[V5] Return a BatchEncoding dict from apply_chat_template by default again (#42567)

Rocketknight1 · web-flow · commit ce53cc00aac9 · 2025-12-04T14:44:19.000Z
* Flip the default return type for `apply_chat_template` to match the underlying tokenizer

* Remove test_tokenization_for_chat tests, which no longer do anything useful

* Remove test_tokenization_for_chat tests, which no longer do anything useful

* Fix test_encode_message tests

* Fix test_encode_message tests

* nit fix

* Trigger tests

* Remove test_tokenization_for_chat

* make fixup

* Add a little test to make sure that doesn't happen again

* make fixup
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
@@ -3195,7 +3195,7 @@ def apply_chat_template(
         truncation: bool = False,
         max_length: Optional[int] = None,
         return_tensors: Optional[Union[str, TensorType]] = None,
-        return_dict: bool = False,
+        return_dict: bool = True,
         return_assistant_tokens_mask: bool = False,
         tokenizer_kwargs: Optional[dict[str, Any]] = None,
         **kwargs,
@@ -3268,14 +3268,11 @@ def apply_chat_template(
             set, will return a dict of tokenizer outputs instead.
         """
 
-        if return_dict and not tokenize:
-            raise ValueError(
-                "`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
-                "of tokenizer outputs to return."
-            )
+        if not tokenize:
+            return_dict = False  # dicts are only returned by the tokenizer anyway
 
-        if return_assistant_tokens_mask and not return_dict:
-            raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`")
+        if return_assistant_tokens_mask and not (return_dict and tokenize):
+            raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`")
 
         if tokenizer_kwargs is None:
             tokenizer_kwargs = {}
@@ -3390,13 +3387,17 @@ def encode_message_with_chat_template(
             )
 
         if conversation_history is None or len(conversation_history) == 0:
-            return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs)
+            return self.apply_chat_template(
+                [message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
+            )
 
         conversation = conversation_history + [message]
-        tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs)
+        tokens = self.apply_chat_template(
+            conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
+        )
 
         prefix_tokens = self.apply_chat_template(
-            conversation_history, add_generation_prompt=False, tokenize=True, **kwargs
+            conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
         )
         # It's possible that the prefix tokens are not a prefix of the full list of tokens.
         # For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`.
diff --git a/tests/models/blenderbot/test_tokenization_blenderbot.py b/tests/models/blenderbot/test_tokenization_blenderbot.py
@@ -21,23 +21,3 @@ def test_pretokenized_inputs(self, *args, **kwargs):
         # The issue is that when you have a sequence with leading spaces, splitting it
         # with .split() loses the leading spaces, so the tokenization results differ
         pass
-
-    def test_tokenization_for_chat(self):
-        tok = self.get_tokenizer()
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tok.apply_chat_template(test_chat) for test_chat in test_chats]
-        expected_tokens = [
-            [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 2],
-            [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 228, 3490, 287, 2273, 304, 21, 2],
-            [3490, 287, 2273, 304, 21, 228, 228, 6950, 8, 2],
-        ]
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py
@@ -17,7 +17,7 @@
 from datasets import load_dataset
 
 from transformers import TokenizersBackend
-from transformers.testing_utils import require_jinja, require_tokenizers, slow
+from transformers.testing_utils import require_tokenizers, slow
 
 from ...test_tokenization_common import TokenizerTesterMixin
 
@@ -129,28 +129,6 @@ def test_encodings_from_xnli_dataset(self):
         predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens]
         self.assertListEqual(predicted_text, input_text)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = self.get_tokenizer()
-        tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        expected_tokens = [
-            [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2],
-            [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2, 229126, 427, 11890, 1152, 17, 2],
-            [229126, 427, 11890, 1152, 17, 2, 59414, 4, 2],
-        ]
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     def test_add_prefix_space_fast(self):
         tokenizer_w_prefix = self.get_tokenizer(add_prefix_space=True)
         tokenizer_wo_prefix = self.get_tokenizer(add_prefix_space=False)
diff --git a/tests/models/cohere/test_tokenization_cohere.py b/tests/models/cohere/test_tokenization_cohere.py
@@ -73,32 +73,6 @@ def test_pretrained_model_lists(self):
         self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1)
         self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = self.get_tokenizer()
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [
-            [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8],
-            [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65,
-            59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8,
-            36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59,
-            45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61,
-            58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 43, 48, 41, 60, 42, 55, 60, 71, 60, 55, 51, 45, 54, 99, 38,
-            54, 567, 235, 693, 276, 411, 243, 22, 8]
-        ]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     @require_jinja
     def test_tokenization_for_tool_use(self):
         tokenizer = self.get_tokenizer()
diff --git a/tests/models/gpt2/test_tokenization_gpt2.py b/tests/models/gpt2/test_tokenization_gpt2.py
@@ -16,7 +16,7 @@
 import unittest
 
 from transformers import AutoTokenizer, GPT2Tokenizer
-from transformers.testing_utils import require_jinja, require_tiktoken, require_tokenizers
+from transformers.testing_utils import require_tiktoken, require_tokenizers
 
 from ...test_tokenization_common import TokenizerTesterMixin
 
@@ -67,26 +67,6 @@ def test_special_tokens_mask_input_pairs_and_bos_token(self):
                 filtered_sequence = [x for x in filtered_sequence if x is not None]
                 self.assertEqual(encoded_sequence, filtered_sequence)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = GPT2Tokenizer.from_pretrained(self.tmpdirname)
-        tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [[1639, 389, 257, 7613, 8537, 13645, 13, 50256, 15496, 0, 50256], [1639, 389, 257, 7613, 8537, 13645, 13, 50256, 15496, 0, 50256, 35284, 284, 1826, 345, 13, 50256], [35284, 284, 1826, 345, 13, 50256, 15496, 0, 50256]]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     @require_tiktoken
     def test_tokenization_tiktoken(self):
         from tiktoken import encoding_name_for_model
diff --git a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py
@@ -15,7 +15,7 @@
 import unittest
 
 from transformers import GPTSw3Tokenizer
-from transformers.testing_utils import get_tests_dir, require_jinja, require_sentencepiece, require_tokenizers, slow
+from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow
 
 from ...test_tokenization_common import TokenizerTesterMixin
 
@@ -129,36 +129,3 @@ def test_tokenizer_integration(self):
             model_name="AI-Sweden-Models/gpt-sw3-126m",
             sequences=sequences,
         )
-
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = GPTSw3Tokenizer(SAMPLE_VOCAB, name_or_path="test")
-        tokenizer.chat_template = (
-            "{{ eos_token }}{{ bos_token }}"
-            "{% for message in messages %}"
-            "{% if message['role'] == 'user' %}{{ 'User: ' + message['content']}}"
-            "{% else %}{{ 'Bot: ' + message['content']}}{% endif %}"
-            "{{ message['text'] }}{{ bos_token }}"
-            "{% endfor %}"
-            "Bot:"
-        )
-        # This is in English, but it's just here to make sure the chat control tokens are being added properly
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [
-            [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419],
-            [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 575, 541, 419],
-            [2000, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419]
-            ]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
@@ -950,7 +950,9 @@ def test_chat_template(self):
             dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=False
         )
         dict_output = tokenizer.apply_chat_template(
-            dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=True
+            dummy_conversation,
+            chat_template=dummy_template,
+            tokenize=True,  # This also checks return_dict=True is the default
         )
         self.assertEqual(dict_output["input_ids"], output)  # Test return_dict behaviour matches
 

Original file line number	Diff line number	Diff line change
`@@ -950,7 +950,9 @@ def test_chat_template(self):`
`950`	`950`	`dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=False`
`951`	`951`	`)`
`952`	`952`	`dict_output = tokenizer.apply_chat_template(`
`953`		`- dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=True`
	`953`	`+ dummy_conversation,`
	`954`	`+ chat_template=dummy_template,`
	`955`	`+ tokenize=True, # This also checks return_dict=True is the default`
`954`	`956`	`)`
`955`	`957`	`self.assertEqual(dict_output["input_ids"], output) # Test return_dict behaviour matches`
`956`	`958`