From 14a9768f0774b6abc63dce44daf08dc6364925c5 Mon Sep 17 00:00:00 2001 From: itazap Date: Fri, 5 Dec 2025 10:52:50 +0100 Subject: [PATCH] qwen2 tok regression --- src/transformers/models/qwen2/tokenization_qwen2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/qwen2/tokenization_qwen2.py b/src/transformers/models/qwen2/tokenization_qwen2.py index 48312e3cadf8..86c4c458606c 100644 --- a/src/transformers/models/qwen2/tokenization_qwen2.py +++ b/src/transformers/models/qwen2/tokenization_qwen2.py @@ -63,7 +63,10 @@ def __init__( self._vocab = { "<|endoftext|>": 0, } - self._merges = merges if merges is not None else generate_merges(self._vocab) + if merges is not None: + self._merges = [tuple(merge) if isinstance(merge, list) else merge for merge in merges] + else: + self._merges = generate_merges(self._vocab) self._tokenizer = Tokenizer( BPE(