diff --git a/src/transformers/models/qwen2/tokenization_qwen2.py b/src/transformers/models/qwen2/tokenization_qwen2.py index 48312e3cadf8..86c4c458606c 100644 --- a/src/transformers/models/qwen2/tokenization_qwen2.py +++ b/src/transformers/models/qwen2/tokenization_qwen2.py @@ -63,7 +63,10 @@ def __init__( self._vocab = { "<|endoftext|>": 0, } - self._merges = merges if merges is not None else generate_merges(self._vocab) + if merges is not None: + self._merges = [tuple(merge) if isinstance(merge, list) else merge for merge in merges] + else: + self._merges = generate_merges(self._vocab) self._tokenizer = Tokenizer( BPE(