From fc4db6ed8bd01ebc727728f3430ef15b7d947178 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:09:26 +0100 Subject: [PATCH 1/3] Only default `rope_parameters` to empty `dict` if there is something to put in it Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- src/transformers/modeling_rope_utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/transformers/modeling_rope_utils.py b/src/transformers/modeling_rope_utils.py index 131b49a4ffa0..a0353d4ca60a 100644 --- a/src/transformers/modeling_rope_utils.py +++ b/src/transformers/modeling_rope_utils.py @@ -654,13 +654,19 @@ def standardize_rope_params(self): Helper to standardize the config's rope params field by ensuring the params are defined for each later type. For old model the fn will duplicate a single rope param in each layer type (backward compatibility) """ - # Move `rope_theta` and `partial_rotary_factor` to the params dict, if not there yet + # Move `rope_theta` and `partial_rotary_factor` to the `rope_parameters`, if not there yet rope_theta = getattr(self, "rope_theta", None) partial_rotary_factor = getattr(self, "partial_rotary_factor", None) - rope_parameters = getattr(self, "rope_parameters", None) or {} + rope_parameters = getattr(self, "rope_parameters", None) + # If `rope_parameters` is not defined, but either `rope_theta` or `partial_rotary_factor` are, create it + if rope_parameters is None and (rope_theta is not None or partial_rotary_factor is not None): + rope_parameters = {} + # Case 0: no RoPE params defined + if rope_parameters is None: + return # Case 1: RoPE param keys do not intersect with possible `layer_types` -> one global dict - if getattr(self, "layer_types", None) is None or not set(rope_parameters.keys()).issubset(self.layer_types): + elif getattr(self, "layer_types", None) is None or not set(rope_parameters.keys()).issubset(self.layer_types): rope_parameters.setdefault("rope_type", rope_parameters.get("type", "default")) rope_parameters.setdefault("rope_theta", rope_theta) if partial_rotary_factor is not None: From 8012d222d64f2be913d3a1ebf6c2d4d3018bbeb4 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:57:49 +0100 Subject: [PATCH 2/3] Add warning Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- src/transformers/modeling_rope_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/modeling_rope_utils.py b/src/transformers/modeling_rope_utils.py index a0353d4ca60a..ef21df725568 100644 --- a/src/transformers/modeling_rope_utils.py +++ b/src/transformers/modeling_rope_utils.py @@ -664,6 +664,7 @@ def standardize_rope_params(self): # Case 0: no RoPE params defined if rope_parameters is None: + logger.warning("`standardize_rope_params` was called but no RoPE parameters were found.") return # Case 1: RoPE param keys do not intersect with possible `layer_types` -> one global dict elif getattr(self, "layer_types", None) is None or not set(rope_parameters.keys()).issubset(self.layer_types): From 10bfdf5f7b7a88e87a35de894870068d3217e8a1 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:08:20 +0100 Subject: [PATCH 3/3] Also catch explicit `{}` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- src/transformers/modeling_rope_utils.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/transformers/modeling_rope_utils.py b/src/transformers/modeling_rope_utils.py index ef21df725568..b8b3c4873f3d 100644 --- a/src/transformers/modeling_rope_utils.py +++ b/src/transformers/modeling_rope_utils.py @@ -657,13 +657,11 @@ def standardize_rope_params(self): # Move `rope_theta` and `partial_rotary_factor` to the `rope_parameters`, if not there yet rope_theta = getattr(self, "rope_theta", None) partial_rotary_factor = getattr(self, "partial_rotary_factor", None) - rope_parameters = getattr(self, "rope_parameters", None) - # If `rope_parameters` is not defined, but either `rope_theta` or `partial_rotary_factor` are, create it - if rope_parameters is None and (rope_theta is not None or partial_rotary_factor is not None): - rope_parameters = {} + rope_parameters = getattr(self, "rope_parameters", None) or {} # Case 0: no RoPE params defined - if rope_parameters is None: + if not (rope_parameters or rope_theta): + # partial_rotary_factor without rope_theta is invalid, so we don't check for it here logger.warning("`standardize_rope_params` was called but no RoPE parameters were found.") return # Case 1: RoPE param keys do not intersect with possible `layer_types` -> one global dict