diff --git a/src/transformers/modeling_rope_utils.py b/src/transformers/modeling_rope_utils.py index 131b49a4ffa0..b8b3c4873f3d 100644 --- a/src/transformers/modeling_rope_utils.py +++ b/src/transformers/modeling_rope_utils.py @@ -654,13 +654,18 @@ def standardize_rope_params(self): Helper to standardize the config's rope params field by ensuring the params are defined for each later type. For old model the fn will duplicate a single rope param in each layer type (backward compatibility) """ - # Move `rope_theta` and `partial_rotary_factor` to the params dict, if not there yet + # Move `rope_theta` and `partial_rotary_factor` to the `rope_parameters`, if not there yet rope_theta = getattr(self, "rope_theta", None) partial_rotary_factor = getattr(self, "partial_rotary_factor", None) rope_parameters = getattr(self, "rope_parameters", None) or {} + # Case 0: no RoPE params defined + if not (rope_parameters or rope_theta): + # partial_rotary_factor without rope_theta is invalid, so we don't check for it here + logger.warning("`standardize_rope_params` was called but no RoPE parameters were found.") + return # Case 1: RoPE param keys do not intersect with possible `layer_types` -> one global dict - if getattr(self, "layer_types", None) is None or not set(rope_parameters.keys()).issubset(self.layer_types): + elif getattr(self, "layer_types", None) is None or not set(rope_parameters.keys()).issubset(self.layer_types): rope_parameters.setdefault("rope_type", rope_parameters.get("type", "default")) rope_parameters.setdefault("rope_theta", rope_theta) if partial_rotary_factor is not None: