From 9c6357c7d22d6e35605a1a0c8bb9e9a4f2c054ed Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 21:23:24 +0000
Subject: [PATCH 1/7] support saving/loading multiple sub_processor of the same
 kind

---
 src/transformers/processing_utils.py     | 110 ++++++++++++++++++----
 tests/models/auto/test_processor_auto.py | 113 +++++++++++++++++++++++
 2 files changed, 204 insertions(+), 19 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index f54ddeb1b2a6..85eb5cdd3f9d 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -130,6 +130,26 @@ def keys(self):
     "video_processor": "BaseVideoProcessor",
 }
 
+
+def _get_modality_for_attribute(attribute_name: str) -> str:
+    """
+    Get the canonical modality type for a given attribute name.
+
+    For example:
+    - "image_processor" -> "image_processor"
+    - "encoder_image_processor" -> "image_processor"
+    - "text_tokenizer" -> "tokenizer"
+    - "my_feature_extractor" -> "feature_extractor"
+    """
+    for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys():
+        if modality in attribute_name:
+            return modality
+    raise ValueError(
+        f"Cannot determine modality for attribute '{attribute_name}'. "
+        f"Attribute name must contain one of: {list(MODALITY_TO_AUTOPROCESSOR_MAPPING.keys())}"
+    )
+
+
 if sys.version_info >= (3, 11):
     Unpack = typing.Unpack
 else:
@@ -664,8 +684,10 @@ def check_argument_for_proper_class(self, argument_name, argument):
         mismatch between expected and actual class, an error is raise. Otherwise, the proper retrieved class
         is returned.
         """
-        if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING and "tokenizer" in argument_name:
-            argument_name = "tokenizer"
+        # If the exact attribute name is not in the mapping, use its canonical modality
+        # (e.g., "encoder_tokenizer" -> "tokenizer")
+        if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING:
+            argument_name = _get_modality_for_attribute(argument_name)
         class_name = MODALITY_TO_BASE_CLASS_MAPPING.get(argument_name)
         if isinstance(class_name, tuple):
             proper_class = tuple(self.get_possibly_dynamic_module(n) for n in class_name if n is not None)
@@ -696,9 +718,13 @@ def to_dict(self) -> dict[str, Any]:
         # extra attributes to be kept
         attrs_to_save += ["auto_map"]
 
+        # Remove tokenizers from output - they have their own vocab files and are saved separately.
+        # All other sub-processors (image_processor, feature_extractor, etc.) are kept in processor_config.json.
         for attribute in self.__class__.get_attributes():
-            if "tokenizer" in attribute and attribute in output:
-                del output[attribute]
+            if attribute in output:
+                modality = _get_modality_for_attribute(attribute)
+                if modality == "tokenizer":
+                    del output[attribute]
 
         if "chat_template" in output:
             del output["chat_template"]
@@ -820,13 +846,15 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
 
-            # Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
-            if attribute_name == "tokenizer":
-                attribute.save_pretrained(save_directory)
-            # if a model has multiple tokenizers, save the additional tokenizers in their own folders.
-            # Note that the additional tokenizers must have "tokenizer" in their attribute name.
-            elif "tokenizer" in attribute_name:
-                attribute.save_pretrained(os.path.join(save_directory, attribute_name))
+            modality = _get_modality_for_attribute(attribute_name)
+            is_primary = attribute_name == modality
+            if modality == "tokenizer":
+                # Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
+                if is_primary:
+                    attribute.save_pretrained(save_directory)
+                else:
+                    # if a model has multiple tokenizers, save the additional tokenizers in their own folders.
+                    attribute.save_pretrained(os.path.join(save_directory, attribute_name))
             elif attribute._auto_class is not None:
                 custom_object_save(attribute, save_directory, config=attribute)
 
@@ -1394,8 +1422,9 @@ def from_pretrained(
         if token is not None:
             kwargs["token"] = token
 
-        args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
+        # Get processor_dict first so we can use it to instantiate non-tokenizer sub-processors
         processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
+        args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
         return cls.from_args_and_dict(args, processor_dict, **kwargs)
 
     @classmethod
@@ -1406,7 +1435,7 @@ def get_attributes(cls):
             # don't treat audio_tokenizer as an attribute
             if sub_processor_type == "audio_tokenizer":
                 continue
-            if sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING or "tokenizer" in sub_processor_type:
+            if any(modality in sub_processor_type for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
                 attributes.append(sub_processor_type)
 
         # Legacy processors may not override `__init__` and instead expose modality
@@ -1420,7 +1449,7 @@ def get_attributes(cls):
                 inferred_attribute = attribute_name[: -len("_class")]
                 if inferred_attribute == "audio_tokenizer":
                     continue
-                if inferred_attribute in MODALITY_TO_AUTOPROCESSOR_MAPPING or "tokenizer" in inferred_attribute:
+                if any(modality in inferred_attribute for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
                     attributes.append(inferred_attribute)
 
         return attributes
@@ -1448,20 +1477,36 @@ def register_for_auto_class(cls, auto_class="AutoProcessor"):
         cls._auto_class = auto_class
 
     @classmethod
-    def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+    def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor_dict=None, **kwargs):
         """
         Identify and instantiate the subcomponents of Processor classes, such as image processors, tokenizers,
         and feature extractors. This method inspects the processor's `__init__` signature to identify parameters
         that correspond to known modality types (image_processor, tokenizer, feature_extractor, etc.) or contain
-        "tokenizer" in their name. It then uses the appropriate Auto class (AutoImageProcessor, AutoTokenizer, etc.)
-        from `MODALITY_TO_AUTOPROCESSOR_MAPPING` to load each subcomponent via `.from_pretrained()`. For tokenizer-like
-        parameters not explicitly in the mapping, the method uses AutoTokenizer with a subfolder argument.
+        modality names in their attribute name.
+
+        For tokenizers: Uses the appropriate Auto class (AutoTokenizer) to load via `.from_pretrained()`.
+        Additional tokenizers (e.g., "decoder_tokenizer") are loaded from subfolders.
+
+        For other sub-processors (image_processor, feature_extractor, etc.): Primary ones are loaded via
+        Auto class. Additional ones are instantiated from the config stored in processor_config.json
+        (passed as processor_dict).
+
+        Args:
+            pretrained_model_name_or_path: Path or model id to load from.
+            processor_dict: Optional dict containing processor config (from processor_config.json).
+                Required when loading additional non-tokenizer sub-processors.
         """
         args = []
+        processor_dict = processor_dict if processor_dict is not None else {}
+
         # get args from processor init signature
         sub_processors = cls.get_attributes()
         for sub_processor_type in sub_processors:
-            if sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING:
+            modality = _get_modality_for_attribute(sub_processor_type)
+            is_primary = sub_processor_type == modality
+
+            if is_primary:
+                # Primary non-tokenizer sub-processor: load via Auto class
                 auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
                 sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
                 args.append(sub_processor)
@@ -1474,6 +1519,33 @@ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
                 )
                 args.append(sub_processor)
 
+            elif sub_processor_type in processor_dict:
+                # Additional non-tokenizer sub-processor: instantiate from config in processor_dict
+                sub_processor_config = processor_dict[sub_processor_type]
+                if isinstance(sub_processor_config, dict):
+                    # Determine the class to instantiate
+                    # Image processors have 'image_processor_type', feature extractors have 'feature_extractor_type'
+                    type_key = f"{modality}_type"
+                    class_name = sub_processor_config.get(type_key)
+                    if class_name is None:
+                        raise ValueError(
+                            f"Cannot instantiate {sub_processor_type}: missing '{type_key}' in config. "
+                            f"Config keys: {list(sub_processor_config.keys())}"
+                        )
+                    processor_class = cls.get_possibly_dynamic_module(class_name)
+                    sub_processor = processor_class(**sub_processor_config)
+                    args.append(sub_processor)
+                else:
+                    raise ValueError(
+                        f"Expected dict for {sub_processor_type} in processor_config.json, "
+                        f"got {type(sub_processor_config)}"
+                    )
+            else:
+                raise ValueError(
+                    f"Cannot find config for {sub_processor_type} in processor_config.json. "
+                    f"Available keys: {list(processor_dict.keys())}"
+                )
+
         return args
 
     @staticmethod
diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py
index 63f28d3dea9d..4e618ea0f9b5 100644
--- a/tests/models/auto/test_processor_auto.py
+++ b/tests/models/auto/test_processor_auto.py
@@ -35,6 +35,7 @@
     AutoTokenizer,
     BaseVideoProcessor,
     BertTokenizer,
+    CLIPImageProcessorFast,
     FeatureExtractionMixin,
     ImageProcessingMixin,
     LlamaTokenizer,
@@ -42,6 +43,7 @@
     LlavaProcessor,
     ProcessorMixin,
     SiglipImageProcessor,
+    SiglipImageProcessorFast,
     Wav2Vec2Config,
     Wav2Vec2FeatureExtractor,
     Wav2Vec2Processor,
@@ -431,6 +433,117 @@ def test_auto_processor_save_load(self):
             second_processor = AutoProcessor.from_pretrained(tmp_dir)
             self.assertEqual(second_processor.__class__.__name__, processor.__class__.__name__)
 
+    def test_processor_with_multiple_tokenizers_save_load(self):
+        """Test that processors with multiple tokenizers save and load correctly."""
+
+        class DualTokenizerProcessor(ProcessorMixin):
+            """A processor with two tokenizers and an image processor."""
+
+            def __init__(self, tokenizer, decoder_tokenizer, image_processor):
+                super().__init__(tokenizer, decoder_tokenizer, image_processor)
+
+        # Create processor with multiple tokenizers
+        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertForMaskedLM")
+        decoder_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
+        image_processor = SiglipImageProcessor()
+
+        processor = DualTokenizerProcessor(
+            tokenizer=tokenizer,
+            decoder_tokenizer=decoder_tokenizer,
+            image_processor=image_processor,
+        )
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            processor.save_pretrained(tmp_dir)
+
+            # Verify directory structure: primary tokenizer in root, additional in subfolder
+            self.assertTrue(os.path.exists(os.path.join(tmp_dir, "tokenizer_config.json")))
+            self.assertTrue(os.path.isdir(os.path.join(tmp_dir, "decoder_tokenizer")))
+            self.assertTrue(os.path.exists(os.path.join(tmp_dir, "decoder_tokenizer", "tokenizer_config.json")))
+
+            # Verify processor_config.json contains image_processor but not tokenizers
+            with open(os.path.join(tmp_dir, "processor_config.json")) as f:
+                processor_config = json.load(f)
+            self.assertIn("image_processor", processor_config)
+            self.assertNotIn("tokenizer", processor_config)
+            self.assertNotIn("decoder_tokenizer", processor_config)
+
+            # Reload the full processor and verify all attributes
+            loaded_processor = DualTokenizerProcessor.from_pretrained(tmp_dir)
+
+            # Verify the processor has all expected attributes
+            self.assertTrue(hasattr(loaded_processor, "tokenizer"))
+            self.assertTrue(hasattr(loaded_processor, "decoder_tokenizer"))
+            self.assertTrue(hasattr(loaded_processor, "image_processor"))
+
+            # Verify tokenizers loaded correctly
+            self.assertEqual(loaded_processor.tokenizer.vocab_size, tokenizer.vocab_size)
+            self.assertEqual(loaded_processor.decoder_tokenizer.vocab_size, decoder_tokenizer.vocab_size)
+
+            # Verify image processor loaded correctly
+            self.assertEqual(loaded_processor.image_processor.size, image_processor.size)
+
+    def test_processor_with_multiple_image_processors_save_load(self):
+        """Test that processors with multiple image processors save and load correctly."""
+
+        class DualImageProcessorProcessor(ProcessorMixin):
+            """A processor with two image processors and a tokenizer."""
+
+            def __init__(self, tokenizer, image_processor, encoder_image_processor):
+                super().__init__(tokenizer, image_processor, encoder_image_processor)
+
+        # Create processor with multiple image processors
+        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertForMaskedLM")
+        image_processor = SiglipImageProcessorFast(size={"height": 224, "width": 224})
+        encoder_image_processor = CLIPImageProcessorFast(size={"height": 384, "width": 384})
+
+        processor = DualImageProcessorProcessor(
+            tokenizer=tokenizer,
+            image_processor=image_processor,
+            encoder_image_processor=encoder_image_processor,
+        )
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            processor.save_pretrained(tmp_dir)
+
+            # Verify processor_config.json contains both image processors
+            with open(os.path.join(tmp_dir, "processor_config.json")) as f:
+                processor_config = json.load(f)
+            self.assertIn("image_processor", processor_config)
+            self.assertIn("encoder_image_processor", processor_config)
+            self.assertNotIn("tokenizer", processor_config)
+
+            # Verify both image processors have the correct type key for instantiation
+            self.assertIn("image_processor_type", processor_config["image_processor"])
+            self.assertIn("image_processor_type", processor_config["encoder_image_processor"])
+            self.assertEqual(processor_config["image_processor"]["image_processor_type"], "SiglipImageProcessorFast")
+            self.assertEqual(
+                processor_config["encoder_image_processor"]["image_processor_type"], "CLIPImageProcessorFast"
+            )
+
+            # Verify the sizes are different (to ensure they're separate configs)
+            self.assertEqual(processor_config["image_processor"]["size"], {"height": 224, "width": 224})
+            self.assertEqual(processor_config["encoder_image_processor"]["size"], {"height": 384, "width": 384})
+
+            # Reload the full processor and verify all attributes
+            loaded_processor = DualImageProcessorProcessor.from_pretrained(tmp_dir)
+
+            # Verify the processor has all expected attributes
+            self.assertTrue(hasattr(loaded_processor, "tokenizer"))
+            self.assertTrue(hasattr(loaded_processor, "image_processor"))
+            self.assertTrue(hasattr(loaded_processor, "encoder_image_processor"))
+
+            # Verify tokenizer loaded correctly
+            self.assertEqual(loaded_processor.tokenizer.vocab_size, tokenizer.vocab_size)
+
+            # Verify image processors loaded correctly with their distinct sizes
+            self.assertEqual(loaded_processor.image_processor.size, {"height": 224, "width": 224})
+            self.assertEqual(loaded_processor.encoder_image_processor.size, {"height": 384, "width": 384})
+
+            # Verify they are different types
+            self.assertIsInstance(loaded_processor.image_processor, SiglipImageProcessorFast)
+            self.assertIsInstance(loaded_processor.encoder_image_processor, CLIPImageProcessorFast)
+
 
 @is_staging_test
 class ProcessorPushToHubTester(unittest.TestCase):

From f3bd01c9156559ce829a7ba3a7e14c82cecaa985 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 21:41:06 +0000
Subject: [PATCH 2/7] standardize all processors

---
 .../models/audioflamingo3/processing_audioflamingo3.py        | 4 ----
 src/transformers/models/auto/feature_extraction_auto.py       | 1 +
 src/transformers/models/auto/processing_auto.py               | 2 ++
 src/transformers/models/auto/tokenization_auto.py             | 3 +++
 .../models/phi4_multimodal/processing_phi4_multimodal.py      | 2 --
 src/transformers/models/pix2struct/processing_pix2struct.py   | 4 ----
 6 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/transformers/models/audioflamingo3/processing_audioflamingo3.py b/src/transformers/models/audioflamingo3/processing_audioflamingo3.py
index bc14f0d6cde4..b53dcd165464 100644
--- a/src/transformers/models/audioflamingo3/processing_audioflamingo3.py
+++ b/src/transformers/models/audioflamingo3/processing_audioflamingo3.py
@@ -74,10 +74,6 @@ class AudioFlamingo3Processor(ProcessorMixin):
             Special token used to represent audio inputs in the chat template.
     """
 
-    attributes = ["feature_extractor", "tokenizer"]
-    feature_extractor_class = "WhisperFeatureExtractor"
-    tokenizer_class = "Qwen2TokenizerFast"
-
     def __init__(
         self,
         feature_extractor,
diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py
index a9008af06ab6..6963447b5b6f 100644
--- a/src/transformers/models/auto/feature_extraction_auto.py
+++ b/src/transformers/models/auto/feature_extraction_auto.py
@@ -38,6 +38,7 @@
 FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict(
     [
         ("audio-spectrogram-transformer", "ASTFeatureExtractor"),
+        ("audioflamingo3", "WhisperFeatureExtractor"),
         ("clap", "ClapFeatureExtractor"),
         ("clvp", "ClvpFeatureExtractor"),
         ("csm", "EncodecFeatureExtractor"),
diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py
index 6d08bf37ebab..88dde801bba3 100644
--- a/src/transformers/models/auto/processing_auto.py
+++ b/src/transformers/models/auto/processing_auto.py
@@ -93,6 +93,8 @@
         ("kosmos-2", "Kosmos2Processor"),
         ("kosmos-2.5", "Kosmos2_5Processor"),
         ("kyutai_speech_to_text", "KyutaiSpeechToTextProcessor"),
+        ("lasr_ctc", "LasrProcessor"),
+        ("lasr_encoder", "LasrProcessor"),
         ("layoutlmv2", "LayoutLMv2Processor"),
         ("layoutlmv3", "LayoutLMv3Processor"),
         ("layoutxlm", "LayoutXLMProcessor"),
diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
index 31c6a783726b..bf4de43e30df 100644
--- a/src/transformers/models/auto/tokenization_auto.py
+++ b/src/transformers/models/auto/tokenization_auto.py
@@ -70,6 +70,7 @@
         ("align", "BertTokenizer" if is_tokenizers_available() else None),
         ("arcee", "LlamaTokenizerFast" if is_tokenizers_available() else None),
         ("aria", "LlamaTokenizerFast" if is_tokenizers_available() else None),
+        ("audioflamingo3", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
         ("aya_vision", "CohereTokenizer" if is_tokenizers_available() else None),
         ("bark", "BertTokenizer" if is_tokenizers_available() else None),
         ("bart", "RobertaTokenizer" if is_tokenizers_available() else None),
@@ -183,6 +184,8 @@
         ("jetmoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
         ("kosmos-2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
         ("kosmos-2.5", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
+        ("lasr_ctc", "ParakeetTokenizerFast" if is_tokenizers_available() else None),
+        ("lasr_encoder", "ParakeetTokenizerFast" if is_tokenizers_available() else None),
         ("layoutlm", "BertTokenizer" if is_tokenizers_available() else None),
         ("layoutlmv2", "LayoutLMv2Tokenizer" if is_tokenizers_available() else None),
         ("layoutlmv3", "LayoutLMv3Tokenizer" if is_tokenizers_available() else None),
diff --git a/src/transformers/models/phi4_multimodal/processing_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/processing_phi4_multimodal.py
index 8eec69b0448e..cde089821878 100644
--- a/src/transformers/models/phi4_multimodal/processing_phi4_multimodal.py
+++ b/src/transformers/models/phi4_multimodal/processing_phi4_multimodal.py
@@ -58,8 +58,6 @@ class Phi4MultimodalProcessor(ProcessorMixin):
             The fake audio token pattern.
     """
 
-    audio_processor_class = "Phi4MultimodalFeatureExtractor"
-
     def __init__(
         self,
         image_processor,
diff --git a/src/transformers/models/pix2struct/processing_pix2struct.py b/src/transformers/models/pix2struct/processing_pix2struct.py
index 1fe236339a7c..3ce09bf9d7fc 100644
--- a/src/transformers/models/pix2struct/processing_pix2struct.py
+++ b/src/transformers/models/pix2struct/processing_pix2struct.py
@@ -61,10 +61,6 @@ class Pix2StructProcessor(ProcessorMixin):
             An instance of ['T5Tokenizer`]. The tokenizer is a required input.
     """
 
-    attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "Pix2StructImageProcessor"
-    tokenizer_class = ("T5Tokenizer",)
-
     def __init__(self, image_processor, tokenizer):
         tokenizer.return_token_type_ids = False
         super().__init__(image_processor, tokenizer)

From c84b5642dace2eeb70e08c15e6eaa74dda492154 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 21:46:36 +0000
Subject: [PATCH 3/7] remove tokenizer_class from lasr

---
 src/transformers/models/lasr/processing_lasr.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/transformers/models/lasr/processing_lasr.py b/src/transformers/models/lasr/processing_lasr.py
index 3396986866e2..7a4661c6a6ce 100644
--- a/src/transformers/models/lasr/processing_lasr.py
+++ b/src/transformers/models/lasr/processing_lasr.py
@@ -47,8 +47,6 @@ class LasrProcessorKwargs(ProcessingKwargs, total=False):
 
 
 class LasrProcessor(ProcessorMixin):
-    tokenizer_class = "ParakeetTokenizerFast"
-
     def __init__(self, feature_extractor, tokenizer):
         super().__init__(feature_extractor, tokenizer)
 

From abd038d1886b7f759f593ece420a0df630bbde9c Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 21:51:41 +0000
Subject: [PATCH 4/7] fix modular

---
 src/transformers/models/lasr/modular_lasr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/models/lasr/modular_lasr.py b/src/transformers/models/lasr/modular_lasr.py
index c02b2ae0f1c3..75170f0009a5 100644
--- a/src/transformers/models/lasr/modular_lasr.py
+++ b/src/transformers/models/lasr/modular_lasr.py
@@ -97,7 +97,7 @@ def _decode(
 
 
 class LasrProcessor(ParakeetProcessor):
-    tokenizer_class = "ParakeetTokenizerFast"
+    pass
 
 
 class LasrEncoderConfig(ParakeetEncoderConfig):

From 855627931f289da1a4b9d67dbc580fe28ebc0c42 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 22:20:08 +0000
Subject: [PATCH 5/7] refactor + check init of parent classes

---
 src/transformers/processing_utils.py | 124 +++++++++++++++++----------
 tests/test_processing_common.py      |  16 ++--
 2 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 85eb5cdd3f9d..097d019051a6 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -119,9 +119,9 @@ def keys(self):
         return self._MAPPING_NAMES.keys()
 
 
-MODALITY_TO_AUTOPROCESSOR_MAPPING = _LazyAutoProcessorMapping()
+SUBPROCESSOR_TO_AUTO_CLASS_MAPPING = _LazyAutoProcessorMapping()
 
-MODALITY_TO_BASE_CLASS_MAPPING = {
+SUBPROCESSOR_TO_BASE_CLASS_MAPPING = {
     "audio_tokenizer": "DacModel",
     "audio_processor": "FeatureExtractionMixin",
     "tokenizer": ("PreTrainedTokenizerBase", "MistralCommonBackend"),
@@ -131,9 +131,9 @@ def keys(self):
 }
 
 
-def _get_modality_for_attribute(attribute_name: str) -> str:
+def _get_subprocessor_type(attribute_name: str) -> str:
     """
-    Get the canonical modality type for a given attribute name.
+    Get the canonical sub-processor type for a given attribute name.
 
     For example:
     - "image_processor" -> "image_processor"
@@ -141,12 +141,13 @@ def _get_modality_for_attribute(attribute_name: str) -> str:
     - "text_tokenizer" -> "tokenizer"
     - "my_feature_extractor" -> "feature_extractor"
     """
-    for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys():
-        if modality in attribute_name:
-            return modality
+    subprocessor_types = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING.keys()
+    for subprocessor_type in subprocessor_types:
+        if subprocessor_type in attribute_name:
+            return subprocessor_type
     raise ValueError(
-        f"Cannot determine modality for attribute '{attribute_name}'. "
-        f"Attribute name must contain one of: {list(MODALITY_TO_AUTOPROCESSOR_MAPPING.keys())}"
+        f"Cannot determine sub-processor type for attribute '{attribute_name}'. "
+        f"Attribute name must contain one of: {list(subprocessor_types)}"
     )
 
 
@@ -684,11 +685,11 @@ def check_argument_for_proper_class(self, argument_name, argument):
         mismatch between expected and actual class, an error is raise. Otherwise, the proper retrieved class
         is returned.
         """
-        # If the exact attribute name is not in the mapping, use its canonical modality
+        # If the exact attribute name is not in the mapping, use its canonical sub-processor type
         # (e.g., "encoder_tokenizer" -> "tokenizer")
-        if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING:
-            argument_name = _get_modality_for_attribute(argument_name)
-        class_name = MODALITY_TO_BASE_CLASS_MAPPING.get(argument_name)
+        if argument_name not in SUBPROCESSOR_TO_BASE_CLASS_MAPPING:
+            argument_name = _get_subprocessor_type(argument_name)
+        class_name = SUBPROCESSOR_TO_BASE_CLASS_MAPPING.get(argument_name)
         if isinstance(class_name, tuple):
             proper_class = tuple(self.get_possibly_dynamic_module(n) for n in class_name if n is not None)
         else:
@@ -722,8 +723,8 @@ def to_dict(self) -> dict[str, Any]:
         # All other sub-processors (image_processor, feature_extractor, etc.) are kept in processor_config.json.
         for attribute in self.__class__.get_attributes():
             if attribute in output:
-                modality = _get_modality_for_attribute(attribute)
-                if modality == "tokenizer":
+                subprocessor_type = _get_subprocessor_type(attribute)
+                if subprocessor_type == "tokenizer":
                     del output[attribute]
 
         if "chat_template" in output:
@@ -846,9 +847,9 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
 
-            modality = _get_modality_for_attribute(attribute_name)
-            is_primary = attribute_name == modality
-            if modality == "tokenizer":
+            subprocessor_type = _get_subprocessor_type(attribute_name)
+            is_primary = attribute_name == subprocessor_type
+            if subprocessor_type == "tokenizer":
                 # Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
                 if is_primary:
                     attribute.save_pretrained(save_directory)
@@ -1429,30 +1430,63 @@ def from_pretrained(
 
     @classmethod
     def get_attributes(cls):
-        args_in_init = inspect.signature(cls.__init__).parameters.keys()
+        """
+        Detect the sub-processor attributes for this processor class.
+
+        Detection priority:
+        1. Auto-detection from `__init__` signature parameters across the full class hierarchy (MRO)
+        2. `<subprocessor_type>_class` class attributes (legacy pattern, checks full class hierarchy)
+
+        Returns:
+            List of attribute names corresponding to sub-processors.
+        """
+        subprocessor_types = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING.keys()
+
+        # Priority 1: Auto-detect from __init__ signatures across the full MRO
+        # This handles inheritance where child classes use *args/**kwargs
         attributes = []
-        for sub_processor_type in args_in_init:
-            # don't treat audio_tokenizer as an attribute
-            if sub_processor_type == "audio_tokenizer":
+        seen_params = set()
+        for base_class in cls.__mro__:
+            if not hasattr(base_class, "__init__"):
                 continue
-            if any(modality in sub_processor_type for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
-                attributes.append(sub_processor_type)
+            sig = inspect.signature(base_class.__init__)
+            for param_name, param in sig.parameters.items():
+                # Skip self, *args, **kwargs
+                if param_name in ("self",) or param.kind in (
+                    inspect.Parameter.VAR_POSITIONAL,
+                    inspect.Parameter.VAR_KEYWORD,
+                ):
+                    continue
+                if param_name in seen_params or param_name == "audio_tokenizer":
+                    continue
+                seen_params.add(param_name)
+                if any(sp_type in param_name for sp_type in subprocessor_types):
+                    attributes.append(param_name)
 
+        if attributes:
+            return attributes
+
+        # Priority 2: Check for <subprocessor_type>_class attributes in the full class hierarchy
         # Legacy processors may not override `__init__` and instead expose modality
         # attributes via `<attribute>_class`. In that case, `args_in_init` only exposes
         # `*args`/`**kwargs`, so we need to infer the attributes from those class-level
         # hints to keep backward compatibility (e.g. dynamic processors stored on the Hub).
-        if not attributes:
-            for attribute_name, value in cls.__dict__.items():
-                if value is None or attribute_name == "audio_tokenizer_class" or not attribute_name.endswith("_class"):
+        attributes_from_class_hints = []
+        for base_class in cls.__mro__:
+            for attribute_name in base_class.__dict__:
+                if not attribute_name.endswith("_class") or attribute_name == "audio_tokenizer_class":
+                    continue
+                value = getattr(base_class, attribute_name, None)
+                if value is None:
                     continue
                 inferred_attribute = attribute_name[: -len("_class")]
                 if inferred_attribute == "audio_tokenizer":
                     continue
-                if any(modality in inferred_attribute for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
-                    attributes.append(inferred_attribute)
+                if any(sp_type in inferred_attribute for sp_type in subprocessor_types):
+                    if inferred_attribute not in attributes_from_class_hints:
+                        attributes_from_class_hints.append(inferred_attribute)
 
-        return attributes
+        return attributes_from_class_hints
 
     @classmethod
     def register_for_auto_class(cls, auto_class="AutoProcessor"):
@@ -1481,8 +1515,8 @@ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor
         """
         Identify and instantiate the subcomponents of Processor classes, such as image processors, tokenizers,
         and feature extractors. This method inspects the processor's `__init__` signature to identify parameters
-        that correspond to known modality types (image_processor, tokenizer, feature_extractor, etc.) or contain
-        modality names in their attribute name.
+        that correspond to known sub-processor types (image_processor, tokenizer, feature_extractor, etc.) or
+        contain sub-processor type names in their attribute name.
 
         For tokenizers: Uses the appropriate Auto class (AutoTokenizer) to load via `.from_pretrained()`.
         Additional tokenizers (e.g., "decoder_tokenizer") are loaded from subfolders.
@@ -1501,35 +1535,35 @@ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor
 
         # get args from processor init signature
         sub_processors = cls.get_attributes()
-        for sub_processor_type in sub_processors:
-            modality = _get_modality_for_attribute(sub_processor_type)
-            is_primary = sub_processor_type == modality
+        for sub_processor_name in sub_processors:
+            subprocessor_type = _get_subprocessor_type(sub_processor_name)
+            is_primary = sub_processor_name == subprocessor_type
 
             if is_primary:
                 # Primary non-tokenizer sub-processor: load via Auto class
-                auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
+                auto_processor_class = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING[sub_processor_name]
                 sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
                 args.append(sub_processor)
-            elif "tokenizer" in sub_processor_type:
+            elif "tokenizer" in sub_processor_name:
                 # Special case: tokenizer-like parameters not in the mapping (e.g., "protein_tokenizer")
                 # Load using AutoTokenizer with subfolder
-                auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
+                auto_processor_class = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING["tokenizer"]
                 sub_processor = auto_processor_class.from_pretrained(
-                    pretrained_model_name_or_path, subfolder=sub_processor_type, **kwargs
+                    pretrained_model_name_or_path, subfolder=sub_processor_name, **kwargs
                 )
                 args.append(sub_processor)
 
-            elif sub_processor_type in processor_dict:
+            elif sub_processor_name in processor_dict:
                 # Additional non-tokenizer sub-processor: instantiate from config in processor_dict
-                sub_processor_config = processor_dict[sub_processor_type]
+                sub_processor_config = processor_dict[sub_processor_name]
                 if isinstance(sub_processor_config, dict):
                     # Determine the class to instantiate
                     # Image processors have 'image_processor_type', feature extractors have 'feature_extractor_type'
-                    type_key = f"{modality}_type"
+                    type_key = f"{subprocessor_type}_type"
                     class_name = sub_processor_config.get(type_key)
                     if class_name is None:
                         raise ValueError(
-                            f"Cannot instantiate {sub_processor_type}: missing '{type_key}' in config. "
+                            f"Cannot instantiate {sub_processor_name}: missing '{type_key}' in config. "
                             f"Config keys: {list(sub_processor_config.keys())}"
                         )
                     processor_class = cls.get_possibly_dynamic_module(class_name)
@@ -1537,12 +1571,12 @@ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor
                     args.append(sub_processor)
                 else:
                     raise ValueError(
-                        f"Expected dict for {sub_processor_type} in processor_config.json, "
+                        f"Expected dict for {sub_processor_name} in processor_config.json, "
                         f"got {type(sub_processor_config)}"
                     )
             else:
                 raise ValueError(
-                    f"Cannot find config for {sub_processor_type} in processor_config.json. "
+                    f"Cannot find config for {sub_processor_name} in processor_config.json. "
                     f"Available keys: {list(processor_dict.keys())}"
                 )
 
diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 9e512f982049..d50c8e21c0c3 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -27,7 +27,7 @@
 from parameterized import parameterized
 
 from transformers.processing_utils import (
-    MODALITY_TO_AUTOPROCESSOR_MAPPING,
+    SUBPROCESSOR_TO_AUTO_CLASS_MAPPING,
     Unpack,
 )
 from transformers.testing_utils import (
@@ -264,7 +264,7 @@ def _get_component_class_from_processor(cls, attribute, use_fast: bool = True):
         config_class = CONFIG_MAPPING[model_type]
 
         # Now get the component class from the appropriate Auto mapping
-        if attribute in MODALITY_TO_AUTOPROCESSOR_MAPPING:
+        if attribute in SUBPROCESSOR_TO_AUTO_CLASS_MAPPING:
             mapping_name = attribute
         elif "tokenizer" in attribute:
             mapping_name = "tokenizer"
@@ -321,11 +321,11 @@ def prepare_processor_dict():
         return {}
 
     def get_component(self, attribute, **kwargs):
-        if attribute not in MODALITY_TO_AUTOPROCESSOR_MAPPING and "tokenizer" in attribute:
-            auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
+        if attribute not in SUBPROCESSOR_TO_AUTO_CLASS_MAPPING and "tokenizer" in attribute:
+            auto_processor_class = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING["tokenizer"]
             component = auto_processor_class.from_pretrained(self.tmpdirname, subfolder=attribute, **kwargs)  # noqa
         else:
-            auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[attribute]
+            auto_processor_class = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING[attribute]
             component = auto_processor_class.from_pretrained(self.tmpdirname, **kwargs)  # noqa
         if "tokenizer" in attribute and not component.pad_token:
             component.pad_token = "[TEST_PAD]"
@@ -443,11 +443,11 @@ def test_processor_from_and_save_pretrained_as_nested_dict(self):
 
             # Try to load each attribute separately from saved directory
             for attribute in processor_first.get_attributes():
-                if attribute not in MODALITY_TO_AUTOPROCESSOR_MAPPING and "tokenizer" in attribute:
-                    auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
+                if attribute not in SUBPROCESSOR_TO_AUTO_CLASS_MAPPING and "tokenizer" in attribute:
+                    auto_processor_class = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING["tokenizer"]
                     attribute_reloaded = auto_processor_class.from_pretrained(tmpdirname, subfolder=attribute)
                 else:
-                    auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[attribute]
+                    auto_processor_class = SUBPROCESSOR_TO_AUTO_CLASS_MAPPING[attribute]
                     attribute_reloaded = auto_processor_class.from_pretrained(tmpdirname)
                 attribute_first = getattr(processor_first, attribute)
 

From 114a48bf658262b37aa2911cfacf0d71107547ac Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 23:01:02 +0000
Subject: [PATCH 6/7] fix kwargs logic

---
 src/transformers/processing_utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 85eb5cdd3f9d..d42aa05bd4c9 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -1423,9 +1423,9 @@ def from_pretrained(
             kwargs["token"] = token
 
         # Get processor_dict first so we can use it to instantiate non-tokenizer sub-processors
-        processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
+        processor_dict, instantiation_kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
         args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
-        return cls.from_args_and_dict(args, processor_dict, **kwargs)
+        return cls.from_args_and_dict(args, processor_dict, **instantiation_kwargs)
 
     @classmethod
     def get_attributes(cls):
@@ -1498,6 +1498,8 @@ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor
         """
         args = []
         processor_dict = processor_dict if processor_dict is not None else {}
+        # Remove subfolder from kwargs to avoid duplicate keyword arguments
+        subfolder = kwargs.pop("subfolder", "")
 
         # get args from processor init signature
         sub_processors = cls.get_attributes()
@@ -1508,14 +1510,17 @@ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor
             if is_primary:
                 # Primary non-tokenizer sub-processor: load via Auto class
                 auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
-                sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
+                sub_processor = auto_processor_class.from_pretrained(
+                    pretrained_model_name_or_path, subfolder=subfolder, **kwargs
+                )
                 args.append(sub_processor)
             elif "tokenizer" in sub_processor_type:
                 # Special case: tokenizer-like parameters not in the mapping (e.g., "protein_tokenizer")
                 # Load using AutoTokenizer with subfolder
                 auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
+                tokenizer_subfolder = os.path.join(subfolder, sub_processor_type) if subfolder else sub_processor_type
                 sub_processor = auto_processor_class.from_pretrained(
-                    pretrained_model_name_or_path, subfolder=sub_processor_type, **kwargs
+                    pretrained_model_name_or_path, subfolder=tokenizer_subfolder, **kwargs
                 )
                 args.append(sub_processor)
 

From 69041020cc72b807b4a3794ff60f455d66f9c0f9 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Fri, 5 Dec 2025 23:31:14 +0000
Subject: [PATCH 7/7] add test

---
 tests/models/auto/test_processor_auto.py | 50 ++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py
index 4e618ea0f9b5..ae197961b199 100644
--- a/tests/models/auto/test_processor_auto.py
+++ b/tests/models/auto/test_processor_auto.py
@@ -544,6 +544,56 @@ def __init__(self, tokenizer, image_processor, encoder_image_processor):
             self.assertIsInstance(loaded_processor.image_processor, SiglipImageProcessorFast)
             self.assertIsInstance(loaded_processor.encoder_image_processor, CLIPImageProcessorFast)
 
+    def test_processor_inheritance_correctly_detects_subprocessors(self):
+        """Test that sub-processor detection works correctly with inheritance.
+
+        Verifies that get_attributes() detects sub-processors from both parent and child classes
+        when the child class uses *args/**kwargs.
+        """
+
+        class BaseMultimodalProcessor(ProcessorMixin):
+            def __init__(self, tokenizer, image_processor):
+                super().__init__(tokenizer, image_processor)
+
+        class ExtendedMultimodalProcessor(BaseMultimodalProcessor):
+            def __init__(self, feature_extractor, *args, **kwargs):
+                ProcessorMixin.__init__(self, feature_extractor, *args, **kwargs)
+
+        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertForMaskedLM")
+        image_processor = SiglipImageProcessor()
+        feature_extractor = Wav2Vec2FeatureExtractor()
+
+        attributes = ExtendedMultimodalProcessor.get_attributes()
+        self.assertIn("tokenizer", attributes)
+        self.assertIn("image_processor", attributes)
+        self.assertIn("feature_extractor", attributes)
+        self.assertEqual(len(attributes), 3)
+
+        processor = ExtendedMultimodalProcessor(
+            feature_extractor=feature_extractor,
+            tokenizer=tokenizer,
+            image_processor=image_processor,
+        )
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            processor.save_pretrained(tmp_dir)
+
+            with open(os.path.join(tmp_dir, "processor_config.json")) as f:
+                processor_config = json.load(f)
+            self.assertIn("image_processor", processor_config)
+            self.assertIn("feature_extractor", processor_config)
+            self.assertNotIn("tokenizer", processor_config)
+
+            loaded_processor = ExtendedMultimodalProcessor.from_pretrained(tmp_dir)
+
+            self.assertTrue(hasattr(loaded_processor, "tokenizer"))
+            self.assertTrue(hasattr(loaded_processor, "image_processor"))
+            self.assertTrue(hasattr(loaded_processor, "feature_extractor"))
+
+            self.assertIsInstance(loaded_processor.tokenizer, type(tokenizer))
+            self.assertIsInstance(loaded_processor.image_processor, SiglipImageProcessor)
+            self.assertIsInstance(loaded_processor.feature_extractor, Wav2Vec2FeatureExtractor)
+
 
 @is_staging_test
 class ProcessorPushToHubTester(unittest.TestCase):