Arm backend: Replace ±inf and FP limit values with ±255.0 (#15976)

YufengShi-dudu · web-flow · commit 4014597d79fe · 2025-12-05T14:21:32.000+01:00
- Rename ReplaceInfValuesPass -&gt; ReplaceInfAndLimitValuesPass
- Extend to rewrite torch.finfo(torch.float32).{min,max} to ±255.0 to
avoid generating TOSA RESCALE shifts &lt; 2 (invalid per spec)

Signed-off-by: Yufeng Shi &lt;yufeng.shi@arm.com&gt;
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -117,5 +117,7 @@
 from .to_tosa_memory_format_pass import ToTosaMemoryFormatPass  # noqa
 from .unsqueeze_before_repeat_pass import UnsqueezeBeforeRepeatPass  # noqa
 from .unsqueeze_scalar_placeholders_pass import UnsqueezeScalarPlaceholdersPass  # noqa
-from .replace_inf_values_pass import ReplaceInfValuesPass  # noqa  # usort: skip
+from .replace_inf_and_limit_values_pass import (  # noqa  # usort: skip
+    ReplaceInfAndLimitValuesPass,
+)
 from .arm_pass_manager import ArmPassManager  # noqa  # usort: skip
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -99,7 +99,7 @@
     RemoveGetItemPass,
     RemoveGraphAssertsPass,
     RemoveNoopPass,
-    ReplaceInfValuesPass,
+    ReplaceInfAndLimitValuesPass,
     ReplaceScalarWithTensorByProfilePass,
     RewriteConv2dPass,
     RewriteMatmulPass,
@@ -385,7 +385,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         # Postprocessing passes
         self.add_passes(
             [
-                ReplaceInfValuesPass(),
+                ReplaceInfAndLimitValuesPass(),
                 DecomposeMaskedFillPass() if not self.tosa_spec.is_U55_subset else None,
             ]
         )
diff --git a/backends/arm/_passes/replace_inf_and_limit_values_pass.py b/backends/arm/_passes/replace_inf_and_limit_values_pass.py
@@ -14,9 +14,11 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 
 
-class ReplaceInfValuesPass(ArmPass):
+class ReplaceInfAndLimitValuesPass(ArmPass):
     """
-    Due to limitation in Quantizer, we need to change inf/-inf to more quantizable values.
+    Rewrites +inf/-inf and floating-point limit values (e.g., torch.finfo(...).min/max)
+    to quantization-friendly values (±255 by default), improving quantizer stability
+    (notably for attention mask paths).
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
@@ -34,12 +36,12 @@ def call(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
             arg_list = list(node.args)
             for index, arg in enumerate(arg_list):
-                if arg == float("-inf"):
+                if arg == float("-inf") or arg == torch.finfo(torch.float32).min:
                     modified = True
-                    arg_list[index] = -255
-                elif arg == float("inf"):
+                    arg_list[index] = -255.0
+                elif arg == float("inf") or arg == torch.finfo(torch.float32).max:
                     modified = True
-                    arg_list[index] = +255
+                    arg_list[index] = +255.0
             node.args = tuple(arg_list)
 
         if modified:
diff --git a/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py b/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py
@@ -42,7 +42,6 @@ class TestCLIPTextModelWithProjection:
 
     ops_after_partitioner_INT = {
         "executorch_exir_dialects_edge__ops_aten_argmax_default": 1,
-        "executorch_exir_dialects_edge__ops_aten_full_default": 1,
         "executorch_exir_dialects_edge__ops_aten_index_select_default": 1,
         "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 1,
         "executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@`
`99`	`99`	`RemoveGetItemPass,`
`100`	`100`	`RemoveGraphAssertsPass,`
`101`	`101`	`RemoveNoopPass,`
`102`		`- ReplaceInfValuesPass,`
	`102`	`+ ReplaceInfAndLimitValuesPass,`
`103`	`103`	`ReplaceScalarWithTensorByProfilePass,`
`104`	`104`	`RewriteConv2dPass,`
`105`	`105`	`RewriteMatmulPass,`
`@@ -385,7 +385,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):`
`385`	`385`	`# Postprocessing passes`
`386`	`386`	`self.add_passes(`
`387`	`387`	`[`
`388`		`- ReplaceInfValuesPass(),`
	`388`	`+ ReplaceInfAndLimitValuesPass(),`
`389`	`389`	`DecomposeMaskedFillPass() if not self.tosa_spec.is_U55_subset else None,`
`390`	`390`	`]`
`391`	`391`	`)`