Add dict comprehension support (#1191)

oulgen · web-flow · commit e75c434f8156 · 2025-12-04T10:19:22.000-08:00
diff --git a/helion/_compiler/device_ir.py b/helion/_compiler/device_ir.py
@@ -54,6 +54,7 @@
 from .roll_reduction import ReductionRoller
 from .source_location import current_location
 from .type_propagation import CallableType
+from .type_propagation import DictType
 from .type_propagation import GridIndexType
 from .type_propagation import IterType
 from .type_propagation import LiteralType
@@ -1087,6 +1088,36 @@ def evaluate_expression() -> object:
         # Return as tuple to match the expected type for tuple unrolling
         return tuple(results)
 
+    def visit_DictComp(self, node: ast.DictComp) -> dict[object, object]:
+        """Handle dict comprehension unrolling."""
+        assert isinstance(node, ExtendedAST)
+
+        if len(node.generators) != 1 or node.generators[0].ifs:
+            raise exc.StatementNotSupported(
+                "Complex dict comprehensions are not supported"
+            )
+
+        generator = node.generators[0]
+        assert isinstance(generator.iter, ExtendedAST)
+        iter_type = generator.iter._type_info
+
+        if not isinstance(iter_type, SequenceType):
+            raise exc.StatementNotSupported(
+                "Dict comprehensions over non-sequence types are not supported"
+            )
+
+        result: dict[object, object] = {}
+
+        def evaluate_key_value() -> None:
+            key = self.visit(node.key)
+            value = self.visit(node.value)
+            result[key] = value
+
+        self._handle_sequence_unrolling(
+            generator.iter, generator.target, evaluate_key_value, preserve_scope=False
+        )
+        return result
+
     def visit_Dict(self, node: ast.Dict) -> dict[object, object]:
         keys = [self.visit(key) if key is not None else None for key in node.keys]
         values = [self.visit(value) for value in node.values]
@@ -1224,9 +1255,18 @@ def visit_Subscript(self, node: ast.Subscript) -> object:
                 # pyrefly: ignore [bad-index]
                 return self.visit(value)[index_value]
             raise exc.InvalidSequenceSubscription(node.slice)
+        # Check StackTensorType before DictType since StackTensorType inherits from DictType
         if isinstance(type_info, StackTensorType):
             # pyrefly: ignore [bad-argument-type]
             return hl.load(self.visit(value), self._subscript_slice_proxy(node.slice))
+        if isinstance(type_info, DictType):
+            key_value = self.visit(node.slice)
+            if isinstance(key_value, (str, int)):
+                # pyrefly: ignore [bad-index]
+                return self.visit(value)[key_value]
+            raise exc.TypeInferenceError(
+                f"Dict subscript must be a literal str or int, got {type(key_value).__name__}"
+            )
         if type_info is not None and type_info.origin.is_host():
             # pyrefly: ignore [bad-argument-type]
             return hl.load(self.visit(value), self._subscript_slice_proxy(node.slice))
diff --git a/helion/_compiler/type_propagation.py b/helion/_compiler/type_propagation.py
@@ -2445,11 +2445,64 @@ def visit_ListComp(self, node: ast.ListComp) -> TypeInfo:
     def visit_GeneratorExp(self, node: ast.GeneratorExp) -> TypeInfo:
         return self._visit_comprehension(node, "generator expression")
 
+    def visit_DictComp(self, node: ast.DictComp) -> TypeInfo:
+        """Type propagation for dict comprehensions."""
+        if len(node.generators) != 1:
+            raise exc.StatementNotSupported(
+                "Dict comprehensions with multiple generators are not supported"
+            )
+
+        generator = node.generators[0]
+        iter_type = self.visit(generator.iter)
+
+        # Try to unpack the iterable
+        try:
+            iterable_elements = iter_type.unpack()
+        except NotImplementedError:
+            raise exc.StatementNotSupported(
+                "Dict comprehensions over non-unpackable iterables are not supported"
+            ) from None
+
+        result_elements: dict[str | int, TypeInfo] = {}
+
+        def clear_type_info(n: ast.AST) -> None:
+            """Clear _type_info on AST nodes to allow re-visiting with different values."""
+            if isinstance(n, ExtendedAST):
+                n._type_info = None
+            for child in ast.iter_child_nodes(n):
+                clear_type_info(child)
+
+        for element_type in iterable_elements:
+            self.push_scope()
+            try:
+                self._assign(generator.target, element_type)
+                for if_clause in generator.ifs:
+                    self.visit(if_clause)
+                # Clear type info before visiting to avoid merging with previous iteration
+                clear_type_info(node.key)
+                clear_type_info(node.value)
+                key_type = self.visit(node.key)
+                value_type = self.visit(node.value)
+                # Get the literal key value by evaluating with proxy
+                try:
+                    key = key_type.proxy()
+                except (NotImplementedError, TypeError):
+                    raise exc.StatementNotSupported(
+                        "Dict comprehension keys must evaluate to literals"
+                    ) from None
+                if not isinstance(key, (str, int)):
+                    raise exc.StatementNotSupported(
+                        f"Dict comprehension keys must be str or int, got {type(key).__name__}"
+                    )
+                result_elements[key] = value_type
+            finally:
+                self.pop_scope()
+
+        return DictType(self.origin(), result_elements)
+
     # TODO(jansel): need to implement these
     # pyrefly: ignore [bad-assignment, bad-param-name-override]
     visit_SetComp: _VisitMethod = _not_supported
-    # pyrefly: ignore [bad-assignment, bad-param-name-override]
-    visit_DictComp: _VisitMethod = _not_supported
 
     # TODO(jansel): support closure functions defined on host
     # pyrefly: ignore [bad-assignment, bad-param-name-override]
diff --git a/test/test_unroll_tuples.expected b/test/test_unroll_tuples.expected
@@ -88,6 +88,111 @@ def kernel_constants_iteration(x: torch.Tensor, *, _launcher=_default_launcher):
     # src[test_unroll_tuples.py:N]: return result
     return result
 
+--- assertExpectedJournal(TestUnrollTuples.test_dict_comprehension)
+from __future__ import annotations
+
+import torch
+import triton
+import triton.language as tl
+from helion.runtime import default_launcher as _default_launcher
+
+@triton.jit
+def _helion_kernel_dict_comprehension(x, result, _BLOCK_SIZE_0: tl.constexpr):
+    # src[test_unroll_tuples.py:N]: for tile_idx in hl.tile(result.size(0)):
+    pid_0 = tl.program_id(0)
+    offset_0 = pid_0 * _BLOCK_SIZE_0
+    indices_0 = (offset_0 + tl.arange(0, _BLOCK_SIZE_0)).to(tl.int32)
+    # src[test_unroll_tuples.py:N]: acc = torch.zeros([tile_idx], dtype=torch.float32, device=result.device)
+    acc = tl.full([_BLOCK_SIZE_0], 0, tl.float32)
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[1]
+    load = tl.load(x + indices_0 * 1, None)
+    v_0 = 2.0
+    v_1 = load * v_0
+    v_2 = acc + v_1
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[2]
+    load_1 = tl.load(x + indices_0 * 1, None)
+    v_3 = 4.0
+    v_4 = load_1 * v_3
+    v_5 = v_2 + v_4
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[3]
+    load_2 = tl.load(x + indices_0 * 1, None)
+    v_6 = 6.0
+    v_7 = load_2 * v_6
+    v_8 = v_5 + v_7
+    # src[test_unroll_tuples.py:N]: result[tile_idx] = acc
+    tl.store(result + indices_0 * 1, v_8, None)
+
+def kernel_dict_comprehension(x: torch.Tensor, *, _launcher=_default_launcher):
+    """Test dict comprehension with constants."""
+    # src[test_unroll_tuples.py:N]: result = torch.zeros_like(x)
+    result = torch.zeros_like(x)
+    # src[test_unroll_tuples.py:N]: multipliers = {k: k * 2 for k in (1, 2, 3)}
+    multipliers = {k: k * 2 for k in (1, 2, 3)}
+    # src[test_unroll_tuples.py:N]: for tile_idx in hl.tile(result.size(0)):
+    _BLOCK_SIZE_0 = 16
+    # src[test_unroll_tuples.py:N]: for tile_idx in hl.tile(result.size(0)):
+    # src[test_unroll_tuples.py:N]:     acc = torch.zeros([tile_idx], dtype=torch.float32, device=result.device)
+    # src[test_unroll_tuples.py:N]:     # Access dict with literal keys
+    # src[test_unroll_tuples.py:N-N]: ...
+    _launcher(_helion_kernel_dict_comprehension, (triton.cdiv(16, _BLOCK_SIZE_0),), x, result, _BLOCK_SIZE_0, num_warps=4, num_stages=1)
+    # src[test_unroll_tuples.py:N]: return result
+    return result
+
+--- assertExpectedJournal(TestUnrollTuples.test_dict_comprehension_with_range)
+from __future__ import annotations
+
+import torch
+import triton
+import triton.language as tl
+from helion.runtime import default_launcher as _default_launcher
+
+@triton.jit
+def _helion_kernel_dict_comprehension_with_range(x, result, _BLOCK_SIZE_0: tl.constexpr):
+    # src[test_unroll_tuples.py:N]: for tile_idx in hl.tile(result.size(0)):
+    pid_0 = tl.program_id(0)
+    offset_0 = pid_0 * _BLOCK_SIZE_0
+    indices_0 = (offset_0 + tl.arange(0, _BLOCK_SIZE_0)).to(tl.int32)
+    # src[test_unroll_tuples.py:N]: acc = torch.zeros([tile_idx], dtype=torch.float32, device=result.device)
+    acc = tl.full([_BLOCK_SIZE_0], 0, tl.float32)
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[0]
+    load = tl.load(x + indices_0 * 1, None)
+    v_0 = 2.0
+    v_1 = load * v_0
+    v_2 = acc + v_1
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[1]
+    load_1 = tl.load(x + indices_0 * 1, None)
+    v_3 = 4.0
+    v_4 = load_1 * v_3
+    v_5 = v_2 + v_4
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[2]
+    load_2 = tl.load(x + indices_0 * 1, None)
+    v_6 = 6.0
+    v_7 = load_2 * v_6
+    v_8 = v_5 + v_7
+    # src[test_unroll_tuples.py:N]: acc += x[tile_idx] * multipliers[3]
+    load_3 = tl.load(x + indices_0 * 1, None)
+    v_9 = 8.0
+    v_10 = load_3 * v_9
+    v_11 = v_8 + v_10
+    # src[test_unroll_tuples.py:N]: result[tile_idx] = acc
+    tl.store(result + indices_0 * 1, v_11, None)
+
+def kernel_dict_comprehension_with_range(x: torch.Tensor, *, _launcher=_default_launcher):
+    """Test dict comprehension with range for key generation."""
+    # src[test_unroll_tuples.py:N]: result = torch.zeros_like(x)
+    result = torch.zeros_like(x)
+    # src[test_unroll_tuples.py:N]: multipliers = {i: (i + 1) * 2 for i in range(4)}
+    multipliers = {i: (i + 1) * 2 for i in range(4)}
+    # src[test_unroll_tuples.py:N]: for tile_idx in hl.tile(result.size(0)):
+    _BLOCK_SIZE_0 = 16
+    # src[test_unroll_tuples.py:N]: for tile_idx in hl.tile(result.size(0)):
+    # src[test_unroll_tuples.py:N]:     acc = torch.zeros([tile_idx], dtype=torch.float32, device=result.device)
+    # src[test_unroll_tuples.py:N]:     # Access dict with literal keys
+    # src[test_unroll_tuples.py:N-N]: ...
+    _launcher(_helion_kernel_dict_comprehension_with_range, (triton.cdiv(16, _BLOCK_SIZE_0),), x, result, _BLOCK_SIZE_0, num_warps=4, num_stages=1)
+    # src[test_unroll_tuples.py:N]: return result
+    return result
+
 --- assertExpectedJournal(TestUnrollTuples.test_enumerate_constants)
 from __future__ import annotations
 
diff --git a/test/test_unroll_tuples.py b/test/test_unroll_tuples.py
@@ -278,6 +278,43 @@ def kernel_tuple_comprehension_with_tensors(
     return result
 
 
+@helion.kernel(autotune_effort="none")
+def kernel_dict_comprehension(
+    x: torch.Tensor,
+) -> torch.Tensor:
+    """Test dict comprehension with constants."""
+    result = torch.zeros_like(x)
+    # Create dict using comprehension
+    multipliers = {k: k * 2 for k in (1, 2, 3)}
+    for tile_idx in hl.tile(result.size(0)):
+        acc = torch.zeros([tile_idx], dtype=torch.float32, device=result.device)
+        # Access dict with literal keys
+        acc += x[tile_idx] * multipliers[1]
+        acc += x[tile_idx] * multipliers[2]
+        acc += x[tile_idx] * multipliers[3]
+        result[tile_idx] = acc
+    return result
+
+
+@helion.kernel(autotune_effort="none")
+def kernel_dict_comprehension_with_range(
+    x: torch.Tensor,
+) -> torch.Tensor:
+    """Test dict comprehension with range for key generation."""
+    result = torch.zeros_like(x)
+    # Create dict using comprehension with range
+    multipliers = {i: (i + 1) * 2 for i in range(4)}
+    for tile_idx in hl.tile(result.size(0)):
+        acc = torch.zeros([tile_idx], dtype=torch.float32, device=result.device)
+        # Access dict with literal keys
+        acc += x[tile_idx] * multipliers[0]
+        acc += x[tile_idx] * multipliers[1]
+        acc += x[tile_idx] * multipliers[2]
+        acc += x[tile_idx] * multipliers[3]
+        result[tile_idx] = acc
+    return result
+
+
 @helion.kernel(autotune_effort="none")
 def kernel_list_comprehension_with_function(
     x: torch.Tensor,
@@ -725,6 +762,36 @@ def test_tuple_comprehension_with_tensors(self):
         expected = tensor1 * 0.5 + tensor2 * 1.0 + tensor3 * 1.5
         torch.testing.assert_close(result, expected)
 
+    def test_dict_comprehension(self):
+        """Test dict comprehension with constants."""
+        size = (16,)
+        x = torch.randn(size, device=DEVICE)
+
+        code, result = code_and_output(kernel_dict_comprehension, (x,))
+
+        # Validate generated code
+        self.assertExpectedJournal(code)
+
+        # Test correctness - multipliers = {1: 2, 2: 4, 3: 6}
+        # should be x * (2 + 4 + 6) = x * 12
+        expected = x * 12
+        torch.testing.assert_close(result, expected)
+
+    def test_dict_comprehension_with_range(self):
+        """Test dict comprehension with range for key generation."""
+        size = (16,)
+        x = torch.randn(size, device=DEVICE)
+
+        code, result = code_and_output(kernel_dict_comprehension_with_range, (x,))
+
+        # Validate generated code
+        self.assertExpectedJournal(code)
+
+        # Test correctness - multipliers = {0: 2, 1: 4, 2: 6, 3: 8}
+        # should be x * (2 + 4 + 6 + 8) = x * 20
+        expected = x * 20
+        torch.testing.assert_close(result, expected)
+
     def test_list_comprehension_with_function(self):
         """Test list comprehension with expressions."""
         size = (14,)