[1.12] bridge: only lift up viewOp if used by one single Op (#909)

chunyuan-w · web-flow · commit 75eb59d447b4 · 2022-06-28T17:06:55.000+08:00
* bridge: lift up viewOp if not used by Ops requiring type promotion

* only rewrite if viewOp is used by one single Op
diff --git a/intel_extension_for_pytorch/csrc/jit/codegen/onednn/lift_up_quant.cpp b/intel_extension_for_pytorch/csrc/jit/codegen/onednn/lift_up_quant.cpp
@@ -7,91 +7,103 @@ namespace jit {
 namespace fuser {
 namespace onednn {
 
+bool usedBySingleOp(Value* v) {
+  return v->uses().size() == 1;
+}
+
 class QuantLifter {
  private:
   std::shared_ptr<Graph> graph_;
 
  public:
   QuantLifter(std::shared_ptr<Graph> graph) : graph_(std::move(graph)) {}
 
-  bool analyzeNode(Node* node) {
-    if (node->kind() != Symbol::aten("quantize_per_tensor") &&
-        node->kind() != aten::to) {
-      return false;
-    }
-
-    // TODO: only supported nb_uses to be 1 for now
-    auto* output_value = node->output(0);
-    auto& uses = output_value->uses();
-    if (uses.size() != 1) {
-      return false;
-    }
+  bool analyze(Block* block) {
+    bool changed = false;
+    for (auto it = block->nodes().begin(); it != block->nodes().end(); ++it) {
+      auto node = *it;
 
-    auto user = uses[0].user;
-    auto target = node;
-
-    auto prev_node = node->input(0)->node();
-
-    bool could_lift_up = true;
-    while (could_lift_up) {
-      if (utils::isViewOp(target->input(0)->node())) {
-        target = target->input(0)->node();
-
-        // After lifting up, need to fix the output type
-        auto prev_target_type = target->output(0)->type()->expect<TensorType>();
-        auto new_scalar_type =
-            node->output(0)->type()->expect<TensorType>()->scalarType();
-        auto new_target_type =
-            prev_target_type->withScalarType(new_scalar_type);
-        target->output(0)->setType(new_target_type);
-      } else {
-        could_lift_up = false;
+      if (node->kind() != Symbol::aten("quantize_per_tensor") &&
+          node->kind() != aten::to) {
+        continue;
       }
-    }
-
-    // No possible lift up, return directly
-    if (target == node) {
-      return false;
-    }
 
-    // From:
-    // linear -> view (target) -> permute -> transpose -> to (node) -> quant
-    // To:
-    // linear -> to -> view (target) -> permute -> transpose -> quant
-    // Finally:
-    // linear -> to -> quant -> view -> permute -> transpose
-    WithInsertPoint guard(target);
-    auto g = target->owningGraph();
-
-    // Construct lifted up node
-    std::vector<Value*> input_values;
-    input_values.push_back(target->input(0));
-    for (size_t i = 1; i < node->inputs().size(); i++) {
-      input_values.push_back(node->input(i));
-    }
-    auto new_node = g->create(node->kind(), input_values)->insertBefore(target);
+      // TODO: only supported nb_uses to be 1 for now
+      auto* output_value = node->output(0);
+      auto& uses = output_value->uses();
+      if (uses.size() != 1) {
+        continue;
+      }
 
-    // Fix type of the output of lifted up node
-    auto insert_point_output_type =
-        target->input(0)->type()->expect<TensorType>();
-    auto old_node_type = node->input(0)->type()->expect<TensorType>();
-    auto new_node_type =
-        insert_point_output_type->withScalarType(old_node_type->scalarType());
-    new_node->output(0)->setType(new_node_type);
+      auto user = uses[0].user;
+      auto target = node;
+
+      auto prev_node = node->input(0)->node();
+
+      bool could_lift_up = true;
+      while (could_lift_up) {
+        auto* target_value = target->input(0);
+        if (utils::isViewOp(target_value->node()) &&
+            (usedBySingleOp(target_value))) {
+          target = target_value->node();
+
+          // After lifting up, need to fix the output type
+          auto prev_target_type =
+              target->output(0)->type()->expect<TensorType>();
+          auto new_scalar_type =
+              node->output(0)->type()->expect<TensorType>()->scalarType();
+          auto new_target_type =
+              prev_target_type->withScalarType(new_scalar_type);
+          target->output(0)->setType(new_target_type);
+        } else {
+          could_lift_up = false;
+        }
+      }
 
-    target->replaceInputWith(target->input(0), new_node->output(0));
-    user->replaceInputWith(node->output(0), prev_node->output(0));
+      // No possible lift up, return directly
+      if (target == node) {
+        continue;
+      }
 
-    return true;
+      // From:
+      // linear -> view (target) -> permute -> transpose -> to (node) -> quant
+      // To:
+      // linear -> to -> view (target) -> permute -> transpose -> quant
+      // Finally:
+      // linear -> to -> quant -> view -> permute -> transpose
+      WithInsertPoint guard(target);
+      auto g = target->owningGraph();
+
+      // Construct lifted up node
+      std::vector<Value*> input_values;
+      input_values.push_back(target->input(0));
+      for (size_t i = 1; i < node->inputs().size(); i++) {
+        input_values.push_back(node->input(i));
+      }
+      auto new_node =
+          g->create(node->kind(), input_values)->insertBefore(target);
+
+      // Fix type of the output of lifted up node
+      auto insert_point_output_type =
+          target->input(0)->type()->expect<TensorType>();
+      auto old_node_type = node->input(0)->type()->expect<TensorType>();
+      auto new_node_type =
+          insert_point_output_type->withScalarType(old_node_type->scalarType());
+      new_node->output(0)->setType(new_node_type);
+
+      target->replaceInputWith(target->input(0), new_node->output(0));
+      user->replaceInputWith(node->output(0), prev_node->output(0));
+
+      it.destroyCurrent();
+      changed = true;
+    }
+    return changed;
   }
 
   void run() {
     bool changed = true;
     while (changed) {
-      changed = false;
-      for (Node* node : graph_->block()->nodes()) {
-        changed |= analyzeNode(node);
-      }
+      changed = analyze(graph_->block());
     }
   }
 };
diff --git a/tests/cpu/test_ao_jit_llga_quantization_fuser.py b/tests/cpu/test_ao_jit_llga_quantization_fuser.py
@@ -1019,6 +1019,50 @@ def forward(self, x, y):
         self.assertFused(graph, ['aten::dequantize', 'aten::linear', 'aten::matmul'])
         self.checkPatterns(graph, patterns)
 
+    def test_lift_up_quant_unsupported(self):
+        # Original graph:
+        #          |
+        #        view
+        #      /  (f32)\   /(f32)
+        #   quant       add
+        #     |
+
+        # Lifting up in this case will raise: 
+        # promoteTypes with quantized numbers is not handled in aten::add;
+        #          |
+        #        quant
+        #          |
+        #         view
+        #         (int8)\  /(f32)
+        #                add
+        class M(nn.Module):
+            def __init__(self):
+                super(M, self).__init__()
+                self.conv1 = nn.Conv2d(3, 8, 1)
+                self.conv2 = nn.Conv2d(8, 8, 1)
+
+            def forward(self, x, y):
+                x = self.conv1(x)
+                z1 = x.permute(0, 3, 1, 2)
+                z2 = self.conv2(z1)
+                z = z1 + y
+                output = z2 + z
+                return output
+        
+        x = torch.randn(1, 3, 8, 8)
+        y = torch.randn(1, 8, 8, 8)
+        m = M()
+
+        patterns = [
+            ["aten::dequantize", "aten::_convolution"],
+            ["aten::dequantize", "aten::_convolution", "aten::add"],
+        ]
+
+        graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1)
+        self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 2)
+        self.assertFused(graph, ['aten::_convolution', 'aten::dequantize'])
+        self.checkPatterns(graph, patterns)        
+
     def test_wildcard(self):
         class M(nn.Module):
             def __init__(self):