fix position inside subgraph in inplace check (#870)

jianan-gu · web-flow · commit c31d354d5170 · 2022-06-17T09:12:26.000+08:00
diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_inplace_replace.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_inplace_replace.cpp
@@ -21,7 +21,6 @@ bool hasSideEffectInDefNode(Node* def_node, int position) {
           def_node->hasSideEffects() || (def_node->kind() == prim::Param);
     }
   }
-
   return checkresult;
 }
 
@@ -45,31 +44,45 @@ bool hasSideEffectInBlocks(Block* block, Value* v) {
 
 bool hasSideEffectOrAliasInSubgraphs(Node* node, Value* v) {
   bool checkresult = false;
-  // A LLGAFusionGroup must have its fallbackgraph, we only need to check one of
-  // them
+  // A LLGAFusionGroup or TensorExprGroup must have its fallbackgraph, we only
+  // need to check one of them
   if (node->kind().toQualString() ==
       Symbol::fromQualString("ipex::LlgaFusionGroup").toQualString()) {
     return false;
   }
+  if (node->kind().toQualString() ==
+      Symbol::fromQualString("prim::TensorExprGroup").toQualString()) {
+    return false;
+  }
+
   // get the subgraph of the def node
   auto subgraph = node->g(attr::Subgraph);
 
   // find the position of target value in its def node in subgraph
   // for example, here find (%input.1), and the posion is 0:
   // graph(---),
   //    %input.1 : Tensor = Ops
-  //    return (%input.1)
-  int position = v->offset();
-  auto def_node = subgraph->outputs()[position]->node();
-  std::unique_ptr<AliasDb> aliasDb_ = std::make_unique<AliasDb>(subgraph);
+  //    %input.2 : Tensor = Ops
+  //    return (%input.1, %input.2)
 
-  checkresult = hasSideEffectInDefNode(def_node, position);
+  // position_in_subgraph is graph returned position, e.g, for %input.1 is 0,
+  // for %input.2 is 1
+  int position_in_subgraph = v->offset();
+  auto def_node = subgraph->outputs()[position_in_subgraph]->node();
+  // position_in_def_node is def node position, e.g, for %input.1 or %input.2 is
+  // 0
+  int position_in_def_node =
+      subgraph->outputs()[position_in_subgraph]->offset();
+
+  checkresult = hasSideEffectInDefNode(def_node, position_in_def_node);
 
   // for def node in subgraph, has to check its alias too
+  // if the output isn't contained or alias by the inputs to its node, it's
+  // unique. No need to check for alias if the node is a ListConstruct.
+  std::unique_ptr<AliasDb> aliasDb_ = std::make_unique<AliasDb>(subgraph);
   bool mayAliasInputs = (def_node->kind() != prim::ListConstruct) &&
       aliasDb_->mayContainAlias(
-          def_node->inputs(), def_node->outputs()[position]);
-
+          def_node->inputs(), def_node->outputs()[position_in_def_node]);
   checkresult = checkresult || mayAliasInputs;
   return checkresult;
 }
diff --git a/tests/cpu/test_softmax.py b/tests/cpu/test_softmax.py
@@ -32,6 +32,20 @@ def forward(self, x):
         x2 = nn.Softmax(dim=-1)(x1)
         return x2
 
+class inplace_softmax_with_TE_group(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        x1 = x + 1
+        x2 = x + 2
+        x3 = x + 3
+        x4 = x + 4
+        x5 = x + 5
+        y1 = (x1 / x2).softmax(dim = -1)
+        y2 = ((x4 - x3) / x5).softmax(dim = -1)
+        return y1, y2
+
+
 class SoftmaxTester(JitTestCase):
     def test_softmax(self):
         for dtype in ["fp32", "bf16"]:
@@ -40,19 +54,22 @@ def test_softmax(self):
             test3 = torch.tensor([[1.0,1.0],[1.0,1.0]])
             test4 = torch.tensor([[1.0,1.0],[1.0,1.0]]).transpose(1,0)
             test5 = torch.tensor([[2.0,2.0],[2.0,2.0]]).transpose(1,0)
+            test6 = torch.tensor([[1.0,1.0],[1.0,1.0]])
 
             if dtype == "bf16":
                 test1 = test1.bfloat16()
                 test2 = test2.bfloat16()
                 test3 = test3.bfloat16()
                 test4 = test4.bfloat16()
                 test5 = test5.bfloat16()
+                test6 = test6.bfloat16()
 
             model1 = softmax_with_multiuse_input().eval()
             model2 = softmax_with_alias_input().eval()
             model3 = inplace_softmax().eval()
             model4 = inplace_softmax().eval()
             model5 = softmax_with_multiuse_input().eval()
+            model6 = inplace_softmax_with_TE_group().eval()
 
             with torch.no_grad():
                 model1 = torch.jit.trace(model1, test1)
@@ -65,6 +82,9 @@ def test_softmax(self):
                 res4 = model4(test4)
                 model5 = torch.jit.trace(model5, test5)
                 res5 = model5(test5)
+                model6_traced = torch.jit.trace(model6, test6)
+                res6_traced = model6_traced(test6)
+                res6 = model6(test6)
 
 
             # should be outplace since multi-use
@@ -82,12 +102,17 @@ def test_softmax(self):
             # outplace test, but should be aten::softmax due to non-contiguous input
             graph5 = model5.graph_for(test5)
             self.assertGraphContainsExactly(graph5, ATEN_SOFTMAX, 1)
+            # should be inplace
+            graph6 = model6_traced.graph_for(test6)
+            self.assertGraphContainsExactly(graph6, IPEX_SOFTMAX_, 2)
 
             # the output results of above inplace/outplace softmax should be the same
             self.assertEqual(res1[0], res2[1], 0)
             self.assertEqual(res1[0], res3, 0)
             self.assertEqual(res1[0], res4, 0)
             self.assertEqual(res1[0], res5[0], 0)
+            self.assertEqual(res6[0], res6_traced[0], 0)
+            self.assertEqual(res6[1], res6_traced[1], 0)
 
 
 if __name__ == '__main__':