quantization: disable deconvolution quantization, replace dropout with identity to enable more fusion (#872)

XiaobingSuper · web-flow · commit 955bcff40b43 · 2022-06-17T15:25:55.000+08:00
* quantization: disable deconvolution quantization

* quantization: replace dropout with identity to enable more fusion
diff --git a/intel_extension_for_pytorch/ao/quantization/_quantization_state_utils.py b/intel_extension_for_pytorch/ao/quantization/_quantization_state_utils.py
@@ -25,10 +25,10 @@
     F.conv3d,
     torch.conv2d,
     torch.conv3d,
-    F.conv_transpose2d,
-    F.conv_transpose3d,
-    torch.conv_transpose2d,
-    torch.conv_transpose3d,
+    #F.conv_transpose2d,   #TODO
+    #F.conv_transpose3d,   #TODO
+    #torch.conv_transpose2d,  #TODO
+    #torch.conv_transpose3d,  #TODO
     torch.relu,
     F.relu,
     #torch.sigmoid,  # TODO
@@ -50,8 +50,8 @@
 module_types_supported_by_quantization = set([
     torch.nn.Conv2d,
     torch.nn.Conv3d,
-    torch.nn.ConvTranspose2d,
-    torch.nn.ConvTranspose3d,
+    #torch.nn.ConvTranspose2d,
+    #torch.nn.ConvTranspose3d,
     torch.nn.Linear,
     torch.nn.MaxPool2d,
     torch.nn.MaxPool3d,
@@ -90,10 +90,10 @@
     str(F.conv3d),
     str(torch.conv2d),
     str(torch.conv3d),
-    str(F.conv_transpose2d),
-    str(F.conv_transpose3d),
-    str(torch.conv_transpose2d),
-    str(torch.conv_transpose3d),
+    #str(F.conv_transpose2d),
+    #str(F.conv_transpose3d),
+    #str(torch.conv_transpose2d),
+    #str(torch.conv_transpose3d),
     str(F.linear), 
     str(torch._C._nn.linear),
     ]
@@ -102,8 +102,8 @@
     #str(torch.nn.Conv1d) # it will be enabled at next step.
     str(torch.nn.Conv2d),
     str(torch.nn.Conv3d),
-    str(torch.nn.ConvTranspose2d),
-    str(torch.nn.ConvTranspose3d),
+    #str(torch.nn.ConvTranspose2d),
+    #str(torch.nn.ConvTranspose3d),
     str(torch.nn.Linear),
     ]
 
diff --git a/intel_extension_for_pytorch/ao/quantization/_quantize.py b/intel_extension_for_pytorch/ao/quantization/_quantize.py
@@ -38,6 +38,8 @@ def prepare(
             except:
                 assert False, "The model's copy is failed, please try set inplace to True to do the prepare"
         warnings.warn("Conv BatchNorm folding failed during the prepare process.")
+    # replace dropout with identity to enable more fusion pattern.
+    nn.utils._model_convert.replace_dropout_with_identity(prepare_model)
     # Special case for common case of passing a single Tensor
     if isinstance(example_inputs, (torch.Tensor, dict)):
         example_inputs = (example_inputs,)
diff --git a/tests/cpu/test_ao_jit_llga_quantization_fuser.py b/tests/cpu/test_ao_jit_llga_quantization_fuser.py
@@ -755,7 +755,7 @@ def forward(self, x, y):
             ["aten::dequantize", "aten::linear"]
         ]
         for qconfig in static_qconfig:
-            graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1, remove_dropout=True, qconfig=qconfig)
+            graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1, qconfig=qconfig)
             self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 2)
             self.assertFused(graph, ['aten::linear', 'aten::add', 'aten::quantize_per_channel', 'aten::dequantize'])
         self.checkPatterns(graph, patterns)
@@ -806,7 +806,7 @@ def forward(self, x, y):
             ["aten::dequantize", "aten::to", "aten::linear", "aten::to", "aten::quantize_per_tensor"],
             ["aten::dequantize", "aten::to", "aten::linear", "aten::add"]
         ]
-        graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1, remove_dropout=True, int8_bf16=True)
+        graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1, int8_bf16=True)
         self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 4)
         self.assertFused(graph, ['aten::linear', 'aten::add', 'aten::dequantize'])
         self.checkPatterns(graph, patterns)
diff --git a/tests/cpu/test_ao_jit_llga_utils.py b/tests/cpu/test_ao_jit_llga_utils.py
@@ -102,9 +102,9 @@ def assertFused(self, graph, fused_patterns):
         for pat in fused_patterns:
             self.assertGraphContainsExactly(graph, pat, 0)
 
-    def checkQuantizeTrace(self, model, x, atol=1e-3, rtol=1e-2, remove_dropout=False, x_var=None,
+    def checkQuantizeTrace(self, model, x, atol=1e-3, rtol=1e-2, x_var=None,
             qconfig=default_static_qconfig, int8_bf16=False):
-        graph, traced_model, fp32_model = self.prepareModel(model, x, remove_dropout, qconfig, int8_bf16)
+        graph, traced_model, fp32_model = self.prepareModel(model, x, qconfig, int8_bf16)
         with torch.no_grad():
             y = fp32_model(*x)
             y = y.to(torch.bfloat16) if int8_bf16 else y
@@ -120,14 +120,11 @@ def checkQuantizeTrace(self, model, x, atol=1e-3, rtol=1e-2, remove_dropout=Fals
 
             return graph
 
-    def prepareModel(self, model, x, remove_dropout=False, qconfig=default_static_qconfig,
-            int8_bf16=False, prepare_inplace=True, convert_inplace=True,):
+    def prepareModel(self, model, x, qconfig=default_static_qconfig, int8_bf16=False,
+            prepare_inplace=True, convert_inplace=True,):
         model.eval()
         fp32_model = copy.deepcopy(model)
         with torch.no_grad(), torch._jit_internal._disable_emit_hooks():
-            # fold conv bn
-            if remove_dropout:
-                ipex.nn.utils._model_convert.replace_dropout_with_identity(model)
             model = ipex.quantization.prepare(model, qconfig, x, inplace=prepare_inplace)
             # do calibration
             y = model(*x)