Fix REQ checks & scale/zps value retrieval when FX is used with JIT (#1420) (#1432)

sanchitintel · chunyuan-w · web-flow · commit 55676ed01a90 · 2023-03-02T12:21:54.000+08:00
* Fix REQ checks &amp; scale/zps value retrieval when FX is used with JIT

* Revise comments

* Refactor code

* Fix lint

---------

Co-authored-by: Chunyuan WU &lt;chunyuan.wu@intel.com&gt;
diff --git a/csrc/jit/codegen/onednn/graph_helper.cpp b/csrc/jit/codegen/onednn/graph_helper.cpp
@@ -396,11 +396,12 @@ Operator LlgaGraphHelper::createOperator(Node* node) const {
     //   ---/-----/-----\-----\---
     // dequant q_scale  q_zp  dtype
     // REQ(node->output(0)->uses().size() <= 2);
-    auto scale = toIValue(node->input(1));
-    REQ(scale.has_value() && scale->isDouble());
+    auto scale = node->input(1);
+    REQ(utils::isScaleSupported(scale));
+
+    auto zero_point = node->input(2);
+    REQ(utils::isZeroPointSupported(zero_point));
 
-    auto zero_point = toIValue(node->input(2));
-    REQ(zero_point.has_value() && zero_point->isInt());
     return Operator(node, opkind::Quantize)
         .setInput(0)
         .setOutput(0)
diff --git a/csrc/jit/codegen/onednn/operator.h b/csrc/jit/codegen/onednn/operator.h
@@ -64,12 +64,23 @@ class Operator {
   }
 
   static int64_t Int(const torch::jit::Node* node, size_t offset) {
-    return torch::jit::toIValue(node->input(offset))->toInt();
+    if (node->input(offset)->type()->isSubtypeOf(
+            torch::jit::TensorType::get())) {
+      // Composing FX with JIT tracing may cause scale/zps to be 0-dim tensors
+      return toIValue(node->input(offset)).value().toTensor().item().toInt();
+    } else {
+      return static_cast<int64_t>(toIValue(node->input(offset))->toInt());
+    }
   }
 
   static float Float(const torch::jit::Node* node, size_t offset) {
-    return static_cast<float>(
-        torch::jit::toIValue(node->input(offset))->toDouble());
+    if (node->input(offset)->type()->isSubtypeOf(
+            torch::jit::TensorType::get())) {
+      // Composing FX with JIT tracing may cause scale/zps to be 0-dim tensors
+      return toIValue(node->input(offset)).value().toTensor().item().toFloat();
+    } else {
+      return static_cast<float>(toIValue(node->input(offset))->toDouble());
+    }
   }
 
   static float ScalarToFloat(const torch::jit::Node* node, size_t offset) {
diff --git a/csrc/jit/codegen/onednn/utils.cpp b/csrc/jit/codegen/onednn/utils.cpp
@@ -77,6 +77,25 @@ double getScale(Node* input_node) {
   return scale;
 }
 
+bool isZeroPointSupported(Value* zps) {
+  auto zps_value = toIValue(zps);
+  return (
+      zps_value.has_value() &&
+      (zps_value->isInt() ||
+       (zps_value->isTensor() &&
+        (zps_value.value().toTensor().scalar_type() == at::ScalarType::Long))));
+}
+
+bool isScaleSupported(Value* scale) {
+  auto scale_value = toIValue(scale);
+  return (
+      scale_value.has_value() &&
+      (scale_value->isDouble() ||
+       (scale_value->isTensor() &&
+        (scale_value.value().toTensor().scalar_type() ==
+         at::ScalarType::Float))));
+}
+
 } // namespace utils
 } // namespace onednn
 } // namespace fuser
diff --git a/csrc/jit/codegen/onednn/utils.h b/csrc/jit/codegen/onednn/utils.h
@@ -20,6 +20,10 @@ double getScale(torch::jit::Node* input_node);
 
 std::vector<int64_t> getZPSVector(torch::jit::Node* input_node);
 
+bool isZeroPointSupported(torch::jit::Value* zps);
+
+bool isScaleSupported(torch::jit::Value* scale);
+
 } // namespace utils
 } // namespace onednn
 } // namespace fuser
diff --git a/tests/cpu/test_ao_jit_llga_quantization_fuser.py b/tests/cpu/test_ao_jit_llga_quantization_fuser.py
@@ -1745,7 +1745,41 @@ def forward(self, x):
         m = convert_to_reference_fx(m)
         graph = self.checkQuantizeTrace(m, [x], atol=2e-1)
         # dequant -> linear should be mapped to LLGA
-        self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 1)        
+        self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 1)
+
+    @unittest.skipIf(True, "Poor accuracy")
+    @skipIfNoTorchVision
+    def test_fx_ao_qat_model(self):
+        class M(nn.Module):
+            def __init__(self):
+                super(M, self).__init__()
+                self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
+                self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
+                self.eltwise = torch.nn.ReLU()
+
+            def forward(self, x):
+                x = self.conv1(x)
+                x = self.eltwise(x)
+                x = self.conv2(x)
+                return x
+        data = torch.randn(1, 32, 224, 224).to(memory_format=torch.channels_last)
+        m = M()
+        m.eval()
+        #
+        # quantization aware training for static quantization
+        #
+        qconfig_dict = {"": torch.quantization.get_default_qat_qconfig('fbgemm')}
+        m.train()
+        model_prepared = prepare_qat_fx(m, qconfig_dict, example_inputs=data)
+        model_quantized = convert_to_reference_fx(model_prepared)
+        model_quantized=model_quantized.eval()
+        model = model_quantized.to(memory_format=torch.channels_last)
+        graph = self.checkQuantizeTrace(model, [data], atol=2e-1)
+        self.checkPatterns(graph, [['aten::dequantize', 'aten::quantize_per_channel', 'aten::_convolution',
+                                    'aten::relu', 'aten::quantize_per_tensor'],
+                                    ['aten::dequantize', 'aten::quantize_per_channel', 'aten::_convolution',
+                                     'aten::quantize_per_tensor']])
+
 
     def test_ffn_residual(self):
         class FFN_Residual(nn.Module):