quantization: introduce default qconfig for static and dynamic quantization (#886)

XiaobingSuper · Jiong Gong · web-flow · commit 60d0796f9ac8 · 2022-06-21T21:08:04.000+08:00
* quantization:introduce default qconfig for static and dynamic quantization

* update README

* change reduce_range to False

* Update intel_extension_for_pytorch/ao/quantization/README.md

Co-authored-by: Jiong Gong &lt;jiong.gong@intel.com&gt;

Co-authored-by: Jiong Gong &lt;jiong.gong@intel.com&gt;
diff --git a/intel_extension_for_pytorch/ao/quantization/README.md b/intel_extension_for_pytorch/ao/quantization/README.md
@@ -11,19 +11,31 @@ import intel_extension_for_pytorch as ipex
 from intel_extension_for_pytorch.quantization import prepare, convert
 ```
 
-### Define QConfig
+### Define qconfig
+
+Using the default qconfig(recommended):
+
+```python
+qconfig = ipex.quantization.default_static_qconfig
+# equal to
+# QConfig(activation=HistogramObserver.with_args(reduce_range=False),
+#         weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric)) 
+```
 
-Define a **QConfig** which set the activation and weight's observer methond:
+or define your own qconfig as:
 
 ```python
 from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig
 qconfig = QConfig(activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8),
-        weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric))
+                  weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric))
 ```
 
 Note: we fully use of PyTorch [observer methonds](https://pytorch.org/docs/stable/quantization-support.html#torch-quantization-observer), so you can use a different PyTorch obsever methond to define the [QConfig](https://pytorch.org/docs/1.11/generated/torch.quantization.qconfig.QConfig.html). For weight observer, we only support **torch.qint8** dtype now.
 
-**Suggestion**: For activation observer, if your set **qscheme** with **torch.per_tensor_affine**, the dtype prefer to **torch.quint8**, if you set the **qscheme** with **torch.per_tensor_symmetric**, the dtype prefer to **torch.qint8**. For weight observer, setting **qscheme** to **torch.per_channel_symmetric** can get a better accuracy,
+**Suggestion**:
+
+1. For activation observer, if your set **qscheme** with **torch.per_tensor_affine**, **torch.quint8** is preferred, if you set the **qscheme** with **torch.per_tensor_symmetric**, **torch.qint8** is preferred. For weight observer, setting **qscheme** to **torch.per_channel_symmetric** can get a better accuracy.
+2. If your CPU device doesn't support VNNI, seeting the observer's **reduce_range** to **True** can get a better accuracy, such as skylake.
 
 ### Prepare Model and Do Calibration
 
@@ -74,14 +86,29 @@ from intel_extension_for_pytorch.quantization import prepare, convert
 
 ### Define QConfig
 
+Using the default qconfig(recommended):
+
+```python
+dynamic_qconfig = ipex.quantization.default_dynamic_qconfig
+# equal to 
+# QConfig(activation=PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
+#         weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric))
+```
+
+or define your own qconfig as:
+
 ```python
 from torch.ao.quantization import MinMaxObserver, PlaceholderObserver, QConfig
-dynamic_qconfig = QConfig(
-        activation = PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
-        weight = PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric))
+dynamic_qconfig = QConfig(activation = PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
+                          weight = MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric))
 ```
 
-Note: For weight observer, it only support dtype **torch.qint8**, and the qscheme only can be **torch.per_tensor_symmetric** or **torch.per_channel_symmetric**.
+Note: For weight observer, it only supports dtype **torch.qint8**, and the qscheme only can be **torch.per_tensor_symmetric** or **torch.per_channel_symmetric**. For activation observer, it only supports dtype **torch.float**, and the compute_dtype can be **torch.quint8** or **torch.qint8**.
+
+**Suggestion**:
+
+1. For weight observer, setting **qscheme** to **torch.per_channel_symmetric** can get a better accuracy.
+2. If your CPU device doesn't support VNNI, seeting the observer's **reduce_range** to **True** can get a better accuracy, such as skylake.
 
 ### Prepare Model
 
@@ -106,13 +133,13 @@ convert_model = convert(prepared_model)
 # ...
 # for inference 
 y = convert_model(x)
-
 ```
 
 Note: we only support the following ops to do dynamic quantization:
+
 - torch.nn.Linear
-- torch.nn.LSTM 
-- torch.nn.GRU 
+- torch.nn.LSTM
+- torch.nn.GRU
 - torch.nn.LSTMCell
 - torch.nn.RNNCell
 - torch.nn.GRUCell
diff --git a/intel_extension_for_pytorch/ao/quantization/__init__.py b/intel_extension_for_pytorch/ao/quantization/__init__.py
@@ -1 +1,2 @@
 from ._quantize import prepare, convert
+from ._qconfig import default_static_qconfig, default_dynamic_qconfig
diff --git a/intel_extension_for_pytorch/ao/quantization/_qconfig.py b/intel_extension_for_pytorch/ao/quantization/_qconfig.py
@@ -0,0 +1,17 @@
+import torch
+from torch.ao.quantization import PlaceholderObserver, PerChannelMinMaxObserver, HistogramObserver, QConfig
+
+
+_default_weight_observer = PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric)
+
+default_static_qconfig = QConfig(activation=HistogramObserver.with_args(reduce_range=False),
+                                 weight=_default_weight_observer)
+"""
+Default qconfig configuration for static quantization.
+"""
+
+default_dynamic_qconfig = QConfig(activation=PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
+                                  weight=_default_weight_observer)
+"""
+Default qconfig configuration for dynamic quantization.
+"""
diff --git a/intel_extension_for_pytorch/quantization/__init__.py b/intel_extension_for_pytorch/quantization/__init__.py
@@ -1 +1 @@
-from ..ao.quantization import prepare, convert
+from ..ao.quantization import prepare, convert, default_static_qconfig, default_dynamic_qconfig
diff --git a/tests/cpu/test_ao_jit_ipex_quantization.py b/tests/cpu/test_ao_jit_ipex_quantization.py
@@ -21,28 +21,19 @@
 default_weight_observer = PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric)
 
 static_qconfig = [
-        QConfig(
-            activation = MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8),
+    QConfig(activation = MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8),
             weight = default_weight_observer),
-        QConfig(
-            activation = MinMaxObserver.with_args(qscheme=torch.per_tensor_symmetric, dtype=torch.qint8),
+    QConfig(activation = MinMaxObserver.with_args(qscheme=torch.per_tensor_symmetric, dtype=torch.qint8),
             weight = default_weight_observer),
-        QConfig(
-            activation = HistogramObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8, reduce_range=True),
+    QConfig(activation = HistogramObserver.with_args(qscheme=torch.per_tensor_symmetric, dtype=torch.qint8, reduce_range=True),
             weight = default_weight_observer),
-        QConfig(
-            activation = HistogramObserver.with_args(qscheme=torch.per_tensor_symmetric, dtype=torch.qint8, reduce_range=True),
-            weight = default_weight_observer),
-        ]
+    ipex.quantization.default_static_qconfig]
 
 dynamic_qconfig = [
-    QConfig(
-        activation = PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
-        weight = MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)),
-    QConfig(
-        activation = PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
-        weight = default_weight_observer),
-    ]
+    QConfig(activation = PlaceholderObserver.with_args(dtype=torch.float, compute_dtype=torch.quint8),
+            weight = MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)),
+    ipex.quantization.default_dynamic_qconfig]
+
 
 class TestIpexOps(JitLlgaTestCase):
     def test_adaptive_avg_pool2d(self):

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`from ._quantize import prepare, convert`
	`2`	`+from ._qconfig import default_static_qconfig, default_dynamic_qconfig`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from ..ao.quantization import prepare, convert`
	`1`	`+from ..ao.quantization import prepare, convert, default_static_qconfig, default_dynamic_qconfig`