Add _native_multi_head_attention to low precision cast policy of AutocastCPU (#860)

CaoE · web-flow · commit d9a2680d2f39 · 2022-06-16T16:51:21.000+08:00
* add _native_multi_head_attention to low precision cast policy of AutocastCPU

* fix fcode format
diff --git a/intel_extension_for_pytorch/csrc/autocast/autocast_mode.cpp b/intel_extension_for_pytorch/csrc/autocast/autocast_mode.cpp
@@ -126,6 +126,7 @@ struct CPU_WrapFunction_<
   }
 };
 
+#define TUPLE_TWO_TENSORS std::tuple<Tensor, Tensor>
 #define ADD_NS(RAW_OP) at::RAW_OP
 
 #define MAKE_REGISTER_FUNC(FUNC, NAME, SIG, CAST_POLICY)                   \
@@ -153,9 +154,29 @@ MAKE_REGISTER_FUNC(
         bool),
     user_defined_dtype)
 
+MAKE_REGISTER_FUNC(
+    ADD_NS(_native_multi_head_attention),
+    "_native_multi_head_attention",
+    TUPLE_TWO_TENSORS(
+        const Tensor&,
+        const Tensor&,
+        const Tensor&,
+        int64_t,
+        int64_t,
+        const Tensor&,
+        const Tensor&,
+        const Tensor&,
+        const Tensor&,
+        const c10::optional<Tensor>&,
+        bool,
+        bool),
+    user_defined_dtype)
+
 // fp32 cast policy a.k.a BlackList
 MAKE_REGISTER_FUNC(ADD_NS(mish), "mish", Tensor(const Tensor&), fp32)
 
+#undef TUPLE_TWO_TENSORS
+
 IPEX_TORCH_LIBRARY_IMPL(aten, AutocastCPU, m) {
   m.impl(
       TORCH_SELECTIVE_NAME("aten::_convolution"),
diff --git a/tests/cpu/autocast_test_lists.py b/tests/cpu/autocast_test_lists.py
@@ -80,6 +80,16 @@ def __init__(self, dev):
             ("addbmm", mat0_fp32 + (torch.randn((n, n, n), device=dev, dtype=torch.float32),
                                     torch.randn((n, n, n), device=dev, dtype=torch.float32))),
         ]
+        self.torch_bf16_multi_output = [
+            ("_native_multi_head_attention", (torch.randn((1, 1, 768), device=dev, dtype=torch.float32),
+                                              torch.randn((1, 1, 768), device=dev, dtype=torch.float32),
+                                              torch.randn((1, 1, 768), device=dev, dtype=torch.float32),
+                                              768, 12, torch.randn((2304, 768), device=dev, dtype=torch.float32),
+                                              torch.randn((2304), device=dev, dtype=torch.float32),
+                                              torch.randn((768, 768), device=dev, dtype=torch.float32),
+                                              torch.randn((768), device=dev, dtype=torch.float32),
+                                              None, False, True)),
+        ]
         self.torch_fp32 = [
             ("conv_transpose1d", conv_args_bf16[0]),
             ("conv_transpose2d", conv_args_bf16[1]),
diff --git a/tests/cpu/test_autocast.py b/tests/cpu/test_autocast.py
@@ -659,6 +659,11 @@ def test_autocast_blacklist_non_float_output(self):
         for op, args in self.autocast_lists.blacklist_non_float_output_pass_test:
             self._run_autocast_pass_test(op, args, torch.float32)
 
+    def test_autocast_torch_bf16_multi_output(self):
+        for op_with_args in self.autocast_lists.torch_bf16_multi_output:
+            op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)
+            self._run_autocast_pass_test(op, args, torch.bfloat16, add_kwargs=maybe_kwargs)
+
     def test_autocast_torch_fp32_multi_output(self):
         for op_with_args in self.autocast_lists.torch_fp32_multi_output:
             op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)