Change the OOB blocked format solution (#1628)

zejun-chen · web-flow · commit 31875e74e7d9 · 2022-10-19T20:58:33.000+08:00
* Change the block format policy for our oneDNN op and expand the block semantics with IPEX_XPU_ONEDNN_LAYOUT
1. For training:
Only with IPEX_XPU_ONEDNN_LAYOUT=1, the block is chosen, others are all plain
* This PR for now has no change to oneDNN backward integration code

2. For inference:
Only Conv can trigger the block when IPEX_XPU_ONEDNN_LAYOUT=1 or src is block or doing ATSM inference
Matmul chooses block only when IPEX_XPU_ONEDNN_LAYOUT=1 or src is block
Others choose block only when src is block
* This PR adds some helper function to contain the block suggestion condition

Signed-off-by: Chen, Zejun &lt;zejun.chen@intel.com&gt;
diff --git a/csrc/aten/operators/OpaqueTensorFactories.cpp b/csrc/aten/operators/OpaqueTensorFactories.cpp
@@ -94,9 +94,6 @@ Tensor empty_opaque_qtensor(
 }
 
 inline bool need_to_plain(const Tensor& tensor) {
-  if (!Settings::I().is_onednn_layout_enabled())
-    return false;
-
   if (!tensor.defined())
     return false;
 
@@ -108,6 +105,10 @@ inline bool need_to_plain(const Tensor& tensor) {
   if (tensor.is_sparse())
     return false;
 
+  auto tensor_ctx = DPCPPTensorContext::get_tensor_ctx(tensor);
+  if (tensor_ctx.is_plain())
+    return false;
+
   return true;
 }
 
@@ -133,9 +134,6 @@ Tensor to_plain_if_needed_(const Tensor& tensor) {
 }
 
 std::vector<Tensor> to_plain_if_needed(TensorList tensors) {
-  if (!Settings::I().is_onednn_layout_enabled())
-    return tensors.vec();
-
   std::vector<Tensor> _tensors;
   for (auto tensor : tensors) {
     _tensors.push_back(to_plain_if_needed(tensor));
diff --git a/csrc/oneDNN/Conv.h b/csrc/oneDNN/Conv.h
@@ -217,7 +217,8 @@ static at::Tensor convolution(
       padding_back_bottom_right,
       stride,
       dilation);
-  if (!Settings::I().is_onednn_layout_enabled() && !dst.defined()) {
+  auto is_suggested_block = use_blocked_format_for_conv(src);
+  if (!is_suggested_block && !dst.defined()) {
     auto dst_opt = src.options();
     if (src.is_quantized()) {
       dst_opt = attr.get_dst_dtype();
@@ -288,7 +289,7 @@ static at::Tensor convolution(
                               : memory::desc();
 
   // block combination
-  if (Settings::I().is_onednn_layout_enabled()) {
+  if (is_suggested_block) {
     // In blocked format scenario, oneDNN accept the src in plain format
     // when src ic = 3
     if (ic == 3) {
@@ -396,7 +397,7 @@ static at::Tensor convolution(
       ? memory::desc(wgh_tz, wei_usr_data_t, fmt_wgh)
       : wgh_ctx.meta();
 
-  if (!Settings::I().is_onednn_layout_enabled()) {
+  if (!is_suggested_block) {
     src_usr_md = memory::desc(src_tz, src_data_t, fmt_src);
     dst_usr_md = memory::desc(dst_tz, dst_data_t, fmt_src);
   } else {
@@ -444,7 +445,7 @@ static at::Tensor convolution(
 
   auto weight_cache_optimization = [&]() {
     bool onoff = false;
-    onoff |= Settings::I().is_onednn_layout_enabled();
+    onoff |= is_suggested_block;
     onoff |= onednn_conv_use_channels_last(src, wgh);
     onoff &= !at::GradMode::is_enabled();
     return onoff;
@@ -494,7 +495,7 @@ static at::Tensor convolution(
   auto expected_dst_md = conv_fwd_pd.dst_desc();
   auto dst_m = dpcpp_onednn_memory(dst_usr_md, engine, dst.data_ptr());
   if (dst_usr_md != expected_dst_md) {
-    if (Settings::I().is_onednn_layout_enabled() && dst.is_quantized()) {
+    if (is_suggested_block && dst.is_quantized()) {
       auto quantizer = dpcpp_make_per_tensor_affine_quantizer(
           (get_onednn_dtype_include_double(dst) == memory::data_type::u8 &&
            dst.q_zero_point() == 128)
@@ -580,8 +581,7 @@ static at::Tensor convolution(
        {DNNL_ARG_DST, dst_m}});
 #endif
 
-  if (Settings::I().is_onednn_layout_enabled() &&
-      dst_.data_ptr() != dst.data_ptr()) {
+  if (is_suggested_block && dst_.data_ptr() != dst.data_ptr()) {
     auto blk_ctx = DPCPPTensorContext::release_tensor_ctx(dst_);
     DPCPPTensorContext::set_tensor_ctx(dst, std::move(blk_ctx));
   }
diff --git a/csrc/oneDNN/Eltwise.h b/csrc/oneDNN/Eltwise.h
@@ -42,13 +42,13 @@ static inline void eltwise(
                         src.is_contiguous(at::MemoryFormat::ChannelsLast3d)));
   auto src_md = memory::desc({src_tz}, data_t, format_data);
 
+  auto src_ctx = at::AtenIpexTypeXPU::DPCPPTensorContext::get_tensor_ctx(src);
+
   memory src_memory;
-  if (!Settings::I().is_onednn_layout_enabled() ||
-      src.is_contiguous(at::MemoryFormat::ChannelsLast) ||
+  if (src_ctx.is_plain() || src.is_contiguous(at::MemoryFormat::ChannelsLast) ||
       src.is_contiguous(at::MemoryFormat::ChannelsLast3d)) {
     src_memory = dpcpp_onednn_memory(src_md, engine, src.data_ptr());
   } else {
-    auto src_ctx = at::AtenIpexTypeXPU::DPCPPTensorContext::get_tensor_ctx(src);
     src_md = src_ctx.is_plain() ? src_md : src_ctx.meta();
     src_memory = dpcpp_onednn_memory(src_md, engine, src.data_ptr());
   }
@@ -69,7 +69,7 @@ static inline void eltwise(
       eltwise_forward::primitive_desc(eltwise_eltwiseFwd_desc, attr, engine);
 
   memory dst_memory;
-  if (!Settings::I().is_onednn_layout_enabled()) {
+  if (src_ctx.is_plain()) {
     if (!dst.defined()) {
       dst = src.is_contiguous(at::MemoryFormat::ChannelsLast)
           ? at::empty_like(src, at::MemoryFormat::ChannelsLast)
diff --git a/csrc/oneDNN/Matmul.h b/csrc/oneDNN/Matmul.h
@@ -281,13 +281,14 @@ static inline void matmul(
 #endif
 
   auto matmul_desc = matmul::desc(m1_md, m2_md, dst_md);
+  auto is_suggested_block = use_blocked_format_for_matmul(m1);
 
   if (with_bias && (!m1.is_quantized()) && (!m2.is_quantized())) {
     // ensure getting a valid oneDNN bias md here
     b_md = memory::desc(
         get_onednn_dims(b), get_onednn_dtype(b), get_onednn_strides(b));
 
-    if (dims == 2 && Settings::I().is_onednn_layout_enabled()) {
+    if (dims == 2 && is_suggested_block) {
       // attr + blk
 #ifdef USE_PRIMITIVE_CACHE
       create_key(
@@ -310,7 +311,7 @@ static inline void matmul(
       matmul_desc = matmul::desc(m1_md, m2_md, b_md, dst_md);
     }
   } else {
-    if (dims == 2 && Settings::I().is_onednn_layout_enabled()) {
+    if (dims == 2 && is_suggested_block) {
       // no attr + blk
 #ifdef USE_PRIMITIVE_CACHE
       create_key(
@@ -374,7 +375,7 @@ static inline void matmul(
 
   auto weight_cache_optimization = [&]() {
     bool onoff = false;
-    onoff |= Settings::I().is_onednn_layout_enabled();
+    onoff |= is_suggested_block;
     onoff &= c10::InferenceMode::is_enabled();
     return onoff;
   }();
@@ -441,8 +442,7 @@ static inline void matmul(
         });
   }
 
-  if (Settings::I().is_onednn_layout_enabled() && dst_m != dst_usr_m &&
-      dims == 2) {
+  if (is_suggested_block && dst_m != dst_usr_m && dims == 2) {
     auto blk_ctx = DPCPPTensorContext::release_tensor_ctx(dst_);
     DPCPPTensorContext::set_tensor_ctx(dst, std::move(blk_ctx));
   }
diff --git a/csrc/oneDNN/Pooling.h b/csrc/oneDNN/Pooling.h
@@ -151,7 +151,7 @@ static at::Tensor pooling(
       pooling_forward::primitive_desc(pooling_fwd_desc, engine);
 
   memory src_m, dst_m;
-  if (!Settings::I().is_onednn_layout_enabled() || is_smf_channels_last(src)) {
+  if (src_ctx.is_plain()) {
     src_m = dpcpp_onednn_memory(src_md, engine, src.data_ptr());
     dst_m = dpcpp_onednn_memory(dst_md, engine, dst.data_ptr());
   } else {
@@ -310,8 +310,7 @@ static std::tuple<at::Tensor, at::Tensor> pooling(
   auto expected_dst_md = pooling_fwd_pd.dst_desc();
 
   memory src_usr_m, dst_usr_m;
-  if (!Settings::I().is_onednn_layout_enabled() ||
-      onednn_pool_use_channels_last(src)) {
+  if (src_ctx.is_plain() || onednn_pool_use_channels_last(src)) {
     src_usr_m = dpcpp_onednn_memory(src_md, engine, src.data_ptr());
     dst_usr_m = dpcpp_onednn_memory(dst_md, engine, dst.data_ptr());
   } else {
@@ -340,8 +339,7 @@ static std::tuple<at::Tensor, at::Tensor> pooling(
   if (prop_kind == dnnl::prop_kind::forward_training) {
     at::Tensor idx_;
     memory idx_m;
-    if (!Settings::I().is_onednn_layout_enabled() ||
-        onednn_pool_use_channels_last(src)) {
+    if (src_ctx.is_plain() || onednn_pool_use_channels_last(src)) {
       idx_ = at::empty({dst_tz}, at::TensorOptions(at::kXPU).dtype(at::kInt));
       idx_m = dpcpp_onednn_memory(idx_md, engine, idx_.data_ptr());
     } else {
@@ -366,8 +364,7 @@ static std::tuple<at::Tensor, at::Tensor> pooling(
          {DNNL_ARG_DST, dst_m},
          {DNNL_ARG_WORKSPACE, idx_m}});
 
-    if (!Settings::I().is_onednn_layout_enabled() ||
-        onednn_pool_use_channels_last(src)) {
+    if (src_ctx.is_plain() || onednn_pool_use_channels_last(src)) {
       dtype_convert_by_scalar(
           idx.data_ptr<int64_t>(), idx_.data_ptr<int32_t>(), idx_.numel());
     } else {
diff --git a/csrc/oneDNN/Utils.h b/csrc/oneDNN/Utils.h
@@ -4,6 +4,7 @@
 #include <core/MemoryFormat.h>
 #include <core/detail/TensorInfo.h>
 #include <oneapi/dnnl/dnnl.hpp>
+#include <runtime/Utils.h>
 #include <tensor/Context.h>
 #include <utils/Macros.h>
 #include <utils/Settings.h>
@@ -470,6 +471,55 @@ static inline bool cat_valid(const TensorList& tensors) {
   return true;
 }
 
+// judge to use block or not for Conv
+static inline bool use_blocked_format_for_conv(const at::Tensor& src) {
+  if (!src.defined() || src.is_sparse()) {
+    // suggest plain
+    return false;
+  }
+
+  if (Settings::I().is_onednn_layout_enabled()) {
+    // suggest block
+    return true;
+  }
+
+  // inference workloads on ATSM platform, the conv will use blocked format
+  // used double support to distinguish is atsm or not
+  auto is_auto_transpose = !dpcppSupportFP64();
+  auto suggest_weight_block = is_auto_transpose &&
+      (c10::InferenceMode::is_enabled() || !at::GradMode::is_enabled()) &&
+      !is_smf_channels_last(src);
+  if (suggest_weight_block) {
+    // suggest block
+    return true;
+  }
+
+  // suggest plain
+  return false;
+}
+
+// judge to use block or not for Matmul
+static inline bool use_blocked_format_for_matmul(const at::Tensor& src) {
+  if (!src.defined() || src.is_sparse()) {
+    // suggest plain
+    return false;
+  }
+
+  if (Settings::I().is_onednn_layout_enabled()) {
+    // suggest block
+    return true;
+  }
+
+  auto src_ctx = at::AtenIpexTypeXPU::DPCPPTensorContext::get_tensor_ctx(src);
+  if (!src_ctx.is_plain()) {
+    // suggest block
+    return true;
+  }
+
+  // suggest plain
+  return false;
+}
+
 static inline std::vector<int64_t> gen_dummy_input_size_for(
     const at::IntArrayRef weight_sizes,
     const int64_t groups) {