change amp policy of layer_norm to fall through (#1703)

xiaolil1 · web-flow · commit a4ba9affbee6 · 2022-10-25T17:49:34.000+08:00
diff --git a/csrc/aten/amp/autocast_mode.cpp b/csrc/aten/amp/autocast_mode.cpp
@@ -595,17 +595,6 @@ TORCH_LIBRARY_IMPL(aten, AutocastXPU, m) {
       "softplus",
       Tensor(const Tensor&, const Scalar&, const Scalar&),
       fp32)
-  KERNEL_XPU(
-      ADD_NS(layer_norm),
-      "layer_norm",
-      Tensor(
-          const Tensor&,
-          IntArrayRef,
-          const c10::optional<Tensor>&,
-          const c10::optional<Tensor>&,
-          double,
-          bool),
-      fp32)
   KERNEL_XPU(
       ADD_NS(group_norm),
       "group_norm",
@@ -848,27 +837,6 @@ TORCH_LIBRARY_IMPL(aten, AutocastXPU, m) {
           int64_t,
           c10::optional<c10::string_view>),
       fp32)
-  // The macro doesn't like these (I think it chokes on commas inside <>) so
-  // write them manually
-  m.impl(
-      TORCH_SELECTIVE_NAME("aten::native_layer_norm"),
-      TORCH_FN((&WrapFunction<
-                CastPolicy::fp32,
-                DeviceType::XPU,
-                std::tuple<Tensor, Tensor, Tensor>(
-                    const Tensor&,
-                    IntArrayRef,
-                    const c10::optional<Tensor>&,
-                    const c10::optional<Tensor>&,
-                    double),
-                std::tuple<Tensor, Tensor, Tensor>(
-                    const Tensor&,
-                    IntArrayRef,
-                    const c10::optional<Tensor>&,
-                    const c10::optional<Tensor>&,
-                    double),
-                &ADD_NS(native_layer_norm)>::type::call)));
-
   // promote
   KERNEL_XPU(ADD_NS(cat), "cat", Tensor(TensorList, int64_t), promote)
   KERNEL_XPU(ADD_NS(stack), "stack", Tensor(TensorList, int64_t), promote)
diff --git a/docs/tutorials/features/amp.md b/docs/tutorials/features/amp.md
@@ -95,7 +95,7 @@ If an op is unlisted, we assume it's numerically stable in `bfloat16` or `float1
 
 #### Ops that can autocast to `float32`
 
-`binary_cross_entropy`, `binary_cross_entropy_with_logits`, `log_softmax`, `nll_loss`, `nll_loss2d`, `nll_loss_nd`, `cross_entropy_loss`, `fft_fft`, `fft_ifft`, `fft_fft2`, `fft_ifft2`, `fft_fftn`, `fft_ifftn`, `fft_rfft`, `fft_irfft`, `fft_rfft2`, `fft_irfft2`, `fft_rfftn`, `fft_irfftn`, `fft_hfft`, `fft_ihfft`, `acos`, `asin`, `cosh`, `erfinv`, `exp`, `expm1`, `log`, `log10`, `log2`, `log1p`, `reciprocal`, `rsqrt`, `sinh`, `tan`, `pow`, `softplus`, `layer_norm`, `group_norm`, `frobenius_norm`, `nuclear_norm`, `cosine_similarity`, `poisson_nll_loss`, `cosine_embedding_loss`, `hinge_embedding_loss`, `kl_div`, `l1_loss`, `smooth_l1_loss `, `huber_loss`, `mse_loss`, `margin_ranking_loss`, `multilabel_margin_loss`, `soft_margin_loss`, `triplet_margin_loss`, `multi_margin_loss`, `dist`, `pdist`, `cdist`, `renorm`, `native_layer_norm`
+`binary_cross_entropy`, `binary_cross_entropy_with_logits`, `log_softmax`, `nll_loss`, `nll_loss2d`, `nll_loss_nd`, `cross_entropy_loss`, `fft_fft`, `fft_ifft`, `fft_fft2`, `fft_ifft2`, `fft_fftn`, `fft_ifftn`, `fft_rfft`, `fft_irfft`, `fft_rfft2`, `fft_irfft2`, `fft_rfftn`, `fft_irfftn`, `fft_hfft`, `fft_ihfft`, `acos`, `asin`, `cosh`, `erfinv`, `exp`, `expm1`, `log`, `log10`, `log2`, `log1p`, `reciprocal`, `rsqrt`, `sinh`, `tan`, `pow`, `softplus`, `group_norm`, `frobenius_norm`, `nuclear_norm`, `cosine_similarity`, `poisson_nll_loss`, `cosine_embedding_loss`, `hinge_embedding_loss`, `kl_div`, `l1_loss`, `smooth_l1_loss `, `huber_loss`, `mse_loss`, `margin_ranking_loss`, `multilabel_margin_loss`, `soft_margin_loss`, `triplet_margin_loss`, `multi_margin_loss`, `dist`, `pdist`, `cdist`, `renorm`
 
 #### Ops that promote to the widest input type