|
1 | 1 | #include "torch_ipex/csrc/cpu/CustomOPs.h" |
2 | | -#include "torch_ipex/csrc/utils.h" |
3 | 2 | #include "Conv.h" |
| 3 | +#include "LayerNorm.h" |
4 | 4 | #include "Linear.h" |
5 | | -#include "Pooling.h" |
6 | 5 | #include "Matmul.h" |
| 6 | +#include "Pooling.h" |
7 | 7 | #include "Softmax.h" |
| 8 | +#include "torch_ipex/csrc/utils.h" |
8 | 9 |
|
9 | 10 | #include <ATen/Context.h> |
10 | 11 | #include <ATen/InferSize.h> |
@@ -357,5 +358,115 @@ at::Tensor AtenIpexJITDev::dil_softmax( |
357 | 358 | return softmax_impl(input, dim); |
358 | 359 | } |
359 | 360 |
|
| 361 | +/** |
| 362 | + *prepare inputs for dil_layernorm |
| 363 | + * |
| 364 | + *@param input: the source tensor to layernorm |
| 365 | + *@param normalized_shape: input shape from an expected input of size |
| 366 | + *@param weight: scale tensor for layernorm |
| 367 | + *@param bias: shift tensor for layernorm |
| 368 | + * |
| 369 | + *@return inputs for dil_layernorm. |
| 370 | + **/ |
| 371 | +std::tuple<at::Tensor, at::Tensor, at::Tensor, int64_t, int64_t> |
| 372 | +_prepare_layer_norm_inputs(const at::Tensor &input, |
| 373 | + at::IntArrayRef normalized_shape, |
| 374 | + const at::Tensor &weight /* optional */, |
| 375 | + const at::Tensor &bias /* optional */) { |
| 376 | + |
| 377 | + const int normalized_ndim = normalized_shape.size(); |
| 378 | + TORCH_CHECK(normalized_ndim >= 1, |
| 379 | + "Expected normalized_shape to be at least 1-dimensional, i.e., ", |
| 380 | + "containing at least one element, but got normalized_shape = ", |
| 381 | + normalized_shape); |
| 382 | + TORCH_CHECK( |
| 383 | + !weight.defined() || weight.sizes().equals(normalized_shape), |
| 384 | + "Expected weight to be of same shape as normalized_shape, but got ", |
| 385 | + "weight of shape ", weight.sizes(), |
| 386 | + " and normalized_shape = ", normalized_shape); |
| 387 | + TORCH_CHECK(!bias.defined() || bias.sizes().equals(normalized_shape), |
| 388 | + "Expected bias to be of same shape as normalized_shape, but got ", |
| 389 | + "bias of shape ", bias.sizes(), |
| 390 | + " and normalized_shape = ", normalized_shape); |
| 391 | + |
| 392 | + const auto input_shape = input.sizes(); |
| 393 | + const auto input_ndim = input.dim(); |
| 394 | + |
| 395 | + if (input_ndim < normalized_ndim || |
| 396 | + !input_shape.slice(input_ndim - normalized_ndim) |
| 397 | + .equals(normalized_shape)) { |
| 398 | + std::stringstream ss; |
| 399 | + ss << "Given normalized_shape=" << normalized_shape |
| 400 | + << ", expected input with shape [*"; |
| 401 | + for (auto size : normalized_shape) { |
| 402 | + ss << ", " << size; |
| 403 | + } |
| 404 | + ss << "], but got input of size" << input_shape; |
| 405 | + AT_ERROR(ss.str()); |
| 406 | + } |
| 407 | + |
| 408 | + const int axis = input_ndim - normalized_ndim; |
| 409 | + const int64_t M = |
| 410 | + std::accumulate(input_shape.cbegin(), input_shape.cbegin() + axis, |
| 411 | + static_cast<int64_t>(1), std::multiplies<int64_t>()); |
| 412 | + const int64_t N = |
| 413 | + std::accumulate(input_shape.cbegin() + axis, input_shape.cend(), |
| 414 | + static_cast<int64_t>(1), std::multiplies<int64_t>()); |
| 415 | + ; |
| 416 | + |
| 417 | + const auto &X = input.is_contiguous() ? input : input.contiguous(); |
| 418 | + const auto &gamma = weight.is_contiguous() ? weight : weight.contiguous(); |
| 419 | + const auto &beta = bias.is_contiguous() ? bias : bias.contiguous(); |
| 420 | + return std::make_tuple(X, gamma, beta, M, N); |
| 421 | +} |
| 422 | + |
| 423 | +/** |
| 424 | + * at::layer_norm performance drop due to |
| 425 | + * #PR https://github.com/pytorch/pytorch/pull/59987 |
| 426 | + * This is a workaround for layernorm regression. |
| 427 | + * Replace at::layer_norm with ipex::layernorm in jit pass for inference. |
| 428 | + * Now, we only use oneDNN kernel when both weight and bias are provided. |
| 429 | + * ToDo: more scenarios to use oneDNN or remvoe this pass |
| 430 | + * when at::layer_norm performance is back compared to w/o |
| 431 | + * mergeing https://github.com/pytorch/pytorch/pull/59987 |
| 432 | + * |
| 433 | + * @param input: the source tensor to layernorm |
| 434 | + * @param normalized_shape: input shape from an expected input of size |
| 435 | + * @param weight_opt: scale tensor for layernorm |
| 436 | + * @param bias_opt: shift tensor for layernorm |
| 437 | + * @param bias: a value added to the denominator for numerical stability. |
| 438 | + * Default: 1e-5 |
| 439 | + * |
| 440 | + * return: output for layernorm |
| 441 | + */ |
| 442 | +at::Tensor AtenIpexJITDev::dil_layernorm( |
| 443 | + const at::Tensor &input, at::IntArrayRef normalized_shape, |
| 444 | + const c10::optional<at::Tensor> &weight_opt, |
| 445 | + const c10::optional<at::Tensor> &bias_opt, float eps, bool cudnn_enable) { |
| 446 | + |
| 447 | + if (weight_opt.has_value() && bias_opt.has_value()) { |
| 448 | +#if defined(IPEX_PROFILE_OP) |
| 449 | + RECORD_FUNCTION("AtenIpexJITDev::dil_layernorm", |
| 450 | + std::vector<c10::IValue>({})); |
| 451 | +#endif |
| 452 | + auto inputs = _prepare_layer_norm_inputs( |
| 453 | + input, normalized_shape, weight_opt.value(), bias_opt.value()); |
| 454 | + auto X = std::get<0>(inputs); |
| 455 | + auto gamma = std::get<1>(inputs); |
| 456 | + auto beta = std::get<2>(inputs); |
| 457 | + auto M = std::get<3>(inputs); |
| 458 | + auto N = std::get<4>(inputs); |
| 459 | + return std::get<0>(dil_native_layer_norm_impl(X, gamma, beta, M, N, eps)); |
| 460 | + } |
| 461 | + c10::MaybeOwned<at::Tensor> weight_maybe_owned = |
| 462 | + at::borrow_from_optional_tensor(weight_opt); |
| 463 | + const at::Tensor &weight = *weight_maybe_owned; |
| 464 | + c10::MaybeOwned<at::Tensor> bias_maybe_owned = |
| 465 | + at::borrow_from_optional_tensor(bias_opt); |
| 466 | + const at::Tensor &bias = *bias_maybe_owned; |
| 467 | + return std::get<0>( |
| 468 | + at::native_layer_norm(input, normalized_shape, weight, bias, eps)); |
| 469 | +} |
| 470 | + |
360 | 471 | } // namespace cpu |
361 | 472 | } // namespace torch_ipex |
0 commit comments