diff --git a/kernels/portable/cpu/op_constant_pad_nd.cpp b/kernels/portable/cpu/op_constant_pad_nd.cpp
index 7209e8e42e5..56f1283bf0f 100644
--- a/kernels/portable/cpu/op_constant_pad_nd.cpp
+++ b/kernels/portable/cpu/op_constant_pad_nd.cpp
@@ -83,6 +83,15 @@ void apply_padding_to_dim(
     size_t copy_nbytes = copy_len * sizeof(CTYPE);
 
     if (copy_nbytes > 0) {
+      // Check that out_data and self_data do not overlap.
+      ET_KERNEL_CHECK_MSG(
+          ctx,
+          out_data != self_data &&
+              ((out_data + copy_len < self_data) ||
+               (self_data + copy_len < out_data)),
+          InvalidArgument,
+          /* void */,
+          "Out tensor overlaps with the input tensor. This is not supported.");
       memcpy(out_data, self_data, copy_nbytes);
       out_data += copy_len;
       self_data += copy_len;
diff --git a/kernels/portable/cpu/op_linear_scratch_example.cpp b/kernels/portable/cpu/op_linear_scratch_example.cpp
deleted file mode 100644
index b7a263a199f..00000000000
--- a/kernels/portable/cpu/op_linear_scratch_example.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/runtime/kernel/kernel_includes.h>
-#include <executorch/runtime/platform/assert.h>
-
-/**
- * @file
- *
- * NOTE: This file is deprecated: no new code should be added to it, and its
- * contents should be split into per-operator files like op_add.cpp.
- */
-
-namespace torch {
-namespace executor {
-namespace native {
-
-using Tensor = executorch::aten::Tensor;
-
-template <typename T>
-using optional = std::optional<T>;
-
-// kernel for demonstration purpose only
-
-// Kernel implementation provided by user.
-// The schema is added by user to PyTorch native function DSL in a yaml file,
-// defined in
-// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/README.md
-// @lint-ignore-every CLANGTIDY
-
-namespace {
-bool check_linear_scratch_example_args(
-    const Tensor& input,
-    const Tensor& weight,
-    const optional<Tensor>& bias,
-    Tensor& out,
-    Tensor& scratch) {
-  ET_CHECK_OR_RETURN_FALSE(
-      input.size(1) == weight.size(1),
-      "Unexpected weight size 1; input.size(1) = %" ET_PRI_TENSOR_SIZE
-      ", weight.size(1) = %" ET_PRI_TENSOR_SIZE,
-      input.size(1),
-      weight.size(1));
-
-  ET_CHECK_OR_RETURN_FALSE(
-      scratch.size(0) == input.size(0),
-      "Unexpected scratch size 0; scratch.size(0) = %" ET_PRI_TENSOR_SIZE
-      ", input.size(0) = %" ET_PRI_TENSOR_SIZE,
-      scratch.size(0),
-      input.size(0));
-
-  ET_CHECK_OR_RETURN_FALSE(
-      scratch.size(1) == weight.size(0),
-      "Unexpected scratch size 1; scratch.size(1) = %" ET_PRI_TENSOR_SIZE
-      ", weight.size(0) = %" ET_PRI_TENSOR_SIZE,
-      scratch.size(1),
-      weight.size(0));
-
-  return true;
-}
-} // namespace
-
-/*
- * A simple example of using scratch tensor. In this specific case we could also
- * update the out tensor in place to avoid the scratch tensor.
- *
- * linear.scratch_example(Tensor input, Tensor weight, Tensor? bias=None, *,
- *     Tensor(a!) out, Tensor(b!) _scratch_tensor) -> Tensor(a!)
- */
-Tensor& linear_scratch_example(
-    const Tensor& input,
-    const Tensor& weight,
-    const optional<Tensor>& bias,
-    Tensor& out,
-    Tensor& scratch) {
-  size_t M, N, K;
-  M = input.size(0);
-  N = input.size(1);
-  K = weight.size(0);
-
-  // TODO: Update to use ET_KERNEL_CHECK when context is available in custom
-  // ops.
-  ET_CHECK(
-      check_linear_scratch_example_args(input, weight, bias, out, scratch));
-
-  // input @ weight -> scratch
-  // TODO: does not handle the case that accumulator has different type
-  // as input
-  // TODO: this is just some inefficient implementation to verify correctness
-  if (input.scalar_type() == ScalarType::Float) {
-    // only support float32 before D35829540 is landed
-    using scalar_t = float;
-    for (size_t i = 0; i < M; ++i) {
-      for (size_t j = 0; j < K; ++j) {
-        scalar_t* scratch_ptr =
-            scratch.mutable_data_ptr<scalar_t>() + (i * K + j);
-        *scratch_ptr = 0;
-        for (size_t k = 0; k < N; ++k) {
-          const scalar_t* const input_ptr =
-              input.const_data_ptr<scalar_t>() + (i * N + k);
-          // note it's transposed
-          // (j,k) element in the (K, N) array
-          const scalar_t* const weight_ptr =
-              weight.const_data_ptr<scalar_t>() + (j * N + k);
-          *scratch_ptr += *input_ptr * *weight_ptr;
-        }
-      }
-    }
-
-    // add the bias
-    if (bias.has_value()) {
-      ET_CHECK_MSG(
-          static_cast<ssize_t>(K) == bias.value().numel(),
-          "Unexpected numel for bias");
-      for (size_t i = 0; i < M; ++i) {
-        for (size_t j = 0; j < K; ++j) {
-          scalar_t* scratch_ptr =
-              scratch.mutable_data_ptr<scalar_t>() + (i * K + j);
-          scalar_t* out_ptr = out.mutable_data_ptr<scalar_t>() + (i * K + j);
-          scalar_t* bias_ptr = bias.value().mutable_data_ptr<scalar_t>() + j;
-          *out_ptr = *scratch_ptr + *bias_ptr;
-        }
-      }
-    }
-  }
-  return out;
-}
-
-Tensor& linear_scratch_example(
-    KernelRuntimeContext& ctx,
-    const Tensor& input,
-    const Tensor& weight,
-    const optional<Tensor>& bias,
-    Tensor& out,
-    Tensor& scratch) {
-  // TODO(larryliu): Add a context arg to the real op function and remove this
-  // wrapper
-  (void)ctx;
-  return linear_scratch_example(input, weight, bias, out, scratch);
-}
-
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/kernels/portable/custom_ops.yaml b/kernels/portable/custom_ops.yaml
index e8ae0812674..314852efd3c 100644
--- a/kernels/portable/custom_ops.yaml
+++ b/kernels/portable/custom_ops.yaml
@@ -30,8 +30,3 @@
   kernels:
     - arg_meta: null
       kernel_name: torch::executor::allclose_tensor
-
-- func: linear.scratch_example(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out, Tensor(b!) _scratch_tensor) -> Tensor(a!)
-  kernels:
-    - arg_meta: null
-      kernel_name: torch::executor::linear_scratch_example
diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl
index 8d8893f7454..9b5d03e7977 100644
--- a/shim_et/xplat/executorch/build/build_variables.bzl
+++ b/shim_et/xplat/executorch/build/build_variables.bzl
@@ -140,7 +140,6 @@ PORTABLE_KERNELS_SRCS = [
     "kernels/portable/cpu/op_le.cpp",
     "kernels/portable/cpu/op_leaky_relu.cpp",
     "kernels/portable/cpu/op_lift_fresh_copy.cpp",
-    "kernels/portable/cpu/op_linear_scratch_example.cpp",
     "kernels/portable/cpu/op_log.cpp",
     "kernels/portable/cpu/op_log10.cpp",
     "kernels/portable/cpu/op_log1p.cpp",
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
index c42be80010b..78004a1fc1a 100644
--- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
+++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -1367,9 +1367,6 @@ CUSTOM_OPS = (
     op_target(
         name = "op_allclose",
     ),
-    op_target(
-        name = "op_linear_scratch_example",
-    ),
 )
 
 def portable_source_list():