Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ea16e15
[Executorch] Introduce caching cpu memory allocator
kimishpatel Nov 5, 2025
08ab552
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 6, 2025
f9ce984
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 6, 2025
0c23c32
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 10, 2025
79bb135
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 11, 2025
7939d44
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 14, 2025
1d02fb8
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 20, 2025
7c2efa1
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 20, 2025
1bdcf8a
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 20, 2025
02ef641
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 21, 2025
beaaabb
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 22, 2025
a287819
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 23, 2025
0a15a85
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 23, 2025
504bd01
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 24, 2025
6fe6af2
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 24, 2025
7ec81a0
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 24, 2025
dc4103e
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 24, 2025
2735447
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 25, 2025
664ceea
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Nov 25, 2025
5330158
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
d861760
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
76e005a
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
ff4370d
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
d601936
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
de8ebe9
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
dd01f59
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
55ac299
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 4, 2025
5c82b90
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 5, 2025
ceaf5c3
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 5, 2025
cb40d29
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 5, 2025
f6d5528
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 5, 2025
6b37360
Update on "[Executorch] Introduce caching cpu memory allocator"
kimishpatel Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions extension/memory_allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Please keep this file formatted by running:
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~

cmake_minimum_required(VERSION 3.19)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

list(TRANSFORM _extension_memory_allocator__srcs PREPEND "${EXECUTORCH_ROOT}/")
if(CMAKE_TOOLCHAIN_IOS
OR CMAKE_TOOLCHAIN_ANDROID
OR APPLE
)
# Building a share library on iOS requires code signing On Android we see
# duplicated registration when using shared lib
add_library(
extension_memory_allocator STATIC ${_extension_memory_allocator__srcs}
)
else()
add_library(extension_memory_allocator ${_extension_memory_allocator__srcs})
endif()
target_link_libraries(extension_memory_allocator PRIVATE executorch_core)
target_include_directories(
extension_memory_allocator PUBLIC ${_common_include_directories}
)
target_compile_options(
extension_memory_allocator
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
)

# Install libraries
install(
TARGETS extension_memory_allocator
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
INCLUDES
DESTINATION ${_common_include_directories}
)
97 changes: 97 additions & 0 deletions extension/memory_allocator/cpu_caching_malloc_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include <cstdlib>

#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h>
#include <executorch/extension/memory_allocator/memory_allocator_utils.h>

namespace executorch::extension {

CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size)
: MemoryAllocator(0, nullptr) {
max_size_ = max_size;
current_size_ = 0;
}

void* CPUCachingAllocator::allocate(size_t size, size_t alignment) {
EXECUTORCH_TRACK_ALLOCATION(prof_id(), size);

if (!isPowerOf2(alignment)) {
ET_LOG(Error, "Alignment %zu is not a power of 2", alignment);
return nullptr;
}
alignment = std::max(alignment, kCachingAllocatorDefaultAlignment);
auto adjusted_size_value =
executorch::extension::utils::get_aligned_size(size, alignment);
if (!adjusted_size_value.ok()) {
return nullptr;
}
size = adjusted_size_value.get();

std::lock_guard<std::mutex> guard(mutex_);
const auto& it = available_map_.find(size);
// Two choices here.
// 1. Return cached memory
// 2. Allocate new memory
// 2 can lead to current_size > max_size_
if (it == available_map_.end() || it->second.empty()) {
void* ptr = std::malloc(size);
if (ptr == nullptr) {
ET_LOG(Error, "Failed to allocate memory");
return nullptr;
}
current_size_ += size;
allocation_map_[ptr] = size;
return alignPointer(ptr, alignment);
}
void* ptr = it->second.back();
it->second.pop_back();
allocation_map_[ptr] = size;
return alignPointer(ptr, alignment);
}

void CPUCachingAllocator::free_everything() {
// We dont lock mutex_ here because it will cause deadlock otherwise
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in comment: "dont" should be "don't".

Suggested change
// We dont lock mutex_ here because it will cause deadlock otherwise
// We don't lock mutex_ here because it will cause deadlock otherwise

Copilot uses AI. Check for mistakes.
// we could use recursive_mutex but we just design this differently since
// free_cache is not a public API anyways
for (const auto& it : available_map_) {
for (const auto ptr : it.second) {
std::free(ptr);
}
}
available_map_.clear();
for (const auto& it : allocation_map_) {
void* ptr = it.first;
std::free(ptr);
}
allocation_map_.clear();
// Note that purely by the design, clearing available map does not
// mean that our current allocated size is zero.
current_size_ = 0;
}

void CPUCachingAllocator::reset() {
std::lock_guard<std::mutex> guard(mutex_);
// We make the default allocations, via allcate to be either
// a. gotten via cached memory OR
// b. allocated via malloced and not yet cached
// So if current_size_ (allocated) is larger than the max_size_
// for now we simply deallocate everything.
if (current_size_ > max_size_) {
free_everything();
} else {
for (auto& it : allocation_map_) {
void* ptr = it.first;
size_t alloc_size = it.second;
// Cache the memory
available_map_[alloc_size].push_back(ptr);
}
allocation_map_.clear();
}
}

CPUCachingAllocator::~CPUCachingAllocator() {
// destructor must be called in thread safe manner
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a potential race condition in the destructor. While the comment states "destructor must be called in thread safe manner", the destructor doesn't lock the mutex before calling reset() and free_cached(). If another thread is still executing methods on this object when the destructor is called, this could lead to undefined behavior. Consider adding a lock guard at the start of the destructor, or document that the caller must ensure no concurrent access during destruction.

Suggested change
// destructor must be called in thread safe manner
// destructor must be called in thread safe manner
std::lock_guard<std::mutex> guard(mutex_);

Copilot uses AI. Check for mistakes.
reset();
free_everything();
}

} // namespace executorch::extension
90 changes: 90 additions & 0 deletions extension/memory_allocator/cpu_caching_malloc_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#pragma once

#include <cstddef>
#include <mutex>

#include <executorch/runtime/core/memory_allocator.h>

#ifdef USE_C10_SMALL_VECTOR
#include <c10/util/SmallVector.h>
#else
#include <vector>
#endif

#ifdef USE_C10_FLAT_HASH_MAP
#include <c10/util/flat_hash_map.h>
#else
#include <unordered_map>
#endif

/*
* CPUCachingAllocator:
* This file is copied over from c10/mobile/CPUCachingAllocator.h
* It is a thread safe caching allocator.
*/

namespace executorch::extension {

#ifdef USE_C10_SMALL_VECTOR
template <typename T, unsigned N>
using SmallVector = c10::SmallVector<T, N>;
#else
template <typename T, unsigned N>
using SmallVector = std::vector<T>;
#endif

#ifdef USE_C10_FLAT_HASH_MAP
template <typename KeyType, typename ValueType>
using FlatHashMap = ska::flat_hash_map<KeyType, ValueType>;
#else
template <typename KeyType, typename ValueType>
using FlatHashMap = std::unordered_map<KeyType, ValueType>;
#endif

constexpr size_t kCachingAllocatorDefaultAlignment = 64;
class CPUCachingAllocator : public executorch::runtime::MemoryAllocator {
/*
* What it does:
* Caches all the allocations carried out by this allocator.
* Cache key is the size of the allocation.
* If requested size is found in the cache returns the cached pointer.
* What it does not do:
* No speculative allocation for any future allocations.
*/
private:
void free_everything();

protected:
// Invariants.
// New invariants must be written.
FlatHashMap<size_t, SmallVector<void*, 16>> available_map_;
FlatHashMap<void*, size_t> allocation_map_;
// Since allocation_map_ and other member variables are mutated/read via
// all public APIs, we need a mutex to protect concurrent access to these
// instance members.
std::mutex mutex_;
size_t max_size_;
size_t current_size_;

public:
/*
max_size: Maximum size of memory to cache. Never cache more than that.
*/
explicit CPUCachingAllocator(uint32_t max_size);
// No copies allowed
CPUCachingAllocator(const CPUCachingAllocator&) = delete;
CPUCachingAllocator& operator=(const CPUCachingAllocator&) = delete;
// No moves allowed
CPUCachingAllocator(CPUCachingAllocator&&) = delete;
CPUCachingAllocator& operator=(CPUCachingAllocator&&) = delete;
// Checks the cache to see if allocation of size bytes can be found.
// If so return cached memory, else
// allocates memory, records it for caching and returns.
void* allocate(
size_t size,
size_t alignment = kCachingAllocatorDefaultAlignment) override;
void reset() override;
~CPUCachingAllocator();
};

} // namespace executorch::extension
19 changes: 6 additions & 13 deletions extension/memory_allocator/malloc_memory_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <cstdlib>
#include <vector>

#include <executorch/extension/memory_allocator/memory_allocator_utils.h>
#include <executorch/runtime/core/memory_allocator.h>

namespace executorch {
Expand Down Expand Up @@ -51,20 +52,12 @@ class MallocMemoryAllocator : public executorch::runtime::MemoryAllocator {
return nullptr;
}

// The minimum alignment that malloc() is guaranteed to provide.
static constexpr size_t kMallocAlignment = alignof(std::max_align_t);
if (alignment > kMallocAlignment) {
// To get higher alignments, allocate extra and then align the returned
// pointer. This will waste an extra `alignment - 1` bytes every time, but
// this is the only portable way to get aligned memory from the heap.
const size_t extra = alignment - 1;
if ET_UNLIKELY (extra >= SIZE_MAX - size) {
ET_LOG(
Error, "Malloc size overflow: size=%zu + extra=%zu", size, extra);
return nullptr;
}
size += extra;
auto adjusted_size_value =
executorch::extension::utils::get_aligned_size(size, alignment);
if (!adjusted_size_value.ok()) {
return nullptr;
}
size = adjusted_size_value.get();
void* mem_ptr = std::malloc(size);
if (!mem_ptr) {
ET_LOG(Error, "Malloc failed to allocate %zu bytes", size);
Expand Down
41 changes: 41 additions & 0 deletions extension/memory_allocator/memory_allocator_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <cstddef>
#include <cstdint>
#include <cstdlib>

#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/result.h>
#include <executorch/runtime/platform/compiler.h>

using executorch::runtime::Error;
using executorch::runtime::Result;
namespace executorch::extension::utils {

// Util to get alighment adjusted allocation size
inline Result<size_t> get_aligned_size(size_t size, size_t alignment) {
// The minimum alignment that malloc() is guaranteed to provide.
static constexpr size_t kMallocAlignment = alignof(std::max_align_t);
if (alignment > kMallocAlignment) {
// To get higher alignments, allocate extra and then align the returned
// pointer. This will waste an extra `alignment - 1` bytes every time, but
// this is the only portable way to get aligned memory from the heap.
const size_t extra = alignment - 1;
if ET_UNLIKELY (extra >= SIZE_MAX - size) {
ET_LOG(Error, "Malloc size overflow: size=%zu + extra=%zu", size, extra);
return Result<size_t>(Error::InvalidArgument);
}
size += extra;
}
return size;
}

} // namespace executorch::extension::utils
19 changes: 19 additions & 0 deletions extension/memory_allocator/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,25 @@ def define_common_targets():
name = "malloc_memory_allocator",
exported_headers = [
"malloc_memory_allocator.h",
"memory_allocator_utils.h",
],
exported_deps = [
"//executorch/runtime/core:memory_allocator",
],
visibility = [
"//executorch/extension/memory_allocator/test/...",
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "cpu_caching_allocator",
srcs = [
"cpu_caching_malloc_allocator.cpp",
],
exported_headers = [
"cpu_caching_malloc_allocator.h",
"memory_allocator_utils.h",
],
exported_deps = [
"//executorch/runtime/core:memory_allocator",
Expand Down
Loading
Loading