-
Notifications
You must be signed in to change notification settings - Fork 749
[Executorch] Introduce caching cpu memory allocator #15611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ea16e15
08ab552
f9ce984
0c23c32
79bb135
7939d44
1d02fb8
7c2efa1
1bdcf8a
02ef641
beaaabb
a287819
0a15a85
504bd01
6fe6af2
7ec81a0
dc4103e
2735447
664ceea
5330158
d861760
76e005a
ff4370d
d601936
de8ebe9
dd01f59
55ac299
5c82b90
ceaf5c3
cb40d29
f6d5528
6b37360
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # All rights reserved. | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| # Please keep this file formatted by running: | ||
| # ~~~ | ||
| # cmake-format -i CMakeLists.txt | ||
| # ~~~ | ||
|
|
||
| cmake_minimum_required(VERSION 3.19) | ||
|
|
||
| # Source root directory for executorch. | ||
| if(NOT EXECUTORCH_ROOT) | ||
| set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) | ||
| endif() | ||
|
|
||
| list(TRANSFORM _extension_memory_allocator__srcs PREPEND "${EXECUTORCH_ROOT}/") | ||
| if(CMAKE_TOOLCHAIN_IOS | ||
| OR CMAKE_TOOLCHAIN_ANDROID | ||
| OR APPLE | ||
| ) | ||
| # Building a share library on iOS requires code signing On Android we see | ||
| # duplicated registration when using shared lib | ||
| add_library( | ||
| extension_memory_allocator STATIC ${_extension_memory_allocator__srcs} | ||
| ) | ||
| else() | ||
| add_library(extension_memory_allocator ${_extension_memory_allocator__srcs}) | ||
| endif() | ||
| target_link_libraries(extension_memory_allocator PRIVATE executorch_core) | ||
| target_include_directories( | ||
| extension_memory_allocator PUBLIC ${_common_include_directories} | ||
| ) | ||
| target_compile_options( | ||
| extension_memory_allocator | ||
| PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/wd4996> | ||
| $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC> | ||
| ) | ||
|
|
||
| # Install libraries | ||
| install( | ||
| TARGETS extension_memory_allocator | ||
| EXPORT ExecuTorchTargets | ||
| DESTINATION ${CMAKE_INSTALL_LIBDIR} | ||
| INCLUDES | ||
| DESTINATION ${_common_include_directories} | ||
| ) |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,97 @@ | ||||||||
| #include <cstdlib> | ||||||||
|
|
||||||||
| #include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h> | ||||||||
| #include <executorch/extension/memory_allocator/memory_allocator_utils.h> | ||||||||
|
|
||||||||
| namespace executorch::extension { | ||||||||
|
|
||||||||
| CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size) | ||||||||
| : MemoryAllocator(0, nullptr) { | ||||||||
| max_size_ = max_size; | ||||||||
| current_size_ = 0; | ||||||||
| } | ||||||||
|
|
||||||||
| void* CPUCachingAllocator::allocate(size_t size, size_t alignment) { | ||||||||
| EXECUTORCH_TRACK_ALLOCATION(prof_id(), size); | ||||||||
|
|
||||||||
| if (!isPowerOf2(alignment)) { | ||||||||
| ET_LOG(Error, "Alignment %zu is not a power of 2", alignment); | ||||||||
| return nullptr; | ||||||||
| } | ||||||||
| alignment = std::max(alignment, kCachingAllocatorDefaultAlignment); | ||||||||
| auto adjusted_size_value = | ||||||||
| executorch::extension::utils::get_aligned_size(size, alignment); | ||||||||
| if (!adjusted_size_value.ok()) { | ||||||||
| return nullptr; | ||||||||
| } | ||||||||
| size = adjusted_size_value.get(); | ||||||||
|
|
||||||||
| std::lock_guard<std::mutex> guard(mutex_); | ||||||||
| const auto& it = available_map_.find(size); | ||||||||
| // Two choices here. | ||||||||
| // 1. Return cached memory | ||||||||
| // 2. Allocate new memory | ||||||||
| // 2 can lead to current_size > max_size_ | ||||||||
| if (it == available_map_.end() || it->second.empty()) { | ||||||||
| void* ptr = std::malloc(size); | ||||||||
| if (ptr == nullptr) { | ||||||||
| ET_LOG(Error, "Failed to allocate memory"); | ||||||||
| return nullptr; | ||||||||
| } | ||||||||
| current_size_ += size; | ||||||||
| allocation_map_[ptr] = size; | ||||||||
| return alignPointer(ptr, alignment); | ||||||||
| } | ||||||||
| void* ptr = it->second.back(); | ||||||||
| it->second.pop_back(); | ||||||||
| allocation_map_[ptr] = size; | ||||||||
| return alignPointer(ptr, alignment); | ||||||||
| } | ||||||||
|
|
||||||||
| void CPUCachingAllocator::free_everything() { | ||||||||
| // We dont lock mutex_ here because it will cause deadlock otherwise | ||||||||
| // we could use recursive_mutex but we just design this differently since | ||||||||
| // free_cache is not a public API anyways | ||||||||
| for (const auto& it : available_map_) { | ||||||||
| for (const auto ptr : it.second) { | ||||||||
| std::free(ptr); | ||||||||
| } | ||||||||
| } | ||||||||
| available_map_.clear(); | ||||||||
| for (const auto& it : allocation_map_) { | ||||||||
| void* ptr = it.first; | ||||||||
| std::free(ptr); | ||||||||
| } | ||||||||
| allocation_map_.clear(); | ||||||||
| // Note that purely by the design, clearing available map does not | ||||||||
| // mean that our current allocated size is zero. | ||||||||
| current_size_ = 0; | ||||||||
| } | ||||||||
|
|
||||||||
| void CPUCachingAllocator::reset() { | ||||||||
| std::lock_guard<std::mutex> guard(mutex_); | ||||||||
| // We make the default allocations, via allcate to be either | ||||||||
| // a. gotten via cached memory OR | ||||||||
| // b. allocated via malloced and not yet cached | ||||||||
| // So if current_size_ (allocated) is larger than the max_size_ | ||||||||
| // for now we simply deallocate everything. | ||||||||
| if (current_size_ > max_size_) { | ||||||||
| free_everything(); | ||||||||
| } else { | ||||||||
| for (auto& it : allocation_map_) { | ||||||||
| void* ptr = it.first; | ||||||||
| size_t alloc_size = it.second; | ||||||||
| // Cache the memory | ||||||||
| available_map_[alloc_size].push_back(ptr); | ||||||||
| } | ||||||||
| allocation_map_.clear(); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| CPUCachingAllocator::~CPUCachingAllocator() { | ||||||||
| // destructor must be called in thread safe manner | ||||||||
|
||||||||
| // destructor must be called in thread safe manner | |
| // destructor must be called in thread safe manner | |
| std::lock_guard<std::mutex> guard(mutex_); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| #pragma once | ||
|
|
||
| #include <cstddef> | ||
| #include <mutex> | ||
|
|
||
| #include <executorch/runtime/core/memory_allocator.h> | ||
|
|
||
| #ifdef USE_C10_SMALL_VECTOR | ||
| #include <c10/util/SmallVector.h> | ||
| #else | ||
| #include <vector> | ||
| #endif | ||
|
|
||
| #ifdef USE_C10_FLAT_HASH_MAP | ||
| #include <c10/util/flat_hash_map.h> | ||
| #else | ||
| #include <unordered_map> | ||
| #endif | ||
|
|
||
| /* | ||
| * CPUCachingAllocator: | ||
| * This file is copied over from c10/mobile/CPUCachingAllocator.h | ||
| * It is a thread safe caching allocator. | ||
| */ | ||
|
|
||
| namespace executorch::extension { | ||
|
|
||
| #ifdef USE_C10_SMALL_VECTOR | ||
| template <typename T, unsigned N> | ||
| using SmallVector = c10::SmallVector<T, N>; | ||
| #else | ||
| template <typename T, unsigned N> | ||
| using SmallVector = std::vector<T>; | ||
| #endif | ||
|
|
||
| #ifdef USE_C10_FLAT_HASH_MAP | ||
| template <typename KeyType, typename ValueType> | ||
| using FlatHashMap = ska::flat_hash_map<KeyType, ValueType>; | ||
| #else | ||
| template <typename KeyType, typename ValueType> | ||
| using FlatHashMap = std::unordered_map<KeyType, ValueType>; | ||
| #endif | ||
|
|
||
| constexpr size_t kCachingAllocatorDefaultAlignment = 64; | ||
| class CPUCachingAllocator : public executorch::runtime::MemoryAllocator { | ||
| /* | ||
| * What it does: | ||
| * Caches all the allocations carried out by this allocator. | ||
| * Cache key is the size of the allocation. | ||
| * If requested size is found in the cache returns the cached pointer. | ||
| * What it does not do: | ||
| * No speculative allocation for any future allocations. | ||
| */ | ||
| private: | ||
| void free_everything(); | ||
|
|
||
| protected: | ||
| // Invariants. | ||
| // New invariants must be written. | ||
| FlatHashMap<size_t, SmallVector<void*, 16>> available_map_; | ||
| FlatHashMap<void*, size_t> allocation_map_; | ||
| // Since allocation_map_ and other member variables are mutated/read via | ||
| // all public APIs, we need a mutex to protect concurrent access to these | ||
| // instance members. | ||
| std::mutex mutex_; | ||
| size_t max_size_; | ||
| size_t current_size_; | ||
|
|
||
| public: | ||
| /* | ||
| max_size: Maximum size of memory to cache. Never cache more than that. | ||
| */ | ||
| explicit CPUCachingAllocator(uint32_t max_size); | ||
| // No copies allowed | ||
| CPUCachingAllocator(const CPUCachingAllocator&) = delete; | ||
| CPUCachingAllocator& operator=(const CPUCachingAllocator&) = delete; | ||
| // No moves allowed | ||
| CPUCachingAllocator(CPUCachingAllocator&&) = delete; | ||
| CPUCachingAllocator& operator=(CPUCachingAllocator&&) = delete; | ||
| // Checks the cache to see if allocation of size bytes can be found. | ||
| // If so return cached memory, else | ||
| // allocates memory, records it for caching and returns. | ||
| void* allocate( | ||
| size_t size, | ||
| size_t alignment = kCachingAllocatorDefaultAlignment) override; | ||
| void reset() override; | ||
| ~CPUCachingAllocator(); | ||
| }; | ||
|
|
||
| } // namespace executorch::extension |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cstddef> | ||
| #include <cstdint> | ||
| #include <cstdlib> | ||
|
|
||
| #include <executorch/runtime/core/error.h> | ||
| #include <executorch/runtime/core/result.h> | ||
| #include <executorch/runtime/platform/compiler.h> | ||
|
|
||
| using executorch::runtime::Error; | ||
| using executorch::runtime::Result; | ||
| namespace executorch::extension::utils { | ||
|
|
||
| // Util to get alighment adjusted allocation size | ||
| inline Result<size_t> get_aligned_size(size_t size, size_t alignment) { | ||
| // The minimum alignment that malloc() is guaranteed to provide. | ||
| static constexpr size_t kMallocAlignment = alignof(std::max_align_t); | ||
| if (alignment > kMallocAlignment) { | ||
| // To get higher alignments, allocate extra and then align the returned | ||
| // pointer. This will waste an extra `alignment - 1` bytes every time, but | ||
| // this is the only portable way to get aligned memory from the heap. | ||
| const size_t extra = alignment - 1; | ||
| if ET_UNLIKELY (extra >= SIZE_MAX - size) { | ||
| ET_LOG(Error, "Malloc size overflow: size=%zu + extra=%zu", size, extra); | ||
| return Result<size_t>(Error::InvalidArgument); | ||
| } | ||
| size += extra; | ||
| } | ||
| return size; | ||
| } | ||
|
|
||
| } // namespace executorch::extension::utils |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo in comment: "dont" should be "don't".