Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
00fffa1
[Executorch][LLM] Use caching allocator for runner
kimishpatel Nov 11, 2025
5cecbfc
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 14, 2025
e09bcd6
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 20, 2025
356ec2f
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 20, 2025
1f59722
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 20, 2025
2aaf193
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 21, 2025
e91d367
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 22, 2025
7784291
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 23, 2025
10c67dc
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 23, 2025
a7be4da
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 23, 2025
cc6beb5
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 24, 2025
9b35c78
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 24, 2025
4db1a94
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Nov 25, 2025
ea7c837
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
b340181
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
af57723
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
e4845c5
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
1d85984
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
559d0d3
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
5198114
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
c2bbfbd
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
90d3d57
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
be88d80
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 4, 2025
4082b28
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 5, 2025
54f9381
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 5, 2025
494bbd5
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 5, 2025
4092750
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 5, 2025
4e0b339
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 5, 2025
d63ffbd
Update on "[Executorch][LLM] Use caching allocator for runner"
kimishpatel Dec 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,8 @@ if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
endif()

if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator/runner)
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path extension/memory_allocator/runner does not exist in the repository. The memory allocator CMakeLists.txt is located at extension/memory_allocator/CMakeLists.txt. This line should be:

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator)
Suggested change
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator/runner)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator)

Copilot uses AI. Check for mistakes.
list(APPEND _executorch_extensions extension_memory_allocator)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
list(APPEND _executorch_extensions extension_llm_runner)
endif()
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ list(TRANSFORM _extension_llm_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})

set(runner_deps executorch_core extension_module extension_tensor
tokenizers::tokenizers
extension_memory_allocator tokenizers::tokenizers
)

# depend on arange_utils
Expand Down
20 changes: 18 additions & 2 deletions extension/llm/runner/llm_runner_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <executorch/extension/llm/runner/text_llm_runner.h>
#include <executorch/extension/llm/runner/text_prefiller.h>
#include <executorch/extension/llm/runner/text_token_generator.h>
#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h>
#include <executorch/runtime/core/result.h>
#include <executorch/runtime/platform/runtime.h>
#include <pytorch/tokenizers/hf_tokenizer.h>
Expand Down Expand Up @@ -209,11 +210,26 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(

// Create the Module
std::unique_ptr<Module> module;
uint32_t max_cached_memory_size_bytes_ = 1024 * 1024 * 10; // 10MB
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded value of 10MB for the caching allocator size should be documented or made configurable. According to the PR description, this improves performance by 6% on iOS for SDPA op temp allocations, but different models or use cases may benefit from different cache sizes. Consider:

  1. Adding a comment explaining why 10MB was chosen
  2. Making this value configurable through a parameter or constant
  3. Documenting the performance implications in code comments

Copilot uses AI. Check for mistakes.
if (data_files.size() > 0) {
module = std::make_unique<Module>(
model_path, data_files, Module::LoadMode::File);
model_path,
data_files,
Module::LoadMode::File,
nullptr,
std::make_unique<
executorch::extension::CPUCachingAllocator>( // temp memory
// allocator
max_cached_memory_size_bytes_));
} else {
module = std::make_unique<Module>(model_path, Module::LoadMode::File);
module = std::make_unique<Module>(
model_path,
Module::LoadMode::File,
nullptr,
std::make_unique<
executorch::extension::CPUCachingAllocator>( // temp memory
// allocator
max_cached_memory_size_bytes_));
}

// Get metadata from Module
Expand Down
1 change: 1 addition & 0 deletions extension/llm/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def define_common_targets():
":text_prefiller" + aten_suffix,
":text_token_generator" + aten_suffix,
"//executorch/extension/llm/runner/io_manager:io_manager" + aten_suffix,
"//executorch/extension/memory_allocator:cpu_caching_allocator",
"//pytorch/tokenizers:hf_tokenizer",
"//pytorch/tokenizers:llama2c_tokenizer",
"//pytorch/tokenizers:sentencepiece",
Expand Down
Loading