File tree Expand file tree Collapse file tree 3 files changed +22
-2
lines changed
Expand file tree Collapse file tree 3 files changed +22
-2
lines changed Original file line number Diff line number Diff line change 11[submodule "vendor/llama.cpp "]
22 path = vendor/llama.cpp
3- url = https ://github.com/ggerganov /llama.cpp.git
3+ url = http ://github.com/inference-sh /llama.cpp
Original file line number Diff line number Diff line change 22
33import os
44import ctypes
5+ from enum import Enum
56
67from typing import (
78 Dict ,
2425
2526
2627# Python wrappers over llama.h structs
27-
28+ class LlamaBackendDev (Enum ):
29+ # CPU device using system memory
30+ CPU = 0
31+ # GPU device using dedicated memory
32+ GPU = 1
33+ # accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
34+ ACCEL = 2
2835
2936class LlamaModel :
3037 """Intermediate Python wrapper for a llama.cpp llama_model.
@@ -88,6 +95,12 @@ def n_ctx_train(self) -> int:
8895
8996 def n_embd (self ) -> int :
9097 return llama_cpp .llama_n_embd (self .model )
98+
99+ def n_layer (self ) -> int :
100+ return llama_cpp .llama_n_layer (self .model )
101+
102+ def dev_layer (self , il : int ) -> LlamaBackendDev :
103+ return LlamaBackendDev (llama_cpp .llama_model_dev_layer (self .model , il ))
91104
92105 def rope_freq_scale_train (self ) -> float :
93106 return llama_cpp .llama_model_rope_freq_scale_train (self .model )
Original file line number Diff line number Diff line change @@ -490,6 +490,13 @@ def ctx(self) -> llama_cpp.llama_context_p:
490490 @property
491491 def model (self ) -> llama_cpp .llama_model_p :
492492 return self ._model .model
493+
494+ @property
495+ def n_layer (self ) -> int :
496+ return self ._model .n_layer ()
497+
498+ def dev_layer (self , il : int ) -> internals .LlamaBackendDev :
499+ return self ._model .dev_layer (il )
493500
494501 @property
495502 def _input_ids (self ) -> npt .NDArray [np .intc ]:
You can’t perform that action at this time.
0 commit comments