Skip to content

Commit d214754

Browse files
committed
switch to llama.cpp fork and llama : expose C API to get layer device type
1 parent 5fcd220 commit d214754

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "vendor/llama.cpp"]
22
path = vendor/llama.cpp
3-
url = https://github.com/ggerganov/llama.cpp.git
3+
url = http://github.com/inference-sh/llama.cpp

llama_cpp/_internals.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import os
44
import ctypes
5+
from enum import Enum
56

67
from typing import (
78
Dict,
@@ -24,7 +25,13 @@
2425

2526

2627
# Python wrappers over llama.h structs
27-
28+
class LlamaBackendDev(Enum):
29+
# CPU device using system memory
30+
CPU = 0
31+
# GPU device using dedicated memory
32+
GPU = 1
33+
# accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
34+
ACCEL = 2
2835

2936
class LlamaModel:
3037
"""Intermediate Python wrapper for a llama.cpp llama_model.
@@ -88,6 +95,12 @@ def n_ctx_train(self) -> int:
8895

8996
def n_embd(self) -> int:
9097
return llama_cpp.llama_n_embd(self.model)
98+
99+
def n_layer(self) -> int:
100+
return llama_cpp.llama_n_layer(self.model)
101+
102+
def dev_layer(self, il: int) -> LlamaBackendDev:
103+
return LlamaBackendDev(llama_cpp.llama_model_dev_layer(self.model, il))
91104

92105
def rope_freq_scale_train(self) -> float:
93106
return llama_cpp.llama_model_rope_freq_scale_train(self.model)

llama_cpp/llama.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,13 @@ def ctx(self) -> llama_cpp.llama_context_p:
490490
@property
491491
def model(self) -> llama_cpp.llama_model_p:
492492
return self._model.model
493+
494+
@property
495+
def n_layer(self) -> int:
496+
return self._model.n_layer()
497+
498+
def dev_layer(self, il: int) -> internals.LlamaBackendDev:
499+
return self._model.dev_layer(il)
493500

494501
@property
495502
def _input_ids(self) -> npt.NDArray[np.intc]:

0 commit comments

Comments
 (0)