Skip to content

Commit 111dbdd

Browse files
Use quantization-specific activation init in Llama models
1 parent 843e30c commit 111dbdd

File tree

1 file changed

+2
-8
lines changed

1 file changed

+2
-8
lines changed

src/main/java/org/beehive/gpullama3/inference/state/LlamaState.java

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,8 @@ protected StateFields createStateFields(Configuration config) {
5555
fields.wrapHb2 = new FloatArray(config.hiddenDim());
5656

5757
switch (config.modelType()) {
58-
case "FP16" -> fields.embeddingX = new HalfFloatArray(config.dim());
59-
case "Q8_0" -> {
60-
int blockSize = 32;
61-
int Q8_0_BLOCK_BYTES = 34; // 2 bytes scale + 32 bytes quants
62-
int blocksNeeded = (config.dim() + blockSize - 1) / blockSize;
63-
int q8BytesNeeded = blocksNeeded * Q8_0_BLOCK_BYTES;
64-
fields.embeddingX = new ByteArray(q8BytesNeeded);
65-
}
58+
case "FP16" -> fields.createActivationFP16(config.dim());
59+
case "Q8_0" -> fields.createActivationQ8_0(config.dim());
6660
default -> throw new UnsupportedOperationException("Quantization format " + config.modelType());
6761
}
6862

0 commit comments

Comments
 (0)