Skip to content

Commit 4e30022

Browse files
Use quantization-specific activation init in Qwen2 and Deepseek models
1 parent ed5f882 commit 4e30022

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/main/java/org/beehive/gpullama3/inference/state/Qwen2State.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@ protected StateFields createStateFields(Configuration configuration) {
4141
fields.valueCache = Stream.generate(() -> ArrayFloatTensor.allocate(config.contextLength(), nEmbdGqa)).limit(config.numberOfLayers()).toArray(FloatTensor[]::new);
4242

4343
// TornadoVM wrappers with Qwen2 dimensions
44-
fields.embeddingX = new HalfFloatArray(config.dim());
44+
switch (config.modelType()) {
45+
case "FP16" -> fields.createActivationFP16(config.dim());
46+
case "Q8_0" -> fields.createActivationQ8_0(config.dim());
47+
default -> throw new UnsupportedOperationException("Quantization format " + config.modelType());
48+
}
4549
fields.wrapX = new FloatArray(config.dim());
4650
fields.wrapXb = new FloatArray(config.dim());
4751
fields.wrapXb2 = new FloatArray(config.dim());

0 commit comments

Comments
 (0)