File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed
src/main/java/org/beehive/gpullama3/inference/state Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -41,7 +41,11 @@ protected StateFields createStateFields(Configuration configuration) {
4141 fields .valueCache = Stream .generate (() -> ArrayFloatTensor .allocate (config .contextLength (), nEmbdGqa )).limit (config .numberOfLayers ()).toArray (FloatTensor []::new );
4242
4343 // TornadoVM wrappers with Qwen2 dimensions
44- fields .embeddingX = new HalfFloatArray (config .dim ());
44+ switch (config .modelType ()) {
45+ case "FP16" -> fields .createActivationFP16 (config .dim ());
46+ case "Q8_0" -> fields .createActivationQ8_0 (config .dim ());
47+ default -> throw new UnsupportedOperationException ("Quantization format " + config .modelType ());
48+ }
4549 fields .wrapX = new FloatArray (config .dim ());
4650 fields .wrapXb = new FloatArray (config .dim ());
4751 fields .wrapXb2 = new FloatArray (config .dim ());
You can’t perform that action at this time.
0 commit comments