File tree Expand file tree Collapse file tree 1 file changed +2
-8
lines changed
src/main/java/org/beehive/gpullama3/inference/state Expand file tree Collapse file tree 1 file changed +2
-8
lines changed Original file line number Diff line number Diff line change @@ -55,14 +55,8 @@ protected StateFields createStateFields(Configuration config) {
5555 fields .wrapHb2 = new FloatArray (config .hiddenDim ());
5656
5757 switch (config .modelType ()) {
58- case "FP16" -> fields .embeddingX = new HalfFloatArray (config .dim ());
59- case "Q8_0" -> {
60- int blockSize = 32 ;
61- int Q8_0_BLOCK_BYTES = 34 ; // 2 bytes scale + 32 bytes quants
62- int blocksNeeded = (config .dim () + blockSize - 1 ) / blockSize ;
63- int q8BytesNeeded = blocksNeeded * Q8_0_BLOCK_BYTES ;
64- fields .embeddingX = new ByteArray (q8BytesNeeded );
65- }
58+ case "FP16" -> fields .createActivationFP16 (config .dim ());
59+ case "Q8_0" -> fields .createActivationQ8_0 (config .dim ());
6660 default -> throw new UnsupportedOperationException ("Quantization format " + config .modelType ());
6761 }
6862
You can’t perform that action at this time.
0 commit comments