Skip to content

Commit 68729ee

Browse files
Replace getHalf with getHalfFloat for Q8_0 block scale loading in compute kernels
1 parent 9d0fb16 commit 68729ee

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

src/main/java/org/beehive/gpullama3/tornadovm/kernels/TransformerComputeKernels.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static void convertQ8_0toFP32(KernelContext context, ByteArray x, FloatAr
4747
int blockByteOffset = blockIdx * Q8_0_BLOCK_BYTES;
4848

4949
// Load scale (first 2 bytes of block as HalfFloat)
50-
HalfFloat scale = x.getHalf(blockByteOffset);
50+
HalfFloat scale = x.getHalfFloat(blockByteOffset);
5151
float scaleFloat = scale.getFloat32();
5252

5353
// Load quantized value (skip 2-byte scale, then index within block)

src/main/java/org/beehive/gpullama3/tornadovm/kernels/TransformerComputeKernelsLayered.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,7 @@ public static float matrixVectorRowMajorOptimizedQ8_0Byte(KernelContext context,
10561056
int blockByteOffset = (rowBlockOffset + blockIdx) * Q8_0_BLOCK_BYTES;
10571057

10581058
// Load scale (first 2 bytes of block as HalfFloat)
1059-
HalfFloat scale = q.getHalf(blockByteOffset);
1059+
HalfFloat scale = q.getHalfFloat(blockByteOffset);
10601060
float scaleFloat = scale.getFloat32();
10611061

10621062
// Load 4 consecutive quantized values
@@ -1084,7 +1084,7 @@ public static float matrixVectorRowMajorOptimizedQ8_0Byte(KernelContext context,
10841084
int blockByteOffset = (rowBlockOffset + blockIdx) * Q8_0_BLOCK_BYTES;
10851085

10861086
// Load scale
1087-
HalfFloat scale = q.getHalf(blockByteOffset);
1087+
HalfFloat scale = q.getHalfFloat(blockByteOffset);
10881088
float scaleFloat = scale.getFloat32();
10891089

10901090
// Load quantized value

0 commit comments

Comments
 (0)