Skip to content

Commit cfe367e

Browse files
committed
[CI] Add complete CI testing for all supported models & quant types
1 parent e6735f9 commit cfe367e

File tree

4 files changed

+88
-58
lines changed

4 files changed

+88
-58
lines changed

.github/workflows/build-and-run.yml

Lines changed: 80 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ on:
77
branches: [ main ]
88
types: [opened, synchronize, reopened]
99

10-
1110
jobs:
1211
build-and-run:
1312
runs-on: self-hosted
@@ -26,11 +25,11 @@ jobs:
2625
- name: Check code formatting (Spotless)
2726
run: |
2827
cd ${{ github.workspace }}
29-
#./mvnw -T12C -Pspotless spotless:check
28+
# ./mvnw -T12C -Pspotless spotless:check
3029
31-
- name: Clone TornadoVM explicitly
30+
- name: Clone Latest TornadoVM
3231
run: |
33-
git clone --depth 1 --branch develop \
32+
git clone --depth 1 --branch master \
3433
https://github.com/beehive-lab/TornadoVM.git \
3534
GPULlama3.java/external/tornadovm
3635
- name: Set up Python venv for TornadoVM
@@ -40,7 +39,6 @@ jobs:
4039
python --version
4140
- name: Build TornadoVM
4241
run: |
43-
set -x
4442
cd GPULlama3.java/external/tornadovm
4543
source venv/bin/activate
4644
echo "=== Building TornadoVM ==="
@@ -66,59 +64,91 @@ jobs:
6664
echo "=== Checking tornado CLI ==="
6765
which tornado || { echo "::error::tornado not in PATH"; exit 1; }
6866
tornado --devices
69-
- name: Build GPULlama3
67+
- name: Build GPULlama3.java
7068
run: |
71-
set -x
7269
cd ${{ github.workspace }}
7370
echo "Using TORNADO_SDK=$TORNADO_SDK"
7471
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
7572
which tornado || { echo "::error::tornado unavailable during GPULlama3 build"; exit 1; }
7673
tornado --version
77-
make
78-
79-
test-models:
80-
runs-on: self-hosted
81-
needs: build-and-run
82-
83-
strategy:
84-
fail-fast: false
85-
matrix:
86-
model:
87-
- /opt/models/DeepSeek-R1-Distill-Qwen-1.5B-F16.gguf
88-
- /opt/models/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf
89-
- /opt/models/Llama-3.2-1B-Instruct-F16.gguf
90-
- /opt/models/Llama-3.2-1B-Instruct-Q8_0.gguf
91-
- /opt/models/Llama-3.2-3B-Instruct-F16.gguf
92-
- /opt/models/Llama-3.2-3B-Instruct-Q8_0.gguf
93-
- /opt/models/Mistral-7B-Instruct-v0.3.fp16.gguf
94-
- /opt/models/Mistral-7B-Instruct-v0.3.Q8_0.gguf
95-
- /opt/models/Phi-3-mini-4k-instruct-fp16.gguf
96-
- /opt/models/Phi-3-mini-4k-instruct-Q8_0.gguf
97-
- /opt/models/Qwen2.5-0.5B-Instruct-f16.gguf
98-
- /opt/models/Qwen2.5-0.5B-Instruct-Q8_0.gguf
99-
- /opt/models/qwen2.5-1.5b-instruct-fp16.gguf
100-
- /opt/models/qwen2.5-1.5b-instruct-q8_0.gguf
101-
- /opt/models/Qwen3-0.6B-f16.gguf
102-
- /opt/models/Qwen3-0.6B-Q8_0.gguf
103-
- /opt/models/Qwen3-4B-f16.gguf
104-
- /opt/models/Qwen3-4B-Q8_0.gguf
105-
106-
env:
107-
JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3
108-
TORNADO_SDK: ${{ needs.build-and-run.outputs.tornado_sdk }}
109-
110-
steps:
111-
- name: Checkout GPULlama3
112-
uses: actions/checkout@v4
113-
114-
- name: Run inference for ${{ matrix.model }}
74+
./mvnw clean package -DskipTests
75+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
11576
run: |
116-
set -x
11777
cd ${{ github.workspace }}
118-
11978
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
120-
echo "Using Tornado SDK: $TORNADO_SDK"
121-
79+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
80+
./llama-tornado --gpu --opencl \
81+
--model /home/michalis/models/Llama-3.2-1B-Instruct-F16.gguf \
82+
--prompt "Say hello"
83+
- name: FP16 - Run Qwen3-4B-f16.gguf
84+
run: |
85+
cd ${{ github.workspace }}
86+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
87+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
88+
./llama-tornado --gpu --opencl \
89+
--model /opt/models/Qwen3-4B-f16.gguf \
90+
--prompt "Say hello"
91+
- name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf
92+
run: |
93+
cd ${{ github.workspace }}
94+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
95+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
96+
./llama-tornado --gpu --opencl \
97+
--model /opt/models/Mistral-7B-Instruct-v0.3.fp16.gguf \
98+
--prompt "Say hello"
99+
- name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf
100+
run: |
101+
cd ${{ github.workspace }}
102+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
103+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
104+
./llama-tornado --gpu --opencl \
105+
--model /opt/models/qwen2.5-1.5b-instruct-fp16.gguf \
106+
--prompt "Say hello"
107+
- name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf
108+
run: |
109+
cd ${{ github.workspace }}
110+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
111+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
112+
./llama-tornado --gpu --opencl \
113+
--model /opt/models/Phi-3-mini-4k-instruct-fp16.gguf \
114+
--prompt "Say hello"
115+
- name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
116+
run: |
117+
cd ${{ github.workspace }}
118+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
119+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
120+
./llama-tornado --gpu --opencl \
121+
--model /opt/models/Llama-3.2-1B-Instruct-Q8_0.gguf \
122+
--prompt "Say hello"
123+
- name: Q8 - Run Qwen3-0.6B-Q8_0.gguf
124+
run: |
125+
cd ${{ github.workspace }}
126+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
127+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
128+
./llama-tornado --gpu --opencl \
129+
--model /opt/models/Qwen3-0.6B-Q8_0.gguf \
130+
--prompt "Say hello"
131+
- name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf
132+
run: |
133+
cd ${{ github.workspace }}
134+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
135+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
136+
./llama-tornado --gpu --opencl \
137+
--model /opt/models/Phi-3-mini-4k-instruct-Q8_0.gguf \
138+
--prompt "Say hello"
139+
- name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf
140+
run: |
141+
cd ${{ github.workspace }}
142+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
143+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
144+
./llama-tornado --gpu --opencl \
145+
--model /opt/models/qwen2.5-1.5b-instruct-q8_0.gguf \
146+
--prompt "Say hello"
147+
- name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf
148+
run: |
149+
cd ${{ github.workspace }}
150+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
151+
which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
122152
./llama-tornado --gpu --opencl \
123-
--model "${{ matrix.model }}" \
153+
--model /opt/models/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
124154
--prompt "Say hello"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# GPULlama3.java powered by TornadoVM
1+
# GPULlama3.java powered by TornadoVM [![GPULlama3 Build & Run Inference](https://github.com/beehive-lab/GPULlama3.java/actions/workflows/build-and-run.yml/badge.svg)](https://github.com/beehive-lab/GPULlama3.java/actions/workflows/build-and-run.yml)
22
![Java Version](https://img.shields.io/badge/java-21+-blue?style=for-the-badge&logo=openjdk)
33
![OpenCL](https://img.shields.io/badge/OpenCL-supported-blue?style=for-the-badge&logo=khronos)
44
![CUDA](https://img.shields.io/badge/CUDA/PTX-supported-76B900?style=for-the-badge&logo=nvidia)

llama-tornado

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ def create_parser() -> argparse.ArgumentParser:
410410
const=Backend.PTX,
411411
help="Use PTX/CUDA backend",
412412
)
413-
hw_group.add_argument("--gpu-memory", default="7GB", help="GPU memory allocation")
413+
hw_group.add_argument("--gpu-memory", default="14GB", help="GPU memory allocation")
414414
hw_group.add_argument("--heap-min", default="20g", help="Minimum JVM heap size")
415415
hw_group.add_argument("--heap-max", default="20g", help="Maximum JVM heap size")
416416

src/main/java/org/beehive/gpullama3/tornadovm/layers/type/fp16/Phi3FP16FFNLayers.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,12 @@ TaskGraph setupSinglePhi3FFNLayer(Phi3TornadoWeights weights, int layerIndex) {
156156
unifiedLayer.consumeFromDevice(phi3State.wrapX);
157157
unifiedLayer.transferToDevice(DataTransferMode.FIRST_EXECUTION,
158158
// Copy-in weights per layer for batched-layered layout
159-
weights.rms_att_weightLayered[layerIndex],
160-
weights.wqkvLayered[layerIndex],
161-
weights.woLayered[layerIndex],
162-
weights.rms_ffn_weightLayered[layerIndex],
163-
weights.wUpLayered[layerIndex],
164-
weights.wDownLayered[layerIndex]
159+
weights.rms_att_weightLayered[layerIndex].asFloatArray(),
160+
weights.wqkvLayered[layerIndex].asHalfFloatArray(),
161+
weights.woLayered[layerIndex].asHalfFloatArray(),
162+
weights.rms_ffn_weightLayered[layerIndex].asFloatArray(),
163+
weights.wUpLayered[layerIndex].asHalfFloatArray(),
164+
weights.wDownLayered[layerIndex].asHalfFloatArray()
165165
);
166166
unifiedLayer = configureLayerDataTransfers(unifiedLayer, layerIndex);
167167

0 commit comments

Comments
 (0)