@@ -40,6 +40,106 @@ ensure_sharegpt_downloaded() {
4040 fi
4141}
4242
43+ build_vllm_from_source_for_rocm () {
44+ echo " Starting vLLM build for ROCm..."
45+
46+ # Validate ROCm installation
47+ if ! command -v rocminfo & > /dev/null; then
48+ echo " Error: rocminfo not found. Please ensure ROCm is properly installed."
49+ exit 1
50+ fi
51+
52+ if [ ! -d " /opt/rocm" ]; then
53+ echo " Error: ROCm installation directory /opt/rocm not found."
54+ exit 1
55+ fi
56+
57+ extra_index=" ${PYTORCH_ROCM_INDEX_URL:- https:// download.pytorch.org/ whl/ rocm6.3} "
58+
59+ # Tooling & base deps for building
60+ uv pip install --upgrade pip
61+ uv pip install cmake ninja packaging typing_extensions pybind11 wheel
62+
63+ # Install ROCm PyTorch that matches the container ROCm
64+ uv pip uninstall torch || true
65+ uv pip uninstall torchvision || true
66+ uv pip uninstall torchaudio || true
67+ uv pip install --no-cache-dir --pre torch torchvision torchaudio --index-url " ${extra_index} "
68+
69+ # Install Triton flash attention for ROCm
70+ echo " Installing Triton flash attention for ROCm..."
71+ uv pip uninstall triton || true
72+ if ! git clone https://github.com/OpenAI/triton.git; then
73+ echo " Error: Failed to clone Triton repository"
74+ exit 1
75+ fi
76+ cd triton
77+ if ! git checkout e5be006; then
78+ echo " Error: Failed to checkout Triton commit e5be006"
79+ exit 1
80+ fi
81+ cd python
82+ if ! uv pip install . ; then
83+ echo " Error: Failed to install Triton"
84+ exit 1
85+ fi
86+ cd ../..
87+ rm -rf triton
88+
89+ # Clone vLLM source
90+ rm -rf vllm
91+ git clone https://github.com/vllm-project/vllm.git
92+ cd vllm
93+
94+ # Build & install AMD SMI
95+ uv pip install /opt/rocm/share/amd_smi
96+
97+ # Install additional dependencies
98+ uv pip install --upgrade numba \
99+ scipy \
100+ huggingface-hub[cli,hf_transfer] \
101+ setuptools_scm
102+ uv pip install " numpy<2"
103+
104+ # Install ROCm-specific Python requirements from the repo
105+ if [ -f requirements/rocm.txt ]; then
106+ uv pip install -r requirements/rocm.txt
107+ fi
108+
109+ # Detect GPU architecture dynamically
110+ gpu_arch=$( rocminfo | grep gfx | head -1 | awk ' {print $2}' || echo " gfx90a" )
111+ echo " Detected GPU architecture: $gpu_arch "
112+
113+ # Set ROCm environment variables
114+ export VLLM_TARGET_DEVICE=rocm
115+ export PYTORCH_ROCM_ARCH=" $gpu_arch "
116+ export ROCM_HOME=" /opt/rocm"
117+ export HIP_PLATFORM=" amd"
118+ export PATH=" $ROCM_HOME /bin:$PATH "
119+ export LD_LIBRARY_PATH=" $ROCM_HOME /lib:$LD_LIBRARY_PATH "
120+
121+ # Additional ROCm stability settings
122+ export PYTORCH_HIP_ALLOC_CONF=" expandable_segments:True"
123+ export HIP_VISIBLE_DEVICES=" 0"
124+ export AMD_LOG_LEVEL=1 # Reduce AMD driver logging
125+
126+ # Build & install vLLM into this venv
127+ echo " Building vLLM for ROCm with architecture: $gpu_arch "
128+ if ! python3 setup.py develop; then
129+ echo " Error: Failed to build vLLM from source"
130+ exit 1
131+ fi
132+
133+ # Verify vLLM installation
134+ echo " Verifying vLLM installation..."
135+ if ! python3 -c " import vllm; print(f'vLLM version: {vllm.__version__}')" ; then
136+ echo " Error: vLLM installation verification failed"
137+ exit 1
138+ fi
139+
140+ echo " vLLM build completed successfully!"
141+ cd ..
142+ }
43143
44144run_serving_tests () {
45145 # run serving tests using `sglang.bench_serving` command
@@ -74,12 +174,11 @@ run_serving_tests() {
74174 qps_list=$( echo " $qps_list " | jq -r ' .[] | @sh' )
75175 echo " Running over qps list $qps_list "
76176
77- # Extract only specific SGLang server parameters
177+ # Extract special parameters that need mapping or special handling
78178 model_path=$( echo " $server_params " | jq -r ' .model_path // .model' )
79- context_length =$( echo " $server_params " | jq -r ' .context_length // 4096 ' )
179+ tp =$( echo " $server_params " | jq -r ' .tp // .tensor_parallel_size // 1 ' )
80180
81181 # check if there is enough resources to run the test
82- tp=$( echo " $server_params " | jq -r ' .tp // 1' )
83182 if [ " $ON_CPU " == " 1" ]; then
84183 if [[ $numa_count -lt $tp ]]; then
85184 echo " Required tensor-parallel-size $tp but only $numa_count NUMA nodes found. Skip testcase $test_name ."
@@ -95,13 +194,28 @@ run_serving_tests() {
95194 # check if server model and client model is aligned
96195 server_model=" $model_path "
97196 client_model=$( echo " $client_params " | jq -r ' .model // .model_path' )
98- if [[ $server_model != " $client_model " ]]; then
197+ if [[ $server_model != " $client_model " ]] && [[ $server_model != * " gpt-oss " * ]] ; then
99198 echo " Server model and client model must be the same. Skip testcase $test_name ."
100199 continue
101200 fi
102201
103- server_command=" python -m sglang.launch_server --model-path $model_path --context-length $context_length --tp $tp "
104-
202+ # Remove the special parameters that we'll handle manually
203+ server_params_filtered=$( echo " $server_params " | jq ' del(.model, .model_path, .tensor_parallel_size, .tp)' )
204+
205+ # Use the json2args utility to convert the filtered params to command line arguments
206+ server_args=$( json2args " $server_params_filtered " )
207+
208+ # Build the server command with manually mapped parameters and auto-parsed ones
209+ server_command=" python3 -m sglang.launch_server --model-path $model_path --tp $tp $server_args "
210+
211+ # Model-specific environment variables (command-line flags can be added to JSON directly)
212+ if [[ " ${DEVICE_NAME:- } " == " rocm" ]]; then
213+ # GPT-OSS models on ROCm - set environment variables
214+ if [[ " $model_path " == * " gpt-oss" * ]]; then
215+ echo " Detected GPT-OSS model on ROCm, setting compatibility environment variables"
216+ export SGLANG_USE_AITER=0
217+ fi
218+ fi
105219 # run the server
106220 echo " Running test case $test_name "
107221 echo " Server command: $server_command "
@@ -119,14 +233,17 @@ run_serving_tests() {
119233 continue
120234 fi
121235
122- # Create a new uv environment for vllm client (once per test case)
123236 echo " Creating new uv environment for vllm client..."
124237 uv venv vllm_client_env
125238
126- # Activate the environment and install vllm
127239 echo " Installing vllm in the new environment..."
128240 source vllm_client_env/bin/activate
129- pip install vllm
241+
242+ if [[ " ${DEVICE_NAME:- } " == " rocm" ]]; then
243+ build_vllm_from_source_for_rocm
244+ else
245+ uv pip install vllm
246+ fi
130247
131248 # iterate over different QPS
132249 for qps in $qps_list ; do
@@ -192,6 +309,8 @@ main() {
192309 check_hf_token
193310 install_dependencies
194311
312+ pip install uv
313+
195314 # get the current IP address, required by SGLang bench commands
196315 export SGLANG_HOST_IP=$( hostname -I | awk ' {print $1}' )
197316 # turn off the reporting of the status of each request, to clean up the terminal output
0 commit comments