Skip to content

Commit 4d1bbbf

Browse files
[vLLM Profiling] Github workflow for vLLM profiling (#75)
* Created a workflow for vllm profiling * fix commit bug * fix issue * add the code for server start as well * fix server issue * added missing import statements * refactored script * try with diff command * fix directory issue * trying a different approach for directory * not working yet * removing logs * running generic tests * fix test * fix the path * fixing path issue * refactored code and split common functions * renamed file name * address review comments * fix a small bug
1 parent 774075f commit 4d1bbbf

File tree

6 files changed

+537
-115
lines changed

6 files changed

+537
-115
lines changed

.github/scripts/run-sglang-performance-benchmarks.sh

Lines changed: 7 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,13 @@
99
set -x
1010
set -o pipefail
1111

12+
# Source common functions
13+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14+
source "${SCRIPT_DIR}/utilities.sh"
15+
1216
# The helper functions and their implementations are referred from the implementation
1317
# of the run-performance-benchmarks.sh script in the official vllm repo
1418
# Path:- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
15-
check_gpus() {
16-
if command -v nvidia-smi; then
17-
# check the number of GPUs and GPU type.
18-
declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
19-
elif command -v amd-smi; then
20-
declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
21-
fi
22-
23-
if [[ $gpu_count -gt 0 ]]; then
24-
echo "GPU found."
25-
else
26-
echo "Need at least 1 GPU to run benchmarking."
27-
exit 1
28-
fi
29-
if command -v nvidia-smi; then
30-
declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
31-
elif command -v amd-smi; then
32-
declare -g gpu_type=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
33-
fi
34-
echo "GPU type is $gpu_type"
35-
}
36-
3719
check_cpus() {
3820
# check the number of CPUs and NUMA Node and GPU type.
3921
declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}')
@@ -48,18 +30,6 @@ check_cpus() {
4830
echo "GPU type is $gpu_type"
4931
}
5032

51-
check_hf_token() {
52-
# check if HF_TOKEN is available and valid
53-
if [[ -z "$HF_TOKEN" ]]; then
54-
echo "Error: HF_TOKEN is not set."
55-
exit 1
56-
elif [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
57-
echo "Error: HF_TOKEN does not start with 'hf_'."
58-
exit 1
59-
else
60-
echo "HF_TOKEN is set and valid."
61-
fi
62-
}
6333

6434
ensure_sharegpt_downloaded() {
6535
local FILE=ShareGPT_V3_unfiltered_cleaned_split.json
@@ -70,78 +40,6 @@ ensure_sharegpt_downloaded() {
7040
fi
7141
}
7242

73-
json2args() {
74-
# transforms the JSON string to command line args, and '_' is replaced to '-'
75-
# example:
76-
# input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
77-
# output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
78-
local json_string=$1
79-
local args=$(
80-
echo "$json_string" | jq -r '
81-
to_entries |
82-
map("--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)) |
83-
join(" ")
84-
'
85-
)
86-
echo "$args"
87-
}
88-
89-
json2envs() {
90-
# transforms the JSON string to environment variables.
91-
# example:
92-
# input: { "SGLANG_DISABLE_CUDA_GRAPH": 1 }
93-
# output: SGLANG_DISABLE_CUDA_GRAPH=1
94-
local json_string=$1
95-
local args=$(
96-
echo "$json_string" | jq -r '
97-
to_entries |
98-
map((.key ) + "=" + (.value | tostring)) |
99-
join(" ")
100-
'
101-
)
102-
echo "$args"
103-
}
104-
105-
wait_for_server() {
106-
# wait for sglang server to start
107-
# return 1 if sglang server crashes
108-
timeout 1200 bash -c '
109-
until curl -s localhost:30000/v1/completions > /dev/null; do
110-
sleep 1
111-
done' && return 0 || return 1
112-
}
113-
114-
kill_processes_launched_by_current_bash() {
115-
# Kill all python processes launched from current bash script
116-
current_shell_pid=$$
117-
processes=$(ps -eo pid,ppid,command | awk -v ppid="$current_shell_pid" -v proc="$1" '$2 == ppid && $3 ~ proc {print $1}')
118-
if [ -n "$processes" ]; then
119-
echo "Killing the following processes matching '$1':"
120-
echo "$processes"
121-
echo "$processes" | xargs kill -9
122-
else
123-
echo "No processes found matching '$1'."
124-
fi
125-
}
126-
127-
kill_gpu_processes() {
128-
ps -aux
129-
lsof -t -i:30000 | xargs -r kill -9
130-
pgrep python3 | xargs -r kill -9
131-
pgrep python | xargs -r kill -9
132-
pgrep VLLM | xargs -r kill -9
133-
134-
# wait until GPU memory usage smaller than 1GB
135-
if command -v nvidia-smi; then
136-
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
137-
sleep 1
138-
done
139-
elif command -v amd-smi; then
140-
while [ "$(amd-smi metric -g 0 | grep 'USED_VRAM' | awk '{print $2}')" -ge 1000 ]; do
141-
sleep 1
142-
done
143-
fi
144-
}
14543

14644
run_serving_tests() {
14745
# run serving tests using `sglang.bench_serving` command
@@ -211,7 +109,7 @@ run_serving_tests() {
211109
server_pid=$!
212110

213111
# wait until the server is alive
214-
if wait_for_server; then
112+
if wait_for_server "localhost:30000/v1/completions"; then
215113
echo ""
216114
echo "SGLang server is up and running."
217115
else
@@ -285,18 +183,14 @@ run_serving_tests() {
285183

286184
# clean up
287185
kill -9 $server_pid
288-
kill_gpu_processes
186+
kill_gpu_processes 30000
289187
done
290188
}
291189

292190
main() {
293191
check_gpus
294192
check_hf_token
295-
296-
# dependencies
297-
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
298-
(which jq) || (apt-get update && apt-get -y install jq)
299-
(which lsof) || (apt-get update && apt-get install -y lsof)
193+
install_dependencies
300194

301195
# get the current IP address, required by SGLang bench commands
302196
export SGLANG_HOST_IP=$(hostname -I | awk '{print $1}')
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#!/bin/bash
2+
set -eux
3+
4+
# Source common functions
5+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6+
source "${SCRIPT_DIR}/utilities.sh"
7+
8+
print_configuration() {
9+
echo 'Running vLLM profiling with the following configuration:'
10+
echo " Profiler Dir: ${VLLM_TORCH_PROFILER_DIR:-not set}"
11+
echo " VLLM_USE_V1: ${VLLM_USE_V1:-1}"
12+
}
13+
14+
setup_workspace() {
15+
WORKSPACE_DIR="/tmp/workspace"
16+
cd "${WORKSPACE_DIR}"
17+
18+
echo "Creating profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
19+
mkdir -p "${VLLM_TORCH_PROFILER_DIR}"
20+
chmod 755 "${VLLM_TORCH_PROFILER_DIR}"
21+
}
22+
23+
start_vllm_server() {
24+
local server_args="$1"
25+
26+
echo "Starting vLLM server..."
27+
VLLM_USE_V1=${VLLM_USE_V1:-1} python3 -m vllm.entrypoints.openai.api_server ${server_args} &
28+
29+
server_pid=$!
30+
echo "vLLM server started with PID: ${server_pid}"
31+
32+
# Wait for server to be ready
33+
echo "Waiting for vLLM server to be ready..."
34+
if wait_for_server "${SERVER_HOST}:${SERVER_PORT}"; then
35+
echo "vLLM server is up and running!"
36+
return 0
37+
else
38+
echo "vLLM server failed to start within the timeout period."
39+
kill -9 $server_pid 2>/dev/null || true
40+
return 1
41+
fi
42+
}
43+
44+
run_profiling() {
45+
local client_args="$1"
46+
47+
echo "Starting load generation for profiling..."
48+
echo "Client command: vllm bench serve ${client_args}"
49+
50+
vllm bench serve ${client_args}
51+
}
52+
53+
cleanup_server() {
54+
echo "Stopping vLLM server..."
55+
kill -9 $server_pid 2>/dev/null || true
56+
kill_gpu_processes
57+
}
58+
59+
run_profiling_tests() {
60+
# run profiling tests using JSON configuration
61+
local profiling_test_file="$1"
62+
63+
if [[ ! -f "$profiling_test_file" ]]; then
64+
echo "Error: Profiling test file $profiling_test_file not found!"
65+
exit 1
66+
fi
67+
68+
# Iterate over profiling tests
69+
jq -c '.[]' "$profiling_test_file" | while read -r params; do
70+
# Get the test name
71+
TEST_NAME=$(echo "$params" | jq -r '.test_name')
72+
echo "Running profiling test case: $TEST_NAME"
73+
74+
75+
# Extract server and client parameters
76+
server_params=$(echo "$params" | jq -r '.server_parameters')
77+
client_params=$(echo "$params" | jq -r '.client_parameters')
78+
79+
# Convert JSON to command line arguments
80+
server_args=$(json2args "$server_params")
81+
client_args=$(json2args "$client_params")
82+
83+
# Extract host and port for server health check
84+
SERVER_HOST=$(echo "$server_params" | jq -r '.host // "::"')
85+
SERVER_PORT=$(echo "$server_params" | jq -r '.port // 8000')
86+
87+
# Convert :: to localhost for health check
88+
if [[ "$SERVER_HOST" == "::" ]]; then
89+
SERVER_HOST="localhost"
90+
fi
91+
92+
# Clean up any existing processes first
93+
kill_gpu_processes
94+
95+
# Run the profiling test
96+
if start_vllm_server "$server_args"; then
97+
run_profiling "$client_args"
98+
cleanup_server
99+
100+
# Debug: Check if profiling files were created
101+
echo "DEBUG: Checking profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
102+
if [ -d "${VLLM_TORCH_PROFILER_DIR}" ]; then
103+
echo "DEBUG: Profiling directory exists for test $TEST_NAME"
104+
ls -la "${VLLM_TORCH_PROFILER_DIR}" || echo "DEBUG: Directory is empty or inaccessible"
105+
find "${VLLM_TORCH_PROFILER_DIR}" -type f 2>/dev/null | head -10 | while read file; do
106+
echo "DEBUG: Found profiling file: ${file}"
107+
done
108+
else
109+
echo "DEBUG: Profiling directory does not exist for test $TEST_NAME!"
110+
fi
111+
112+
echo "Profiling test $TEST_NAME completed successfully."
113+
else
114+
echo "Failed to start vLLM server for test $TEST_NAME."
115+
continue
116+
fi
117+
done
118+
}
119+
120+
main() {
121+
# Set default values
122+
export VLLM_USE_V1=${VLLM_USE_V1:-1}
123+
124+
# Setup phase
125+
print_configuration
126+
install_dependencies
127+
setup_workspace
128+
129+
# Determine the profiling test file based on device type
130+
local device_name="${DEVICE_NAME:-cuda}"
131+
local profiling_test_file="${WORKSPACE_DIR}/vllm-profiling/${device_name}/profiling-tests.json"
132+
133+
echo "Looking for profiling test file: $profiling_test_file"
134+
135+
if [[ -f "$profiling_test_file" ]]; then
136+
echo "Found profiling test file: $profiling_test_file"
137+
run_profiling_tests "$profiling_test_file"
138+
else
139+
echo "Error: No profiling test file found at $profiling_test_file"
140+
echo "Available files in ${WORKSPACE_DIR}/vllm-profiling/:"
141+
find "${WORKSPACE_DIR}/vllm-profiling/" -name "*.json" 2>/dev/null || echo "No JSON files found"
142+
exit 1
143+
fi
144+
145+
echo "All profiling tests completed. Artifacts should be available in ${VLLM_TORCH_PROFILER_DIR:-default profiler directory}."
146+
}
147+
148+
main "$@"

0 commit comments

Comments
 (0)