pytorch
diff --git a/‎.github/scripts/run-sglang-performance-benchmarks.sh‎
Lines changed: 7 additions & 113 deletions b/‎.github/scripts/run-sglang-performance-benchmarks.sh‎
Lines changed: 7 additions & 113 deletions
diff --git a/‎.github/scripts/run_vllm_profiling.sh‎
Lines changed: 148 additions & 0 deletions b/‎.github/scripts/run_vllm_profiling.sh‎
Lines changed: 148 additions & 0 deletions
@@ -9,31 +9,13 @@
 set -x
 set -o pipefail
 
+# Source common functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/utilities.sh"
+
 # The helper functions and their implementations are referred from the implementation
 # of the run-performance-benchmarks.sh script in the official vllm repo
 # Path:- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
-check_gpus() {
-  if command -v nvidia-smi; then
-    # check the number of GPUs and GPU type.
-    declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
-  elif command -v amd-smi; then
-    declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
-  fi
-
-  if [[ $gpu_count -gt 0 ]]; then
-    echo "GPU found."
-  else
-    echo "Need at least 1 GPU to run benchmarking."
-    exit 1
-  fi
-  if command -v nvidia-smi; then
-    declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
-  elif command -v amd-smi; then
-    declare -g gpu_type=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
-  fi
-  echo "GPU type is $gpu_type"
-}
-
 check_cpus() {
   # check the number of CPUs and NUMA Node and GPU type.
   declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}')
@@ -48,18 +30,6 @@ check_cpus() {
   echo "GPU type is $gpu_type"
 }
 
-check_hf_token() {
-  # check if HF_TOKEN is available and valid
-  if [[ -z "$HF_TOKEN" ]]; then
-    echo "Error: HF_TOKEN is not set."
-    exit 1
-  elif [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
-    echo "Error: HF_TOKEN does not start with 'hf_'."
-    exit 1
-  else
-    echo "HF_TOKEN is set and valid."
-  fi
-}
 
 ensure_sharegpt_downloaded() {
   local FILE=ShareGPT_V3_unfiltered_cleaned_split.json
@@ -70,78 +40,6 @@ ensure_sharegpt_downloaded() {
   fi
 }
 
-json2args() {
-  # transforms the JSON string to command line args, and '_' is replaced to '-'
-  # example:
-  # input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
-  # output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
-  local json_string=$1
-  local args=$(
-    echo "$json_string" | jq -r '
-      to_entries |
-      map("--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)) |
-      join(" ")
-    '
-  )
-  echo "$args"
-}
-
-json2envs() {
-  # transforms the JSON string to environment variables.
-  # example:
-  # input: { "SGLANG_DISABLE_CUDA_GRAPH": 1 }
-  # output: SGLANG_DISABLE_CUDA_GRAPH=1
-  local json_string=$1
-  local args=$(
-    echo "$json_string" | jq -r '
-      to_entries |
-      map((.key ) + "=" + (.value | tostring)) |
-      join(" ")
-    '
-  )
-  echo "$args"
-}
-
-wait_for_server() {
-  # wait for sglang server to start
-  # return 1 if sglang server crashes
-  timeout 1200 bash -c '
-    until curl -s localhost:30000/v1/completions > /dev/null; do
-      sleep 1
-    done' && return 0 || return 1
-}
-
-kill_processes_launched_by_current_bash() {
-  # Kill all python processes launched from current bash script
-  current_shell_pid=$$
-  processes=$(ps -eo pid,ppid,command | awk -v ppid="$current_shell_pid" -v proc="$1" '$2 == ppid && $3 ~ proc {print $1}')
-  if [ -n "$processes" ]; then
-    echo "Killing the following processes matching '$1':"
-    echo "$processes"
-    echo "$processes" | xargs kill -9
-  else
-    echo "No processes found matching '$1'."
-  fi
-}
-
-kill_gpu_processes() {
-  ps -aux
-  lsof -t -i:30000 | xargs -r kill -9
-  pgrep python3 | xargs -r kill -9
-  pgrep python | xargs -r kill -9
-  pgrep VLLM | xargs -r kill -9
-
-  # wait until GPU memory usage smaller than 1GB
-  if command -v nvidia-smi; then
-    while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
-      sleep 1
-    done
-  elif command -v amd-smi; then
-    while [ "$(amd-smi metric -g 0 | grep 'USED_VRAM' | awk '{print $2}')" -ge 1000 ]; do
-      sleep 1
-    done
-  fi
-}
 
 run_serving_tests() {
   # run serving tests using `sglang.bench_serving` command
@@ -211,7 +109,7 @@ run_serving_tests() {
     server_pid=$!
 
     # wait until the server is alive
-    if wait_for_server; then
+    if wait_for_server "localhost:30000/v1/completions"; then
       echo ""
       echo "SGLang server is up and running."
     else
@@ -285,18 +183,14 @@ run_serving_tests() {
 
     # clean up
     kill -9 $server_pid
-    kill_gpu_processes
+    kill_gpu_processes 30000
   done
 }
 
 main() {
     check_gpus
     check_hf_token
-
-    # dependencies
-    (which wget && which curl) || (apt-get update && apt-get install -y wget curl)
-    (which jq) || (apt-get update && apt-get -y install jq)
-    (which lsof) || (apt-get update && apt-get install -y lsof)
+    install_dependencies
 
     # get the current IP address, required by SGLang bench commands
     export SGLANG_HOST_IP=$(hostname -I | awk '{print $1}')
 
@@ -0,0 +1,148 @@
+#!/bin/bash
+set -eux
+
+# Source common functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/utilities.sh"
+
+print_configuration() {
+    echo 'Running vLLM profiling with the following configuration:'
+    echo "  Profiler Dir: ${VLLM_TORCH_PROFILER_DIR:-not set}"
+    echo "  VLLM_USE_V1: ${VLLM_USE_V1:-1}"
+}
+
+setup_workspace() {
+    WORKSPACE_DIR="/tmp/workspace"
+    cd "${WORKSPACE_DIR}"
+
+    echo "Creating profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
+    mkdir -p "${VLLM_TORCH_PROFILER_DIR}"
+    chmod 755 "${VLLM_TORCH_PROFILER_DIR}"
+}
+
+start_vllm_server() {
+    local server_args="$1"
+
+    echo "Starting vLLM server..."
+    VLLM_USE_V1=${VLLM_USE_V1:-1} python3 -m vllm.entrypoints.openai.api_server ${server_args} &
+
+    server_pid=$!
+    echo "vLLM server started with PID: ${server_pid}"
+
+    # Wait for server to be ready
+    echo "Waiting for vLLM server to be ready..."
+    if wait_for_server "${SERVER_HOST}:${SERVER_PORT}"; then
+        echo "vLLM server is up and running!"
+        return 0
+    else
+        echo "vLLM server failed to start within the timeout period."
+        kill -9 $server_pid 2>/dev/null || true
+        return 1
+    fi
+}
+
+run_profiling() {
+    local client_args="$1"
+
+    echo "Starting load generation for profiling..."
+    echo "Client command: vllm bench serve ${client_args}"
+
+    vllm bench serve ${client_args}
+}
+
+cleanup_server() {
+    echo "Stopping vLLM server..."
+    kill -9 $server_pid 2>/dev/null || true
+    kill_gpu_processes
+}
+
+run_profiling_tests() {
+    # run profiling tests using JSON configuration
+    local profiling_test_file="$1"
+
+    if [[ ! -f "$profiling_test_file" ]]; then
+        echo "Error: Profiling test file $profiling_test_file not found!"
+        exit 1
+    fi
+
+    # Iterate over profiling tests
+    jq -c '.[]' "$profiling_test_file" | while read -r params; do
+        # Get the test name
+        TEST_NAME=$(echo "$params" | jq -r '.test_name')
+        echo "Running profiling test case: $TEST_NAME"
+
+
+        # Extract server and client parameters
+        server_params=$(echo "$params" | jq -r '.server_parameters')
+        client_params=$(echo "$params" | jq -r '.client_parameters')
+
+        # Convert JSON to command line arguments
+        server_args=$(json2args "$server_params")
+        client_args=$(json2args "$client_params")
+
+        # Extract host and port for server health check
+        SERVER_HOST=$(echo "$server_params" | jq -r '.host // "::"')
+        SERVER_PORT=$(echo "$server_params" | jq -r '.port // 8000')
+
+        # Convert :: to localhost for health check
+        if [[ "$SERVER_HOST" == "::" ]]; then
+            SERVER_HOST="localhost"
+        fi
+
+        # Clean up any existing processes first
+        kill_gpu_processes
+
+        # Run the profiling test
+        if start_vllm_server "$server_args"; then
+            run_profiling "$client_args"
+            cleanup_server
+
+            # Debug: Check if profiling files were created
+            echo "DEBUG: Checking profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
+            if [ -d "${VLLM_TORCH_PROFILER_DIR}" ]; then
+                echo "DEBUG: Profiling directory exists for test $TEST_NAME"
+                ls -la "${VLLM_TORCH_PROFILER_DIR}" || echo "DEBUG: Directory is empty or inaccessible"
+                find "${VLLM_TORCH_PROFILER_DIR}" -type f 2>/dev/null | head -10 | while read file; do
+                    echo "DEBUG: Found profiling file: ${file}"
+                done
+            else
+                echo "DEBUG: Profiling directory does not exist for test $TEST_NAME!"
+            fi
+
+            echo "Profiling test $TEST_NAME completed successfully."
+        else
+            echo "Failed to start vLLM server for test $TEST_NAME."
+            continue
+        fi
+    done
+}
+
+main() {
+    # Set default values
+    export VLLM_USE_V1=${VLLM_USE_V1:-1}
+
+    # Setup phase
+    print_configuration
+    install_dependencies
+    setup_workspace
+
+    # Determine the profiling test file based on device type
+    local device_name="${DEVICE_NAME:-cuda}"
+    local profiling_test_file="${WORKSPACE_DIR}/vllm-profiling/${device_name}/profiling-tests.json"
+
+    echo "Looking for profiling test file: $profiling_test_file"
+
+    if [[ -f "$profiling_test_file" ]]; then
+        echo "Found profiling test file: $profiling_test_file"
+        run_profiling_tests "$profiling_test_file"
+    else
+        echo "Error: No profiling test file found at $profiling_test_file"
+        echo "Available files in ${WORKSPACE_DIR}/vllm-profiling/:"
+        find "${WORKSPACE_DIR}/vllm-profiling/" -name "*.json" 2>/dev/null || echo "No JSON files found"
+        exit 1
+    fi
+
+    echo "All profiling tests completed. Artifacts should be available in ${VLLM_TORCH_PROFILER_DIR:-default profiler directory}."
+}
+
+main "$@"