|
| 1 | +name: TritonBench |
| 2 | + |
| 3 | +on: |
| 4 | + schedule: |
| 5 | + # Run every 12 hours |
| 6 | + - cron: '0 */12 * * *' |
| 7 | + workflow_dispatch: |
| 8 | + tritonbench_branch: |
| 9 | + description: TritonBench branch (main) |
| 10 | + required: true |
| 11 | + type: string |
| 12 | + default: main |
| 13 | + benchmarks: |
| 14 | + description: | |
| 15 | + A comma-separated list of benchmarks from tritonbench/benchmarks (optional, default to run nightly) |
| 16 | + required: false |
| 17 | + type: string |
| 18 | + runners: |
| 19 | + description: | |
| 20 | + A comma-separated list of runners from .github/scripts/genenerate_tritonbench_matrix.py to run the benchmark (optional, default to run b200) |
| 21 | + required: true |
| 22 | + type: string |
| 23 | + default: b200 |
| 24 | + |
| 25 | +concurrency: |
| 26 | + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} |
| 27 | + cancel-in-progress: true |
| 28 | + |
| 29 | + |
| 30 | +jobs: |
| 31 | + set-parameters: |
| 32 | + runs-on: ubuntu-latest |
| 33 | + outputs: |
| 34 | + benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }} |
| 35 | + steps: |
| 36 | + - name: Checkout repository |
| 37 | + uses: actions/checkout@v4 |
| 38 | + |
| 39 | + - uses: actions/setup-python@v5 |
| 40 | + with: |
| 41 | + python-version: '3.12' |
| 42 | + |
| 43 | + - name: Set parameters |
| 44 | + id: set-parameters |
| 45 | + shell: bash |
| 46 | + env: |
| 47 | + BENCHMARKS: ${{ inputs.benchmarks || '' }} |
| 48 | + RUNNERS: ${{ inputs.runners || '' }} |
| 49 | + run: | |
| 50 | + set -eux |
| 51 | +
|
| 52 | + # The generated matrix is grouped by benchmark and runner |
| 53 | + python .github/scripts/generate_tritonbench_matrix.py \ |
| 54 | + --benchmarks "${BENCHMARKS}" \ |
| 55 | + --runners "${RUNNERS}" |
| 56 | +
|
| 57 | +
|
| 58 | + benchmarks: |
| 59 | + name: Run TritonBench benchmarks |
| 60 | + needs: set-parameters |
| 61 | + if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }} |
| 62 | + strategy: |
| 63 | + matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }} |
| 64 | + fail-fast: false |
| 65 | + runs-on: ${{ matrix.runner }} |
| 66 | + environment: pytorch-x-vllm |
| 67 | + permissions: |
| 68 | + id-token: write |
| 69 | + contents: read |
| 70 | + steps: |
| 71 | + - name: Checkout repository |
| 72 | + uses: actions/checkout@v4 |
| 73 | + |
| 74 | + - name: Install system dependencies |
| 75 | + shell: bash |
| 76 | + run: | |
| 77 | + sudo apt-get update |
| 78 | + sudo apt-get install -y libnuma-dev numactl |
| 79 | + |
| 80 | + - name: Checkout TritonBench repository |
| 81 | + uses: actions/checkout@v4 |
| 82 | + with: |
| 83 | + repository: meta-pytorch/tritonbench |
| 84 | + path: triton-benchmarks/tritonbench |
| 85 | + ref: ${{ inputs.tritonbench_branch || 'main' }} |
| 86 | + fetch-depth: 0 |
| 87 | + |
| 88 | + - uses: actions/setup-python@v5 |
| 89 | + # Amazon Linux fails on this step |
| 90 | + continue-on-error: true |
| 91 | + with: |
| 92 | + python-version: '3.12' |
| 93 | + cache: 'pip' |
| 94 | + |
| 95 | + - name: Check if the device is supported |
| 96 | + shell: bash |
| 97 | + run: | |
| 98 | + set -eux |
| 99 | +
|
| 100 | + if command -v nvidia-smi; then |
| 101 | + DEVICE_NAME=cuda |
| 102 | + nvidia-smi |
| 103 | + elif command -v rocm-smi; then |
| 104 | + DEVICE_NAME=rocm |
| 105 | + rocm-smi |
| 106 | + else |
| 107 | + DEVICE_NAME=cpu |
| 108 | + lscpu |
| 109 | + fi |
| 110 | + echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV |
| 111 | +
|
| 112 | + - name: Set GPU name and type |
| 113 | + shell: bash |
| 114 | + run: | |
| 115 | + set -eux |
| 116 | +
|
| 117 | + if [[ "${DEVICE_NAME}" == "cuda" ]]; then |
| 118 | + DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') |
| 119 | + CUDA_HOME="/usr/local/cuda" |
| 120 | + echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV |
| 121 | + elif [[ "${DEVICE_NAME}" == "rocm" ]]; then |
| 122 | + DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs) |
| 123 | + elif [[ "${DEVICE_NAME}" == "cpu" ]]; then |
| 124 | + DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ") |
| 125 | + fi |
| 126 | + echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV |
| 127 | +
|
| 128 | + - name: Setup CUDA GPU_FLAG for docker run |
| 129 | + if: env.DEVICE_NAME == 'cuda' |
| 130 | + run: | |
| 131 | + echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" |
| 132 | +
|
| 133 | + - name: Select TritonBench Docker image |
| 134 | + shell: bash |
| 135 | + run: | |
| 136 | + set -eux |
| 137 | + # Determine image suffix based on device |
| 138 | + if [[ "${DEVICE_NAME}" == "cuda" ]]; then |
| 139 | + IMAGE_SUFFIX="latest" |
| 140 | + elif [[ "${DEVICE_NAME}" == "rocm" ]]; then |
| 141 | + IMAGE_SUFFIX="rocm-latest" |
| 142 | + else |
| 143 | + echo "TritonBench requires either CUDA or ROCm devices." |
| 144 | + exit 1 |
| 145 | + fi |
| 146 | +
|
| 147 | + DOCKER_IMAGE="meta-pytorch/tritonbench:${IMAGE_SUFFIX}" |
| 148 | + echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV" |
| 149 | + echo "CONDA_ENV=triton-main" >> "$GITHUB_ENV" |
| 150 | + echo "Using docker image: $DOCKER_IMAGE " |
| 151 | + echo "Using conda env: $CONDA_ENV " |
| 152 | +
|
| 153 | + - name: Run TritonBench benchmark |
| 154 | + run: | |
| 155 | + set -eux |
| 156 | +
|
| 157 | + container_name=$(docker run \ |
| 158 | + ${GPU_FLAG:-} \ |
| 159 | + -e DEVICE_NAME \ |
| 160 | + -e DEVICE_TYPE \ |
| 161 | + -e CONDA_ENV \ |
| 162 | + --ipc=host \ |
| 163 | + --tty \ |
| 164 | + --detach \ |
| 165 | + --security-opt seccomp=unconfined \ |
| 166 | + --shm-size=32g \ |
| 167 | + -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ |
| 168 | + -w /tmp/workspace \ |
| 169 | + "${DOCKER_IMAGE}" |
| 170 | + ) |
| 171 | +
|
| 172 | + docker exec -t -w /tmp/workspace "${container_name}" bash -c " \ |
| 173 | + set -eux && cd /workspace/tritonbench && |
| 174 | + bash .ci/tritonbench/run-benchmark.sh ${{ matrix.BENCHMARKS }} --conda-env ${{ env.CONDA_ENV }} " |
| 175 | + |
| 176 | + docker exec -t -w /tmp/workspace "${container_name}" bash -c " \ |
| 177 | + set -eux && cd /workspace/tritonbench && mv .benchmarks /tmp/workspace/triton-benchmarks/tritonbench/results |
| 178 | + " |
| 179 | +
|
| 180 | + - name: Authenticate with AWS |
| 181 | + # AWS CUDA runners already have access to the bucket via its runner IAM role |
| 182 | + if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200') |
| 183 | + uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 |
| 184 | + with: |
| 185 | + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results |
| 186 | + # The max duration enforced by the server side |
| 187 | + role-duration-seconds: 18000 |
| 188 | + aws-region: us-east-1 |
| 189 | + |
| 190 | + # Keep a copy of the benchmark results on GitHub for reference |
| 191 | + - uses: actions/upload-artifact@v4 |
| 192 | + if: always() |
| 193 | + with: |
| 194 | + name: tritonbench-results |
| 195 | + path: triton-benchmarks/tritonbench/results |
| 196 | + retention-days: 30 |
| 197 | + |
| 198 | + - name: Upload result to Scribe |
| 199 | + working-directory: triton-benchmarks/tritonbench |
| 200 | + run: | |
| 201 | + latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) |
| 202 | + python3 ./.ci/upload/scribe.py --json ${latest_result_json} |
| 203 | +
|
| 204 | + - name: Rewrite Tritonbench result json to ClickHouse style |
| 205 | + working-directory: triton-benchmarks/tritonbench |
| 206 | + run: | |
| 207 | + latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) |
| 208 | + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ |
| 209 | + --output clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json |
| 210 | +
|
| 211 | + - name: Upload result to ClickHouse |
| 212 | + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main |
| 213 | + with: |
| 214 | + benchmark-results-dir: triton-benchmarks/tritonbench/clickhouse-results |
| 215 | + dry-run: false |
| 216 | + schema-version: v3 |
| 217 | + github-token: ${{ secrets.GITHUB_TOKEN }} |
| 218 | + |
| 219 | + - name: Kill the container |
| 220 | + if: always() |
| 221 | + run: | |
| 222 | + docker kill "${TRITONBENCH_CONTAINER_ID}" || true |
0 commit comments