Skip to content

Commit 9709270

Browse files
authored
[tritonbench] Add initial tritonbench benchmark config (#110)
1 parent 4033e32 commit 9709270

File tree

3 files changed

+356
-0
lines changed

3 files changed

+356
-0
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/usr/bin/env python
2+
3+
import glob
4+
import json
5+
import logging
6+
import os
7+
from argparse import Action, ArgumentParser, Namespace
8+
from logging import warning
9+
from typing import Any, Dict, List, Optional
10+
11+
12+
logging.basicConfig(level=logging.INFO)
13+
14+
# This mapping is needed to find out the platform of the runner
15+
RUNNER_TO_PLATFORM_MAPPING = {
16+
"linux.dgx.b200": "cuda",
17+
}
18+
19+
# TritonBench benchmarks
20+
TRITONBENCH_BENCHMARKS = set(
21+
[
22+
"nightly",
23+
]
24+
)
25+
26+
def set_output(name: str, val: Any) -> None:
27+
"""
28+
Set the output value to be used by other GitHub jobs.
29+
30+
Args:
31+
name (str): The name of the output variable.
32+
val (Any): The value to set for the output variable.
33+
34+
Example:
35+
set_output("benchmark_matrix", {"include": [...]})
36+
"""
37+
github_output = os.getenv("GITHUB_OUTPUT")
38+
39+
if not github_output:
40+
print(f"::set-output name={name}::{val}")
41+
return
42+
43+
with open(github_output, "a") as env:
44+
env.write(f"{name}={val}\n")
45+
46+
47+
def parse_args() -> Any:
48+
parser = ArgumentParser("Generate TritonBench benchmark CI matrix")
49+
50+
parser.add_argument(
51+
"--benchmarks",
52+
type=str,
53+
default="nightly",
54+
help="the comma-separated list of benchmarks to run. Default to nightly.",
55+
)
56+
parser.add_argument(
57+
"--runners",
58+
type=str,
59+
default="",
60+
help="the comma-separated list of runners to run the benchmark. Required.",
61+
required=True,
62+
)
63+
64+
return parser.parse_args()
65+
66+
def generate_benchmark_matrix(benchmarks: List[str], runners: List[str]) -> Dict[str, Any]:
67+
benchmark_matrix: Dict[str, Any] = {
68+
"include": [],
69+
}
70+
if not runners:
71+
runners = list(RUNNER_TO_PLATFORM_MAPPING.keys())
72+
else:
73+
runner_args = runners.copy()
74+
runners = []
75+
for k, v in RUNNER_TO_PLATFORM_MAPPING.items():
76+
for r in runner_args:
77+
if r.lower() in k:
78+
runners.append(k)
79+
80+
if not benchmarks:
81+
benchmarks = TRITONBENCH_BENCHMARKS
82+
83+
# Gather all possible benchmarks
84+
for runner in runners:
85+
for benchmark in benchmarks:
86+
benchmark_matrix["include"].append(
87+
{
88+
"runner": runner,
89+
# I opt to return a comma-separated list of models here
90+
# so that we could run multiple models on the same runner
91+
"benchmarks": benchmark,
92+
}
93+
)
94+
95+
return benchmark_matrix
96+
97+
98+
def main() -> None:
99+
args = parse_args()
100+
benchmarks = [b.strip().lower() for b in args.benchmarks.split(",") if b.strip()]
101+
runners = [r.strip().lower() for r in args.runners.split(",") if r.strip()]
102+
benchmark_matrix = generate_benchmark_matrix(benchmarks, runners)
103+
print(benchmark_matrix)
104+
set_output("benchmark_matrix", benchmark_matrix)
105+
106+
107+
if __name__ == "__main__":
108+
main()
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
import json
3+
4+
from expecttest import assert_expected_inline
5+
from generate_tritonbench_matrix import generate_benchmark_matrix
6+
7+
8+
def test_generate_benchmark_matrix():
9+
# All combinations, no duplication
10+
benchmarks = []
11+
runners = []
12+
output = json.dumps(
13+
generate_benchmark_matrix(benchmarks, runners), indent=2
14+
)
15+
assert_expected_inline(
16+
output,
17+
"""\
18+
{
19+
"include": [
20+
{
21+
"runner": "linux.dgx.b200",
22+
"benchmarks": "nightly"
23+
}
24+
]
25+
}""",
26+
)

.github/workflows/tritonbench.yml

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
name: TritonBench
2+
3+
on:
4+
schedule:
5+
# Run every 12 hours
6+
- cron: '0 */12 * * *'
7+
workflow_dispatch:
8+
tritonbench_branch:
9+
description: TritonBench branch (main)
10+
required: true
11+
type: string
12+
default: main
13+
benchmarks:
14+
description: |
15+
A comma-separated list of benchmarks from tritonbench/benchmarks (optional, default to run nightly)
16+
required: false
17+
type: string
18+
runners:
19+
description: |
20+
A comma-separated list of runners from .github/scripts/genenerate_tritonbench_matrix.py to run the benchmark (optional, default to run b200)
21+
required: true
22+
type: string
23+
default: b200
24+
25+
concurrency:
26+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
27+
cancel-in-progress: true
28+
29+
30+
jobs:
31+
set-parameters:
32+
runs-on: ubuntu-latest
33+
outputs:
34+
benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
35+
steps:
36+
- name: Checkout repository
37+
uses: actions/checkout@v4
38+
39+
- uses: actions/setup-python@v5
40+
with:
41+
python-version: '3.12'
42+
43+
- name: Set parameters
44+
id: set-parameters
45+
shell: bash
46+
env:
47+
BENCHMARKS: ${{ inputs.benchmarks || '' }}
48+
RUNNERS: ${{ inputs.runners || '' }}
49+
run: |
50+
set -eux
51+
52+
# The generated matrix is grouped by benchmark and runner
53+
python .github/scripts/generate_tritonbench_matrix.py \
54+
--benchmarks "${BENCHMARKS}" \
55+
--runners "${RUNNERS}"
56+
57+
58+
benchmarks:
59+
name: Run TritonBench benchmarks
60+
needs: set-parameters
61+
if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
62+
strategy:
63+
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
64+
fail-fast: false
65+
runs-on: ${{ matrix.runner }}
66+
environment: pytorch-x-vllm
67+
permissions:
68+
id-token: write
69+
contents: read
70+
steps:
71+
- name: Checkout repository
72+
uses: actions/checkout@v4
73+
74+
- name: Install system dependencies
75+
shell: bash
76+
run: |
77+
sudo apt-get update
78+
sudo apt-get install -y libnuma-dev numactl
79+
80+
- name: Checkout TritonBench repository
81+
uses: actions/checkout@v4
82+
with:
83+
repository: meta-pytorch/tritonbench
84+
path: triton-benchmarks/tritonbench
85+
ref: ${{ inputs.tritonbench_branch || 'main' }}
86+
fetch-depth: 0
87+
88+
- uses: actions/setup-python@v5
89+
# Amazon Linux fails on this step
90+
continue-on-error: true
91+
with:
92+
python-version: '3.12'
93+
cache: 'pip'
94+
95+
- name: Check if the device is supported
96+
shell: bash
97+
run: |
98+
set -eux
99+
100+
if command -v nvidia-smi; then
101+
DEVICE_NAME=cuda
102+
nvidia-smi
103+
elif command -v rocm-smi; then
104+
DEVICE_NAME=rocm
105+
rocm-smi
106+
else
107+
DEVICE_NAME=cpu
108+
lscpu
109+
fi
110+
echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
111+
112+
- name: Set GPU name and type
113+
shell: bash
114+
run: |
115+
set -eux
116+
117+
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
118+
DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
119+
CUDA_HOME="/usr/local/cuda"
120+
echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV
121+
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
122+
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
123+
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
124+
DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
125+
fi
126+
echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
127+
128+
- name: Setup CUDA GPU_FLAG for docker run
129+
if: env.DEVICE_NAME == 'cuda'
130+
run: |
131+
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
132+
133+
- name: Select TritonBench Docker image
134+
shell: bash
135+
run: |
136+
set -eux
137+
# Determine image suffix based on device
138+
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
139+
IMAGE_SUFFIX="latest"
140+
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
141+
IMAGE_SUFFIX="rocm-latest"
142+
else
143+
echo "TritonBench requires either CUDA or ROCm devices."
144+
exit 1
145+
fi
146+
147+
DOCKER_IMAGE="meta-pytorch/tritonbench:${IMAGE_SUFFIX}"
148+
echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV"
149+
echo "CONDA_ENV=triton-main" >> "$GITHUB_ENV"
150+
echo "Using docker image: $DOCKER_IMAGE "
151+
echo "Using conda env: $CONDA_ENV "
152+
153+
- name: Run TritonBench benchmark
154+
run: |
155+
set -eux
156+
157+
container_name=$(docker run \
158+
${GPU_FLAG:-} \
159+
-e DEVICE_NAME \
160+
-e DEVICE_TYPE \
161+
-e CONDA_ENV \
162+
--ipc=host \
163+
--tty \
164+
--detach \
165+
--security-opt seccomp=unconfined \
166+
--shm-size=32g \
167+
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
168+
-w /tmp/workspace \
169+
"${DOCKER_IMAGE}"
170+
)
171+
172+
docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
173+
set -eux && cd /workspace/tritonbench &&
174+
bash .ci/tritonbench/run-benchmark.sh ${{ matrix.BENCHMARKS }} --conda-env ${{ env.CONDA_ENV }} "
175+
176+
docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
177+
set -eux && cd /workspace/tritonbench && mv .benchmarks /tmp/workspace/triton-benchmarks/tritonbench/results
178+
"
179+
180+
- name: Authenticate with AWS
181+
# AWS CUDA runners already have access to the bucket via its runner IAM role
182+
if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
183+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
184+
with:
185+
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
186+
# The max duration enforced by the server side
187+
role-duration-seconds: 18000
188+
aws-region: us-east-1
189+
190+
# Keep a copy of the benchmark results on GitHub for reference
191+
- uses: actions/upload-artifact@v4
192+
if: always()
193+
with:
194+
name: tritonbench-results
195+
path: triton-benchmarks/tritonbench/results
196+
retention-days: 30
197+
198+
- name: Upload result to Scribe
199+
working-directory: triton-benchmarks/tritonbench
200+
run: |
201+
latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1)
202+
python3 ./.ci/upload/scribe.py --json ${latest_result_json}
203+
204+
- name: Rewrite Tritonbench result json to ClickHouse style
205+
working-directory: triton-benchmarks/tritonbench
206+
run: |
207+
latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1)
208+
python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \
209+
--output clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json
210+
211+
- name: Upload result to ClickHouse
212+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
213+
with:
214+
benchmark-results-dir: triton-benchmarks/tritonbench/clickhouse-results
215+
dry-run: false
216+
schema-version: v3
217+
github-token: ${{ secrets.GITHUB_TOKEN }}
218+
219+
- name: Kill the container
220+
if: always()
221+
run: |
222+
docker kill "${TRITONBENCH_CONTAINER_ID}" || true

0 commit comments

Comments
 (0)