TritonBench #16

Workflow file for this run

.github/workflows/tritonbench.yml at 0399a49

	name: TritonBench

	on:
	schedule:
	# Run every 12 hours
	- cron: '0 /12 * *'
	workflow_dispatch:
	tritonbench_branch:
	description: TritonBench branch (main)
	required: true
	type: string
	default: main
	benchmarks:
	description: \|
	A comma-separated list of benchmarks from tritonbench/benchmarks (optional, default to run nightly)
	required: false
	type: string
	runners:
	description: \|
	A comma-separated list of runners from .github/scripts/genenerate_tritonbench_matrix.py to run the benchmark (optional, default to run b200)
	required: true
	type: string
	default: b200

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true


	jobs:
	set-parameters:
	runs-on: ubuntu-latest
	outputs:
	benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'

	- name: Set parameters
	id: set-parameters
	shell: bash
	env:
	BENCHMARKS: ${{ inputs.benchmarks \|\| '' }}
	RUNNERS: ${{ inputs.runners \|\| '' }}
	run: \|
	set -eux

	# The generated matrix is grouped by benchmark and runner
	python .github/scripts/generate_tritonbench_matrix.py \
	--benchmarks "${BENCHMARKS}" \
	--runners "${RUNNERS}"


	benchmarks:
	name: Run TritonBench benchmarks
	needs: set-parameters
	if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	env:
	TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
	environment: pytorch-x-vllm
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Install system dependencies
	shell: bash
	run: \|
	sudo apt-get update
	sudo apt-get install -y libnuma-dev numactl

	- name: Checkout TritonBench repository
	uses: actions/checkout@v4
	with:
	repository: meta-pytorch/tritonbench
	path: triton-benchmarks/tritonbench
	ref: ${{ inputs.tritonbench_branch \|\| 'main' }}
	fetch-depth: 0

	- uses: actions/setup-python@v5
	# Amazon Linux fails on this step
	continue-on-error: true
	with:
	python-version: '3.12'
	cache: 'pip'

	- name: Check if the device is supported
	shell: bash
	run: \|
	set -eux

	if command -v nvidia-smi; then
	DEVICE_NAME=cuda
	nvidia-smi
	elif command -v rocm-smi; then
	DEVICE_NAME=rocm
	rocm-smi
	else
	DEVICE_NAME=cpu
	lscpu
	fi
	echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV

	- name: Set GPU name and type
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader \| awk '{print $2}')
	CUDA_HOME="/usr/local/cuda"
	echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DEVICE_TYPE=$(rocminfo \| grep "Marketing Name" \| tail -n1 \| awk -F':' '{print $2}' \| xargs)
	elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
	DEVICE_TYPE=$(lscpu \| grep 'Model name' \| cut -f 2 -d ":" \| awk '{$1=$1}1' \| cut -f 2 -d " ")
	fi
	echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV

	- name: Install dependencies
	shell: bash
	working-directory: triton-benchmarks/tritonbench
	run: \|
	set -eux

	pip install -r .ci/upload/requirements.txt

	- name: Setup CUDA GPU_FLAG for docker run
	if: env.DEVICE_NAME == 'cuda'
	run: \|
	echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

	- name: Select TritonBench Docker image
	shell: bash
	run: \|
	set -eux
	# Determine image suffix based on device
	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	IMAGE_SUFFIX="latest"
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	IMAGE_SUFFIX="rocm-latest"
	else
	echo "TritonBench requires either CUDA or ROCm devices."
	exit 1
	fi

	CONDA_ENV="triton-main"
	DOCKER_IMAGE="ghcr.io/meta-pytorch/tritonbench:${IMAGE_SUFFIX}"
	echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV"
	echo "CONDA_ENV=$CONDA_ENV" >> "$GITHUB_ENV"
	echo "Using docker image: $DOCKER_IMAGE "
	echo "Using conda env: $CONDA_ENV "

	- name: Run TritonBench benchmark
	working-directory: triton-benchmarks/tritonbench
	run: \|
	set -eux

	container_name=$(docker run \
	${GPU_FLAG:-} \
	-e DEVICE_NAME \
	-e DEVICE_TYPE \
	-e CONDA_ENV \
	--ipc=host \
	--tty \
	--detach \
	--security-opt seccomp=unconfined \
	--shm-size=32g \
	-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
	-w /tmp/workspace \
	"${DOCKER_IMAGE}"
	)

	docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
	set -eux && cd /workspace/tritonbench && \
	bash .ci/tritonbench/run-benchmark.sh ${{ matrix.BENCHMARKS }} --conda-env ${{ env.CONDA_ENV }} && \
	sudo mv /workspace/tritonbench/.benchmarks /tmp/workspace/triton-benchmarks/tritonbench/results-${{ env.CONDA_ENV }} "

	# post-process result.json
	latest_result_json=$(find ./results-${CONDA_ENV} -name "result.json" \| sort -r \| head -n 1)
	python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \
	--add-github-env --output ${latest_result_json}

	- name: Authenticate with AWS
	# AWS CUDA runners already have access to the bucket via its runner IAM role
	if: env.DEVICE_NAME == 'rocm' \|\| contains(env.DEVICE_TYPE, 'B200')
	uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
	with:
	role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
	# The max duration enforced by the server side
	role-duration-seconds: 18000
	aws-region: us-east-1

	# Keep a copy of the benchmark results on GitHub for reference
	- uses: actions/upload-artifact@v4
	if: always()
	with:
	name: tritonbench-${{ matrix.runner }}-benchmark-single
	path: triton-benchmarks/tritonbench/results-${{ env.CONDA_ENV }}
	retention-days: 30

	- name: Upload result to Scribe
	if: ${{ env.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN != '' }}
	working-directory: triton-benchmarks/tritonbench
	run: \|
	latest_result_json=$(find ./results-${CONDA_ENV} -name "result.json" \| sort -r \| head -n 1)
	python3 ./.ci/upload/scribe.py --json ${latest_result_json}

	- name: Rewrite Tritonbench result json to ClickHouse style
	working-directory: triton-benchmarks/tritonbench
	run: \|
	latest_result_json=$(find ./results-${CONDA_ENV} -name "result.json" \| sort -r \| head -n 1)
	python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \
	--output clickhouse-results/result-${CONDA_ENV}.json

	- name: Upload result to ClickHouse
	uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
	with:
	benchmark-results-dir: triton-benchmarks/tritonbench/clickhouse-results
	dry-run: false
	schema-version: v3
	github-token: ${{ secrets.GITHUB_TOKEN }}

	- name: Kill the container
	if: always()
	run: \|
	docker kill "${TRITONBENCH_CONTAINER_ID}" \|\| true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

TritonBench #16

Workflow file

TritonBench #16

Uh oh!

Jobs

Run details

Workflow file for this run