Skip to content

Commit 8ed7006

Browse files
committed
Add nightly regression tests for CUDA 12.8 and 12.9 on H100/A100
1 parent aa21b80 commit 8ed7006

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: Run CUDA Nightly Regression Tests (12.8, 12.9)
2+
3+
on:
4+
schedule:
5+
# 7 am PST every day
6+
- cron: "0 15 * * *"
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: regression_test_cuda_nightly-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
11+
cancel-in-progress: true
12+
13+
env:
14+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
15+
16+
jobs:
17+
test:
18+
strategy:
19+
fail-fast: false
20+
matrix:
21+
include:
22+
# CUDA 12.8 on H100
23+
- name: CUDA 12.8 H100
24+
runs-on: linux.aws.h100
25+
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu128'
26+
gpu-arch-type: "cuda"
27+
gpu-arch-version: "12.8"
28+
29+
# CUDA 12.8 on A100
30+
- name: CUDA 12.8 A100
31+
runs-on: linux.aws.a100
32+
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu128'
33+
gpu-arch-type: "cuda"
34+
gpu-arch-version: "12.8"
35+
36+
# CUDA 12.9 on H100
37+
- name: CUDA 12.9 H100
38+
runs-on: linux.aws.h100
39+
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu129'
40+
gpu-arch-type: "cuda"
41+
gpu-arch-version: "12.9"
42+
43+
# CUDA 12.9 on A100
44+
- name: CUDA 12.9 A100
45+
runs-on: linux.aws.a100
46+
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu129'
47+
gpu-arch-type: "cuda"
48+
gpu-arch-version: "12.9"
49+
50+
permissions:
51+
id-token: write
52+
contents: read
53+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
54+
with:
55+
timeout: 180
56+
runner: ${{ matrix.runs-on }}
57+
gpu-arch-type: ${{ matrix.gpu-arch-type }}
58+
gpu-arch-version: ${{ matrix.gpu-arch-version }}
59+
submodules: recursive
60+
script: |
61+
conda create -n venv python=3.10 -y
62+
conda activate venv
63+
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
64+
python -m pip install --upgrade pip
65+
pip install ${{ matrix.torch-spec }}
66+
pip install -r dev-requirements.txt
67+
pip install . --no-build-isolation
68+
export CONDA=$(dirname $(dirname $(which conda)))
69+
export LD_LIBRARY_PATH=$CONDA/lib/:$LD_LIBRARY_PATH
70+
pytest test --verbose -s
71+

0 commit comments

Comments
 (0)