|
106 | 106 | key: "rocm-build-openmpi" |
107 | 107 | agents: |
108 | 108 | queue: "juliagpu" |
109 | | - rocm: "*" # todo fix ROCM version |
| 109 | + rocm: "*" |
110 | 110 | env: |
111 | | - OPENMPI_VER: "4.1" |
112 | | - OPENMPI_VER_FULL: "4.1.4" |
113 | | - UCX_VER: "1.13-rc1" |
| 111 | + OPENMPI_VER: "5.0" |
| 112 | + OPENMPI_VER_FULL: "5.0.3" |
| 113 | + UCX_VER: "1.17.0" |
114 | 114 | CCACHE_DIR: "/root/ccache" |
115 | 115 | commands: | |
116 | 116 | echo "--- Install packages" |
117 | 117 | apt-get install --yes --no-install-recommends curl ccache |
118 | 118 | export PATH="/usr/lib/ccache/:$$PATH" |
| 119 | +
|
119 | 120 | echo "--- Build UCX" |
120 | | - curl -L https://github.com/openucx/ucx/releases/download/v1.13.0-rc1/ucx-1.13.0.tar.gz --output ucx.tar.gz |
| 121 | + curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz |
121 | 122 | tar -zxf ucx.tar.gz |
122 | 123 | pushd ucx-* |
123 | 124 | ./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix) |
124 | 125 | make -j |
125 | 126 | make install |
126 | 127 | popd |
| 128 | +
|
127 | 129 | echo "--- Build OpenMPI" |
128 | 130 | curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz |
129 | 131 | tar -zxf openmpi.tar.gz |
130 | | - pushd openmpi-* |
131 | | - ./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix) |
| 132 | + pushd openmpi-$${OPENMPI_VER_FULL} |
| 133 | + ./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix) |
132 | 134 | make -j |
133 | 135 | make install |
134 | 136 | popd |
| 137 | +
|
135 | 138 | echo "--- Package prefix" |
136 | 139 | tar -zcf mpi-prefix.tar.gz mpi-prefix/ |
| 140 | +
|
137 | 141 | echo "--- ccache stats" |
138 | 142 | ccache -s |
139 | 143 | artifact_paths: |
140 | 144 | - "mpi-prefix.tar.gz" |
141 | 145 |
|
142 | 146 | - wait |
143 | 147 |
|
144 | | - - label: "Tests -- Julia latest" |
| 148 | + - label: "Tests -- Julia {{matrix.version}}" |
| 149 | + matrix: |
| 150 | + setup: |
| 151 | + version: |
| 152 | + - "1.10" |
| 153 | + concurrency: 1 |
| 154 | + concurrency_group: mpi_rocm |
145 | 155 | plugins: |
146 | 156 | - JuliaCI/julia#v1: |
147 | | - version: "1" # failing on 1.8 |
| 157 | + version: "{{matrix.version}}" |
148 | 158 | persist_depot_dirs: packages,artifacts,compiled |
149 | 159 | agents: |
150 | 160 | queue: "juliagpu" |
151 | | - rocm: "*" # todo fix ROCM version |
| 161 | + rocm: "*" |
152 | 162 | if: build.message !~ /\[skip tests\]/ |
153 | | - timeout_in_minutes: 60 |
154 | | - soft_fail: |
155 | | - - exit_status: 1 |
| 163 | + timeout_in_minutes: 90 |
156 | 164 | env: |
157 | 165 | JULIA_MPI_TEST_NPROCS: 2 |
158 | 166 | JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" |
|
182 | 190 | ' |
183 | 191 |
|
184 | 192 | echo "+++ Run tests" |
| 193 | + export JULIA_MPI_TEST_EXCLUDE="test_allreduce.jl,test_reduce.jl,test_scan.jl" |
185 | 194 | julia --color=yes --project=. -e ' |
186 | 195 | import Pkg |
187 | 196 | Pkg.test("MPI"; test_args=["--backend=AMDGPU"]) |
|
0 commit comments