Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions job_scripts/polaris/polaris.submit
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
#!/bin/sh
#PBS -l select=2:system=polaris
#PBS -l filesystems=home:eagle:grand
#PBS -l place=scatter
#PBS -l walltime=6:00:00
#PBS -l walltime=06:00:00
#PBS -q prod
#PBS -A AstroExplosions
#PBS -j eo

EXEC=./Castro2d.gnu.MPI.CUDA.SMPLSDC.ex
INPUTS=inputs_2d.N14.coarse

module swap PrgEnv-nvhpc PrgEnv-gnu
module load nvhpc-mixed

# Enable GPU-MPI (if supported by application)
##export MPICH_GPU_SUPPORT_ENABLED=1

module use /soft/modulefiles
module load PrgEnv-gnu
module load cudatoolkit-standalone
module load cpe-cuda
module load gcc-native/12.3
CRAY_ACCEL_TARGET=nvidia80

# Change to working directory
cd ${PBS_O_WORKDIR}

# MPI and OpenMP settings
# MPI and OpenMP settings. A total of 64 logical cores are available or 32 physical.
NNODES=`wc -l < $PBS_NODEFILE`
NRANKS_PER_NODE=4
NDEPTH=8
NTHREADS=1
NRANKS_PER_NODE=$(nvidia-smi -L | wc -l) # We set this up For GPU simulations (4 GPUS thus 4 NRANKS_PER_NODE).
NDEPTH=8 # Number of CPU logical cores tied to each MPI-process.
NTHREADS=1 # Number of OpenMP processes tied to each MPI process.

NTOTRANKS=$(( NNODES * NRANKS_PER_NODE ))

Expand Down
21 changes: 14 additions & 7 deletions job_scripts/polaris/polaris_simple.submit
Original file line number Diff line number Diff line change
@@ -1,26 +1,33 @@
#!/bin/sh
#PBS -l select=2:system=polaris
#PBS -l filesystems=home:eagle:grand
#PBS -l place=scatter
#PBS -l walltime=0:30:00
#PBS -l walltime=00:30:00
#PBS -q debug
#PBS -A AstroExplosions

EXEC=./Castro2d.gnu.MPI.CUDA.SMPLSDC.ex
INPUTS=inputs_2d.N14.coarse

# Enable GPU-MPI (if supported by application)
##export MPICH_GPU_SUPPORT_ENABLED=1

module use /soft/modulefiles
module load PrgEnv-gnu
module load cudatoolkit-standalone
module load cpe-cuda
module load gcc-native/12.3
CRAY_ACCEL_TARGET=nvidia80

# Change to working directory
cd ${PBS_O_WORKDIR}

# MPI and OpenMP settings
# MPI and OpenMP settings. A total of 64 logical cores are available or 32 physical.
NNODES=`wc -l < $PBS_NODEFILE`
NRANKS_PER_NODE=4
NDEPTH=8
NTHREADS=1
NRANKS_PER_NODE=$(nvidia-smi -L | wc -l) # We set this up For GPU simulations (4 GPUS thus 4 NRANKS_PER_NODE).
NDEPTH=8 # Number of CPU logical cores tied to each MPI-process.
NTHREADS=1 # Number of OpenMP processes tied to each MPI process.

NTOTRANKS=$(( NNODES * NRANKS_PER_NODE ))

# For applications that need mpiexec to bind MPI ranks to GPUs
# For applications that need mpiexec to bind MPI ranks to GPUs. This is done by the script set_affinity_gpu_polaris.sh
mpiexec -n ${NTOTRANKS} --ppn ${NRANKS_PER_NODE} --depth=${NDEPTH} --cpu-bind depth --env OMP_NUM_THREADS=${NTHREADS} -env OMP_PLACES=threads ./set_affinity_gpu_polaris.sh ${EXEC} ${INPUTS}
11 changes: 7 additions & 4 deletions sphinx_docs/source/alcf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ there. This is read at the end of ``/etc/bash.bashrc``
Compiling
=========

Load the modules:
Load the modules and set the `CRAY_ACCEL_TARGET` environment variable as follows:

.. prompt:: bash

module use /soft/modulefiles
module load PrgEnv-gnu
module load nvhpc-mixed
module load cudatoolkit-standalone
module load cpe-cuda
module load gcc-native/12.3
CRAY_ACCEL_TARGET=nvidia80

Then you can compile via:

Expand Down Expand Up @@ -125,7 +128,7 @@ To create the virtual environment:
.. prompt:: bash

module use /soft/modulefiles
module load conda
module load conda
conda activate
VENV_DIR="venvs/polaris"
mkdir -p "${VENV_DIR}"
Expand All @@ -137,7 +140,7 @@ is loaded:

.. prompt:: bash

module load conda
module load conda
conda activate
VENV_DIR="venvs/polaris"
source "${VENV_DIR}/bin/activate"
Expand Down