AMReX-Astro · zingale · Oct 23, 2025 · Oct 23, 2025
diff --git a/job_scripts/polaris/polaris.submit b/job_scripts/polaris/polaris.submit
@@ -1,28 +1,31 @@
 #!/bin/sh
 #PBS -l select=2:system=polaris
+#PBS -l filesystems=home:eagle:grand
 #PBS -l place=scatter
-#PBS -l walltime=6:00:00
+#PBS -l walltime=06:00:00
 #PBS -q prod
 #PBS -A AstroExplosions
-#PBS -j eo
 
 EXEC=./Castro2d.gnu.MPI.CUDA.SMPLSDC.ex
 INPUTS=inputs_2d.N14.coarse
 
-module swap PrgEnv-nvhpc PrgEnv-gnu
-module load nvhpc-mixed
-
 # Enable GPU-MPI (if supported by application)
-##export MPICH_GPU_SUPPORT_ENABLED=1
+
+module use /soft/modulefiles
+module load PrgEnv-gnu
+module load cudatoolkit-standalone
+module load cpe-cuda
+module load gcc-native/12.3
+CRAY_ACCEL_TARGET=nvidia80
 
 # Change to working directory
 cd ${PBS_O_WORKDIR}
 
-# MPI and OpenMP settings
+# MPI and OpenMP settings.  A total of 64 logical cores are available or 32 physical.
 NNODES=`wc -l < $PBS_NODEFILE`
-NRANKS_PER_NODE=4
-NDEPTH=8
-NTHREADS=1
+NRANKS_PER_NODE=$(nvidia-smi -L | wc -l)  # We set this up For GPU simulations (4 GPUS thus 4 NRANKS_PER_NODE).
+NDEPTH=8                                  # Number of CPU logical cores tied to each MPI-process.
+NTHREADS=1                                # Number of OpenMP processes tied to each MPI process.
 
 NTOTRANKS=$(( NNODES * NRANKS_PER_NODE ))
 

diff --git a/job_scripts/polaris/polaris_simple.submit b/job_scripts/polaris/polaris_simple.submit
@@ -1,26 +1,33 @@
 #!/bin/sh
 #PBS -l select=2:system=polaris
+#PBS -l filesystems=home:eagle:grand
 #PBS -l place=scatter
-#PBS -l walltime=0:30:00
+#PBS -l walltime=00:30:00
 #PBS -q debug
 #PBS -A AstroExplosions
 
 EXEC=./Castro2d.gnu.MPI.CUDA.SMPLSDC.ex
 INPUTS=inputs_2d.N14.coarse
 
 # Enable GPU-MPI (if supported by application)
-##export MPICH_GPU_SUPPORT_ENABLED=1
+
+module use /soft/modulefiles
+module load PrgEnv-gnu
+module load cudatoolkit-standalone
+module load cpe-cuda
+module load gcc-native/12.3
+CRAY_ACCEL_TARGET=nvidia80
 
 # Change to working directory
 cd ${PBS_O_WORKDIR}
 
-# MPI and OpenMP settings
+# MPI and OpenMP settings.  A total of 64 logical cores are available or 32 physical.
 NNODES=`wc -l < $PBS_NODEFILE`
-NRANKS_PER_NODE=4
-NDEPTH=8
-NTHREADS=1
+NRANKS_PER_NODE=$(nvidia-smi -L | wc -l)  # We set this up For GPU simulations (4 GPUS thus 4 NRANKS_PER_NODE).
+NDEPTH=8                                  # Number of CPU logical cores tied to each MPI-process.
+NTHREADS=1                                # Number of OpenMP processes tied to each MPI process.
 
 NTOTRANKS=$(( NNODES * NRANKS_PER_NODE ))
 
-# For applications that need mpiexec to bind MPI ranks to GPUs
+# For applications that need mpiexec to bind MPI ranks to GPUs. This is done by the script set_affinity_gpu_polaris.sh
 mpiexec -n ${NTOTRANKS} --ppn ${NRANKS_PER_NODE} --depth=${NDEPTH} --cpu-bind depth --env OMP_NUM_THREADS=${NTHREADS} -env OMP_PLACES=threads ./set_affinity_gpu_polaris.sh ${EXEC} ${INPUTS}
diff --git a/sphinx_docs/source/alcf.rst b/sphinx_docs/source/alcf.rst
@@ -23,13 +23,16 @@ there.  This is read at the end of ``/etc/bash.bashrc``
 Compiling
 =========
 
-Load the modules:
+Load the modules and set the `CRAY_ACCEL_TARGET` environment variable as follows:
 
 .. prompt:: bash
 
    module use /soft/modulefiles
    module load PrgEnv-gnu
-   module load nvhpc-mixed
+   module load cudatoolkit-standalone
+   module load cpe-cuda
+   module load gcc-native/12.3
+   CRAY_ACCEL_TARGET=nvidia80
 
 Then you can compile via:
 
@@ -125,7 +128,7 @@ To create the virtual environment:
 .. prompt:: bash
 
    module use /soft/modulefiles
-   module load conda 
+   module load conda
    conda activate
    VENV_DIR="venvs/polaris"
    mkdir -p "${VENV_DIR}"
@@ -137,7 +140,7 @@ is loaded:
 
 .. prompt:: bash
 
-   module load conda 
+   module load conda
    conda activate
    VENV_DIR="venvs/polaris"
    source "${VENV_DIR}/bin/activate"