diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index dd575099857f4..729def1264f5e 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -270,7 +270,8 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" + # serialization workaround for MI100 nodes: remove it again if the problem will be fixed in ROCm, then also remove the DISABLE_MI100_SERIALIZATION flag in the O2DPG parse script + [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.amdMI100SerializationWorkaround=1;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;"