From 0ae8e24bf71fed11b4775f3a5fe1aaf1925c97b2 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 14 Apr 2025 11:17:23 +0200 Subject: [PATCH] GPU: remove automatic workaround for MI100 memory errors --- prodtests/full-system-test/dpl-workflow.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 2dfc74e3ecfb3..bb2712bedd92e 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -265,7 +265,6 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;"