From f7f6cd01715084e19ffffbef0e92f1bb4cfba856 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 26 May 2025 11:22:48 +0200 Subject: [PATCH] GPU: Tentative workaround for MI100 serialization bug --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx | 3 +++ 2 files changed, 4 insertions(+) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index f7ff14ea188c4..d8173a5b62a35 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -370,6 +370,7 @@ AddOption(debugOnFailureMaxN, uint32_t, 1, "", 0, "Max number of times to run th AddOption(debugOnFailureMaxFiles, uint32_t, 0, "", 0, "Max number of files to have in the target folder") AddOption(debugOnFailureMaxSize, uint32_t, 0, "", 0, "Max size of existing dumps in the target folder in GB") AddOption(debugOnFailureDirectory, std::string, ".", "", 0, "Target folder for debug / dump") +AddOption(amdMI100SerializationWorkaround, bool, false, "", 0, "Enable workaround that mitigates MI100 serialization bug") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 67ef402961a20..708037239071e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -161,6 +161,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() GPUTPCTracker& trk = processors()->tpcTrackers[iSector]; GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk; int32_t useStream = StreamForSector(iSector); + if (GetProcessingSettings().amdMI100SerializationWorkaround) { + SynchronizeStream(useStream); // TODO: Remove this workaround once fixed on MI100 + } if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Creating Sector Data (Sector %d)", iSector);