From 507d171aa2ca4c9b12bc050e18d1c5151805e772 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 10 Jun 2025 11:48:56 +0200 Subject: [PATCH 1/4] Add some more O2 settings to --setO2Settings of standalone benchmark --- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 95997a30034c9..089b3fecfba99 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -223,6 +223,15 @@ int32_t ReadConfiguration(int argc, char** argv) configStandalone.rec.tpc.nWaysOuter = 1; configStandalone.rec.tpc.trackReferenceX = 83; configStandalone.proc.outputSharedClusterMap = 1; + configStandalone.proc.clearO2OutputFromGPU = 1; + configStandalone.QA.clusterRejectionHistograms = 1; + configStandalone.proc.tpcIncreasedMinClustersPerRow = 500000; + configStandalone.proc.ignoreNonFatalGPUErrors=1; + // TODO: rundEdx=1 + // GPU_proc.qcRunFraction=$TPC_TRACKING_QC_RUN_FRACTION;" + // [[ $CTFINPUT == 1 ]] && GPU_CONFIG_KEY+="GPU_proc.tpcInputWithClusterRejection=1;" + // double pipeline / rtc + } if (configStandalone.outputcontrolmem) { From f4a6dd7ad2959b39b89e03ba33dab68fda1f9a55 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 10 Jun 2025 15:35:18 +0200 Subject: [PATCH 2/4] GPU Standalone: No need for warmup iteration of double-pipeline with debugging enabled --- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 089b3fecfba99..34716e623e3c2 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -902,7 +902,7 @@ int32_t main(int argc, char** argv) double pipelineWalltime = 1.; if (configStandalone.proc.doublePipeline) { HighResTimer timerPipeline; - if (RunBenchmark(rec, chainTracking, 1, iEvent, &nTracksTotal, &nClustersTotal) || RunBenchmark(recPipeline, chainTrackingPipeline, 2, iEvent, &nTracksTotal, &nClustersTotal)) { + if (configStandalone.proc.debugLevel < 2 && (RunBenchmark(rec, chainTracking, 1, iEvent, &nTracksTotal, &nClustersTotal) || RunBenchmark(recPipeline, chainTrackingPipeline, 2, iEvent, &nTracksTotal, &nClustersTotal))) { goto breakrun; } auto pipeline1 = std::async(std::launch::async, RunBenchmark, rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 0, &timerPipeline); From b17bb4a5e3d02a64b2df204e55a6f76d0717ebed Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 11 Jun 2025 09:26:49 +0200 Subject: [PATCH 3/4] GPU Standalone: CI build should use -Werror --- GPU/GPUTracking/Standalone/CMakeLists.txt | 7 ++++++- GPU/GPUTracking/Standalone/cmake/build.sh | 17 ++++++++++------- GPU/GPUTracking/Standalone/cmake/config.cmake | 1 + 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 0b37ce38b79f7..c0e3312201efb 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -65,7 +65,12 @@ if (GPUCA_BUILD_DEBUG_SANITIZE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") endif() endif() -string(APPEND CMAKE_CXX_FLAGS " -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +if(GPUCA_CONFIG_WERROR) + string(APPEND CMAKE_CXX_FLAGS " -Werror") +else() + string(APPEND CMAKE_CXX_FLAGS " -Wno-error") +endif() +string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") string(APPEND CMAKE_SHARED_LINKER_FLAGS " -rdynamic -Wl,--no-undefined") if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") string(APPEND CMAKE_CXX_FLAGS " -Wno-vla-cxx-extension") diff --git a/GPU/GPUTracking/Standalone/cmake/build.sh b/GPU/GPUTracking/Standalone/cmake/build.sh index 9fe650fd30905..216efd4ebe408 100755 --- a/GPU/GPUTracking/Standalone/cmake/build.sh +++ b/GPU/GPUTracking/Standalone/cmake/build.sh @@ -7,13 +7,16 @@ set -e mkdir -p standalone/build pushd standalone/build cp $1/GPU/GPUTracking/Standalone/cmake/config.cmake . -cat >> config.cmake << "EOF" -set(ENABLE_CUDA 1) -set(ENABLE_HIP 1) -set(ENABLE_OPENCL 1) -set(GPUCA_CONFIG_ONNX 1) -set(GPUCA_BUILD_EVENT_DISPLAY 0) -EOF +if [[ $GPUCA_STANDALONE_CI == 1 ]]; then + cat >> config.cmake << "EOF" + set(ENABLE_CUDA 1) + set(ENABLE_HIP 1) + set(ENABLE_OPENCL 1) + set(GPUCA_CONFIG_ONNX 1) + set(GPUCA_BUILD_EVENT_DISPLAY 0) + set(GPUCA_CONFIG_WERROR 1) + EOF +fi cmake -DCMAKE_INSTALL_PREFIX=../ $1/GPU/GPUTracking/Standalone make ${JOBS+-j $JOBS} install popd diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 77ce9c9e77fca..88fe418d40e5b 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -38,4 +38,5 @@ set(CUDA_COMPUTETARGET "default") # 86 89 #set(GPUCA_RTC_NO_COMPILED_KERNELS 1) #set(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE 1) #set(GPUCA_CONFIG_COMPILER gcc) # gcc / clang +#set(GPUCA_CONFIG_WERROR 1) #add_definitions(-DGPUCA_GPU_DEBUG_PRINT) From 8a09be314052c4e6bc3ea536482f1b77e1c38983 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Wed, 11 Jun 2025 07:28:22 +0000 Subject: [PATCH 4/4] Please consider the following formatting changes --- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 34716e623e3c2..a624e1e55ed4b 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -226,12 +226,11 @@ int32_t ReadConfiguration(int argc, char** argv) configStandalone.proc.clearO2OutputFromGPU = 1; configStandalone.QA.clusterRejectionHistograms = 1; configStandalone.proc.tpcIncreasedMinClustersPerRow = 500000; - configStandalone.proc.ignoreNonFatalGPUErrors=1; + configStandalone.proc.ignoreNonFatalGPUErrors = 1; // TODO: rundEdx=1 // GPU_proc.qcRunFraction=$TPC_TRACKING_QC_RUN_FRACTION;" // [[ $CTFINPUT == 1 ]] && GPU_CONFIG_KEY+="GPU_proc.tpcInputWithClusterRejection=1;" // double pipeline / rtc - } if (configStandalone.outputcontrolmem) {