From 6251aefd41b7a7780f47571c39553737eb22fabd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 12:29:50 +0200 Subject: [PATCH] GPU: Fix typo in variable name, fix comments, fix debug messages --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 10 +++++----- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Global/GPUChainTracking.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d714c6833d18d..ed47358cc9d5c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -68,7 +68,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu int32_t nThreads = getNKernelHostThreads(false); if (nThreads > 1) { if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d Threads\n", nThreads); + printf("Running %d Threads\n", mThreading->activeThreads->max_concurrency()); } tbb::this_task_arena::isolate([&] { mThreading->activeThreads->execute([&] { @@ -91,10 +91,10 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu template <> inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - int32_t nnThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); - if (nnThreads > 1) { - tbb::parallel_for(0, nnThreads, [&](int iThread) { - size_t threadSize = size / nnThreads; + int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); + if (nThreads > 1) { + tbb::parallel_for(0, nThreads, [&](int iThread) { + size_t threadSize = size / nThreads; if (threadSize % 4096) { threadSize += 4096 - threadSize % 4096; } diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index dd72119e3b56f..e17f1fcd7091e 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -77,8 +77,8 @@ add_custom_command( create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1" - COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src" + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain system headers 1>&2 && exit 1" + COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src for system headers" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 5a68df8ac9527..9a9b1e36a167c 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,8 +115,8 @@ add_custom_command( create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1" - COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src" + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain system headers 1>&2 && exit 1" + COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src for system headers" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 37ad164d20a60..6753db280d5bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -267,7 +267,7 @@ bool GPUChainTracking::ValidateSettings() return false; } if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) { - GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); + GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) { diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 3c1ad9658566b..7faab410d20ea 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -76,7 +76,6 @@ function(o2_gpu_add_kernel kernel_name kernel_files) set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NAMES "${kernel_name}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_INCLUDES "${TMP_KERNEL_CLASS_FILE}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_FILES "${TMP_KERNEL_CLASS_FILE}.cxx") - # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround set(O2_GPU_KERNEL_TEMPLATE_FILES "GPUConstantMem.h") if (GPUCA_BUILD_DEBUG) @@ -102,6 +101,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files) list(TRANSFORM O2_GPU_KERNEL_TEMPLATE_FILES PREPEND "#include \"") list(JOIN O2_GPU_KERNEL_TEMPLATE_FILES "\n" O2_GPU_KERNEL_TEMPLATE_FILES) + # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround to create CUDA and HIP files string(REPLACE ", " "_" TMP_FILENAME "${kernel_name}") if(CUDA_ENABLED) set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.cu")