From 13290c44209724bd6ab6ce4df78217a136765aa2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 29 Mar 2025 20:41:57 +0100 Subject: [PATCH 1/6] GPU Standalone: Use LAZY CMake install message --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index dfc8e8db3bc7a..fbc256d5d7f91 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -13,6 +13,8 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR) project(GPUTrackingStandalone) include(FeatureSummary) +set(CMAKE_INSTALL_MESSAGE LAZY) + set(CMAKE_INSTALL_BINDIR "${CMAKE_INSTALL_PREFIX}") set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}") From 4d2b855ab0039ff406d9870902a437adf3854376 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 29 Mar 2025 21:44:06 +0100 Subject: [PATCH 2/6] GPU RTC: Simplify the way LB preprocessor definitions are imported as parameter object, and replace explicit invalid list with CMake-auto-generated one --- .../Base/GPUReconstructionProcessing.cxx | 5 +- .../Base/cuda/GPUReconstructionCUDA.cu | 6 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 5 +- .../Base/opencl/GPUReconstructionOCL.cxx | 2 - GPU/GPUTracking/CMakeLists.txt | 25 +- .../Definitions/GPUDefParametersDefault.h | 312 ++---------------- .../GPUDefParametersLoad.template.inc | 8 +- .../Standalone/tools/dumpGPUDefParam.C | 1 - 8 files changed, 54 insertions(+), 310 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 95a47dec946e6..d02309f66c762 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -12,12 +12,9 @@ /// \file GPUReconstructionProcessing.cxx /// \author David Rohr -#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS -#include "GPUDefParametersDefault.h" -#include "GPUDefParametersLoad.inc" - #include "GPUReconstructionProcessing.h" #include "GPUReconstructionThreading.h" +#include "GPUDefParametersLoad.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 4cfdf7febabd7..1b830ca95d4fb 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -14,11 +14,6 @@ #define GPUCA_GPUCODE_HOSTONLY -#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS -#include "GPUReconstructionCUDADef.h" -#include "GPUDefParametersDefault.h" -#include "GPUDefParametersLoad.inc" - #include "GPUReconstructionCUDAIncludesSystem.h" #include "GPUReconstructionCUDADef.h" #include @@ -28,6 +23,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" +#include "GPUDefParametersLoad.inc" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index c3d614b8d9f98..5f481d2cb9058 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -13,13 +13,10 @@ /// \author David Rohr #define GPUCA_GPUCODE_HOSTONLY -#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS -#include "GPUDefParametersDefault.h" -#include "GPUDefParametersLoad.inc" #include "GPUReconstructionCUDA.h" #include "GPUParamRTC.h" -#include "GPUDefMacros.h" +#include "GPUDefParametersLoad.inc" #include #include "Framework/SHA1.h" #include diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 7310b8b6041a9..af26bfc7aeca8 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -12,9 +12,7 @@ /// \file GPUReconstructionOCL.cxx /// \author David Rohr -#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS #include "GPUReconstructionOCLIncludesHost.h" -#include "GPUDefParametersDefault.h" #include "GPUDefParametersLoad.inc" #include diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index fe2fefe2412c0..e722d375e4b93 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -231,7 +231,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") endif() file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) -file(GENERATE +file(GENERATE # TODO: Do this as a list OUTPUT include_gpu_onthefly/GPUReconstructionKernelList.h INPUT Base/GPUReconstructionKernelList.template.h ) @@ -255,10 +255,19 @@ file(GENERATE OUTPUT include_gpu_onthefly/GPUDefParametersLoad.inc INPUT Definitions/GPUDefParametersLoad.template.inc ) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2") - include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) -endif() -set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc) +file(GENERATE + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase + CONTENT "$,REPLACE,[^A-Za-z0-9]+,_>,\n>" +) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + COMMAND awk "{print(\"#ifndef GPUCA_LB_\" $0 \"\\n#define GPUCA_LB_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + COMMENT "Generating GPUDefParametersLoadPrepare.h" + VERBATIM + COMMAND_EXPAND_LISTS +) + +set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) include(kernels.cmake) # Optional sources depending on optional dependencies @@ -396,6 +405,12 @@ set_source_files_properties(Base/GPUReconstructionLibrary.cxx PROPERTIES INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}") +# Make sure header files generated with add_custom_command are built +target_sources(${targetName} + PRIVATE FILE_SET "generatedHeaders" + TYPE HEADERS + FILES ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h # TODO: build file list for this + BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) # Add compile definitions and libraries depending on available optional dependencies if(GPUCA_QA) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 60403f3afbffc..4435e69c60ff6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -492,6 +492,30 @@ #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1 #endif #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) + + // These kernel launch-bounds are derrived from one of the constants set above + #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression + #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression + + #ifdef GPUCA_HAS_ONNX + #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels + #endif + + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER + #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER + #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER + #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER + #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER #else #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. #endif @@ -500,29 +524,6 @@ #define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! -#define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression -#define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression - -#ifdef GPUCA_HAS_ONNX -#define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels -#define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels -#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels -#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels -#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels -#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels -#endif - -#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER -#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER -#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER -#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER -#define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER - #if defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_SPECIALIZE_THRUST_SORTS #endif @@ -622,270 +623,5 @@ #define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) #define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT -#ifdef GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS - // Invalid default values, must not be used, but needed for now to make the GPUDefParametersLoad() happy // TOCO: cleanup - #ifndef GPUCA_LB_GPUTPCCreateTrackingData - #define GPUCA_LB_GPUTPCCreateTrackingData 0 - #endif - #ifndef GPUCA_LB_GPUTPCTrackletConstructor - #define GPUCA_LB_GPUTPCTrackletConstructor 0 - #endif - #ifndef GPUCA_LB_GPUTPCTrackletSelector - #define GPUCA_LB_GPUTPCTrackletSelector 0 - #endif - #ifndef GPUCA_LB_GPUTPCNeighboursFinder - #define GPUCA_LB_GPUTPCNeighboursFinder 0 - #endif - #ifndef GPUCA_LB_GPUTPCNeighboursCleaner - #define GPUCA_LB_GPUTPCNeighboursCleaner 0 - #endif - #ifndef GPUCA_LB_GPUTPCExtrapolationTracking - #define GPUCA_LB_GPUTPCExtrapolationTracking 0 - #endif - #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion - #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 0 - #endif - #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill - #define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 0 - #endif - #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold - #define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 0 - #endif - #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version - #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 0 - #endif - #ifndef GPUCA_LB_GPUTPCConvertKernel - #define GPUCA_LB_GPUTPCConvertKernel 0 - #endif - #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 0 - #endif - #ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 0 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 0 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 0 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow - #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 0 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters - #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 0 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters - #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFDecodeZS - #define GPUCA_LB_GPUTPCCFDecodeZS 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFDecodeZSLink - #define GPUCA_LB_GPUTPCCFDecodeZSLink 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFGather - #define GPUCA_LB_GPUTPCCFGather 0 - #endif - #ifndef GPUCA_LB_COMPRESSION_GATHER - #define GPUCA_LB_COMPRESSION_GATHER 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerTrackFit - #define GPUCA_LB_GPUTPCGMMergerTrackFit 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerClearLinks - #define GPUCA_LB_GPUTPCGMMergerClearLinks 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE - #define GPUCA_LB_GPUTPCGMMergerMergeCE 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerCollect - #define GPUCA_LB_GPUTPCGMMergerCollect 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0 - #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1 - #define GPUCA_LB_GPUTPCGMMergerFinalize_step1 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2 - #define GPUCA_LB_GPUTPCGMMergerFinalize_step2 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 - #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 - #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 - #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMO2Output_prepare - #define GPUCA_LB_GPUTPCGMO2Output_prepare 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMO2Output_output - #define GPUCA_LB_GPUTPCGMO2Output_output 0 - #endif - #ifndef GPUCA_LB_GPUITSFitterKernels - #define GPUCA_LB_GPUITSFitterKernels 0 - #endif - #ifndef GPUCA_LB_GPUTPCStartHitsFinder - #define GPUCA_LB_GPUTPCStartHitsFinder 0 - #endif - #ifndef GPUCA_LB_GPUTPCStartHitsSorter - #define GPUCA_LB_GPUTPCStartHitsSorter 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFPeakFinder - #define GPUCA_LB_GPUTPCCFPeakFinder 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFNoiseSuppression - #define GPUCA_LB_GPUTPCCFNoiseSuppression 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFDeconvolution - #define GPUCA_LB_GPUTPCCFDeconvolution 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFClusterizer - #define GPUCA_LB_GPUTPCCFClusterizer 0 - #endif - #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels 0 - #endif - #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU - #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 0 - #endif - #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov - #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 0 - #endif - #ifndef GPUCA_LB_GPUMemClean16 - #define GPUCA_LB_GPUMemClean16 0 - #endif - #ifndef GPUCA_LB_GPUitoa - #define GPUCA_LB_GPUitoa 0 - #endif - #ifndef GPUCA_LB_GPUTPCExtrapolationTrackingCopyNumbers - #define GPUCA_LB_GPUTPCExtrapolationTrackingCopyNumbers 0 - #endif - #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_hitData - #define GPUCA_LB_GPUTPCSectorDebugSortKernels_hitData 0 - #endif - #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_startHits - #define GPUCA_LB_GPUTPCSectorDebugSortKernels_startHits 0 - #endif - #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_sectorTracks - #define GPUCA_LB_GPUTPCSectorDebugSortKernels_sectorTracks 0 - #endif - #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_clearIds - #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_clearIds 0 - #endif - #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_sectorTracks - #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_sectorTracks 0 - #endif - #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks1 - #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks1 0 - #endif - #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks2 - #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks2 0 - #endif - #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_borderTracks - #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_borderTracks 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerUnpackSaveNumber - #define GPUCA_LB_GPUTPCGMMergerUnpackSaveNumber 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step1 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step1 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_variant - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_variant 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSortTracks - #define GPUCA_LB_GPUTPCGMMergerSortTracks 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksQPt - #define GPUCA_LB_GPUTPCGMMergerSortTracksQPt 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMO2Output_sort - #define GPUCA_LB_GPUTPCGMO2Output_sort 0 - #endif - #ifndef GPUCA_LB_GPUTPCGMO2Output_mc - #define GPUCA_LB_GPUTPCGMO2Output_mc 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFMCLabelFlattener_setRowOffsets - #define GPUCA_LB_GPUTPCCFMCLabelFlattener_setRowOffsets 0 - #endif - #ifndef GPUCA_LB_GPUTPCCFMCLabelFlattener_flatten - #define GPUCA_LB_GPUTPCCFMCLabelFlattener_flatten 0 - #endif -#endif // GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS - // clang-format on #endif // GPUDEFPARAMETERSDEFAULT_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index cd1875ecefcf7..c17244572ee0c 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -12,6 +12,12 @@ /// \file GPUDefParametersLoad.inc /// \author David Rohr +#include "GPUDefParametersLoadPrepare.h" + +#define GPUCA_M_LB_EMPTY_(...) 0 +#define GPUCA_M_LB_EMPTY_1(...) __VA_ARGS__ +#define GPUCA_M_LB_EMPTY0(...) GPUCA_M_CAT(GPUCA_M_LB_EMPTY_, __VA_OPT__(1))(__VA_ARGS__) + #include "GPUDefParameters.h" #include "GPUDefMacros.h" #include @@ -24,7 +30,7 @@ static GPUDefParameters GPUDefParametersLoad() { return GPUDefParameters{ // clang-format off - {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_LB_>,APPEND,)>,$>}, + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_LB_EMPTY0(GPUCA_M_FIRST(GPUCA_LB_>,APPEND,))>,$>}, {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0))>,$>}, {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>} // clang-format on diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index 4a2575c40d79a..4a72b0cef31a3 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -18,7 +18,6 @@ // Logic for testing to load the default parameters /*#define GPUCA_GPUCODE #define GPUCA_GPUTYPE_AMPERE -#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS #define GPUCA_MAXN 40 #define GPUCA_ROW_COUNT 152 #define GPUCA_TPC_COMP_CHUNK_SIZE 1024 From 2265f9f3b2e049ff33b38d32bd03acae69dfa2d1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 31 Mar 2025 18:38:20 +0200 Subject: [PATCH 3/6] GPU CMake: fix some dependencies, which were just randomly not failing before --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 ++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 843fc3464e151..ff7d45ac327fa 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -158,6 +158,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=1) target_compile_definitions(GPUTrackingCUDAKernels PRIVATE $) target_include_directories(GPUTrackingCUDAKernels PRIVATE $) + target_link_libraries(GPUTrackingCUDAKernels PRIVATE $) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o @@ -189,3 +190,4 @@ add_library(O2::GPUTrackingCUDAExternalProvider ALIAS GPUTrackingCUDAExternalPro set_property(TARGET GPUTrackingCUDAExternalProvider PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_compile_definitions(GPUTrackingCUDAExternalProvider PRIVATE $) target_include_directories(GPUTrackingCUDAExternalProvider PRIVATE $) +add_dependencies(GPUTrackingCUDAExternalProvider O2::GPUTracking) # must not depend on GPU backend to avoid cyclic dependencies diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 5796c0c48686b..d785a8bf21d2d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -184,6 +184,8 @@ target_compile_definitions(${targetName} PRIVATE $) target_include_directories(${MODULE}_CXX PRIVATE $) +target_link_libraries(${MODULE}_CXX PRIVATE $) +add_dependencies(${MODULE}_CXX O2::GPUTracking) target_link_libraries(${targetName} PRIVATE ${MODULE}_CXX) if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") @@ -213,6 +215,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=1) target_compile_definitions(GPUTrackingHIPKernels PRIVATE $) target_include_directories(GPUTrackingHIPKernels PRIVATE $) + target_link_libraries(GPUTrackingHIPKernels PRIVATE $) if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPKernels ${MODULE}_HIPIFIED) endif() @@ -249,6 +252,7 @@ target_compile_options(GPUTrackingHIPExternalProvider PRIVATE $<$:-fgpu-rdc>) target_compile_definitions(GPUTrackingHIPExternalProvider PRIVATE $) target_include_directories(GPUTrackingHIPExternalProvider PRIVATE $) +add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depend on GPU backend to avoid cyclic dependencies if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() From 1a51d382bad5ff54dbd7f30abb740ae5d9a55503 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 31 Mar 2025 18:45:25 +0200 Subject: [PATCH 4/6] GPU HIP CMake: Remove workaround to build HIP kernels in separate CMake directory, now that we can compile them using HIP language --- GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 ++--- GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt | 13 ------------- 2 files changed, 2 insertions(+), 16 deletions(-) delete mode 100644 GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index d785a8bf21d2d..0387bb8559833 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -207,9 +207,8 @@ endif() if(GPUCA_HIP_COMPILE_MODE STREQUAL "onefile") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=0) elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") - #add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.cu>, >) - #set_property(TARGET GPUTrackingHIPKernels PROPERTY HIP_FATBIN_COMPILATION ON) - add_subdirectory(per_kernel) + add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip>, >) + target_compile_options(GPUTrackingHIPKernels PRIVATE "--cuda-device-only") target_compile_options(GPUTrackingHIPKernels PRIVATE $<$:-fno-gpu-rdc>) target_link_options(GPUTrackingHIPKernels PRIVATE $<$:-fno-gpu-rdc>) target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=1) diff --git a/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt b/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt deleted file mode 100644 index 789333eea9f04..0000000000000 --- a/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip>, >) -set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --cuda-device-only") From b481a3bcc829fe4e299c1c429ac9abfc99cf5e0d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 31 Mar 2025 22:28:52 +0200 Subject: [PATCH 5/6] GPU CMake: get rid of unnecessary temporary variable --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 6 ++---- GPU/GPUTracking/Base/hip/CMakeLists.txt | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index ff7d45ac327fa..dd72119e3b56f 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -110,11 +110,10 @@ set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.co # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") - set(TMP_BASELIB O2::GPUTracking) o2_add_library( ${MODULE} SOURCES ${SRCS} - PUBLIC_LINK_LIBRARIES ${TMP_BASELIB} O2::ITStrackingCUDA + PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -127,10 +126,9 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") - set(TMP_BASELIB GPUTracking) add_library(${MODULE} SHARED ${SRCS}) add_library(O2::${MODULE} ALIAS ${MODULE}) - target_link_libraries(${MODULE} PUBLIC ${TMP_BASELIB}) + target_link_libraries(${MODULE} PUBLIC O2::GPUTracking) install(TARGETS GPUTrackingCUDA) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 0387bb8559833..5a68df8ac9527 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -148,11 +148,10 @@ set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.co # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") - set(TMP_BASELIB O2::GPUTracking) o2_add_library( ${MODULE} SOURCES ${SRCS} - PUBLIC_LINK_LIBRARIES ${TMP_BASELIB} O2::ITStrackingHIP + PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -171,10 +170,9 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") - set(TMP_BASELIB GPUTracking) add_library(${MODULE} SHARED ${SRCS}) add_library(O2::${MODULE} ALIAS ${MODULE}) - target_link_libraries(${MODULE} PUBLIC ${TMP_BASELIB}) + target_link_libraries(${MODULE} PUBLIC O2::GPUTracking) install(TARGETS GPUTrackingHIP) include_directories(${GPUCA_HIP_SOURCE_DIR}) endif() @@ -182,9 +180,9 @@ endif() target_compile_definitions(${targetName} PRIVATE $) add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. -target_compile_definitions(${MODULE}_CXX PRIVATE $) -target_include_directories(${MODULE}_CXX PRIVATE $) -target_link_libraries(${MODULE}_CXX PRIVATE $) +target_compile_definitions(${MODULE}_CXX PRIVATE $) +target_include_directories(${MODULE}_CXX PRIVATE $) +target_link_libraries(${MODULE}_CXX PRIVATE $) add_dependencies(${MODULE}_CXX O2::GPUTracking) target_link_libraries(${targetName} PRIVATE ${MODULE}_CXX) From e597e734ed10ad259528d53f5e3bedd1fcf8b61e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 1 Apr 2025 07:19:58 +0200 Subject: [PATCH 6/6] GPU: Fix codechecker: Empty destructor should be default --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 1b830ca95d4fb..f475929d49d50 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -82,7 +82,7 @@ GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg mRtcBinExtension = ".o"; #endif } -GPUReconstructionCUDA::~GPUReconstructionCUDA() {} +GPUReconstructionCUDA::~GPUReconstructionCUDA() = default; GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); }