From 0261447414a2777aebff38c5498d3b3fa5cf8271 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 15 Apr 2025 14:10:51 +0200 Subject: [PATCH 1/2] Revert "GPU: Temporarily move some defines back to the wrapper, to be cleaned up in another PR" This reverts commit da00550e828dad4617bc4730797d154e4bf79858. --- .../Definitions/GPUDefParametersDefaults.h | 40 +++++++++++++++ .../Definitions/GPUDefParametersWrapper.h | 50 +------------------ 2 files changed, 41 insertions(+), 49 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index ce703e2ceba4a..83ef7d8cf810b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -75,6 +75,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT_DEFAULT 256 @@ -128,6 +138,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 512 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -181,6 +201,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 384 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -226,6 +256,16 @@ #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 8d8815d8a8044..beeefa4eb5f9d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,55 +22,7 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#ifdef GPUCA_GPUCODE -#if defined(GPUCA_GPUTYPE_MI2xx) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half -#endif -#endif - -#ifdef GPUCA_GPUCODE +#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" From 5106f10237645cc23d95283be7bd462f8ac2024f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 15 Apr 2025 14:10:39 +0200 Subject: [PATCH 2/2] GPU: Move compile-time constant parameters to new scheme with runtimeParameter struct and automatic RTC-generated defines With this, we can revert the workaround to have the parameters as defines in the wrapper file --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 - GPU/GPUTracking/Base/GPUReconstruction.h | 3 +- .../Base/GPUReconstructionIncludes.h | 19 +- .../Base/GPUReconstructionProcessing.h | 1 + .../Base/cuda/GPUReconstructionCUDA.cu | 5 - .../Base/cuda/GPUReconstructionCUDA.h | 1 - .../Base/opencl/GPUReconstructionOCL.cxx | 5 - .../Base/opencl/GPUReconstructionOCL.h | 1 - GPU/GPUTracking/CMakeLists.txt | 9 +- .../DataCompression/GPUTPCCompression.cxx | 16 +- .../Definitions/GPUDefConstantsAndSettings.h | 2 +- .../Definitions/GPUDefParametersConstants.h | 45 +- .../Definitions/GPUDefParametersDefaults.h | 567 ++++++++++-------- .../GPUDefParametersLoad.template.inc | 35 +- .../GPUDefParametersRuntime.template.h | 4 + .../Definitions/GPUDefParametersWrapper.h | 6 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 8 +- .../Global/GPUChainTrackingCompression.cxx | 25 +- .../Global/GPUChainTrackingMerger.cxx | 10 +- .../Global/GPUChainTrackingSectorTracker.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 10 +- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.h | 10 +- .../SectorTracker/GPUTPCStartHitsFinder.cxx | 4 +- .../SectorTracker/GPUTPCTracker.cxx | 6 +- .../SectorTracker/GPUTPCTrackletSelector.cxx | 18 +- .../SectorTracker/GPUTPCTrackletSelector.h | 8 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 19 + GPU/GPUTracking/dEdx/GPUdEdx.cxx | 4 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 14 +- GPU/GPUTracking/kernels.cmake | 13 + 34 files changed, 466 insertions(+), 419 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 2f643706647ee..b4dac39ae1cd2 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -292,8 +292,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.delayedOutput = false; } - UpdateAutomaticProcessingSettings(); - GPUCA_GPUReconstructionUpdateDefaults(); if (!mProcessingSettings.rtc.enable) { mProcessingSettings.rtc.optConstexpr = false; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 18098396e1349..23fb6e4d9ff06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -48,6 +48,7 @@ struct GPUReconstructionPipelineContext; struct GPUReconstructionThreading; class GPUROOTDumpCore; class ThrustVolatileAllocator; +struct GPUDefParameters; namespace gpu_reconstruction_kernels { @@ -205,6 +206,7 @@ class GPUReconstruction GPUOutputControl& OutputControl() { return mOutputControl; } uint32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } + virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0; RecoStepField GetRecoSteps() const { return mRecoSteps.steps; } RecoStepField GetRecoStepsGPU() const { return mRecoSteps.stepsGPUMask; } @@ -239,7 +241,6 @@ class GPUReconstruction void FreeRegisteredMemory(GPUMemoryResource* res); GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor int32_t InitPhaseBeforeDevice(); - virtual void UpdateAutomaticProcessingSettings() {} virtual int32_t InitDevice() = 0; int32_t InitPhasePermanentMemory(); int32_t InitPhaseAfterDevice(); diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index 6aba7e30a49d7..d3f11d86a731d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -29,21 +29,4 @@ #include #include -#define GPUCA_GPUReconstructionUpdateDefaults() \ - if (mProcessingSettings.alternateBorderSort < 0) { \ - mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ - } \ - if (mProcessingSettings.mergerSortTracks < 0) { \ - mProcessingSettings.mergerSortTracks = GPUCA_SORT_BEFORE_FIT; \ - } \ - if (param().rec.tpc.looperInterpolationInExtraPass < 0) { \ - param().rec.tpc.looperInterpolationInExtraPass = GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION; \ - } \ - if (mProcessingSettings.tpcCompressionGatherModeKernel < 0) { \ - mProcessingSettings.tpcCompressionGatherModeKernel = GPUCA_COMP_GATHER_KERNEL; \ - } \ - if (mProcessingSettings.tpcCompressionGatherMode < 0) { \ - mProcessingSettings.tpcCompressionGatherMode = GPUCA_COMP_GATHER_MODE; \ - } - -#endif +#endif // GPURECONSTRUCTIONINCLUDES_H diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 2428027118c0a..e8892c4be702b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -101,6 +101,7 @@ class GPUReconstructionProcessing : public GPUReconstruction uint32_t countToGPU = 0; uint32_t countToHost = 0; }; + const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); } protected: GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 47a9b675d27f6..3bea91994ba86 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -99,11 +99,6 @@ void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr } } -void GPUReconstructionCUDA::UpdateAutomaticProcessingSettings() -{ - GPUCA_GPUReconstructionUpdateDefaults(); -} - int32_t GPUReconstructionCUDA::InitDevice_Runtime() { #ifndef __HIPCC__ // CUDA diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 3441c6b9a4fd6..b1a3a53a6a62f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -59,7 +59,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels GetThreadContext() override; void SynchronizeGPU() override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index af26bfc7aeca8..e276f83413bbc 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -60,11 +60,6 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons return error != CL_SUCCESS; } -void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() -{ - GPUCA_GPUReconstructionUpdateDefaults(); -} - int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() { if (mMaster == nullptr) { diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 16ef9b5e87fe8..abde42f01f073 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -37,7 +37,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; - void UpdateAutomaticProcessingSettings() override; virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index f428d982394e0..eaeec508ff27a 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -247,14 +247,17 @@ foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) endforeach() +set(GPUDEFPARAMETERSLBLIST "$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n") +string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>\n") +string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>") file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase - CONTENT "$,REPLACE,[^A-Za-z0-9]+,_>,\n>" -) + CONTENT ${GPUDEFPARAMETERSLBLIST}) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h - COMMAND awk "{print(\"#ifndef GPUCA_LB_\" $0 \"\\n#define GPUCA_LB_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h COMMENT "Generating GPUDefParametersLoadPrepare.h" + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase VERBATIM COMMAND_EXPAND_LISTS ) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 335b201d11d07..8a22545314252 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -16,6 +16,7 @@ #include "GPUReconstruction.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" +#include "GPUDefParametersRuntime.h" using namespace o2::gpu; @@ -36,11 +37,12 @@ void* GPUTPCCompression::SetPointersOutputHost(void* mem) void* GPUTPCCompression::SetPointersScratch(void* mem) { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; computePointerWithAlignment(mem, mClusterStatus, mMaxClusters); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode >= 2) { + if (gatherMode >= 2) { computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTracks); } - if (mRec->GetProcessingSettings().tpcCompressionGatherMode != 1) { + if (gatherMode != 1) { SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false); } return mem; @@ -48,8 +50,9 @@ void* GPUTPCCompression::SetPointersScratch(void* mem) void* GPUTPCCompression::SetPointersOutput(void* mem) { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTrackClusters); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false); } return mem; @@ -102,12 +105,13 @@ void* GPUTPCCompression::SetPointersMemory(void* mem) void GPUTPCCompression::RegisterMemoryAllocation() { AllocateAndInitializeLate(); + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; mMemoryResOutputHost = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputHost, GPUMemoryResource::MEMORY_OUTPUT_FLAG | GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_CUSTOM, "TPCCompressionOutputHost"); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { mMemoryResOutputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCCompressionOutputGPU"); } - uint32_t stackScratch = (mRec->GetProcessingSettings().tpcCompressionGatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; - if (mRec->GetProcessingSettings().tpcCompressionGatherMode < 2) { + uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; + if (gatherMode < 2) { mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | stackScratch, "TPCCompressionOutput"); } mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | stackScratch, "TPCCompressionScratch"); diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 2d7aca8d71b92..48218dd7859e6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -13,7 +13,7 @@ /// \author David Rohr // This files contains compile-time constants affecting the GPU algorithms / reconstruction results. -// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters.h +// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters*.h #ifndef GPUDEFCONSTANTSANDSETTINGS_H #define GPUDEFCONSTANTSANDSETTINGS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h index 3a16d02ecf7c6..dd4a5dcbe7ba8 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -21,16 +21,12 @@ #define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! #if defined(__CUDACC__) || defined(__HIPCC__) - #define GPUCA_SPECIALIZE_THRUST_SORTS + #define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant #endif #define GPUCA_MAX_THREADS 1024 #define GPUCA_MAX_STREAMS 36 -#if defined(GPUCA_GPUCODE) - #define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU -#endif - #define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid #define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers #define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks @@ -44,44 +40,5 @@ #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#ifdef GPUCA_GPUCODE - #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 - #endif - #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 - #endif - #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif - #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 - #endif -#else - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #define GPUCA_SORT_BEFORE_FIT 0 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_THREAD_COUNT_FINDER 1 - #define GPUCA_COMP_GATHER_KERNEL 0 - #define GPUCA_COMP_GATHER_MODE 0 -#endif -#ifndef GPUCA_DEDX_STORAGE_TYPE - #define GPUCA_DEDX_STORAGE_TYPE float -#endif -#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#endif - // clang-format on #endif // GPUDEFPARAMETERSCONSTANTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 83ef7d8cf810b..b212abbcd2707 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -14,266 +14,265 @@ // This file contains compile-time constants affecting the GPU performance. -#if !defined(GPUDEFPARAMETERSDEFAULTS_H) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. +#if !defined(GPUDEFPARAMETERSDEFAULTS_H) #define GPUDEFPARAMETERSDEFAULTS_H // clang-format off // Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) // GPU Run Configuration -#ifdef GPUCA_GPUCODE -#if defined(GPUCA_GPUTYPE_MI2xx) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 128 - #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 512 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 384 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 - #define GPUCA_LB_GPUTPCCFPeakFinder 128 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 - #define GPUCA_LB_GPUTPCCFDeconvolution 384 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - // #define GPUCA_USE_TEXTURES -#elif defined(GPUCA_GPUTYPE_OPENCL) -#else - #error GPU TYPE NOT SET -#endif -#endif // GPUCA_GPUCODE +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. + // GPU-architecture-dependent default settings + #if defined(GPUCA_GPUTYPE_MI2xx) + #define GPUCA_WARP_SIZE 64 + #define GPUCA_THREAD_COUNT_DEFAULT 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024 + #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 + #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 896 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512 + #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_VEGA) + #define GPUCA_WARP_SIZE 64 + #define GPUCA_THREAD_COUNT_DEFAULT 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 128 + #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 + #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 896 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512 + #define GPUCA_LB_GPUTPCCFClusterizer 512 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_AMPERE) + #define GPUCA_WARP_SIZE 32 + #define GPUCA_THREAD_COUNT_DEFAULT 512 + #define GPUCA_LB_GPUTPCCreateTrackingData 384 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 512 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 + #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 512 + #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 + #define GPUCA_LB_GPUTPCCFPeakFinder 128 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 + #define GPUCA_LB_GPUTPCCFDeconvolution 384 + #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_TURING) + #define GPUCA_WARP_SIZE 32 + #define GPUCA_THREAD_COUNT_DEFAULT 512 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 512 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 + #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 512 + #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + // #define GPUCA_USE_TEXTURES + #elif defined(GPUCA_GPUTYPE_OPENCL) + #else + #error GPU TYPE NOT SET + #endif -#ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type #ifndef GPUCA_WARP_SIZE #define GPUCA_WARP_SIZE 32 @@ -509,7 +508,67 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER -#endif + + // Defaults for non-LB parameters + #ifndef GPUCA_PAR_SORT_STARTHITS + #define GPUCA_PAR_SORT_STARTHITS 1 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #endif + #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #endif + #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_PAR_SORT_BEFORE_FIT + #define GPUCA_PAR_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_KERNEL + #define GPUCA_PAR_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_MODE + #define GPUCA_PAR_COMP_GATHER_MODE 2 + #endif +#endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + +#ifndef GPUCA_GPUCODE_GENRTC + // Defaults (also for CPU) for non-LB parameters + #ifndef GPUCA_PAR_SORT_STARTHITS + #define GPUCA_PAR_SORT_STARTHITS 0 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 + #endif + #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 0 + #endif + #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_PAR_SORT_BEFORE_FIT + #define GPUCA_PAR_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_KERNEL + #define GPUCA_PAR_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_MODE + #define GPUCA_PAR_COMP_GATHER_MODE 0 + #endif + #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE + #define GPUCA_PAR_DEDX_STORAGE_TYPE float + #endif + #ifndef GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE float + #endif +#endif // GPUCA_GPUCODE_GENRTC // clang-format on #endif // GPUDEFPARAMETERSDEFAULTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index 938cedbdacc93..ac71adc6232a6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -32,29 +32,38 @@ static GPUDefParameters GPUDefParametersLoad() // clang-format off {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_LB_EMPTY0(GPUCA_M_FIRST(GPUCA_LB_>,APPEND,))>,$>}, {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0))>,$>}, - {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>} + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>}, + $,PREPEND,GPUCA_PAR_>,$>, + $,PREPEND,GPUCA_M_STR(GPUCA_PAR_>,APPEND,)>,$> // clang-format on }; } -#define GPUCA_EXPORT_KERNEL(name) \ - if (par.par_LB_maxThreads[i] > 0) { \ - o << "#define GPUCA_LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ - if (par.par_LB_minBlocks[i] > 0) { \ - o << ", " << par.par_LB_minBlocks[i]; \ - } \ - if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ - o << ", " << par.par_LB_forceBlocks[i]; \ - } \ - o << "\n"; \ - } \ +#define GPUCA_EXPORT_KERNEL_LB(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ i++; +#define GPUCA_EXPORT_KERNEL_PARAM(name) \ + o << "#define GPUCA_PAR_" GPUCA_M_STR(name) " " << GPUCA_M_CAT(par.par_, name) << "\n"; + static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC) { std::stringstream o; // clang-format off int32_t i = 0; - $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL(>,APPEND,)>, + $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL_LB(>,APPEND,)>, + > + $,PREPEND,GPUCA_EXPORT_KERNEL_PARAM(>,APPEND,)>, + > + $,PREPEND,GPUCA_EXPORT_KERNEL_PARAM(>,APPEND,)>, > return o.str(); // clang-format on } diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h index f3537c058a824..d023de7916676 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h @@ -21,6 +21,10 @@ struct GPUDefParameters { // clang-format off int32_t par_LB_maxThreads[$>] = {}; int32_t par_LB_minBlocks[$>] = {}; int32_t par_LB_forceBlocks[$>] = {}; + $,PREPEND,int32_t par_>,APPEND, = 0>,$ + >; + $,PREPEND,char par_>,APPEND,[128] = "">,$ + >; }; // clang-format on } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index beeefa4eb5f9d..b2c08d689aeb2 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,9 +22,7 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" -#endif #include "GPUDefParametersConstants.h" namespace o2::gpu @@ -38,8 +36,8 @@ namespace o2::gpu #define GPUCA_GET_WARP_COUNT(...) 1 // since launch bound constants are not defined in host-code, and must evaluate to 1! #endif -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) -#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) +#define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_PAR_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_PAR_DEDX_STORAGE_TYPE) // #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 6753db280d5bf..43fa49ff74817 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -40,6 +40,7 @@ #include "GPUTrackingInputProvider.h" #include "GPUNewCalibValues.h" #include "GPUTriggerOutputs.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCClusterStatistics.h" #include "GPUHostDataTypes.h" @@ -254,6 +255,7 @@ bool GPUChainTracking::ValidateSteps() bool GPUChainTracking::ValidateSettings() { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; if ((param().rec.tpc.nWays & 1) == 0) { GPUError("nWay setting musst be odd number!"); return false; @@ -270,7 +272,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } - if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) { + if (GetProcessingSettings().noGPUMemoryRegistration && gatherMode != 3) { GPUError("noGPUMemoryRegistration only possible with gather mode 3"); return false; } @@ -286,7 +288,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("Must use external output for double pipeline mode"); return false; } - if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { GPUError("Double pipeline incompatible to compression mode 1"); return false; } @@ -295,7 +297,7 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { + if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (gatherMode == 1 || gatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 03d319f42fd6b..8fb6fc4771658 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -18,6 +18,7 @@ #include "GPUTrackingInputProvider.h" #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" +#include "GPUDefParametersRuntime.h" #include "utils/strtag.h" #include @@ -30,6 +31,7 @@ int32_t GPUChainTracking::RunTPCCompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCCOMPR")); RecoStep myStep = RecoStep::TPCCompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCCompression; + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; GPUTPCCompression& Compressor = processors()->tpcCompressor; GPUTPCCompression& CompressorShadow = doGPU ? processorsShadow()->tpcCompressor : Compressor; const auto& threadContext = GetThreadContext(); @@ -37,7 +39,7 @@ int32_t GPUChainTracking::RunTPCCompression() RecordMarker(&mEvents->single, 0); } - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile } SetupGPUProcessor(&Compressor, true); @@ -70,7 +72,7 @@ int32_t GPUChainTracking::RunTPCCompression() Compressor.mOutputFlat->set(outputSize, *Compressor.mOutput); char* hostFlatPtr = (char*)Compressor.mOutput->qTotU; // First array as allocated in GPUTPCCompression::SetPointersCompressedClusters size_t copySize = 0; - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { CompressorShadow.mOutputA = Compressor.mOutput; copySize = AllocateRegisteredMemory(Compressor.mMemoryResOutputGPU); // We overwrite Compressor.mOutput with the allocated output pointers on the GPU } @@ -81,8 +83,8 @@ int32_t GPUChainTracking::RunTPCCompression() SynchronizeStream(OutputStream()); // Synchronize output copies running in parallel from memory that might be released, only the following async copy from stacked memory is safe after the chain finishes. outputStream = OutputStream(); } - if (GetProcessingSettings().tpcCompressionGatherMode >= 2) { - if (GetProcessingSettings().tpcCompressionGatherMode == 2) { + if (gatherMode >= 2) { + if (gatherMode == 2) { void* devicePtr = mRec->getGPUPointer(Compressor.mOutputFlat); if (devicePtr != Compressor.mOutputFlat) { CompressedClustersPtrs& ptrs = *Compressor.mOutput; // We need to update the ptrs with the gpu-mapped version of the host address space @@ -94,7 +96,8 @@ int32_t GPUChainTracking::RunTPCCompression() TransferMemoryResourcesToGPU(myStep, &Compressor, outputStream); constexpr uint32_t nBlocksDefault = 2; constexpr uint32_t nBlocksMulti = 1 + 2 * 200; - switch (GetProcessingSettings().tpcCompressionGatherModeKernel) { + int32_t gatherModeKernel = mRec->GetProcessingSettings().tpcCompressionGatherModeKernel == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_KERNEL : mRec->GetProcessingSettings().tpcCompressionGatherMode; + switch (gatherModeKernel) { case 0: runKernel(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression)); getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); @@ -117,10 +120,10 @@ int32_t GPUChainTracking::RunTPCCompression() getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); break; default: - GPUError("Invalid compression kernel %d selected.", (int32_t)GetProcessingSettings().tpcCompressionGatherModeKernel); + GPUError("Invalid compression kernel %d selected.", (int32_t)gatherModeKernel); return 1; } - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { RecordMarker(&mEvents->stream[outputStream], outputStream); char* deviceFlatPts = (char*)Compressor.mOutput->qTotU; if (GetProcessingSettings().doublePipeline) { @@ -135,9 +138,9 @@ int32_t GPUChainTracking::RunTPCCompression() } } else { int8_t direction = 0; - if (GetProcessingSettings().tpcCompressionGatherMode == 0) { + if (gatherMode == 0) { P = &CompressorShadow.mPtrs; - } else if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + } else if (gatherMode == 1) { P = &Compressor.mPtrs; direction = -1; gatherTimer = &getTimer("GPUTPCCompression_GatherOnCPU", 0); @@ -181,11 +184,11 @@ int32_t GPUChainTracking::RunTPCCompression() GPUMemCpyAlways(myStep, O->timeA, P->timeA, O->nTracks * sizeof(O->timeA[0]), outputStream, direction); GPUMemCpyAlways(myStep, O->padA, P->padA, O->nTracks * sizeof(O->padA[0]), outputStream, direction); } - if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { gatherTimer->Stop(); } mIOPtrs.tpcCompressedClusters = Compressor.mOutputFlat; - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { SynchronizeEventAndRelease(mEvents->stream[outputStream]); mRec->ReturnVolatileDeviceMemory(); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index ffab3ba0be063..a647c213660c9 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -14,6 +14,7 @@ #include "GPUChainTracking.h" #include "GPULogging.h" +#include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" #include "GPUQA.h" #include "utils/strtag.h" @@ -31,7 +32,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto runKernel({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0); } uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; - if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { + if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) { RecordMarker(&mEvents->single, 0); TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); for (uint32_t i = 0; i < n; i++) { @@ -176,7 +177,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) waitForTransfer = 1; } - if (GetProcessingSettings().mergerSortTracks) { + const bool mergerSortTracks = GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(doGPU).par_SORT_BEFORE_FIT : GetProcessingSettings().mergerSortTracks; + if (mergerSortTracks) { runKernel(GetGridAuto(0, deviceType)); CondWaitEvent(waitForTransfer, &mEvents->single); runKernel(GetGridAuto(0, deviceType)); @@ -212,11 +214,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), GetProcessingSettings().mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } - if (param().rec.tpc.looperInterpolationInExtraPass) { + if (param().rec.tpc.looperInterpolationInExtraPass == -1 ? mRec->getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION : param().rec.tpc.looperInterpolationInExtraPass) { runKernel(GetGridAuto(0)); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 3e7447892307a..64a9179baf0e6 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -19,6 +19,7 @@ #include "GPUTPCClusterData.h" #include "GPUTrackingInputProvider.h" #include "GPUTPCClusterOccupancyMap.h" +#include "GPUDefParametersRuntime.h" #include "utils/strtag.h" #include @@ -200,11 +201,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (doGPU) { + if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) { runKernel({GetGridAuto(useStream), {iSector}}); } -#endif if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSector}}); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index bdf60f744b9ca..f42e5f35b1dc9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -37,6 +37,7 @@ #include "TPCFastTransform.h" #include "GPUTPCConvertImpl.h" #include "GPUTPCGeometry.h" +#include "GPUDefParametersRuntime.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" @@ -288,7 +289,8 @@ void* GPUTPCGMMerger::SetPointersMemory(void* mem) void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) { computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks); - if (mRec->GetProcessingSettings().mergerSortTracks) { + const bool mergerSortTracks = mRec->GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging).par_SORT_BEFORE_FIT : mRec->GetProcessingSettings().mergerSortTracks; + if (mergerSortTracks) { computePointerWithAlignment(mem, mTrackOrderProcess, mNMaxTracks); } return mem; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 238b04510862e..ba251ce34a3eb 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -32,7 +32,7 @@ enum attachTypes { attachAttached = 0x40000000, struct InterpolationErrorHit { float posY, posZ; - GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; + GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; }; struct InterpolationErrors { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index f1aac3da9a7a2..1617ac7b828af 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -663,7 +663,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->err2Y = err2Y; debugVals->err2Z = err2Z; }); if (rejectChi2 >= rejectInterFill) { - if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { rejectChi2 = rejectDirect; } else { int32_t retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 260c64db052af..77453a87b3763 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -309,7 +309,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 - if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { const float Iz0 = interpolation.hit[ihit].posY - mP[0]; const float Iz1 = interpolation.hit[ihit].posZ - mP[1]; float Iw0 = mC[2] + (float)interpolation.hit[ihit].errorZ; @@ -631,7 +631,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; -#if !defined(GPUCA_NO_ATOMIC_PRECHECK) && GPUCA_NO_ATOMIC_PRECHECK < 1 +#if GPUCA_NO_ATOMIC_PRECHECK == 0 if (myWeight <= *weight) { continue; } @@ -757,7 +757,8 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr if (Merger->Param().rec.tpc.disableRefitAttachment & 4) { return 1; } - if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { + const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; + if (inExtraPass && phase2 == false) { StoreAttachMirror(Merger, sector, iRow, iTrack, toAlpha, toY, toX, toSector, toRow, inFlyDirection, prop.GetAlpha()); return 1; } @@ -862,7 +863,8 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU if (Merger->Param().rec.tpc.disableRefitAttachment & 8) { return; } - if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { + const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; + if (inExtraPass && phase2 == false) { StoreAttachMirror(Merger, sector, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); return; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index ec348b59ce7a5..d76c079bb406f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -76,7 +76,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } #define UnrollGlobal 4 -#define MaxShared GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP +#define MaxShared GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #if MaxShared < GPUCA_MAXN #define MaxGlobal ((GPUCA_MAXN - MaxShared - 1) / UnrollGlobal + 1) * UnrollGlobal #else diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 1bf5000cfbe5c..0ecd230a67415 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -40,11 +40,11 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number -#if GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP); - float mA1[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; - float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; - calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; +#if GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 + static_assert(GPUCA_MAXN >= GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP); + float mA1[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; + float mA2[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; + calink mB[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; #endif GPUTPCRow mRow, mRowUp, mRowDown; }; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index af79dddae554e..20dfd69864816 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -39,7 +39,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; if (tracker.mData.mLinkDownData[lHitNumberOffset + ih] == CALINK_INVAL && linkUpData != CALINK_INVAL && tracker.mData.mLinkUpData[rowUp.mHitNumberOffset + linkUpData] != CALINK_INVAL) { -#ifdef GPUCA_SORT_STARTHITS +#if GPUCA_PAR_SORT_STARTHITS > 0 GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxRowStartHits) { @@ -61,7 +61,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr } GPUbarrier(); -#ifdef GPUCA_SORT_STARTHITS +#if GPUCA_PAR_SORT_STARTHITS > 0 if (iThread == 0) { uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 6c1b4eda0d7f5..e923e126e1841 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -22,6 +22,7 @@ #include "GPUTPCTrackParam.h" #include "GPUParam.inc" #include "GPUTPCConvertImpl.h" +#include "GPUDefParametersRuntime.h" #if !defined(GPUCA_GPUCODE) #include @@ -143,13 +144,12 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxRowHits = mRec->MemoryScalers()->NTPCTrackletHits(mData.NumberOfHits()); mNMaxTracks = mRec->MemoryScalers()->NTPCSectorTracks(mData.NumberOfHits()); mNMaxTrackHits = mRec->MemoryScalers()->NTPCSectorTrackHits(mData.NumberOfHits(), mRec->GetProcessingSettings().tpcInputWithClusterRejection); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { + + if (mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking).par_SORT_STARTHITS) { if (mNMaxStartHits > mNMaxRowStartHits * GPUCA_ROW_COUNT) { mNMaxStartHits = mNMaxRowStartHits * GPUCA_ROW_COUNT; } } -#endif mData.SetMaxData(); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index 8810b692e1377..e27a8f66ae754 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -33,7 +33,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread } GPUbarrier(); - GPUTPCHitId trackHits[GPUCA_ROW_COUNT - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE]; + GPUTPCHitId trackHits[GPUCA_ROW_COUNT - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; const float maxSharedFrac = tracker.Param().rec.tpc.trackletMaxSharedFraction; for (int32_t itr = s.mItr0 + iThread; itr < s.mNTracklets; itr += s.mNThreadsTotal) { @@ -67,13 +67,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! gap = 0; -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (nHits < GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE) { +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + if (nHits < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { s.mHits[nHits][iThread].Set(irow, ih); } else -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 { - trackHits[nHits - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); + trackHits[nHits - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); } nHits++; if (!own) { @@ -101,13 +101,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread tracker.Tracks()[itrout].SetFirstHitID(nFirstTrackHit); tracker.Tracks()[itrout].SetNHits(nHits); for (int32_t jh = 0; jh < nHits; jh++) { -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (jh < GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE) { +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + if (jh < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; } else -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 { - tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE]; + tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; } } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index f487931bdaf4b..e5a28c80f37f9 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -36,10 +36,10 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNThreadsTotal; // total n threads int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE); - GPUTPCHitId mHits[GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + static_assert(GPUCA_ROW_COUNT >= GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE); + GPUTPCHitId mHits[GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; typedef GPUconstantref() GPUTPCTracker processorType; diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 7faab410d20ea..35f2915d9486a 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -18,6 +18,8 @@ define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES) define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES) define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) define_property(TARGET PROPERTY O2_GPU_KERNEL_NO_FAST_MATH) +define_property(TARGET PROPERTY O2_GPU_KERNEL_PARAMS) +define_property(TARGET PROPERTY O2_GPU_KERNEL_STRING_PARAMS) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") @@ -167,3 +169,20 @@ function(o2_gpu_kernel_set_deterministic) endif() endforeach() endfunction() + +function(o2_gpu_kernel_add_parameter) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + message(STATUS "Adding ${ARGV${i}}") + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_PARAMS "${ARGV${i}}") + endforeach() +endfunction() +function(o2_gpu_kernel_add_string_parameter) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + message(STATUS "Adding ${ARGV${i}}") + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_STRING_PARAMS "${ARGV${i}}") + endforeach() +endfunction() diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index fd2aeda2828e3..340463b9ec7f7 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -55,7 +55,7 @@ GPUd() void GPUdEdx::computedEdx(GPUdEdxInfo& GPUrestrict() output, const GPUPar output.NHitsSubThresholdOROC3 = countOROC3; } -GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) +GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_PAR_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) { trunclow = count * trunclow / 128; trunchigh = count * trunchigh / 128; @@ -65,7 +65,7 @@ GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() CAAlgo::sort(array, array + count); float mean = 0; for (int32_t i = trunclow; i < trunchigh; i++) { - mean += (float)array[i] * (1.f / scalingFactor::factor); + mean += (float)array[i] * (1.f / scalingFactor::factor); } return (mean / (trunchigh - trunclow)); } diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 4d3b652bdc5d1..e556fd3845d42 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -37,7 +37,7 @@ class GPUdEdx GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: - GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); + GPUd() float GetSortTruncMean(GPUCA_PAR_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); GPUd() void checkSubThresh(int32_t roc); template @@ -62,8 +62,8 @@ class GPUdEdx static constexpr int32_t MAX_NCL = GPUCA_ROW_COUNT; // Must fit in mNClsROC (uint8_t)! - GPUCA_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory - GPUCA_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory + GPUCA_PAR_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory + GPUCA_PAR_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory float mSubThreshMinTot = 0.f; float mSubThreshMinMax = 0.f; uint8_t mNClsROC[4] = {0}; @@ -78,8 +78,8 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) if (roc != mLastROC) { if (mNSubThresh && mCount + mNSubThresh <= MAX_NCL) { for (int32_t i = 0; i < mNSubThresh; i++) { - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); } mNClsROC[mLastROC] += mNSubThresh; mNClsROCSubThresh[mLastROC] += mNSubThresh; @@ -151,8 +151,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= residualGainMapGain; qtot /= residualGainMapGain; - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); mNClsROC[roc]++; if (qtot < mSubThreshMinTot) { mSubThreshMinTot = qtot; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 994f10a516b10..ee3af2b87d925 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -134,3 +134,16 @@ o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFD o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) + +o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP + TRACKLET_SELECTOR_HITS_REG_SIZE + ALTERNATE_BORDER_SORT + SORT_BEFORE_FIT + MERGER_SPLIT_LOOP_INTERPOLATION + NO_ATOMIC_PRECHECK + COMP_GATHER_KERNEL + COMP_GATHER_MODE + SORT_STARTHITS) + +o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE + MERGER_INTERPOLATION_ERROR_TYPE)