diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index a03d9de13ef8f..cc1c62bed507d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -32,6 +32,21 @@ #define GPUCA_M_KRNL_NAME(...) GPUCA_M_KRNL_NAME_A(GPUCA_M_STRIP(__VA_ARGS__)) #if defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_HOSTONLY) + +#if defined(__HIPCC__) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + static_assert(GPUCA_PAR_AMD_EUS_PER_CU > 0); + #define GPUCA_MIN_WARPS_PER_EU(maxThreadsPerBlock, minBlocksPerCU) GPUCA_CEIL_INT_DIV((minBlocksPerCU) * (maxThreadsPerBlock), (GPUCA_WARP_SIZE * GPUCA_PAR_AMD_EUS_PER_CU)) + + #define GPUCA_LB_ARGS_1(maxThreadsPerBlock) maxThreadsPerBlock + #define GPUCA_LB_ARGS_2(maxThreadsPerBlock, minBlocksPerCU) maxThreadsPerBlock, GPUCA_MIN_WARPS_PER_EU(maxThreadsPerBlock, minBlocksPerCU) + + #define GPUCA_LAUNCH_BOUNDS_SELECT(n, ...) GPUCA_M_CAT(GPUCA_LB_ARGS_, n)(__VA_ARGS__) + #define GPUCA_LAUNCH_BOUNDS_DISP(...) GPUCA_LAUNCH_BOUNDS_SELECT(GPUCA_M_COUNT(__VA_ARGS__), __VA_ARGS__) + #define GPUCA_KRNL_REG_DEFAULT(args) __launch_bounds__(GPUCA_LAUNCH_BOUNDS_DISP(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))) +#elif !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + #define GPUCA_KRNL_REG_DEFAULT(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) +#endif + #ifndef GPUCA_KRNL_REG #define GPUCA_KRNL_REG(...) #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 8c3fb92c11c9e..dba7e680d0b2c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -74,7 +74,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } fclose(fp); } - const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) + + const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true, mParDevice->par_AMD_EUS_PER_CU ? (mParDevice->par_AMD_EUS_PER_CU * mWarpSize) : 0) + "#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n"; if (GetProcessingSettings().rtctech.printLaunchBounds || GetProcessingSettings().debugLevel >= 3) { GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str()); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index e6ed94bba2cec..e8779415f0ea4 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -74,7 +74,7 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c } #undef GPUCA_KRNL_REG -#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) +#define GPUCA_KRNL_REG(...) GPUCA_KRNL_REG_DEFAULT(__VA_ARGS__) // clang-format off #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE != 1 // ---------- COMPILE_MODE = perkernel ---------- diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu index 3e4d3113fb995..571428dc39e21 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu @@ -15,7 +15,7 @@ #define GPUCA_GPUCODE_HOSTONLY #define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS -#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) +#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args)) #include "GPUReconstructionCUDAIncludesSystem.h" #include "GPUReconstructionCUDADef.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu index 847011a70f7f9..82759aab48d70 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu @@ -14,7 +14,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #include "GPUReconstructionCUDAIncludesSystem.h" -#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) +#define GPUCA_KRNL_REG(...) GPUCA_KRNL_REG_DEFAULT(__VA_ARGS__) #define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__); #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionKernelMacros.h" diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip index 30a84dfa135eb..7cb895cadd770 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip @@ -14,7 +14,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #include "GPUReconstructionHIPIncludesSystem.h" -#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) +#define GPUCA_KRNL_REG(...) GPUCA_KRNL_REG_DEFAULT(__VA_ARGS__) #define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__); #include "GPUReconstructionHIPDef.h" #include "GPUReconstructionKernelMacros.h" diff --git a/GPU/GPUTracking/Definitions/GPUDefMacros.h b/GPU/GPUTracking/Definitions/GPUDefMacros.h index caf2d1670f84e..ea62d7f34edb0 100644 --- a/GPU/GPUTracking/Definitions/GPUDefMacros.h +++ b/GPU/GPUTracking/Definitions/GPUDefMacros.h @@ -50,5 +50,7 @@ #define GPUCA_UNROLL(...) #endif +#define GPUCA_CEIL_INT_DIV(a, b) (((a) + (b) - 1) / (b)) + #endif // clang-format on diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 29aa3808506dc..46316b8a62fe1 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -25,22 +25,23 @@ // GPU-architecture-dependent default settings #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 + #define GPUCA_PAR_AMD_EUS_PER_CU 4 #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256, 7 + #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 5 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024, 2 + #define GPUCA_LB_GPUTPCTrackletConstructor 768, 8 + #define GPUCA_LB_GPUTPCTrackletSelector 384, 5 + #define GPUCA_LB_GPUTPCNeighboursFinder 192, 8 + #define GPUCA_LB_GPUTPCNeighboursCleaner 128, 5 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256, 7 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE, 4 + #define GPUCA_LB_GPUTPCCFGather 1024, 5 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 192, 2 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 5 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 4 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 @@ -55,7 +56,7 @@ #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerCollect 768, 1 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 @@ -63,21 +64,23 @@ #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128, 1 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64, 10 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512, 9 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_GPUTPCCFDeconvolution 512, 5 + #define GPUCA_LB_GPUTPCCFClusterizer 448, 3 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 10 + #define PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 4 + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 9 #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 #define GPUCA_PAR_SORT_BEFORE_FIT 1 #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 @@ -87,22 +90,23 @@ #define GPUCA_PAR_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 + #define GPUCA_PAR_AMD_EUS_PER_CU 4 #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 128 - #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCreateTrackingData 192, 2 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 7 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024, 7 + #define GPUCA_LB_GPUTPCTrackletConstructor 512, 10 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 10 + #define GPUCA_LB_GPUTPCNeighboursFinder 960, 8 + #define GPUCA_LB_GPUTPCNeighboursCleaner 384, 9 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256, 2 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 1 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE, 14 #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 7 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 5 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256, 2 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -117,7 +121,7 @@ #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerCollect 1024, 1 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 @@ -125,21 +129,23 @@ #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64, 2 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512, 4 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512, 5 + #define GPUCA_LB_GPUTPCCFClusterizer 512, 2 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 2 + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 27 #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 #define GPUCA_PAR_SORT_BEFORE_FIT 1 #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 @@ -272,6 +278,9 @@ #ifndef GPUCA_WARP_SIZE #define GPUCA_WARP_SIZE 32 #endif + #ifndef GPUCA_PAR_AMD_EUS_PER_CU + #define GPUCA_PAR_AMD_EUS_PER_CU 0 + #endif #ifndef GPUCA_THREAD_COUNT_DEFAULT #define GPUCA_THREAD_COUNT_DEFAULT 256 #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index ac71adc6232a6..8b7a79a9e48bf 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -39,23 +39,23 @@ static GPUDefParameters GPUDefParametersLoad() }; } -#define GPUCA_EXPORT_KERNEL_LB(name) \ - if (par.par_LB_maxThreads[i] > 0) { \ - o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ - if (par.par_LB_minBlocks[i] > 0) { \ - o << ", " << par.par_LB_minBlocks[i]; \ - } \ - if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ - o << ", " << par.par_LB_forceBlocks[i]; \ - } \ - o << "\n"; \ - } \ +#define GPUCA_EXPORT_KERNEL_LB(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << GPUCA_CEIL_INT_DIV(par.par_LB_maxThreads[i] * par.par_LB_minBlocks[i], (minBlockFactor ? minBlockFactor : par.par_LB_maxThreads[i])); \ + } \ + if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ i++; #define GPUCA_EXPORT_KERNEL_PARAM(name) \ o << "#define GPUCA_PAR_" GPUCA_M_STR(name) " " << GPUCA_M_CAT(par.par_, name) << "\n"; -static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC) +static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC, int32_t minBlockFactor = 0) { std::stringstream o; // clang-format off int32_t i = 0; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index c8ddcd2e9d81d..0635b89eda61a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -145,7 +145,8 @@ o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP # Number of neighhbo COMP_GATHER_KERNEL # Default kernel to use for Compression Gather Operation [0 - 4] COMP_GATHER_MODE # TPC Compression Gather Mode [0 - 3] SORT_STARTHITS # Sort start hits to improve cache locality during tracklet construction [0/1] - CF_SCAN_WORKGROUP_SIZE) # Work group size to use in clusterizer scan operation + CF_SCAN_WORKGROUP_SIZE # Work group size to use in clusterizer scan operation + AMD_EUS_PER_CU) # Number of SIMD units per Compute Unit (only for AMD GPUs) o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE # Data type to use for intermediate storage of dEdx truncated mean inputs MERGER_INTERPOLATION_ERROR_TYPE) # Data type for storing intermediate track residuals for interpolation