diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index a8061e872c029..82eb48a8a7663 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -80,8 +80,8 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->getPositionResolutions(), this->mTrkParams[iteration].LayerRadii, mTimeFrameGPU->getMSangles(), - conf.nBlocks, - conf.nThreads, + conf.nBlocksLayerTracklets[iteration], + conf.nThreadsLayerTracklets[iteration], mTimeFrameGPU->getStreams()); mTimeFrameGPU->createTrackletsBuffers(); computeTrackletsInROFsHandler(mTimeFrameGPU->getDeviceIndexTableUtils(), @@ -113,8 +113,8 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->getPositionResolutions(), this->mTrkParams[iteration].LayerRadii, mTimeFrameGPU->getMSangles(), - conf.nBlocks, - conf.nThreads, + conf.nBlocksLayerTracklets[iteration], + conf.nThreadsLayerTracklets[iteration], mTimeFrameGPU->getStreams()); } @@ -144,8 +144,8 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].CellDeltaTanLambdaSigma, this->mTrkParams[iteration].NSigmaCut, - conf.nBlocks, - conf.nThreads); + conf.nBlocksLayerCells[iteration], + conf.nThreadsLayerCells[iteration]); mTimeFrameGPU->createCellsBuffers(iLayer); computeCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), mTimeFrameGPU->getDeviceArrayUnsortedClusters(), @@ -161,8 +161,8 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].CellDeltaTanLambdaSigma, this->mTrkParams[iteration].NSigmaCut, - conf.nBlocks, - conf.nThreads); + conf.nBlocksLayerCells[iteration], + conf.nThreadsLayerCells[iteration]); } } @@ -191,8 +191,8 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) currentLayerCellsNum, nextLayerCellsNum, 1e2, - conf.nBlocks, - conf.nThreads); + conf.nBlocksFindNeighbours[iteration], + conf.nThreadsFindNeighbours[iteration]); mTimeFrameGPU->createNeighboursDevice(iLayer, nNeigh); @@ -207,8 +207,8 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) currentLayerCellsNum, nextLayerCellsNum, 1e2, - conf.nBlocks, - conf.nThreads); + conf.nBlocksFindNeighbours[iteration], + conf.nThreadsFindNeighbours[iteration]); nNeigh = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), mTimeFrameGPU->getDeviceNeighbours(iLayer), @@ -247,8 +247,8 @@ void TrackerTraitsGPU::findRoads(const int iteration) this->mTrkParams[0].MaxChi2NDF, mTimeFrameGPU->getDevicePropagator(), this->mTrkParams[0].CorrType, - conf.nBlocks, - conf.nThreads); + conf.nBlocksProcessNeighbours[iteration], + conf.nThreadsProcessNeighbours[iteration]); } // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory. if (trackSeeds.empty()) { @@ -269,8 +269,8 @@ void TrackerTraitsGPU::findRoads(const int iteration) this->mTrkParams[0].MaxChi2NDF, // float maxChi2NDF mTimeFrameGPU->getDevicePropagator(), // const o2::base::Propagator* propagator this->mTrkParams[0].CorrType, // o2::base::PropagatorImpl::MatCorrType - conf.nBlocks, - conf.nThreads); + conf.nBlocksTracksSeeds[iteration], + conf.nThreadsTracksSeeds[iteration]); mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds); diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index ef6f925f1c29f..5b4b1aca1dfb8 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -107,9 +107,32 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper { - // GPU-specific parameters - int nBlocks = 20; - int nThreads = 256; + static constexpr int MaxIter = TrackerParamConfig::MaxIter; + + /// Set nBlocks/nThreads to summarily override all kernel launch parameters in each iteration. + /// Parameters must start with nBlocks/nThreads. + static constexpr int OverrideValue{-1}; + static constexpr char const* BlocksName = "nBlocks"; + static constexpr char const* ThreadsName = "nThreads"; + int nBlocks = OverrideValue; + int nThreads = OverrideValue; + void maybeOverride() const; + + /// Individual kernel launch parameter for each iteration + int nBlocksLayerTracklets[MaxIter] = {30, 30, 30, 30}; + int nThreadsLayerTracklets[MaxIter] = {256, 256, 256, 256}; + + int nBlocksLayerCells[MaxIter] = {30, 30, 30, 30}; + int nThreadsLayerCells[MaxIter] = {256, 256, 256, 256}; + + int nBlocksFindNeighbours[MaxIter] = {30, 30, 30, 30}; + int nThreadsFindNeighbours[MaxIter] = {256, 256, 256, 256}; + + int nBlocksProcessNeighbours[MaxIter] = {30, 30, 30, 30}; + int nThreadsProcessNeighbours[MaxIter] = {256, 256, 256, 256}; + + int nBlocksTracksSeeds[MaxIter] = {30, 30, 30, 30}; + int nThreadsTracksSeeds[MaxIter] = {256, 256, 256, 256}; O2ParamDef(ITSGpuTrackingParamConfig, "ITSGpuTrackingParam"); }; diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index b0add9881d01b..c8bf39142e019 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -39,6 +39,7 @@ Tracker::Tracker(TrackerTraits7* traits) : mTraits(traits) /// Initialise standard configuration with 1 iteration mTrkParams.resize(1); if (traits->isGPU()) { + ITSGpuTrackingParamConfig::Instance().maybeOverride(); ITSGpuTrackingParamConfig::Instance().printKeyValues(true, true); } } diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingConfigParam.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingConfigParam.cxx index b5fbedcc89339..3101c34d4ab8f 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingConfigParam.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingConfigParam.cxx @@ -9,8 +9,36 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include + +#include "Framework/Logger.h" #include "ITStracking/TrackingConfigParam.h" O2ParamImpl(o2::its::VertexerParamConfig); O2ParamImpl(o2::its::TrackerParamConfig); O2ParamImpl(o2::its::ITSGpuTrackingParamConfig); + +namespace o2::its +{ + +void ITSGpuTrackingParamConfig::maybeOverride() const +{ + if (nBlocks == OverrideValue && nThreads == OverrideValue) { + return; + } + const auto name = getName(); + auto members = getDataMembers(); + for (auto member : *members) { + if (!member.name.ends_with(BlocksName) && !member.name.ends_with(ThreadsName)) { + if (nBlocks != OverrideValue && member.name.starts_with(BlocksName) && (member.value != nBlocks)) { + o2::conf::ConfigurableParam::setValue(name, member.name, nBlocks); + } + if (nThreads != OverrideValue && member.name.starts_with(ThreadsName) && (member.value != nThreads)) { + o2::conf::ConfigurableParam::setValue(name, member.name, nThreads); + } + } + } + LOGP(info, "Overwriting gpu threading parameters"); +} // namespace o2::its + +} // namespace o2::its