From 45d6ec99761c38fc47594328b2813ab02a431f04 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 15 Aug 2025 15:59:50 +0200 Subject: [PATCH 1/2] ITS: template Tracker Signed-off-by: Felix Schlepper --- .../tracking/include/ITStracking/Tracker.h | 15 ++++++----- .../include/ITStracking/TrackingInterface.h | 11 ++++---- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 25 +++++++++++++------ .../ITS/tracking/src/TrackingInterface.cxx | 6 ++--- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 3f80d239946b1..642717bd09596 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -51,17 +51,15 @@ class GPUChainITS; namespace its { +template class Tracker { - static constexpr int NLayers{7}; - using TrackerTraits7 = TrackerTraits; - using TimeFrame7 = TimeFrame; using LogFunc = std::function; public: - Tracker(TrackerTraits* traits); + Tracker(TrackerTraits* traits); - void adoptTimeFrame(TimeFrame& tf); + void adoptTimeFrame(TimeFrame& tf); void clustersToTracks( const LogFunc& = [](const std::string& s) { std::cout << s << '\n'; }, @@ -92,8 +90,8 @@ class Tracker template float evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args); - TrackerTraits7* mTraits = nullptr; /// Observer pointer, not owned by this class - TimeFrame7* mTimeFrame = nullptr; /// Observer pointer, not owned by this class + TrackerTraits* mTraits = nullptr; /// Observer pointer, not owned by this class + TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class std::vector mTrkParams; o2::gpu::GPUChainITS* mRecoChain = nullptr; @@ -115,8 +113,9 @@ class Tracker static constexpr std::array StateNames{"TimeFrame initialisation", "Tracklet finding", "Cell finding", "Neighbour finding", "Road finding"}; }; +template template -float Tracker::evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args) { float diff{0.f}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index f123a2a9a1d80..9d927e21202cc 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -35,8 +35,9 @@ namespace o2::its class ITSTrackingInterface { static constexpr int NLayers{7}; - using TrackerTraits7 = TrackerTraits; - using TimeFrame7 = TimeFrame; + using TrackerN = Tracker; + using TrackerTraitsN = TrackerTraits; + using TimeFrameN = TimeFrame; public: ITSTrackingInterface(bool isMC, @@ -66,13 +67,13 @@ class ITSTrackingInterface virtual void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj); // Custom - void setTraitsFromProvider(VertexerTraits*, TrackerTraits7*, TimeFrame7*); + void setTraitsFromProvider(VertexerTraits*, TrackerTraitsN*, TimeFrameN*); void setTrackingMode(TrackingMode::Type mode = TrackingMode::Unset) { mMode = mode; } auto getTracker() const { return mTracker.get(); } auto getVertexer() const { return mVertexer.get(); } - TimeFrame7* mTimeFrame = nullptr; + TimeFrameN* mTimeFrame = nullptr; protected: virtual void loadROF(gsl::span& trackROFspan, @@ -88,7 +89,7 @@ class ITSTrackingInterface TrackingMode::Type mMode = TrackingMode::Unset; bool mOverrideBeamEstimation = false; const o2::itsmft::TopologyDictionary* mDict = nullptr; - std::unique_ptr mTracker = nullptr; + std::unique_ptr mTracker = nullptr; std::unique_ptr mVertexer = nullptr; const o2::dataformats::MeanVertexObject* mMeanVertex; std::shared_ptr mMemoryPool; diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index c8bf39142e019..5d0fe459f44a5 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -34,7 +34,8 @@ namespace o2::its { using o2::its::constants::GB; -Tracker::Tracker(TrackerTraits7* traits) : mTraits(traits) +template +Tracker::Tracker(TrackerTraits* traits) : mTraits(traits) { /// Initialise standard configuration with 1 iteration mTrkParams.resize(1); @@ -44,7 +45,8 @@ Tracker::Tracker(TrackerTraits7* traits) : mTraits(traits) } } -void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) +template +void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) { LogFunc evalLog = [](const std::string&) {}; @@ -158,7 +160,8 @@ void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) } } -void Tracker::computeRoadsMClabels() +template +void Tracker::computeRoadsMClabels() { /// Moore's Voting Algorithm if (!mTimeFrame->hasMCinformation()) { @@ -171,7 +174,7 @@ void Tracker::computeRoadsMClabels() for (int iRoad{0}; iRoad < roadsNum; ++iRoad) { - Road<5>& currentRoad{mTimeFrame->getRoads()[iRoad]}; + auto& currentRoad{mTimeFrame->getRoads()[iRoad]}; std::vector> occurrences; bool isFakeRoad{false}; bool isFirstRoadCell{true}; @@ -262,7 +265,8 @@ void Tracker::computeRoadsMClabels() } } -void Tracker::computeTracksMClabels() +template +void Tracker::computeTracksMClabels() { for (int iROF{0}; iROF < mTimeFrame->getNrof(); ++iROF) { for (auto& track : mTimeFrame->getTracks(iROF)) { @@ -320,7 +324,8 @@ void Tracker::computeTracksMClabels() } } -void Tracker::rectifyClusterIndices() +template +void Tracker::rectifyClusterIndices() { for (int iROF{0}; iROF < mTimeFrame->getNrof(); ++iROF) { for (auto& track : mTimeFrame->getTracks(iROF)) { @@ -334,17 +339,21 @@ void Tracker::rectifyClusterIndices() } } -void Tracker::adoptTimeFrame(TimeFrame7& tf) +template +void Tracker::adoptTimeFrame(TimeFrame& tf) { mTimeFrame = &tf; mTraits->adoptTimeFrame(&tf); } -void Tracker::printSummary() const +template +void Tracker::printSummary() const { auto avgTF = mTotalTime * 1.e-3 / ((mTimeFrameCounter > 0) ? (double)mTimeFrameCounter : -1.0); auto avgTFwithDropped = mTotalTime * 1.e-3 / (((mTimeFrameCounter + mNumberOfDroppedTFs) > 0) ? (double)(mTimeFrameCounter + mNumberOfDroppedTFs) : -1.0); LOGP(info, "Tracker summary: Processed {} TFs (dropped {}) in TOT={:.2f} s, AVG/TF={:.2f} ({:.2f}) s", mTimeFrameCounter, mNumberOfDroppedTFs, mTotalTime * 1.e-3, avgTF, avgTFwithDropped); } +template class Tracker<7>; + } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 3b05a7655d68c..d2e96aad40a9e 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -383,11 +383,11 @@ void ITSTrackingInterface::printSummary() const } void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, - TrackerTraits7* trackerTraits, - TimeFrame7* frame) + TrackerTraitsN* trackerTraits, + TimeFrameN* frame) { mVertexer = std::make_unique(vertexerTraits); - mTracker = std::make_unique(trackerTraits); + mTracker = std::make_unique(trackerTraits); mTimeFrame = frame; mVertexer->adoptTimeFrame(*mTimeFrame); mTracker->adoptTimeFrame(*mTimeFrame); From 83fd86c9f5b8c49f05a0246efff56fbac1a9efe0 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 15 Aug 2025 16:40:56 +0200 Subject: [PATCH 2/2] ITS: template CellSeed and Road Signed-off-by: Felix Schlepper --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 20 +- .../GPU/ITStrackingGPU/TrackerTraitsGPU.h | 2 +- .../GPU/ITStrackingGPU/TrackingKernels.h | 65 ++----- .../ITS/tracking/GPU/cuda/TimeFrameGPU.cu | 34 ++-- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 142 +++++++------- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 173 ++++++++++++++---- .../ITS/tracking/include/ITStracking/Cell.h | 17 +- .../tracking/include/ITStracking/Constants.h | 34 +++- .../ITS/tracking/include/ITStracking/Road.h | 6 +- .../tracking/include/ITStracking/TimeFrame.h | 5 +- .../include/ITStracking/TrackerTraits.h | 5 +- .../ITSMFT/ITS/tracking/src/TimeFrame.cxx | 2 +- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 2 +- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 20 +- 14 files changed, 320 insertions(+), 207 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 27b987fb9a84a..9731ad5c5db67 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -27,6 +27,8 @@ namespace o2::its::gpu template class TimeFrameGPU : public TimeFrame { + using typename TimeFrame::CellSeedN; + public: TimeFrameGPU(); ~TimeFrameGPU() = default; @@ -64,7 +66,7 @@ class TimeFrameGPU : public TimeFrame void loadTrackSeedsDevice(); void loadTrackSeedsChi2Device(); void loadRoadsDevice(); - void loadTrackSeedsDevice(bounded_vector&); + void loadTrackSeedsDevice(bounded_vector&); void createTrackletsBuffers(const int); void createTrackletsBuffersArray(const int); void createCellsBuffers(const int); @@ -75,8 +77,8 @@ class TimeFrameGPU : public TimeFrame void createNeighboursIndexTablesDevice(const int); void createNeighboursDevice(const unsigned int layer); void createNeighboursLUTDevice(const int, const unsigned int); - void createTrackITSExtDevice(bounded_vector&); - void downloadTrackITSExtDevice(bounded_vector&); + void createTrackITSExtDevice(bounded_vector&); + void downloadTrackITSExtDevice(bounded_vector&); void downloadCellsNeighboursDevice(std::vector>>&, const int); void downloadNeighboursLUTDevice(bounded_vector&, const int); void downloadCellsDevice(); @@ -125,8 +127,8 @@ class TimeFrameGPU : public TimeFrame int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; } int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; } int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; } - CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; } - CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; } + CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; } + CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; } o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; } float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; } int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; } @@ -145,7 +147,7 @@ class TimeFrameGPU : public TimeFrame gsl::span getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; } gsl::span getDeviceCellLUTs() { return mCellsLUTDevice; } gsl::span getDeviceTracklets() { return mTrackletsDevice; } - gsl::span getDeviceCells() { return mCellsDevice; } + gsl::span getDeviceCells() { return mCellsDevice; } // Overridden getters int getNumberOfTracklets() const final; @@ -189,10 +191,10 @@ class TimeFrameGPU : public TimeFrame int** mNeighboursCellDeviceArray{nullptr}; int** mNeighboursCellLUTDeviceArray{nullptr}; int** mTrackletsLUTDeviceArray{nullptr}; - std::array mCellsDevice; - CellSeed** mCellsDeviceArray; + std::array mCellsDevice; + CellSeedN** mCellsDeviceArray; std::array mNeighboursIndexTablesDevice; - CellSeed* mTrackSeedsDevice{nullptr}; + CellSeedN* mTrackSeedsDevice{nullptr}; std::array mCellSeedsDevice; o2::track::TrackParCovF** mCellSeedsDeviceArray; std::array mCellSeedsChi2Device; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index d5c3e8ac74925..f8eedb33d91eb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -49,7 +49,7 @@ class TrackerTraitsGPU final : public TrackerTraits private: IndexTableUtils* mDeviceIndexTableUtils; - gpu::TimeFrameGPU<7>* mTimeFrameGPU; + gpu::TimeFrameGPU* mTimeFrameGPU; }; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 23b23d2b3f3ab..53e680d474f6f 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -16,51 +16,21 @@ #include #include "ITStracking/BoundedAllocator.h" +#include "ITStracking/Definitions.h" #include "ITStrackingGPU/Utils.h" #include "DetectorsBase/Propagator.h" #include "GPUCommonDef.h" namespace o2::its { +template class CellSeed; +class TrackingFrameInfo; +class Tracklet; +class IndexTableUtils; +class Cluster; +class TrackITSExt; class ExternalAllocator; -namespace gpu -{ - -#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler - -GPUdii() int4 getEmptyBinsRect() -{ - return int4{0, 0, 0, 0}; -} - -GPUdii() bool fitTrack(TrackITSExt& track, - int start, - int end, - int step, - float chi2clcut, - float chi2ndfcut, - float maxQoverPt, - int nCl, - float Bz, - TrackingFrameInfo** tfInfos, - const o2::base::Propagator* prop, - o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrNONE); - -template -GPUg() void fitTrackSeedsKernel(CellSeed* trackSeeds, - const TrackingFrameInfo** foundTrackingFrameInfo, - o2::its::TrackITSExt* tracks, - const float* minPts, - const unsigned int nSeeds, - const float Bz, - const int startLevel, - float maxChi2ClusterAttachment, - float maxChi2NDF, - const o2::base::Propagator* propagator, - const o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorF::MatCorrType::USEMatCorrLUT); -#endif -} // namespace gpu template void countTrackletsInROFsHandler(const IndexTableUtils* utils, @@ -131,6 +101,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads, gpu::Streams& streams); +template void countCellsHandler(const Cluster** sortedClusters, const Cluster** unsortedClusters, const TrackingFrameInfo** tfInfo, @@ -138,7 +109,7 @@ void countCellsHandler(const Cluster** sortedClusters, int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsDeviceArray, int* cellsLUTsHost, const int deltaROF, @@ -151,6 +122,7 @@ void countCellsHandler(const Cluster** sortedClusters, const int nThreads, gpu::Streams& streams); +template void computeCellsHandler(const Cluster** sortedClusters, const Cluster** unsortedClusters, const TrackingFrameInfo** tfInfo, @@ -158,7 +130,7 @@ void computeCellsHandler(const Cluster** sortedClusters, int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsDeviceArray, int* cellsLUTsHost, const int deltaROF, @@ -170,7 +142,8 @@ void computeCellsHandler(const Cluster** sortedClusters, const int nThreads, gpu::Streams& streams); -void countCellNeighboursHandler(CellSeed** cellsLayersDevice, +template +void countCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUTs, int** cellsLUTs, gpuPair* cellNeighbours, @@ -188,7 +161,8 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nThreads, gpu::Stream& stream); -void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, +template +void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUTs, int** cellsLUTs, gpuPair* cellNeighbours, @@ -214,14 +188,14 @@ int filterCellNeighboursHandler(gpuPair*, template void processNeighboursHandler(const int startLayer, const int startLevel, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, std::array& nCells, const unsigned char** usedClusters, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - bounded_vector& seedsHost, + bounded_vector>& seedsHost, const float bz, const float MaxChi2ClusterAttachment, const float maxChi2NDF, @@ -231,7 +205,8 @@ void processNeighboursHandler(const int startLayer, const int nBlocks, const int nThreads); -void trackSeedHandler(CellSeed* trackSeeds, +template +void trackSeedHandler(CellSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, o2::its::TrackITSExt* tracks, std::vector& minPtsHost, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index 4f3b52d56a793..d834f28e09db0 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -355,11 +355,11 @@ void TimeFrameGPU::loadCellsDevice() { GPUTimer timer(mGpuStreams, "loading cell seeds", nLayers - 2); for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB); - allocMemAsync(reinterpret_cast(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->getExtAllocator()); + GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB); + allocMemAsync(reinterpret_cast(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->getExtAllocator()); allocMemAsync(reinterpret_cast(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); // accessory for the neigh. finding. GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get())); - GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); + GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); } } @@ -387,8 +387,8 @@ void TimeFrameGPU::createCellsBuffersArray(const int iteration) { if (!iteration) { GPUTimer timer("creating cells buffers array"); - allocMem(reinterpret_cast(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeed*), this->getExtAllocator()); - GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice)); + allocMem(reinterpret_cast(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeedN*), this->getExtAllocator()); + GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice)); } } @@ -399,9 +399,9 @@ void TimeFrameGPU::createCellsBuffers(const int layer) mNCells[layer] = 0; GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); mGpuStreams[layer].sync(); // ensure number of cells is correct - GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB); - allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->getExtAllocator()); - GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); + GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB); + allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->getExtAllocator()); + GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); } template @@ -426,13 +426,13 @@ void TimeFrameGPU::loadRoadsDevice() } template -void TimeFrameGPU::loadTrackSeedsDevice(bounded_vector& seeds) +void TimeFrameGPU::loadTrackSeedsDevice(bounded_vector& seeds) { GPUTimer timer("loading track seeds"); - GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / constants::MB); - allocMem(reinterpret_cast(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeed), this->getExtAllocator()); - GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeed), cudaHostRegisterPortable)); - GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeed), cudaMemcpyHostToDevice)); + GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB); + allocMem(reinterpret_cast(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->getExtAllocator()); + GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeedN), cudaHostRegisterPortable)); + GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice)); } template @@ -450,7 +450,7 @@ void TimeFrameGPU::createNeighboursDevice(const unsigned int layer) } template -void TimeFrameGPU::createTrackITSExtDevice(bounded_vector& seeds) +void TimeFrameGPU::createTrackITSExtDevice(bounded_vector& seeds) { GPUTimer timer("reserving tracks"); mTrackITSExt = bounded_vector(seeds.size(), {}, this->getMemoryPool().get()); @@ -465,9 +465,9 @@ void TimeFrameGPU::downloadCellsDevice() { GPUTimer timer(mGpuStreams, "downloading cells", nLayers - 2); for (int iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB); + GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeedN) / constants::MB); this->mCells[iLayer].resize(mNCells[iLayer]); - GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); + GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeedN), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); } } @@ -499,7 +499,7 @@ void TimeFrameGPU::downloadNeighboursLUTDevice(bounded_vector& lut } template -void TimeFrameGPU::downloadTrackITSExtDevice(bounded_vector& seeds) +void TimeFrameGPU::downloadTrackITSExtDevice(bounded_vector& seeds) { GPUTimer timer("downloading tracks"); GPULog("gpu-transfer: downloading {} tracks, for {:.2f} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / constants::MB); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index a6dfc041e4c71..0b71a7c85e61f 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -168,47 +168,47 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) mTimeFrameGPU->createCellsLUTDevice(iLayer); mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available - countCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), - mTimeFrameGPU->getDeviceArrayUnsortedClusters(), - mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), - mTimeFrameGPU->getDeviceArrayTracklets(), - mTimeFrameGPU->getDeviceArrayTrackletsLUT(), - currentLayerTrackletsNum, - iLayer, - nullptr, - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceCellLUTs()[iLayer], - this->mTrkParams[iteration].DeltaROF, - this->mBz, - this->mTrkParams[iteration].MaxChi2ClusterAttachment, - this->mTrkParams[iteration].CellDeltaTanLambdaSigma, - this->mTrkParams[iteration].NSigmaCut, - mTimeFrameGPU->getExternalAllocator(), - conf.nBlocksLayerCells[iteration], - conf.nThreadsLayerCells[iteration], - mTimeFrameGPU->getStreams()); + countCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), + mTimeFrameGPU->getDeviceArrayUnsortedClusters(), + mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), + mTimeFrameGPU->getDeviceArrayTracklets(), + mTimeFrameGPU->getDeviceArrayTrackletsLUT(), + currentLayerTrackletsNum, + iLayer, + nullptr, + mTimeFrameGPU->getDeviceArrayCellsLUT(), + mTimeFrameGPU->getDeviceCellLUTs()[iLayer], + this->mTrkParams[iteration].DeltaROF, + this->mBz, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mTrkParams[iteration].CellDeltaTanLambdaSigma, + this->mTrkParams[iteration].NSigmaCut, + mTimeFrameGPU->getExternalAllocator(), + conf.nBlocksLayerCells[iteration], + conf.nThreadsLayerCells[iteration], + mTimeFrameGPU->getStreams()); mTimeFrameGPU->createCellsBuffers(iLayer); if (mTimeFrameGPU->getNCells()[iLayer] == 0) { return; } - computeCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), - mTimeFrameGPU->getDeviceArrayUnsortedClusters(), - mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), - mTimeFrameGPU->getDeviceArrayTracklets(), - mTimeFrameGPU->getDeviceArrayTrackletsLUT(), - currentLayerTrackletsNum, - iLayer, - mTimeFrameGPU->getDeviceCells()[iLayer], - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceCellLUTs()[iLayer], - this->mTrkParams[iteration].DeltaROF, - this->mBz, - this->mTrkParams[iteration].MaxChi2ClusterAttachment, - this->mTrkParams[iteration].CellDeltaTanLambdaSigma, - this->mTrkParams[iteration].NSigmaCut, - conf.nBlocksLayerCells[iteration], - conf.nThreadsLayerCells[iteration], - mTimeFrameGPU->getStreams()); + computeCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), + mTimeFrameGPU->getDeviceArrayUnsortedClusters(), + mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), + mTimeFrameGPU->getDeviceArrayTracklets(), + mTimeFrameGPU->getDeviceArrayTrackletsLUT(), + currentLayerTrackletsNum, + iLayer, + mTimeFrameGPU->getDeviceCells()[iLayer], + mTimeFrameGPU->getDeviceArrayCellsLUT(), + mTimeFrameGPU->getDeviceCellLUTs()[iLayer], + this->mTrkParams[iteration].DeltaROF, + this->mBz, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mTrkParams[iteration].CellDeltaTanLambdaSigma, + this->mTrkParams[iteration].NSigmaCut, + conf.nBlocksLayerCells[iteration], + conf.nThreadsLayerCells[iteration], + mTimeFrameGPU->getStreams()); } } @@ -226,43 +226,43 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) } mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer); mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum); - countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here. - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), - mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), - (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(), - this->mTrkParams[0].DeltaROF, - this->mTrkParams[0].MaxChi2ClusterAttachment, - this->mBz, - iLayer, - currentLayerCellsNum, - nextLayerCellsNum, - 1e2, - mTimeFrameGPU->getExternalAllocator(), - conf.nBlocksFindNeighbours[iteration], - conf.nThreadsFindNeighbours[iteration], - mTimeFrameGPU->getStream(iLayer)); + countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), + mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here. + mTimeFrameGPU->getDeviceArrayCellsLUT(), + mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), + mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), + (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(), + this->mTrkParams[0].DeltaROF, + this->mTrkParams[0].MaxChi2ClusterAttachment, + this->mBz, + iLayer, + currentLayerCellsNum, + nextLayerCellsNum, + 1e2, + mTimeFrameGPU->getExternalAllocator(), + conf.nBlocksFindNeighbours[iteration], + conf.nThreadsFindNeighbours[iteration], + mTimeFrameGPU->getStream(iLayer)); mTimeFrameGPU->createNeighboursDevice(iLayer); if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) { continue; } - computeCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), - mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), - (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(), - this->mTrkParams[0].DeltaROF, - this->mTrkParams[0].MaxChi2ClusterAttachment, - this->mBz, - iLayer, - currentLayerCellsNum, - nextLayerCellsNum, - 1e2, - conf.nBlocksFindNeighbours[iteration], - conf.nThreadsFindNeighbours[iteration], - mTimeFrameGPU->getStream(iLayer)); + computeCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), + mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), + mTimeFrameGPU->getDeviceArrayCellsLUT(), + mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), + mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), + (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(), + this->mTrkParams[0].DeltaROF, + this->mTrkParams[0].MaxChi2ClusterAttachment, + this->mBz, + iLayer, + currentLayerCellsNum, + nextLayerCellsNum, + 1e2, + conf.nBlocksFindNeighbours[iteration], + conf.nThreadsFindNeighbours[iteration], + mTimeFrameGPU->getStream(iLayer)); mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), mTimeFrameGPU->getDeviceNeighbours(iLayer), mTimeFrameGPU->getArrayNNeighbours()[iLayer], @@ -278,7 +278,7 @@ void TrackerTraitsGPU::findRoads(const int iteration) auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) { const int minimumLayer{startLevel - 1}; - bounded_vector trackSeeds(this->getMemoryPool().get()); + bounded_vector> trackSeeds(this->getMemoryPool().get()); for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) { if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 9b3df193abe34..694b598334be3 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -90,6 +90,11 @@ struct TypedAllocator { ExternalAllocator* mInternalAllocator; }; +GPUdii() int4 getEmptyBinsRect() +{ + return int4{0, 0, 0, 0}; +} + GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex, const o2::its::IndexTableUtils& utils, const float z1, const float z2, float maxdeltaz, float maxdeltaphi) @@ -247,12 +252,13 @@ struct is_valid_pair { } }; +template struct seed_selector { float maxQ2Pt; float maxChi2; GPUhd() seed_selector(float maxQ2Pt, float maxChi2) : maxQ2Pt(maxQ2Pt), maxChi2(maxChi2) {} - GPUhd() bool operator()(const CellSeed& seed) const + GPUhd() bool operator()(const CellSeed& seed) const { return !(seed.getQ2Pt() > maxQ2Pt || seed.getChi2() > maxChi2); } @@ -305,7 +311,7 @@ GPUdii() gpuSpan getClustersOnLayer(const int rof, template GPUg() void fitTrackSeedsKernel( - CellSeed* trackSeeds, + CellSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, o2::its::TrackITSExt* tracks, const float* minPts, @@ -324,7 +330,7 @@ GPUg() void fitTrackSeedsKernel( temporaryTrack.resetCovariance(); temporaryTrack.setChi2(0); - int* clusters = seed.getClusters(); + auto& clusters = seed.getClusters(); for (int iL{0}; iL < 7; ++iL) { temporaryTrack.setExternalClusterIndex(iL, clusters[iL], clusters[iL] != constants::UnusedIndex); } @@ -368,7 +374,7 @@ GPUg() void fitTrackSeedsKernel( template GPUg() void computeLayerCellNeighboursKernel( - CellSeed** cellSeedArray, + CellSeed** cellSeedArray, int* neighboursLUT, int* neighboursIndexTable, int** cellsLUTs, @@ -388,7 +394,7 @@ GPUg() void computeLayerCellNeighboursKernel( const int nextLayerLastCellIndex{cellsLUTs[layerIndex + 1][nextLayerTrackletIndex + 1]}; int foundNeighbours{0}; for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) { - CellSeed nextCellSeed{cellSeedArray[layerIndex + 1][iNextCell]}; // Copy + auto nextCellSeed{cellSeedArray[layerIndex + 1][iNextCell]}; // Copy if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) { // Check if cells share the same tracklet break; } @@ -430,7 +436,7 @@ GPUg() void computeLayerCellNeighboursKernel( } } -template +template GPUg() void computeLayerCellsKernel( const Cluster** sortedClusters, const Cluster** unsortedClusters, @@ -439,7 +445,7 @@ GPUg() void computeLayerCellsKernel( int** trackletsLUT, const int nTrackletsCurrent, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTs, const int deltaROF, const float bz, @@ -506,7 +512,7 @@ GPUg() void computeLayerCellsKernel( continue; } if constexpr (!initRun) { - new (cells + cellsLUTs[layer][iCurrentTrackletIndex] + foundCells) CellSeed{layer, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2}; + new (cells + cellsLUTs[layer][iCurrentTrackletIndex] + foundCells) CellSeed{layer, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2}; } ++foundCells; if constexpr (initRun) { @@ -654,14 +660,14 @@ GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets, } } -template +template GPUg() void processNeighboursKernel(const int layer, const int level, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, const int* currentCellIds, const unsigned int nCurrentCells, - CellSeed* updatedCellSeeds, + CellSeed* updatedCellSeeds, int* updatedCellsIds, int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration const unsigned char** usedClusters, // Used clusters @@ -692,7 +698,7 @@ GPUg() void processNeighboursKernel(const int layer, for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) { const int neighbourCellId = neighbours[iNeighbourCell]; - const CellSeed& neighbourCell = allCellSeeds[layer - 1][neighbourCellId]; + const auto& neighbourCell = allCellSeeds[layer - 1][neighbourCellId]; if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) { continue; @@ -703,7 +709,7 @@ GPUg() void processNeighboursKernel(const int layer, if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) { continue; } - CellSeed seed{currentCell}; + auto seed{currentCell}; auto& trHit = foundTrackingFrameInfo[layer - 1][neighbourCell.getFirstClusterIndex()]; if (!seed.rotate(trHit.alphaTrackingFrame)) { @@ -904,6 +910,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, } } +template void countCellsHandler( const Cluster** sortedClusters, const Cluster** unsortedClusters, @@ -912,7 +919,7 @@ void countCellsHandler( int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, const int deltaROF, @@ -944,6 +951,7 @@ void countCellsHandler( thrust::exclusive_scan(nosync_policy, cellsLUTsHost, cellsLUTsHost + nTracklets + 1, cellsLUTsHost); } +template void computeCellsHandler( const Cluster** sortedClusters, const Cluster** unsortedClusters, @@ -952,7 +960,7 @@ void computeCellsHandler( int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, const int deltaROF, @@ -981,7 +989,8 @@ void computeCellsHandler( nSigmaCut); // const float } -void countCellNeighboursHandler(CellSeed** cellsLayersDevice, +template +void countCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUT, int** cellsLUTs, gpuPair* cellNeighbours, @@ -1017,7 +1026,8 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, thrust::exclusive_scan(nosync_policy, neighboursIndexTable, neighboursIndexTable + nCells + 1, neighboursIndexTable); } -void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, +template +void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUT, int** cellsLUTs, gpuPair* cellNeighbours, @@ -1068,14 +1078,14 @@ int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, template void processNeighboursHandler(const int startLayer, const int startLevel, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, std::array& nCells, const unsigned char** usedClusters, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - bounded_vector& seedsHost, + bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, @@ -1086,11 +1096,11 @@ void processNeighboursHandler(const int startLayer, const int nThreads) { auto allocInt = gpu::TypedAllocator(alloc); - auto allocCellSeed = gpu::TypedAllocator(alloc); + auto allocCellSeed = gpu::TypedAllocator>(alloc); thrust::device_vector> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(gpu::Stream::DefaultStream); - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( startLayer, startLevel, allCellSeeds, @@ -1111,8 +1121,8 @@ void processNeighboursHandler(const int startLayer, thrust::exclusive_scan(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), foundSeedsTable.begin()); thrust::device_vector> updatedCellId(foundSeedsTable.back(), 0, allocInt); - thrust::device_vector> updatedCellSeed(foundSeedsTable.back(), allocCellSeed); - gpu::processNeighboursKernel<<>>( + thrust::device_vector, gpu::TypedAllocator>> updatedCellSeed(foundSeedsTable.back(), allocCellSeed); + gpu::processNeighboursKernel<<>>( startLayer, startLevel, allCellSeeds, @@ -1134,17 +1144,17 @@ void processNeighboursHandler(const int startLayer, int level = startLevel; thrust::device_vector> lastCellId(allocInt); - thrust::device_vector> lastCellSeed(allocCellSeed); + thrust::device_vector, gpu::TypedAllocator>> lastCellSeed(allocCellSeed); for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); - thrust::device_vector>(allocCellSeed).swap(updatedCellSeed); + thrust::device_vector, gpu::TypedAllocator>>(allocCellSeed).swap(updatedCellSeed); thrust::device_vector>(allocInt).swap(updatedCellId); auto lastCellSeedSize{lastCellSeed.size()}; foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), 0); - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( iLayer, --level, allCellSeeds, @@ -1168,9 +1178,9 @@ void processNeighboursHandler(const int startLayer, updatedCellId.resize(foundSeeds); thrust::fill(nosync_policy, updatedCellId.begin(), updatedCellId.end(), 0); updatedCellSeed.resize(foundSeeds); - thrust::fill(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); + thrust::fill(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( iLayer, level, allCellSeeds, @@ -1190,14 +1200,15 @@ void processNeighboursHandler(const int startLayer, matCorrType); } GPUChkErrS(cudaStreamSynchronize(gpu::Stream::DefaultStream)); - thrust::device_vector> outSeeds(updatedCellSeed.size(), allocCellSeed); - auto end = thrust::copy_if(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); + thrust::device_vector, gpu::TypedAllocator>> outSeeds(updatedCellSeed.size(), allocCellSeed); + auto end = thrust::copy_if(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; seedsHost.reserve(seedsHost.size() + s); thrust::copy(outSeeds.begin(), outSeeds.begin() + s, std::back_inserter(seedsHost)); } -void trackSeedHandler(CellSeed* trackSeeds, +template +void trackSeedHandler(CellSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, o2::its::TrackITSExt* tracks, std::vector& minPtsHost, @@ -1229,6 +1240,7 @@ void trackSeedHandler(CellSeed* trackSeeds, GPUChkErrS(cudaStreamSynchronize(gpu::Stream::DefaultStream)); } +/// Explicit instantiation of ITS2 handlers template void countTrackletsInROFsHandler<7>(const IndexTableUtils* utils, const uint8_t* multMask, const int layer, @@ -1296,16 +1308,90 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils, const int nThreads, gpu::Streams& streams); +template void countCellsHandler<7>(const Cluster** sortedClusters, + const Cluster** unsortedClusters, + const TrackingFrameInfo** tfInfo, + Tracklet** tracklets, + int** trackletsLUT, + const int nTracklets, + const int layer, + CellSeed<7>* cells, + int** cellsLUTsArrayDevice, + int* cellsLUTsHost, + const int deltaROF, + const float bz, + const float maxChi2ClusterAttachment, + const float cellDeltaTanLambdaSigma, + const float nSigmaCut, + o2::its::ExternalAllocator* alloc, + const int nBlocks, + const int nThreads, + gpu::Streams& streams); + +template void computeCellsHandler<7>(const Cluster** sortedClusters, + const Cluster** unsortedClusters, + const TrackingFrameInfo** tfInfo, + Tracklet** tracklets, + int** trackletsLUT, + const int nTracklets, + const int layer, + CellSeed<7>* cells, + int** cellsLUTsArrayDevice, + int* cellsLUTsHost, + const int deltaROF, + const float bz, + const float maxChi2ClusterAttachment, + const float cellDeltaTanLambdaSigma, + const float nSigmaCut, + const int nBlocks, + const int nThreads, + gpu::Streams& streams); + +template void countCellNeighboursHandler<7>(CellSeed<7>** cellsLayersDevice, + int* neighboursLUT, + int** cellsLUTs, + gpuPair* cellNeighbours, + int* neighboursIndexTable, + const Tracklet** tracklets, + const int deltaROF, + const float maxChi2ClusterAttachment, + const float bz, + const int layerIndex, + const unsigned int nCells, + const unsigned int nCellsNext, + const int maxCellNeighbours, + o2::its::ExternalAllocator* alloc, + const int nBlocks, + const int nThreads, + gpu::Stream& stream); + +template void computeCellNeighboursHandler(CellSeed<7>** cellsLayersDevice, + int* neighboursLUT, + int** cellsLUTs, + gpuPair* cellNeighbours, + int* neighboursIndexTable, + const Tracklet** tracklets, + const int deltaROF, + const float maxChi2ClusterAttachment, + const float bz, + const int layerIndex, + const unsigned int nCells, + const unsigned int nCellsNext, + const int maxCellNeighbours, + const int nBlocks, + const int nThreads, + gpu::Stream& stream); + template void processNeighboursHandler<7>(const int startLayer, const int startLevel, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed<7>** allCellSeeds, + CellSeed<7>* currentCellSeeds, std::array& nCells, const unsigned char** usedClusters, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - bounded_vector& seedsHost, + bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, @@ -1314,4 +1400,19 @@ template void processNeighboursHandler<7>(const int startLayer, o2::its::ExternalAllocator* alloc, const int nBlocks, const int nThreads); + +template void trackSeedHandler(CellSeed<7>* trackSeeds, + const TrackingFrameInfo** foundTrackingFrameInfo, + o2::its::TrackITSExt* tracks, + std::vector& minPtsHost, + const unsigned int nSeeds, + const float bz, + const int startLevel, + float maxChi2ClusterAttachment, + float maxChi2NDF, + const o2::base::Propagator* propagator, + const o2::base::PropagatorF::MatCorrType matCorrType, + const int nBlocks, + const int nThreads); + } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h index fcea96abbfa82..902092a510eb0 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h @@ -43,12 +43,14 @@ class Cell final int mLevel{constants::UnusedIndex}; }; +template class CellSeed final : public o2::track::TrackParCovF { public: GPUhdDefault() CellSeed() = default; GPUhd() CellSeed(int innerL, int cl0, int cl1, int cl2, int trkl0, int trkl1, o2::track::TrackParCovF& tpc, float chi2) : o2::track::TrackParCovF(tpc), mChi2(chi2), mLevel(1) { + mClusters.fill(constants::UnusedIndex); setUserField(innerL); mClusters[innerL + 0] = cl0; mClusters[innerL + 1] = cl1; @@ -74,18 +76,25 @@ class CellSeed final : public o2::track::TrackParCovF GPUhd() int getLevel() const { return mLevel; }; GPUhd() void setLevel(int level) { mLevel = level; }; GPUhd() int* getLevelPtr() { return &mLevel; } - GPUhd() int* getClusters() { return mClusters; } + GPUhd() auto& getClusters() { return mClusters; } GPUhd() int getCluster(int i) const { return mClusters[i]; } GPUhd() void printCell() const { - printf("trkl: %d, %d\t lvl: %d\t chi2: %f\tcls: [%d | %d | %d | %d | %d | %d | %d]\n", mTracklets[0], mTracklets[1], mLevel, mChi2, mClusters[0], mClusters[1], mClusters[2], mClusters[3], mClusters[4], mClusters[5], mClusters[6]); + printf("cell: %d, %d\t lvl: %d\t chi2: %f\tcls: [", mTracklets[0], mTracklets[1], mLevel, mChi2); + for (int i = 0; i < nLayers; ++i) { + printf("%d", mClusters[i]); + if (i < nLayers - 1) { + printf(" | "); + } + } + printf("]\n"); } private: float mChi2 = -999.f; int mLevel = constants::UnusedIndex; - int mTracklets[2] = {constants::UnusedIndex, constants::UnusedIndex}; - int mClusters[7] = {constants::UnusedIndex, constants::UnusedIndex, constants::UnusedIndex, constants::UnusedIndex, constants::UnusedIndex, constants::UnusedIndex, constants::UnusedIndex}; + std::array mTracklets = constants::helpers::initArray(); + std::array mClusters = constants::helpers::initArray(); }; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h index ab9d0c2e4d1a6..22642f2e23229 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h @@ -16,21 +16,39 @@ #ifndef TRACKINGITSU_INCLUDE_CONSTANTS_H_ #define TRACKINGITSU_INCLUDE_CONSTANTS_H_ +#include +#include + #include "ITStracking/Definitions.h" +#include "GPUCommonDefAPI.h" namespace o2::its::constants { -constexpr float MB = 1024.f * 1024.f; -constexpr float GB = 1024.f * 1024.f * 1024.f; + +constexpr float KB = 1024.f; +constexpr float MB = KB * KB; +constexpr float GB = MB * KB; constexpr bool DoTimeBenchmarks = true; constexpr bool SaveTimeBenchmarks = false; -constexpr float Tolerance{1e-12}; // numerical tolerance -constexpr int ClustersPerCell{3}; -constexpr int UnusedIndex{-1}; -constexpr float Resolution{0.0005f}; -constexpr float Radl = 9.36f; // Radiation length of Si [cm] -constexpr float Rho = 2.33f; // Density of Si [g/cm^3] +GPUconstexpr() float Tolerance{1e-12}; // numerical tolerance +GPUconstexpr() int ClustersPerCell{3}; +GPUconstexpr() int UnusedIndex{-1}; +GPUconstexpr() float Resolution{0.0005f}; +GPUconstexpr() float Radl = 9.36f; // Radiation length of Si [cm] +GPUconstexpr() float Rho = 2.33f; // Density of Si [g/cm^3] + +namespace helpers +{ + +// initialize a std::array at compile time fully with T +template +constexpr std::array initArray() +{ + return [](std::index_sequence) { return std::array{(static_cast(Is), Value)...}; }(std::make_index_sequence{}); +} + +} // namespace helpers } // namespace o2::its::constants #endif /* TRACKINGITSU_INCLUDE_CONSTANTS_H_ */ diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h index 75f187f31652b..009f3a1b5b146 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h @@ -16,13 +16,15 @@ #ifndef TRACKINGCA_INCLUDE_ROAD_H #define TRACKINGCA_INCLUDE_ROAD_H +#include + #include "ITStracking/Constants.h" #include "GPUCommonDef.h" namespace o2::its { -template +template class Road final { public: @@ -60,7 +62,7 @@ class Road final } private: - int mCellIds[maxRoadSize]{constants::UnusedIndex}; + std::array mCellIds = constants::helpers::initArray(); unsigned char mRoadSize{0}; bool mIsFakeRoad{false}; }; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index d9d89e8149e62..ebc885a3a35cf 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -65,9 +65,12 @@ class TimeFrameGPU; template struct TimeFrame { + using CellSeedN = CellSeed; friend class gpu::TimeFrameGPU; + TimeFrame(); virtual ~TimeFrame(); + const Vertex& getPrimaryVertex(const int ivtx) const { return mPrimaryVertices[ivtx]; } gsl::span getPrimaryVertices(int rofId) const; gsl::span getPrimaryVertices(int romin, int romax) const; @@ -298,7 +301,7 @@ struct TimeFrame { std::array, nLayers> mUnsortedClusters; std::vector> mTracklets; - std::vector> mCells; + std::vector> mCells; bounded_vector> mRoads; std::vector> mTracks; std::vector> mCellsNeighbours; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 08961a2c8c1ee..8647236b4f7e7 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -22,6 +22,7 @@ #include "ITStracking/Configuration.h" #include "ITStracking/MathUtils.h" #include "ITStracking/TimeFrame.h" +#include "ITStracking/Cell.h" #include "ITStracking/BoundedAllocator.h" // #define OPTIMISATION_OUTPUT @@ -39,6 +40,8 @@ class TrackITSExt; template class TrackerTraits { + using CellSeedN = CellSeed; + public: virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf) { mTimeFrame = tf; } @@ -55,7 +58,7 @@ class TrackerTraits virtual void findShortPrimaries(); virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); - virtual void processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); + virtual void processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); void updateTrackingParameters(const std::vector& trkPars) { mTrkParams = trkPars; } TimeFrame* getTimeFrame() { return mTimeFrame; } diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index a59c51949b9f9..18da81d25fbd8 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -382,7 +382,7 @@ unsigned long TimeFrame::getArtefactsMemory() const size += sizeof(Tracklet) * trkl.size(); } for (const auto& cells : mCells) { - size += sizeof(CellSeed) * cells.size(); + size += sizeof(CellSeedN) * cells.size(); } for (const auto& cellsN : mCellsNeighbours) { size += sizeof(int) * cellsN.size(); diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 5d0fe459f44a5..746a15287e994 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -190,7 +190,7 @@ void Tracker::computeRoadsMClabels() } } - const CellSeed& currentCell{mTimeFrame->getCells()[iCell][currentCellIndex]}; + const auto& currentCell{mTimeFrame->getCells()[iCell][currentCellIndex]}; if (isFirstRoadCell) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 2ebfbfe235c5b..fc43a09694dd8 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -319,7 +319,7 @@ void TrackerTraits::computeLayerCells(const int iteration) } mTaskArena->execute([&] { - auto forTrackletCells = [&](auto Tag, int iLayer, bounded_vector& layerCells, int iTracklet, int offset = 0) -> int { + auto forTrackletCells = [&](auto Tag, int iLayer, bounded_vector& layerCells, int iTracklet, int offset = 0) -> int { const Tracklet& currentTracklet{mTimeFrame->getTracklets()[iLayer][iTracklet]}; const int nextLayerClusterIndex{currentTracklet.secondClusterIndex}; const int nextLayerFirstTrackletIndex{mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex]}; @@ -392,7 +392,7 @@ void TrackerTraits::computeLayerCells(const int iteration) } else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) { ++foundCells; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) { - layerCells[offset++] = CellSeed(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2); + layerCells[offset++] = CellSeedN(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2); } else { static_assert(false, "Unknown mode!"); } @@ -615,7 +615,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) } template -void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) +void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) { CA_DEBUGGER(std::cout << "Processing neighbours layer " << iLayer << " level " << iLevel << ", size of the cell seeds: " << currentCellSeed.size() << std::endl); auto propagator = o2::base::Propagator::Instance(); @@ -626,7 +626,7 @@ void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bou mTaskArena->execute([&] { auto forCellNeighbours = [&](auto Tag, int iCell, int offset = 0) -> int { - const CellSeed& currentCell{currentCellSeed[iCell]}; + const auto& currentCell{currentCellSeed[iCell]}; if constexpr (decltype(Tag)::value != PassMode::TwoPassInsert::value) { if (currentCell.getLevel() != iLevel) { @@ -646,7 +646,7 @@ void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bou for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) { CA_DEBUGGER(attempts++); const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell]; - const CellSeed& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId]; + const auto& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId]; if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) { CA_DEBUGGER(failedByMismatch++); continue; @@ -660,7 +660,7 @@ void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bou } /// Let's start the fitting procedure - CellSeed seed{currentCell}; + CellSeedN seed{currentCell}; const auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1)[neighbourCell.getFirstClusterIndex()]; if (!seed.rotate(trHit.alphaTrackingFrame)) { @@ -767,17 +767,17 @@ void TrackerTraits::findRoads(const int iteration) for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { CA_DEBUGGER(std::cout << "\t > Processing level " << startLevel << std::endl); - auto seedFilter = [&](const CellSeed& seed) { + auto seedFilter = [&](const auto& seed) { return seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5); }; - bounded_vector trackSeeds(mMemoryPool.get()); + bounded_vector trackSeeds(mMemoryPool.get()); for (int startLayer{mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= startLevel - 1; --startLayer) { if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; } CA_DEBUGGER(std::cout << "\t\t > Starting processing layer " << startLayer << std::endl); bounded_vector lastCellId(mMemoryPool.get()), updatedCellId(mMemoryPool.get()); - bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); + bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); processNeighbours(startLayer, startLevel, mTimeFrame->getCells()[startLayer], lastCellId, updatedCellSeed, updatedCellId); @@ -805,7 +805,7 @@ void TrackerTraits::findRoads(const int iteration) bounded_vector tracks(mMemoryPool.get()); mTaskArena->execute([&] { auto forSeed = [&](auto Tag, int iSeed, int offset = 0) { - const CellSeed& seed{trackSeeds[iSeed]}; + const auto& seed{trackSeeds[iSeed]}; TrackITSExt temporaryTrack{seed}; temporaryTrack.resetCovariance(); temporaryTrack.setChi2(0);