diff --git a/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt index f9565307d35f6..291ddffbf9475 100644 --- a/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt @@ -9,7 +9,6 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -#add_compile_options(-O0 -g -fPIC -fno-omit-frame-pointer) o2_add_library(ITStracking TARGETVARNAME targetName SOURCES src/ClusterLines.cxx @@ -37,6 +36,7 @@ o2_add_library(ITStracking PRIVATE_LINK_LIBRARIES O2::Steer TBB::tbb) +# target_compile_options(${targetName} PRIVATE -O0 -g -fPIC -fno-omit-frame-pointer) o2_add_library(ITSTrackingInterface TARGETVARNAME targetName diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 8b3e9bddd18d6..0ad08fd88ccf5 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -23,11 +23,6 @@ namespace o2::its::gpu { -class DefaultGPUAllocator : public ExternalAllocator -{ - void* allocate(size_t size) override; -}; - template class TimeFrameGPU : public TimeFrame { @@ -84,7 +79,7 @@ class TimeFrameGPU : public TimeFrame return mGpuStreams[stream]; } auto& getStreams() { return mGpuStreams; } - void wipe(const int); + virtual void wipe() final; /// interface int getNClustersInRofSpan(const int, const int, const int) const; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h index 454e39e04a661..802973d5f4000 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h @@ -21,6 +21,12 @@ #include "GPUCommonDef.h" #include "GPUCommonHelpers.h" +#ifndef __HIPCC__ +#define THRUST_NAMESPACE thrust::cuda +#else +#define THRUST_NAMESPACE thrust::hip +#endif + namespace o2::its { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index d5ea573a2f0e8..f6d9157b0da68 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -21,8 +21,6 @@ #include "ITStrackingGPU/TracerGPU.h" #include -#include -#include #include #include @@ -31,14 +29,7 @@ #include "GPUCommonLogger.h" #include "GPUCommonHelpers.h" -namespace o2 -{ -namespace its -{ -using constants::GB; -using constants::MB; - -namespace gpu +namespace o2::its::gpu { #ifdef ITS_MEASURE_GPU_TIME @@ -96,14 +87,19 @@ class GPUTimer std::vector mStarts, mStops; std::vector mStreams; }; + +#define GPULog(...) LOGP(info, __VA_ARGS__) #else // ITS_MEASURE_GPU_TIME not defined class GPUTimer { public: - GPUTimer(Stream&, const std::string&) {} - GPUTimer(Streams&, const std::string&) {} - GPUTimer(Streams&, const std::string&, int, int = 0) {} + template + GPUTimer(Args&&...) + { + } }; + +#define GPULog(...) #endif template @@ -121,7 +117,7 @@ void TimeFrameGPU::allocMemAsync(void** ptr, size_t size, Stream& strea if (extAllocator) { *ptr = this->mAllocator->allocate(size); } else { - LOGP(debug, "Calling default CUDA allocator"); + GPULog("Calling default CUDA allocator"); GPUChkErrS(cudaMallocAsync(reinterpret_cast(ptr), size, stream.get())); } } @@ -137,10 +133,10 @@ void TimeFrameGPU::loadIndexTableUtils(const int iteration) { GPUTimer timer(mGpuStreams[0], "loading indextable utils"); if (!iteration) { - LOGP(debug, "gpu-allocation: allocating IndexTableUtils buffer, for {} MB.", sizeof(IndexTableUtils) / MB); + GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtils) / constants::MB); allocMemAsync(reinterpret_cast(&mIndexTableUtilsDevice), sizeof(IndexTableUtils), mGpuStreams[0], this->getExtAllocator()); } - LOGP(debug, "gpu-transfer: loading IndexTableUtils object, for {} MB.", sizeof(IndexTableUtils) / MB); + GPULog("gpu-transfer: loading IndexTableUtils object, for {:.2f} MB.", sizeof(IndexTableUtils) / constants::MB); GPUChkErrS(cudaMemcpyAsync(mIndexTableUtilsDevice, &(this->mIndexTableUtils), sizeof(IndexTableUtils), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } @@ -149,11 +145,11 @@ void TimeFrameGPU::loadUnsortedClustersDevice(const int iteration) { if (!iteration) { GPUTimer timer(mGpuStreams[0], "loading unsorted clusters"); - for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} unsorted clusters on layer {}, for {} MB.", this->mUnsortedClusters[iLayer].size(), iLayer, this->mUnsortedClusters[iLayer].size() * sizeof(Cluster) / MB); - allocMemAsync(reinterpret_cast(&mUnsortedClustersDevice[iLayer]), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), mGpuStreams[0], this->getExtAllocator()); + for (int iLayer{0}; iLayer < nLayers; ++iLayer) { + GPULog("gpu-transfer: loading {} unsorted clusters on layer {}, for {:.2f} MB.", this->mUnsortedClusters[iLayer].size(), iLayer, this->mUnsortedClusters[iLayer].size() * sizeof(Cluster) / constants::MB); + allocMemAsync(reinterpret_cast(&mUnsortedClustersDevice[iLayer]), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), mGpuStreams[iLayer], this->getExtAllocator()); GPUChkErrS(cudaHostRegister(this->mUnsortedClusters[iLayer].data(), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), cudaHostRegisterPortable)); - GPUChkErrS(cudaMemcpyAsync(mUnsortedClustersDevice[iLayer], this->mUnsortedClusters[iLayer].data(), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), cudaMemcpyHostToDevice, mGpuStreams[0].get())); + GPUChkErrS(cudaMemcpyAsync(mUnsortedClustersDevice[iLayer], this->mUnsortedClusters[iLayer].data(), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); } allocMemAsync(reinterpret_cast(&mUnsortedClustersDeviceArray), nLayers * sizeof(Cluster*), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaHostRegister(mUnsortedClustersDevice.data(), nLayers * sizeof(Cluster*), cudaHostRegisterPortable)); @@ -167,7 +163,7 @@ void TimeFrameGPU::loadClustersDevice(const int iteration) if (!iteration) { GPUTimer timer(mGpuStreams[0], "loading sorted clusters"); for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} clusters on layer {}, for {} MB.", this->mClusters[iLayer].size(), iLayer, this->mClusters[iLayer].size() * sizeof(Cluster) / MB); + GPULog("gpu-transfer: loading {} clusters on layer {}, for {:.2f} MB.", this->mClusters[iLayer].size(), iLayer, this->mClusters[iLayer].size() * sizeof(Cluster) / constants::MB); allocMemAsync(reinterpret_cast(&mClustersDevice[iLayer]), this->mClusters[iLayer].size() * sizeof(Cluster), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaHostRegister(this->mClusters[iLayer].data(), this->mClusters[iLayer].size() * sizeof(Cluster), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mClustersDevice[iLayer], this->mClusters[iLayer].data(), this->mClusters[iLayer].size() * sizeof(Cluster), cudaMemcpyHostToDevice, mGpuStreams[0].get())); @@ -184,7 +180,7 @@ void TimeFrameGPU::loadClustersIndexTables(const int iteration) if (!iteration) { GPUTimer timer(mGpuStreams[0], "loading sorted clusters"); for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading clusters indextable for layer {} with {} elements, for {} MB.", iLayer, this->mIndexTables[iLayer].size(), this->mIndexTables[iLayer].size() * sizeof(int) / MB); + GPULog("gpu-transfer: loading clusters indextable for layer {} with {} elements, for {:.2f} MB.", iLayer, this->mIndexTables[iLayer].size(), this->mIndexTables[iLayer].size() * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mClustersIndexTablesDevice[iLayer]), this->mIndexTables[iLayer].size() * sizeof(int), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemcpyAsync(mClustersIndexTablesDevice[iLayer], this->mIndexTables[iLayer].data(), this->mIndexTables[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } @@ -199,7 +195,7 @@ void TimeFrameGPU::createUsedClustersDevice(const int iteration) if (!iteration) { GPUTimer timer(mGpuStreams[0], "creating used clusters flags"); for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: creating {} used clusters flags on layer {}, for {} MB.", this->mUsedClusters[iLayer].size(), iLayer, this->mUsedClusters[iLayer].size() * sizeof(unsigned char) / MB); + GPULog("gpu-transfer: creating {} used clusters flags on layer {}, for {:.2f} MB.", this->mUsedClusters[iLayer].size(), iLayer, this->mUsedClusters[iLayer].size() * sizeof(unsigned char) / constants::MB); allocMemAsync(reinterpret_cast(&mUsedClustersDevice[iLayer]), this->mUsedClusters[iLayer].size() * sizeof(unsigned char), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mUsedClustersDevice[iLayer], 0, this->mUsedClusters[iLayer].size() * sizeof(unsigned char), mGpuStreams[0].get())); } @@ -213,7 +209,7 @@ void TimeFrameGPU::loadUsedClustersDevice() { GPUTimer timer(mGpuStreams[0], "loading used clusters flags"); for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} used clusters flags on layer {}, for {} MB.", this->mUsedClusters[iLayer].size(), iLayer, this->mClusters[iLayer].size() * sizeof(unsigned char) / MB); + GPULog("gpu-transfer: loading {} used clusters flags on layer {}, for {:.2f} MB.", this->mUsedClusters[iLayer].size(), iLayer, this->mClusters[iLayer].size() * sizeof(unsigned char) / constants::MB); GPUChkErrS(cudaMemcpyAsync(mUsedClustersDevice[iLayer], this->mUsedClusters[iLayer].data(), this->mUsedClusters[iLayer].size() * sizeof(unsigned char), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } } @@ -224,7 +220,7 @@ void TimeFrameGPU::loadROframeClustersDevice(const int iteration) if (!iteration) { GPUTimer timer(mGpuStreams[0], "loading ROframe clusters"); for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} ROframe clusters info on layer {}, for {} MB.", this->mROFramesClusters[iLayer].size(), iLayer, this->mROFramesClusters[iLayer].size() * sizeof(int) / MB); + GPULog("gpu-transfer: loading {} ROframe clusters info on layer {}, for {:.2f} MB.", this->mROFramesClusters[iLayer].size(), iLayer, this->mROFramesClusters[iLayer].size() * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mROFramesClustersDevice[iLayer]), this->mROFramesClusters[iLayer].size() * sizeof(int), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemcpyAsync(mROFramesClustersDevice[iLayer], this->mROFramesClusters[iLayer].data(), this->mROFramesClusters[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } @@ -239,7 +235,7 @@ void TimeFrameGPU::loadTrackingFrameInfoDevice(const int iteration) GPUTimer timer(mGpuStreams[0], "loading trackingframeinfo"); if (!iteration) { for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} tfinfo on layer {}, for {} MB.", this->mTrackingFrameInfo[iLayer].size(), iLayer, this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo) / MB); + GPULog("gpu-transfer: loading {} tfinfo on layer {}, for {:.2f} MB.", this->mTrackingFrameInfo[iLayer].size(), iLayer, this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo) / constants::MB); allocMemAsync(reinterpret_cast(&mTrackingFrameInfoDevice[iLayer]), this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaHostRegister(this->mTrackingFrameInfo[iLayer].data(), this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mTrackingFrameInfoDevice[iLayer], this->mTrackingFrameInfo[iLayer].data(), this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaMemcpyHostToDevice, mGpuStreams[0].get())); @@ -253,10 +249,12 @@ void TimeFrameGPU::loadTrackingFrameInfoDevice(const int iteration) template void TimeFrameGPU::loadMultiplicityCutMask(const int iteration) { - if (!iteration) { + if (!iteration || iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration GPUTimer timer(mGpuStreams[0], "loading multiplicity cut mask"); - LOGP(debug, "gpu-transfer: loading multiplicity cut mask with {} elements, for {} MB.", this->mMultiplicityCutMask.size(), this->mMultiplicityCutMask.size() * sizeof(bool) / MB); - allocMemAsync(reinterpret_cast(&mMultMaskDevice), this->mMultiplicityCutMask.size() * sizeof(uint8_t), mGpuStreams[0], this->getExtAllocator()); + GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.", iteration, this->mMultiplicityCutMask.size(), this->mMultiplicityCutMask.size() * sizeof(bool) / constants::MB); + if (!iteration) { // only allocate on first call + allocMemAsync(reinterpret_cast(&mMultMaskDevice), this->mMultiplicityCutMask.size() * sizeof(uint8_t), mGpuStreams[0], this->getExtAllocator()); + } GPUChkErrS(cudaMemcpyAsync(mMultMaskDevice, this->mMultiplicityCutMask.data(), this->mMultiplicityCutMask.size() * sizeof(uint8_t), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } } @@ -266,10 +264,10 @@ void TimeFrameGPU::loadVertices(const int iteration) { if (!iteration) { GPUTimer timer(mGpuStreams[0], "loading seeding vertices"); - LOGP(debug, "gpu-transfer: loading {} ROframes vertices, for {} MB.", this->mROFramesPV.size(), this->mROFramesPV.size() * sizeof(int) / MB); + GPULog("gpu-transfer: loading {} ROframes vertices, for {:.2f} MB.", this->mROFramesPV.size(), this->mROFramesPV.size() * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mROFramesPVDevice), this->mROFramesPV.size() * sizeof(int), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemcpyAsync(mROFramesPVDevice, this->mROFramesPV.data(), this->mROFramesPV.size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[0].get())); - LOGP(debug, "gpu-transfer: loading {} seeding vertices, for {} MB.", this->mPrimaryVertices.size(), this->mPrimaryVertices.size() * sizeof(Vertex) / MB); + GPULog("gpu-transfer: loading {} seeding vertices, for {:.2f} MB.", this->mPrimaryVertices.size(), this->mPrimaryVertices.size() * sizeof(Vertex) / constants::MB); allocMemAsync(reinterpret_cast(&mPrimaryVerticesDevice), this->mPrimaryVertices.size() * sizeof(Vertex), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemcpyAsync(mPrimaryVerticesDevice, this->mPrimaryVertices.data(), this->mPrimaryVertices.size() * sizeof(Vertex), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } @@ -278,13 +276,14 @@ void TimeFrameGPU::loadVertices(const int iteration) template void TimeFrameGPU::createTrackletsLUTDevice(const int iteration) { - GPUTimer timer(mGpuStreams, "creating tracklets LUTs", nLayers - 1); + GPUTimer timer(mGpuStreams[0], "creating tracklets LUTs"); for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) { + const int ncls = this->mClusters[iLayer].size() + 1; if (!iteration) { - LOGP(debug, "gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB.", this->mClusters[iLayer].size() + 1, iLayer, (this->mClusters[iLayer].size() + 1) * sizeof(int) / MB); - allocMemAsync(reinterpret_cast(&mTrackletsLUTDevice[iLayer]), (this->mClusters[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); + GPULog("gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, iLayer, ncls * sizeof(int) / constants::MB); + allocMemAsync(reinterpret_cast(&mTrackletsLUTDevice[iLayer]), ncls * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); } - GPUChkErrS(cudaMemsetAsync(mTrackletsLUTDevice[iLayer], 0, (this->mClusters[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get())); + GPUChkErrS(cudaMemsetAsync(mTrackletsLUTDevice[iLayer], 0, ncls * sizeof(int), mGpuStreams[iLayer].get())); } if (!iteration) { allocMemAsync(reinterpret_cast(&mTrackletsLUTDeviceArray), (nLayers - 1) * sizeof(int*), mGpuStreams[0], this->getExtAllocator()); @@ -295,11 +294,11 @@ void TimeFrameGPU::createTrackletsLUTDevice(const int iteration) template void TimeFrameGPU::createTrackletsBuffers() { - GPUTimer timer(mGpuStreams, "creating cells buffers", nLayers - 1); - for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) { + GPUTimer timer(mGpuStreams[0], "creating tracklet buffers"); + for (int iLayer{0}; iLayer < nLayers - 1; ++iLayer) { mNTracklets[iLayer] = 0; - GPUChkErrS(cudaMemcpyAsync(&mNTracklets[iLayer], mTrackletsLUTDevice[iLayer] + this->mClusters[iLayer].size(), sizeof(int), cudaMemcpyDeviceToHost)); - LOGP(debug, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[iLayer], iLayer, mNTracklets[iLayer] * sizeof(Tracklet) / MB); + GPUChkErrS(cudaMemcpyAsync(&mNTracklets[iLayer], mTrackletsLUTDevice[iLayer] + this->mClusters[iLayer].size(), sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); + GPULog("gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {:.2f} MB.", mNTracklets[iLayer], iLayer, mNTracklets[iLayer] * sizeof(Tracklet) / constants::MB); allocMemAsync(reinterpret_cast(&mTrackletsDevice[iLayer]), mNTracklets[iLayer] * sizeof(Tracklet), mGpuStreams[iLayer], this->getExtAllocator()); } allocMemAsync(reinterpret_cast(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), mGpuStreams[0], this->getExtAllocator()); @@ -312,7 +311,7 @@ void TimeFrameGPU::loadTrackletsDevice() { GPUTimer timer(mGpuStreams, "loading tracklets", nLayers - 1); for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} tracklets on layer {}, for {} MB.", this->mTracklets[iLayer].size(), iLayer, this->mTracklets[iLayer].size() * sizeof(Tracklet) / MB); + GPULog("gpu-transfer: loading {} tracklets on layer {}, for {:.2f} MB.", this->mTracklets[iLayer].size(), iLayer, this->mTracklets[iLayer].size() * sizeof(Tracklet) / constants::MB); GPUChkErrS(cudaHostRegister(this->mTracklets[iLayer].data(), this->mTracklets[iLayer].size() * sizeof(Tracklet), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mTrackletsDevice[iLayer], this->mTracklets[iLayer].data(), this->mTracklets[iLayer].size() * sizeof(Tracklet), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); } @@ -323,12 +322,12 @@ void TimeFrameGPU::loadTrackletsLUTDevice() { GPUTimer timer(mGpuStreams, "loading tracklets", nLayers - 2); for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - LOGP(debug, "gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {} MB", this->mTrackletsLookupTable[iLayer].size(), iLayer + 1, this->mTrackletsLookupTable[iLayer].size() * sizeof(int) / MB); + GPULog("gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {:.2f} MB", this->mTrackletsLookupTable[iLayer].size(), iLayer + 1, this->mTrackletsLookupTable[iLayer].size() * sizeof(int) / constants::MB); GPUChkErrS(cudaHostRegister(this->mTrackletsLookupTable[iLayer].data(), this->mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mTrackletsLUTDevice[iLayer + 1], this->mTrackletsLookupTable[iLayer].data(), this->mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); } GPUChkErrS(cudaHostRegister(mTrackletsLUTDevice.data(), (nLayers - 1) * sizeof(int*), cudaHostRegisterPortable)); - GPUChkErrS(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), (nLayers - 1) * sizeof(int*), cudaMemcpyHostToDevice)); + GPUChkErrS(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), (nLayers - 1) * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } template @@ -340,7 +339,7 @@ void TimeFrameGPU::createNeighboursIndexTablesDevice() GPUChkErrS(cudaHostRegister(mCellsDevice.data(), (nLayers - 2) * sizeof(CellSeed*), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mCellsDeviceArray, mCellsDevice.data(), (nLayers - 2) * sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[0].get())); for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - LOGP(debug, "gpu-transfer: loading neighbours LUT for {} elements on layer {}, for {} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / MB); + GPULog("gpu-transfer: loading neighbours LUT for {} elements on layer {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighboursIndexTablesDevice[iLayer]), (mNCells[iLayer] + 1) * sizeof(int), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (mNCells[iLayer] + 1) * sizeof(int), mGpuStreams[0].get())); if (iLayer < nLayers - 3) { @@ -353,7 +352,7 @@ template void TimeFrameGPU::createNeighboursLUTDevice(const int layer, const unsigned int nCells) { GPUTimer timer(mGpuStreams[0], "reserving neighboursLUT"); - LOGP(debug, "gpu-allocation: reserving neighbours LUT for {} elements on layer {} , for {} MB.", nCells + 1, layer, (nCells + 1) * sizeof(int) / MB); + GPULog("gpu-allocation: reserving neighbours LUT for {} elements on layer {} , for {:.2f} MB.", nCells + 1, layer, (nCells + 1) * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighboursLUTDevice[layer]), (nCells + 1) * sizeof(int), mGpuStreams[0], this->getExtAllocator()); // We need one element more to move exc -> inc GPUChkErrS(cudaMemsetAsync(mNeighboursLUTDevice[layer], 0, (nCells + 1) * sizeof(int), mGpuStreams[0].get())); } @@ -363,7 +362,7 @@ void TimeFrameGPU::loadCellsDevice() { GPUTimer timer(mGpuStreams, "loading cell seeds", nLayers - 2); for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} cell seeds on layer {}, for {} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / MB); + GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB); allocMemAsync(reinterpret_cast(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->getExtAllocator()); allocMemAsync(reinterpret_cast(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); // accessory for the neigh. finding. GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get())); @@ -378,7 +377,7 @@ void TimeFrameGPU::createCellsLUTDevice() { GPUTimer timer(mGpuStreams, "creating cells LUTs", nLayers - 2); for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - LOGP(debug, "gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB.", mNTracklets[iLayer] + 1, iLayer, (mNTracklets[iLayer] + 1) * sizeof(int) / MB); + GPULog("gpu-transfer: creating cell LUT for {} elements on layer {}, for {:.2f} MB.", mNTracklets[iLayer] + 1, iLayer, (mNTracklets[iLayer] + 1) * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mCellsLUTDevice[iLayer]), (mNTracklets[iLayer] + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mCellsLUTDevice[iLayer], 0, (mNTracklets[iLayer] + 1) * sizeof(int), mGpuStreams[iLayer].get())); } @@ -391,9 +390,9 @@ void TimeFrameGPU::createCellsBuffers(const int layer) { GPUTimer timer(mGpuStreams[0], "creating cells buffers"); mNCells[layer] = 0; - GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost)); - LOGP(debug, "gpu-transfer: creating cell buffer for {} elements on layer {}, for {} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / MB); - allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[0], this->getExtAllocator()); + GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); + GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB); + allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->getExtAllocator()); } template @@ -401,7 +400,7 @@ void TimeFrameGPU::loadCellsLUTDevice() { GPUTimer timer(mGpuStreams, "loading cells LUTs", nLayers - 3); for (auto iLayer{0}; iLayer < nLayers - 3; ++iLayer) { - LOGP(debug, "gpu-transfer: loading cell LUT for {} elements on layer {}, for {} MB.", this->mCellsLookupTable[iLayer].size(), iLayer, this->mCellsLookupTable[iLayer].size() * sizeof(int) / MB); + GPULog("gpu-transfer: loading cell LUT for {} elements on layer {}, for {:.2f} MB.", this->mCellsLookupTable[iLayer].size(), iLayer, this->mCellsLookupTable[iLayer].size() * sizeof(int) / constants::MB); GPUChkErrS(cudaHostRegister(this->mCellsLookupTable[iLayer].data(), this->mCellsLookupTable[iLayer].size() * sizeof(int), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mCellsLUTDevice[iLayer + 1], this->mCellsLookupTable[iLayer].data(), this->mCellsLookupTable[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); } @@ -411,7 +410,7 @@ template void TimeFrameGPU::loadRoadsDevice() { GPUTimer timer(mGpuStreams[0], "loading roads device"); - LOGP(debug, "gpu-transfer: loading {} roads, for {} MB.", this->mRoads.size(), this->mRoads.size() * sizeof(Road) / MB); + GPULog("gpu-transfer: loading {} roads, for {:.2f} MB.", this->mRoads.size(), this->mRoads.size() * sizeof(Road) / constants::MB); allocMemAsync(reinterpret_cast(&mRoadsDevice), this->mRoads.size() * sizeof(Road), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaHostRegister(this->mRoads.data(), this->mRoads.size() * sizeof(Road), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mRoadsDevice, this->mRoads.data(), this->mRoads.size() * sizeof(Road), cudaMemcpyHostToDevice, mGpuStreams[0].get())); @@ -421,7 +420,7 @@ template void TimeFrameGPU::loadTrackSeedsDevice(bounded_vector& seeds) { GPUTimer timer(mGpuStreams[0], "loading track seeds"); - LOGP(debug, "gpu-transfer: loading {} track seeds, for {} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / MB); + GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / constants::MB); allocMemAsync(reinterpret_cast(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeed), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeed), cudaHostRegisterPortable)); GPUChkErrS(cudaMemcpyAsync(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[0].get())); @@ -431,10 +430,10 @@ template void TimeFrameGPU::createNeighboursDevice(const unsigned int layer, const unsigned int nNeighbours) { GPUTimer timer(mGpuStreams[0], "reserving neighbours"); - LOGP(debug, "gpu-allocation: reserving {} neighbours (pairs), for {} MB.", nNeighbours, nNeighbours * sizeof(gpuPair) / MB); + GPULog("gpu-allocation: reserving {} neighbours (pairs), for {:.2f} MB.", nNeighbours, nNeighbours * sizeof(gpuPair) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighbourPairsDevice[layer]), nNeighbours * sizeof(gpuPair), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mNeighbourPairsDevice[layer], -1, nNeighbours * sizeof(gpuPair), mGpuStreams[0].get())); - LOGP(debug, "gpu-allocation: reserving {} neighbours, for {} MB.", nNeighbours, nNeighbours * sizeof(gpuPair) / MB); + GPULog("gpu-allocation: reserving {} neighbours, for {:.2f} MB.", nNeighbours, nNeighbours * sizeof(gpuPair) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighboursDevice[layer]), nNeighbours * sizeof(int), mGpuStreams[0], this->getExtAllocator()); } @@ -444,10 +443,10 @@ void TimeFrameGPU::createNeighboursDevice(const unsigned int layer, std GPUTimer timer(mGpuStreams[0], "reserving neighbours"); this->mCellsNeighbours[layer].clear(); this->mCellsNeighbours[layer].resize(neighbours.size()); - LOGP(debug, "gpu-allocation: reserving {} neighbours (pairs), for {} MB.", neighbours.size(), neighbours.size() * sizeof(gpuPair) / MB); + GPULog("gpu-allocation: reserving {} neighbours (pairs), for {:.2f} MB.", neighbours.size(), neighbours.size() * sizeof(gpuPair) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighbourPairsDevice[layer]), neighbours.size() * sizeof(gpuPair), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mNeighbourPairsDevice[layer], -1, neighbours.size() * sizeof(gpuPair), mGpuStreams[0].get())); - LOGP(debug, "gpu-allocation: reserving {} neighbours, for {} MB.", neighbours.size(), neighbours.size() * sizeof(gpuPair) / MB); + GPULog("gpu-allocation: reserving {} neighbours, for {:.2f} MB.", neighbours.size(), neighbours.size() * sizeof(gpuPair) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighboursDevice[layer]), neighbours.size() * sizeof(int), mGpuStreams[0], this->getExtAllocator()); } @@ -464,7 +463,7 @@ void TimeFrameGPU::createTrackITSExtDevice(bounded_vector& se { GPUTimer timer(mGpuStreams[0], "reserving tracks"); mTrackITSExt = bounded_vector(seeds.size(), {}, this->getMemoryPool().get()); - LOGP(debug, "gpu-allocation: reserving {} tracks, for {} MB.", seeds.size(), seeds.size() * sizeof(o2::its::TrackITSExt) / MB); + GPULog("gpu-allocation: reserving {} tracks, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(o2::its::TrackITSExt) / constants::MB); allocMemAsync(reinterpret_cast(&mTrackITSExtDevice), seeds.size() * sizeof(o2::its::TrackITSExt), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mTrackITSExtDevice, 0, seeds.size() * sizeof(o2::its::TrackITSExt), mGpuStreams[0].get())); GPUChkErrS(cudaHostRegister(mTrackITSExt.data(), seeds.size() * sizeof(o2::its::TrackITSExt), cudaHostRegisterPortable)); @@ -475,7 +474,7 @@ void TimeFrameGPU::downloadCellsDevice() { GPUTimer timer(mGpuStreams, "downloading cells", nLayers - 2); for (int iLayer{0}; iLayer < nLayers - 2; ++iLayer) { - LOGP(debug, "gpu-transfer: downloading {} cells on layer: {}, for {} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / MB); + GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB); this->mCells[iLayer].resize(mNCells[iLayer]); GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); } @@ -486,7 +485,7 @@ void TimeFrameGPU::downloadCellsLUTDevice() { GPUTimer timer(mGpuStreams, "downloading cell luts", nLayers - 3); for (auto iLayer{0}; iLayer < nLayers - 3; ++iLayer) { - LOGP(debug, "gpu-transfer: downloading cells lut on layer {} for {} elements", iLayer, (mNTracklets[iLayer + 1] + 1)); + GPULog("gpu-transfer: downloading cells lut on layer {} for {} elements", iLayer, (mNTracklets[iLayer + 1] + 1)); this->mCellsLookupTable[iLayer].resize(mNTracklets[iLayer + 1] + 1); GPUChkErrS(cudaMemcpyAsync(this->mCellsLookupTable[iLayer].data(), mCellsLUTDevice[iLayer + 1], (mNTracklets[iLayer + 1] + 1) * sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); } @@ -496,7 +495,7 @@ template void TimeFrameGPU::downloadCellsNeighboursDevice(std::vector>>& neighbours, const int layer) { GPUTimer timer(mGpuStreams[0], fmt::format("downloading neighbours from layer {}", layer)); - LOGP(debug, "gpu-transfer: downloading {} neighbours, for {} MB.", neighbours[layer].size(), neighbours[layer].size() * sizeof(std::pair) / MB); + GPULog("gpu-transfer: downloading {} neighbours, for {:.2f} MB.", neighbours[layer].size(), neighbours[layer].size() * sizeof(std::pair) / constants::MB); // TODO: something less dangerous than assuming the same memory layout of std::pair and gpuPair... or not? :) GPUChkErrS(cudaMemcpyAsync(neighbours[layer].data(), mNeighbourPairsDevice[layer], neighbours[layer].size() * sizeof(gpuPair), cudaMemcpyDeviceToHost, mGpuStreams[0].get())); } @@ -505,7 +504,7 @@ template void TimeFrameGPU::downloadNeighboursLUTDevice(bounded_vector& lut, const int layer) { GPUTimer timer(mGpuStreams[0], fmt::format("downloading neighbours LUT from layer {}", layer)); - LOGP(debug, "gpu-transfer: downloading neighbours LUT for {} elements on layer {}, for {} MB.", lut.size(), layer, lut.size() * sizeof(int) / MB); + GPULog("gpu-transfer: downloading neighbours LUT for {} elements on layer {}, for {:.2f} MB.", lut.size(), layer, lut.size() * sizeof(int) / constants::MB); GPUChkErrS(cudaMemcpyAsync(lut.data(), mNeighboursLUTDevice[layer], lut.size() * sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[0].get())); } @@ -513,7 +512,7 @@ template void TimeFrameGPU::downloadTrackITSExtDevice(bounded_vector& seeds) { GPUTimer timer(mGpuStreams[0], "downloading tracks"); - LOGP(debug, "gpu-transfer: downloading {} tracks, for {} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / MB); + GPULog("gpu-transfer: downloading {} tracks, for {:.2f} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / constants::MB); GPUChkErrS(cudaMemcpyAsync(mTrackITSExt.data(), mTrackITSExtDevice, seeds.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost, mGpuStreams[0].get())); GPUChkErrS(cudaHostUnregister(mTrackITSExt.data())); GPUChkErrS(cudaHostUnregister(seeds.data())); @@ -523,7 +522,7 @@ template void TimeFrameGPU::unregisterRest() { GPUTimer timer(mGpuStreams[0], "unregistering rest of the host memory"); - LOGP(debug, "unregistering rest of the host memory..."); + GPULog("unregistering rest of the host memory..."); GPUChkErrS(cudaHostUnregister(mCellsDevice.data())); GPUChkErrS(cudaHostUnregister(mTrackletsDevice.data())); } @@ -531,6 +530,8 @@ void TimeFrameGPU::unregisterRest() template void TimeFrameGPU::unregisterHostMemory(const int maxLayers) { + GPUTimer timer(mGpuStreams[0], "unregistering host memory"); + GPULog("unregistering host memory"); for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { GPUChkErrS(cudaHostUnregister(this->mUnsortedClusters[iLayer].data())); GPUChkErrS(cudaHostUnregister(this->mClusters[iLayer].data())); @@ -552,7 +553,12 @@ void TimeFrameGPU::initialise(const int iteration, o2::its::TimeFrame::initialise(iteration, trkParam, maxLayers); } +template +void TimeFrameGPU::wipe() +{ + unregisterHostMemory(0); + o2::its::TimeFrame::wipe(); +} + template class TimeFrameGPU<7>; -} // namespace gpu -} // namespace its -} // namespace o2 +} // namespace o2::its::gpu diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 1b1c4af682dc5..a8061e872c029 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -18,10 +18,10 @@ #include "ITStrackingGPU/TrackerTraitsGPU.h" #include "ITStrackingGPU/TrackingKernels.h" #include "ITStracking/TrackingConfigParam.h" +#include "ITStracking/Constants.h" namespace o2::its { -constexpr int UnusedIndex{-1}; template void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) @@ -48,10 +48,8 @@ void TrackerTraitsGPU::adoptTimeFrame(TimeFrame* tf) template void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int iROFslice, int iVertex) { - auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); + const auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); - const Vertex diamondVert({this->mTrkParams[iteration].Diamond[0], this->mTrkParams[iteration].Diamond[1], this->mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f); - gsl::span diamondSpan(&diamondVert, 1); int startROF{this->mTrkParams[iteration].nROFsPerIterations > 0 ? iROFslice * this->mTrkParams[iteration].nROFsPerIterations : 0}; int endROF{o2::gpu::CAMath::Min(this->mTrkParams[iteration].nROFsPerIterations > 0 ? (iROFslice + 1) * this->mTrkParams[iteration].nROFsPerIterations + this->mTrkParams[iteration].DeltaROF : mTimeFrameGPU->getNrof(), mTimeFrameGPU->getNrof())}; @@ -128,6 +126,7 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) for (int iLayer = 0; iLayer < this->mTrkParams[iteration].CellsPerRoad(); ++iLayer) { if (!mTimeFrameGPU->getNTracklets()[iLayer + 1] || !mTimeFrameGPU->getNTracklets()[iLayer]) { + mTimeFrameGPU->getNCells()[iLayer] = 0; continue; } const int currentLayerTrackletsNum{static_cast(mTimeFrameGPU->getNTracklets()[iLayer])}; @@ -173,9 +172,10 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) mTimeFrameGPU->createNeighboursIndexTablesDevice(); const auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); for (int iLayer{0}; iLayer < this->mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) { + const int currentLayerCellsNum{static_cast(mTimeFrameGPU->getNCells()[iLayer])}; const int nextLayerCellsNum{static_cast(mTimeFrameGPU->getNCells()[iLayer + 1])}; - - if (!nextLayerCellsNum) { + if (!nextLayerCellsNum || !currentLayerCellsNum) { + mTimeFrameGPU->getNNeighbours()[iLayer] = 0; continue; } @@ -188,7 +188,7 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) this->mTrkParams[0].MaxChi2ClusterAttachment, this->mBz, iLayer, - mTimeFrameGPU->getNCells()[iLayer], + currentLayerCellsNum, nextLayerCellsNum, 1e2, conf.nBlocks, @@ -204,7 +204,7 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) this->mTrkParams[0].MaxChi2ClusterAttachment, this->mBz, iLayer, - mTimeFrameGPU->getNCells()[iLayer], + currentLayerCellsNum, nextLayerCellsNum, 1e2, conf.nBlocks, @@ -251,8 +251,8 @@ void TrackerTraitsGPU::findRoads(const int iteration) conf.nThreads); } // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory. - if (!trackSeeds.size()) { - LOGP(info, "No track seeds found, skipping track finding"); + if (trackSeeds.empty()) { + LOGP(debug, "No track seeds found, skipping track finding"); continue; } mTimeFrameGPU->createTrackITSExtDevice(trackSeeds); @@ -283,7 +283,7 @@ void TrackerTraitsGPU::findRoads(const int iteration) int nShared = 0; bool isFirstShared{false}; for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) { - if (track.getClusterIndex(iLayer) == UnusedIndex) { + if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { continue; } nShared += int(mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer))); @@ -296,7 +296,7 @@ void TrackerTraitsGPU::findRoads(const int iteration) std::array rofs{INT_MAX, INT_MAX, INT_MAX}; for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) { - if (track.getClusterIndex(iLayer) == UnusedIndex) { + if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { continue; } mTimeFrameGPU->markUsedCluster(iLayer, track.getClusterIndex(iLayer)); @@ -320,9 +320,6 @@ void TrackerTraitsGPU::findRoads(const int iteration) } mTimeFrameGPU->loadUsedClustersDevice(); } - if (iteration == this->mTrkParams.size() - 1) { - mTimeFrameGPU->unregisterHostMemory(0); - } }; template diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 38c59d520aa76..fb75764da2e36 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -36,15 +36,6 @@ #include "ITStrackingGPU/TrackingKernels.h" #include "ITStrackingGPU/Utils.h" -#ifndef __HIPCC__ -#define THRUST_NAMESPACE thrust::cuda -#else -#define THRUST_NAMESPACE thrust::hip -#endif - -#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999) -#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999) - // O2 track model #include "ReconstructionDataFormats/Track.h" #include "DetectorsBase/Propagator.h" @@ -112,7 +103,6 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde if (zRangeMax < -utils.getLayerZ(layerIndex) || zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) { - return getEmptyBinsRect(); } @@ -398,22 +388,20 @@ GPUg() void computeLayerCellNeighboursKernel( } if constexpr (initRun) { atomicAdd(neighboursLUT + iNextCell, 1); - foundNeighbours++; neighboursIndexTable[iCurrentCellIndex]++; } else { cellNeighbours[neighboursIndexTable[iCurrentCellIndex] + foundNeighbours] = {iCurrentCellIndex, iNextCell}; foundNeighbours++; - // FIXME: this is prone to race conditions: check on level is not atomic const int currentCellLevel{currentCellSeed.getLevel()}; if (currentCellLevel >= nextCellSeed.getLevel()) { - cellSeedArray[layerIndex + 1][iNextCell].setLevel(currentCellLevel + 1); + atomicMax(cellSeedArray[layerIndex + 1][iNextCell].getLevelPtr(), currentCellLevel + 1); } } } } } -template +template GPUg() void computeLayerCellsKernel( const Cluster** sortedClusters, const Cluster** unsortedClusters, @@ -530,8 +518,14 @@ GPUg() void computeLayerTrackletsMultiROFKernel( for (unsigned int iROF{blockIdx.x}; iROF < endROF - startROF; iROF += gridDim.x) { const short rof0 = iROF + startROF; auto primaryVertices = getPrimaryVertices(rof0, rofPV, totalROFs, multMask, vertices); + if (primaryVertices.empty()) { + continue; + } const auto startVtx{vertexId >= 0 ? vertexId : 0}; const auto endVtx{vertexId >= 0 ? o2::gpu::CAMath::Min(vertexId + 1, static_cast(primaryVertices.size())) : static_cast(primaryVertices.size())}; + if ((endVtx - startVtx) <= 0) { + continue; + } const short minROF = o2::gpu::CAMath::Max(startROF, static_cast(rof0 - deltaROF)); const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast(rof0 + deltaROF)); auto clustersCurrentLayer = getClustersOnLayer(rof0, totalROFs, layerIndex, ROFClusters, clusters); @@ -541,7 +535,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel( for (int currentClusterIndex = threadIdx.x; currentClusterIndex < clustersCurrentLayer.size(); currentClusterIndex += blockDim.x) { unsigned int storedTracklets{0}; - auto currentCluster{clustersCurrentLayer[currentClusterIndex]}; + const auto& currentCluster{clustersCurrentLayer[currentClusterIndex]}; const int currentSortedIndex{ROFClusters[layerIndex][rof0] + currentClusterIndex}; if (usedClusters[layerIndex][currentCluster.clusterId]) { continue; @@ -550,14 +544,15 @@ GPUg() void computeLayerTrackletsMultiROFKernel( const float inverseR0{1.f / currentCluster.radius}; for (int iV{startVtx}; iV < endVtx; ++iV) { auto& primaryVertex{primaryVertices[iV]}; - if (primaryVertex.isFlagSet(2) && iteration != 3) { + if ((primaryVertex.isFlagSet(Vertex::Flags::UPCMode) && iteration != 3) || (iteration == 3 && !primaryVertex.isFlagSet(Vertex::Flags::UPCMode))) { continue; } + const float resolution = o2::gpu::CAMath::Sqrt(math_utils::Sq(resolutionPV) / primaryVertex.getNContributors() + math_utils::Sq(positionResolution)); const float tanLambda{(currentCluster.zCoordinate - primaryVertex.getZ()) * inverseR0}; const float zAtRmin{tanLambda * (minR - currentCluster.radius) + currentCluster.zCoordinate}; const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; - const float sqInverseDeltaZ0{1.f / (math_utils::Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution + const float sqInverseDeltaZ0{1.f / (math_utils::Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + constants::Tolerance)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInverseDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * MSAngle))}; const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { @@ -579,8 +574,8 @@ GPUg() void computeLayerTrackletsMultiROFKernel( int iPhiBin = (selectedBinsRect.y + iPhiCount) % phiBins; const int firstBinIndex{utils->getBinIndex(selectedBinsRect.x, iPhiBin)}; const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1}; - const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + firstBinIndex]; - const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + maxBinIndex]; + const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1)*tableSize + firstBinIndex]; + const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1)*tableSize + maxBinIndex]; for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) { if (nextClusterIndex >= clustersNextLayer.size()) { break; @@ -591,13 +586,13 @@ GPUg() void computeLayerTrackletsMultiROFKernel( } const float deltaPhi{o2::gpu::CAMath::Abs(currentCluster.phi - nextCluster.phi)}; const float deltaZ{o2::gpu::CAMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate)}; - const int nextSortedIndex{ROFClusters[layerIndex + 1][rof1] + nextClusterIndex}; if (deltaZ / sigmaZ < NSigmaCut && (deltaPhi < phiCut || o2::gpu::CAMath::Abs(deltaPhi - o2::constants::math::TwoPI) < phiCut)) { if constexpr (initRun) { trackletsLUT[layerIndex][currentSortedIndex]++; // we need l0 as well for usual exclusive sums. } else { const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)}; const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)}; + const int nextSortedIndex{ROFClusters[layerIndex + 1][rof1] + nextClusterIndex}; new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, rof0, rof1}; } ++storedTracklets; @@ -841,11 +836,9 @@ GPUhi() void cubExclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stre { void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, - in_out, num_items, stream)); + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, in_out, num_items, stream)); GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, - in_out, num_items, stream)); + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, in_out, num_items, stream)); GPUChkErrS(cudaFree(d_temp_storage)); } @@ -860,11 +853,9 @@ GPUhi() void cubInclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stre { void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, - in_out, num_items, stream)); + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, in_out, num_items, stream)); GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, - in_out, num_items, stream)); + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, in_out, num_items, stream)); GPUChkErrS(cudaFree(d_temp_storage)); } @@ -907,10 +898,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, gpu::Streams& streams) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>( + gpu::computeLayerTrackletsMultiROFKernel<<>>( utils, multMask, iLayer, @@ -976,10 +964,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, gpu::Streams& streams) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>( + gpu::computeLayerTrackletsMultiROFKernel<<>>( utils, multMask, iLayer, @@ -1013,10 +998,10 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, nTracklets[iLayer] = unique_end - tracklets_ptr; if (iLayer > 0) { GPUChkErrS(cudaMemsetAsync(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int), streams[iLayer].get())); - gpu::compileTrackletsLookupTableKernel<<>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); + gpu::compileTrackletsLookupTableKernel<<>>( + spanTracklets[iLayer], + trackletsLUTsHost[iLayer], + nTracklets[iLayer]); gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1, streams[iLayer].get()); } } @@ -1040,8 +1025,7 @@ void countCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -1076,8 +1060,7 @@ void computeCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -1107,8 +1090,7 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads) { - gpu::computeLayerCellNeighboursKernel<<>>( + gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1142,8 +1124,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nThreads) { - gpu::computeLayerCellNeighboursKernel<<>>( + gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1198,8 +1179,7 @@ void processNeighboursHandler(const int startLayer, thrust::device_vector> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); // Shortcut: device_vector skips central memory management, we are relying on the contingency. // TODO: fix this. - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( startLayer, startLevel, allCellSeeds, @@ -1221,8 +1201,7 @@ void processNeighboursHandler(const int startLayer, thrust::device_vector> updatedCellId(foundSeedsTable.back(), 0, allocInt); thrust::device_vector> updatedCellSeed(foundSeedsTable.back(), allocCellSeed); - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( startLayer, startLevel, allCellSeeds, @@ -1255,8 +1234,7 @@ void processNeighboursHandler(const int startLayer, foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( iLayer, --level, allCellSeeds, @@ -1282,8 +1260,7 @@ void processNeighboursHandler(const int startLayer, updatedCellSeed.resize(foundSeeds); thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); - gpu::processNeighboursKernel<<>>( + gpu::processNeighboursKernel<<>>( iLayer, level, allCellSeeds, @@ -1326,8 +1303,7 @@ void trackSeedHandler(CellSeed* trackSeeds, const int nThreads) { thrust::device_vector minPts(minPtsHost); - gpu::fitTrackSeedsKernel<<>>( + gpu::fitTrackSeedsKernel<<>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** tracks, // TrackITSExt* diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/hip/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/hip/CMakeLists.txt index cc43b6845a714..e8e475f2232c8 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/hip/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/GPU/hip/CMakeLists.txt @@ -12,6 +12,7 @@ if(HIP_ENABLED) message(STATUS "Building ITS HIP tracker") set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -fgpu-rdc") + # set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -O0 -g -ggdb -fno-inline -fno-omit-frame-pointer -D__HIP_ENABLE_DEVICE_ASSERT__") o2_add_hipified_library(ITStrackingHIP SOURCES ../cuda/ClusterLinesGPU.cu ../cuda/TimeFrameGPU.cu @@ -28,4 +29,4 @@ if(HIP_ENABLED) hip::host PRIVATE_LINK_LIBRARIES O2::GPUTrackingHIPExternalProvider TARGETVARNAME targetName) -endif() \ No newline at end of file +endif() diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index 3f0d291d5e51d..5e08e6c48e03a 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -308,7 +308,7 @@ struct TimeFrame { const o2::base::PropagatorImpl* mPropagatorDevice = nullptr; // Needed only for GPU - void wipe(); + virtual void wipe(); private: void prepareClusters(const TrackingParameters& trkParam, const int maxLayers = nLayers); @@ -531,8 +531,8 @@ inline gsl::span TimeFrame::getIndexTable(int rofId, int layer) if (rofId < 0 || rofId >= mNrof) { return {}; } - return {&mIndexTables[layer][rofId * (mIndexTableUtils.getNphiBins() * mIndexTableUtils.getNzBins() + 1)], - static_cast::size_type>(mIndexTableUtils.getNphiBins() * mIndexTableUtils.getNzBins() + 1)}; + const int tableSize = mIndexTableUtils.getNphiBins() * mIndexTableUtils.getNzBins() + 1; + return {&mIndexTables[layer][rofId * tableSize], static_cast::size_type>(tableSize)}; } template diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index bf9cb79169566..f123a2a9a1d80 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -61,7 +61,6 @@ class ITSTrackingInterface void initialise(); void run(framework::ProcessingContext& pc); void printSummary() const; - void end(); virtual void updateTimeDependentParams(framework::ProcessingContext& pc); virtual void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj); diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index 2e9ce23719f90..a59c51949b9f9 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -583,7 +583,6 @@ void TimeFrame::printSliceInfo(const int startROF, const int sliceSize) template void TimeFrame::setMemoryPool(std::shared_ptr& pool) { - wipe(); mMemoryPool = pool; auto initVector = [&](bounded_vector& vec) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index ba722c410f95c..938356050262d 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -65,7 +65,6 @@ void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) (double)mTimeFrame->getArtefactsMemory() / GB, (double)mTrkParams[iteration].MaxMemory / GB); LOGP(error, "Exception: {}", err.what()); if (mTrkParams[iteration].DropTFUponFailure) { - mTimeFrame->wipe(); mMemoryPool->print(); ++mNumberOfDroppedTFs; error("...Dropping Timeframe..."); @@ -144,17 +143,17 @@ void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) error("Uncaught exception, all bets are off..."); } - if (mTrkParams[0].PrintMemory) { - mTimeFrame->printArtefactsMemory(); - mMemoryPool->print(); - } - if (mTimeFrame->hasMCinformation()) { computeTracksMClabels(); } rectifyClusterIndices(); ++mTimeFrameCounter; mTotalTime += total; + + if (mTrkParams[0].PrintMemory) { + mTimeFrame->printArtefactsMemory(); + mMemoryPool->print(); + } } void Tracker::computeRoadsMClabels() diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 136ebc647cc38..b46e7a68875e6 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -107,9 +107,10 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROF for (int iV = startVtx; iV < endVtx; ++iV) { const auto& pv = primaryVertices[iV]; - if (pv.isFlagSet(Vertex::Flags::UPCMode) && iteration != 3) { + if ((pv.isFlagSet(Vertex::Flags::UPCMode) && iteration != 3) || (iteration == 3 && !pv.isFlagSet(Vertex::Flags::UPCMode))) { continue; } + const float resolution = o2::gpu::CAMath::Sqrt(math_utils::Sq(mTimeFrame->getPositionResolution(iLayer)) + math_utils::Sq(mTrkParams[iteration].PVres) / float(pv.getNContributors())); const float tanLambda = (currentCluster.zCoordinate - pv.getZ()) * inverseR0; const float zAtRmin = tanLambda * (mTimeFrame->getMinR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 00a69a37cb51a..3b05a7655d68c 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -310,6 +310,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) LOGP(info, "ITSTracker pushed {} vertex purities", allVerticesPurities.size()); } } + mTimeFrame->wipe(); } void ITSTrackingInterface::updateTimeDependentParams(framework::ProcessingContext& pc) @@ -381,11 +382,6 @@ void ITSTrackingInterface::printSummary() const mTracker->printSummary(); } -void ITSTrackingInterface::end() -{ - mTimeFrame->wipe(); -} - void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, TrackerTraits7* trackerTraits, TimeFrame7* frame) diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index c825ec39d3499..be7750964b3e7 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -75,7 +75,6 @@ void TrackerDPL::endOfStream(EndOfStreamContext& ec) void TrackerDPL::end() { - mITSTrackingInterface.end(); mITSTrackingInterface.printSummary(); LOGF(info, "ITS CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); }