From bf7cbab2262d973c745bbd20d362912b28977e33 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 22 May 2025 11:08:49 +0200 Subject: [PATCH 1/2] ITS: track and hard limit memory allocations + tbb Signed-off-by: Felix Schlepper --- Detectors/ITSMFT/ITS/tracking/CMakeLists.txt | 14 +- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 15 +- .../GPU/ITStrackingGPU/TrackingKernels.h | 14 +- .../ITS/tracking/GPU/cuda/TimeFrameGPU.cu | 13 +- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 2 +- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 33 +- .../include/ITStracking/BoundedAllocator.h | 179 +++++ .../include/ITStracking/Configuration.h | 21 +- .../tracking/include/ITStracking/Constants.h | 4 +- .../tracking/include/ITStracking/TimeFrame.h | 167 ++-- .../tracking/include/ITStracking/Tracker.h | 34 +- .../include/ITStracking/TrackerTraits.h | 10 +- .../include/ITStracking/TrackingConfigParam.h | 30 +- .../include/ITStracking/TrackingInterface.h | 2 + .../tracking/include/ITStracking/Vertexer.h | 31 +- .../include/ITStracking/VertexerTraits.h | 28 +- .../ITSMFT/ITS/tracking/src/TimeFrame.cxx | 212 ++++-- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 161 ++-- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 716 +++++++++--------- .../ITS/tracking/src/TrackingInterface.cxx | 42 +- .../ITSMFT/ITS/tracking/src/Vertexer.cxx | 66 +- .../ITS/tracking/src/VertexerTraits.cxx | 322 ++++---- .../ITS3/reconstruction/src/IOUtils.cxx | 4 +- 23 files changed, 1273 insertions(+), 847 deletions(-) create mode 100644 Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h diff --git a/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt index f8c71e27d0058..d6540cdeaf910 100644 --- a/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt @@ -9,6 +9,7 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. +#add_compile_options(-O0 -g -fPIC -fno-omit-frame-pointer) o2_add_library(ITStracking TARGETVARNAME targetName SOURCES src/ClusterLines.cxx @@ -35,12 +36,8 @@ o2_add_library(ITStracking O2::ITSBase O2::ITSReconstruction O2::ITSMFTReconstruction - O2::DataFormatsITS) - -if (OpenMP_CXX_FOUND) - target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) - target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) -endif() + O2::DataFormatsITS + PRIVATE_LINK_LIBRARIES TBB::tbb) o2_add_library(ITSTrackingInterface TARGETVARNAME targetName @@ -50,11 +47,6 @@ o2_add_library(ITSTrackingInterface O2::Framework O2::GPUTracking) -if (OpenMP_CXX_FOUND) - target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) - target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) -endif() - o2_target_root_dictionary(ITStracking HEADERS include/ITStracking/ClusterLines.h include/ITStracking/Tracklet.h diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 14aa0ea5d67fb..82101dba4c02d 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -13,6 +13,7 @@ #ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H #define TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/TimeFrame.h" #include "ITStracking/Configuration.h" #include "ITStrackingGPU/Utils.h" @@ -62,7 +63,7 @@ class TimeFrameGPU : public TimeFrame void loadTrackSeedsDevice(); void loadTrackSeedsChi2Device(); void loadRoadsDevice(); - void loadTrackSeedsDevice(std::vector&); + void loadTrackSeedsDevice(bounded_vector&); void createTrackletsBuffers(); void createCellsBuffers(const int); void createCellsDevice(); @@ -72,10 +73,10 @@ class TimeFrameGPU : public TimeFrame void createNeighboursDevice(const unsigned int layer, std::vector>& neighbours); void createNeighboursLUTDevice(const int, const unsigned int); void createNeighboursDeviceArray(); - void createTrackITSExtDevice(std::vector&); - void downloadTrackITSExtDevice(std::vector&); - void downloadCellsNeighboursDevice(std::vector>>&, const int); - void downloadNeighboursLUTDevice(std::vector&, const int); + void createTrackITSExtDevice(bounded_vector&); + void downloadTrackITSExtDevice(bounded_vector&); + void downloadCellsNeighboursDevice(std::vector>>&, const int); + void downloadNeighboursLUTDevice(bounded_vector&, const int); void downloadCellsDevice(); void downloadCellsLUTDevice(); void unregisterRest(); @@ -90,7 +91,7 @@ class TimeFrameGPU : public TimeFrame int getNClustersInRofSpan(const int, const int, const int) const; IndexTableUtils* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; } int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; } - std::vector& getTrackITSExt() { return mTrackITSExt; } + auto& getTrackITSExt() { return mTrackITSExt; } Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; } int* getDeviceROFramesPV() { return mROFramesPVDevice; } unsigned char* getDeviceUsedClusters(const int); @@ -199,7 +200,7 @@ class TimeFrameGPU : public TimeFrame bool mFirstInit = true; // Temporary buffer for storing output tracks from GPU tracking - std::vector mTrackITSExt; + bounded_vector mTrackITSExt; }; template diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index aabd3a940a532..54aa0e01c8a78 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -71,13 +71,13 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const int iteration, const float NSigmaCut, - std::vector& phiCuts, + bounded_vector& phiCuts, const float resolutionPV, std::array& minR, std::array& maxR, - std::vector& resolutions, + bounded_vector& resolutions, std::vector& radii, - std::vector& mulScatAng, + bounded_vector& mulScatAng, const int nBlocks, const int nThreads); @@ -104,13 +104,13 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const int iteration, const float NSigmaCut, - std::vector& phiCuts, + bounded_vector& phiCuts, const float resolutionPV, std::array& minR, std::array& maxR, - std::vector& resolutions, + bounded_vector& resolutions, std::vector& radii, - std::vector& mulScatAng, + bounded_vector& mulScatAng, const int nBlocks, const int nThreads); @@ -190,7 +190,7 @@ void processNeighboursHandler(const int startLayer, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - std::vector& seedsHost, + bounded_vector& seedsHost, const float bz, const float MaxChi2ClusterAttachment, const float maxChi2NDF, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index 5a1e9148c0548..8380533a28e04 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -420,7 +420,7 @@ void TimeFrameGPU::loadRoadsDevice() } template -void TimeFrameGPU::loadTrackSeedsDevice(std::vector& seeds) +void TimeFrameGPU::loadTrackSeedsDevice(bounded_vector& seeds) { START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), "loading track seeds"); LOGP(debug, "gpu-transfer: loading {} track seeds, for {} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / MB); @@ -466,11 +466,10 @@ void TimeFrameGPU::createNeighboursDeviceArray() } template -void TimeFrameGPU::createTrackITSExtDevice(std::vector& seeds) +void TimeFrameGPU::createTrackITSExtDevice(bounded_vector& seeds) { START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), "reserving tracks"); - mTrackITSExt.clear(); - mTrackITSExt.resize(seeds.size()); + mTrackITSExt = bounded_vector(seeds.size(), {}, this->getMemoryPool().get()); LOGP(debug, "gpu-allocation: reserving {} tracks, for {} MB.", seeds.size(), seeds.size() * sizeof(o2::its::TrackITSExt) / MB); allocMemAsync(reinterpret_cast(&mTrackITSExtDevice), seeds.size() * sizeof(o2::its::TrackITSExt), mGpuStreams[0], this->getExtAllocator()); GPUChkErrS(cudaMemsetAsync(mTrackITSExtDevice, 0, seeds.size() * sizeof(o2::its::TrackITSExt), mGpuStreams[0]->get())); @@ -503,7 +502,7 @@ void TimeFrameGPU::downloadCellsLUTDevice() } template -void TimeFrameGPU::downloadCellsNeighboursDevice(std::vector>>& neighbours, const int layer) +void TimeFrameGPU::downloadCellsNeighboursDevice(std::vector>>& neighbours, const int layer) { START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), fmt::format("downloading neighbours from layer {}", layer)); LOGP(debug, "gpu-transfer: downloading {} neighbours, for {} MB.", neighbours[layer].size(), neighbours[layer].size() * sizeof(std::pair) / MB); @@ -512,7 +511,7 @@ void TimeFrameGPU::downloadCellsNeighboursDevice(std::vector -void TimeFrameGPU::downloadNeighboursLUTDevice(std::vector& lut, const int layer) +void TimeFrameGPU::downloadNeighboursLUTDevice(bounded_vector& lut, const int layer) { START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), fmt::format("downloading neighbours LUT from layer {}", layer)); LOGP(debug, "gpu-transfer: downloading neighbours LUT for {} elements on layer {}, for {} MB.", lut.size(), layer, lut.size() * sizeof(int) / MB); @@ -521,7 +520,7 @@ void TimeFrameGPU::downloadNeighboursLUTDevice(std::vector& lut, c } template -void TimeFrameGPU::downloadTrackITSExtDevice(std::vector& seeds) +void TimeFrameGPU::downloadTrackITSExtDevice(bounded_vector& seeds) { START_GPU_STREAM_TIMER(mGpuStreams[0]->get(), "downloading tracks"); LOGP(debug, "gpu-transfer: downloading {} tracks, for {} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / MB); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 8741b42a697a8..871fd7a95f890 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -221,7 +221,7 @@ void TrackerTraitsGPU::findRoads(const int iteration) auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) { const int minimumLayer{startLevel - 1}; - std::vector trackSeeds; + bounded_vector trackSeeds(this->getMemoryPool().get()); for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) { if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 0dc5c28b3bc65..301f37767c160 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -862,13 +862,13 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const int iteration, const float NSigmaCut, - std::vector& phiCuts, + bounded_vector& phiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, - std::vector& resolutions, + bounded_vector& resolutions, std::vector& radii, - std::vector& mulScatAng, + bounded_vector& mulScatAng, const int nBlocks, const int nThreads) { @@ -928,13 +928,13 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const int iteration, const float NSigmaCut, - std::vector& phiCuts, + bounded_vector& phiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, - std::vector& resolutions, + bounded_vector& resolutions, std::vector& radii, - std::vector& mulScatAng, + bounded_vector& mulScatAng, const int nBlocks, const int nThreads) { @@ -1139,7 +1139,7 @@ void processNeighboursHandler(const int startLayer, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - std::vector& seedsHost, + bounded_vector& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, @@ -1257,9 +1257,8 @@ void processNeighboursHandler(const int startLayer, thrust::device_vector outSeeds(updatedCellSeed.size()); auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; - std::vector outSeedsHost(s); - thrust::copy(outSeeds.begin(), outSeeds.begin() + s, outSeedsHost.begin()); - seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end()); + seedsHost.reserve(seedsHost.size() + s); + thrust::copy(outSeeds.begin(), outSeeds.begin() + s, std::back_inserter(seedsHost)); } void trackSeedHandler(CellSeed* trackSeeds, @@ -1316,13 +1315,13 @@ template void countTrackletsInROFsHandler<7>(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const int iteration, const float NSigmaCut, - std::vector& phiCuts, + bounded_vector& phiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, - std::vector& resolutions, + bounded_vector& resolutions, std::vector& radii, - std::vector& mulScatAng, + bounded_vector& mulScatAng, const int nBlocks, const int nThreads); @@ -1348,13 +1347,13 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const int iteration, const float NSigmaCut, - std::vector& phiCuts, + bounded_vector& phiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, - std::vector& resolutions, + bounded_vector& resolutions, std::vector& radii, - std::vector& mulScatAng, + bounded_vector& mulScatAng, const int nBlocks, const int nThreads); @@ -1367,7 +1366,7 @@ template void processNeighboursHandler<7>(const int startLayer, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - std::vector& seedsHost, + bounded_vector& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h new file mode 100644 index 0000000000000..eced0c64c73a5 --- /dev/null +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h @@ -0,0 +1,179 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +/// +/// \file BoundedAllocator.h +/// \brief +/// + +#ifndef TRACKINGITSU_INCLUDE_BOUNDEDALLOCATOR_H_ +#define TRACKINGITSU_INCLUDE_BOUNDEDALLOCATOR_H_ + +#include +#include +#include +#include +#include + +#include "GPUCommonLogger.h" + +namespace o2::its +{ + +class BoundedMemoryResource final : public std::pmr::memory_resource +{ + public: + class MemoryLimitExceeded final : public std::bad_alloc + { + public: + MemoryLimitExceeded(size_t attempted, size_t used, size_t max) + : mAttempted(attempted), mUsed(used), mMax(max) {} + const char* what() const noexcept final + { + static thread_local char msg[256]; + if (mAttempted != 0) { + snprintf(msg, sizeof(msg), + "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", + mAttempted, mUsed, mMax); + } else { + snprintf(msg, sizeof(msg), + "New set maximum below current used (newMax: %zu, used: %zu)", + mMax, mUsed); + } + return msg; + } + + private: + size_t mAttempted{0}, mUsed{0}, mMax{0}; + }; + + BoundedMemoryResource(size_t maxBytes = std::numeric_limits::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource()) + : mMaxMemory(maxBytes), mUpstream(upstream) {} + + void* do_allocate(size_t bytes, size_t alignment) final + { + size_t new_used{0}, current_used{mUsedMemory.load(std::memory_order_relaxed)}; + do { + new_used = current_used + bytes; + if (new_used > mMaxMemory) { + ++mCountThrow; + throw MemoryLimitExceeded(new_used, current_used, mMaxMemory); + } + } while (!mUsedMemory.compare_exchange_weak(current_used, new_used, + std::memory_order_acq_rel, + std::memory_order_relaxed)); + return mUpstream->allocate(bytes, alignment); + } + + void do_deallocate(void* p, size_t bytes, size_t alignment) final + { + mUpstream->deallocate(p, bytes, alignment); + mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed); + } + + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final + { + return this == &other; + } + + size_t getUsedMemory() const noexcept { return mUsedMemory.load(); } + size_t getMaxMemory() const noexcept { return mMaxMemory; } + void setMaxMemory(size_t max) + { + if (mUsedMemory > max) { + ++mCountThrow; + throw MemoryLimitExceeded(0, mUsedMemory, max); + } + mMaxMemory = max; + } + + void print() const + { +#if !defined(GPUCA_GPUCODE_DEVICE) + constexpr double GB{1024 * 1024 * 1024}; + auto throw_ = mCountThrow.load(std::memory_order_relaxed); + auto used = static_cast(mUsedMemory.load(std::memory_order_relaxed)); + LOGP(info, "maxthrow={} maxmem={:.2f} GB used={:.2f} ({:.2f}%)", + throw_, (double)mMaxMemory / GB, used / GB, 100. * used / (double)mMaxMemory); +#endif + } + + private: + size_t mMaxMemory{std::numeric_limits::max()}; + std::atomic mCountThrow{0}; + std::atomic mUsedMemory{0}; + std::pmr::memory_resource* mUpstream; +}; + +template +using bounded_vector = std::pmr::vector; + +template +void deepVectorClear(std::vector& vec) +{ + std::vector().swap(vec); +} + +template +inline void deepVectorClear(bounded_vector& vec, BoundedMemoryResource* bmr = nullptr) +{ + vec.~bounded_vector(); + if (bmr == nullptr) { + auto alloc = vec.get_allocator().resource(); + new (&vec) bounded_vector(alloc); + } else { + new (&vec) bounded_vector(bmr); + } +} + +template +void deepVectorClear(std::vector>& vec, BoundedMemoryResource* bmr = nullptr) +{ + for (auto& v : vec) { + deepVectorClear(v, bmr); + } +} + +template +void deepVectorClear(std::array, S>& arr, BoundedMemoryResource* bmr = nullptr) +{ + for (size_t i{0}; i < S; ++i) { + deepVectorClear(arr[i], bmr); + } +} + +template +void clearResizeBoundedVector(bounded_vector& vec, size_t size, BoundedMemoryResource* bmr, T def = T()) +{ + vec.~bounded_vector(); + new (&vec) bounded_vector(size, def, bmr); +} + +template +void clearResizeBoundedVector(std::vector>& vec, size_t size, BoundedMemoryResource* bmr) +{ + vec.clear(); + vec.reserve(size); + for (size_t i{0}; i < size; ++i) { + vec.emplace_back(bmr); + } +} + +template +void clearResizeBoundedArray(std::array, S>& arr, size_t size, BoundedMemoryResource* bmr, T def = T()) +{ + for (size_t i{0}; i < S; ++i) { + clearResizeBoundedVector(arr[i], size, bmr, def); + } +} + +} // namespace o2::its + +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h index b63a01cb8cd29..d6c552935dd93 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h @@ -58,9 +58,7 @@ class Configuration : public Param }; struct TrackingParameters { - TrackingParameters& operator=(const TrackingParameters& t) = default; - - int CellMinimumLevel(); + int CellMinimumLevel() { return MinTrackLength - constants::its::ClustersPerCell + 1; } int CellsPerRoad() const { return NLayers - 2; } int TrackletsPerRoad() const { return NLayers - 1; } std::string asString() const; @@ -92,7 +90,6 @@ struct TrackingParameters { float CellsPerClusterLimit = 2.f; /// Fitter parameters o2::base::PropagatorImpl::MatCorrType CorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrNONE; - unsigned long MaxMemory = 12000000000UL; float MaxChi2ClusterAttachment = 60.f; float MaxChi2NDF = 30.f; std::vector MinPt = {0.f, 0.f, 0.f, 0.f}; @@ -102,7 +99,6 @@ struct TrackingParameters { bool SaveTimeBenchmarks = false; bool DoUPCIteration = false; bool FataliseUponFailure = true; - bool DropTFUponFailure = false; /// Cluster attachment bool UseTrackFollower = false; bool UseTrackFollowerTop = false; @@ -110,12 +106,11 @@ struct TrackingParameters { bool UseTrackFollowerMix = false; float TrackFollowerNSigmaCutZ = 1.f; float TrackFollowerNSigmaCutPhi = 1.f; -}; -inline int TrackingParameters::CellMinimumLevel() -{ - return MinTrackLength - constants::its::ClustersPerCell + 1; -} + bool PrintMemory = false; // print allocator usage in epilog report + size_t MaxMemory = 12000000000UL; + bool DropTFUponFailure = false; +}; struct VertexingParameters { int nIterations = 1; // Number of vertexing passes to perform @@ -141,13 +136,15 @@ struct VertexingParameters { int maxTrackletsPerCluster = 2e3; int phiSpan = -1; int zSpan = -1; + bool SaveTimeBenchmarks = false; int nThreads = 1; + bool PrintMemory = false; // print allocator usage in epilog report + size_t MaxMemory = 12000000000UL; + bool DropTFUponFailure = false; }; struct TimeFrameGPUParameters { - TimeFrameGPUParameters() = default; - size_t tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes size_t maxTrackletsPerCluster = 1e2; size_t clustersPerLayerCapacity = 2.5e5; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h index ec075b0f10d04..c29ad2e01c588 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h @@ -84,8 +84,8 @@ GPUhdi() constexpr std::array InverseZBinSize() { constexpr auto zSize = LayersZCoordinate(); return std::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), - 0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]), - 0.5f * ZBins / (zSize[6])}; + 0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]), + 0.5f * ZBins / (zSize[6])}; } GPUhdi() constexpr float getInverseZCoordinate(const int layerIndex) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index 19c24c743ff07..f6bb9a9b11e66 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -32,6 +32,7 @@ #include "ITStracking/Tracklet.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/ExternalAllocator.h" +#include "ITStracking/BoundedAllocator.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" @@ -75,12 +76,12 @@ struct TimeFrame { gsl::span> getPrimaryVerticesXAlpha(int rofId) const; void fillPrimaryVerticesXandAlpha(); int getPrimaryVerticesNum(int rofId = -1) const; - void addPrimaryVertices(const std::vector& vertices); - void addPrimaryVerticesLabels(std::vector>& labels); - void addPrimaryVertices(const std::vector& vertices, const int rofId, const int iteration); + void addPrimaryVertices(const bounded_vector& vertices); + void addPrimaryVerticesLabels(bounded_vector>& labels); + void addPrimaryVertices(const bounded_vector& vertices, const int rofId, const int iteration); void addPrimaryVertices(const gsl::span& vertices, const int rofId, const int iteration); - void addPrimaryVerticesInROF(const std::vector& vertices, const int rofId, const int iteration); - void addPrimaryVerticesLabelsInROF(const std::vector>& labels, const int rofId); + void addPrimaryVerticesInROF(const bounded_vector& vertices, const int rofId, const int iteration); + void addPrimaryVerticesLabelsInROF(const bounded_vector>& labels, const int rofId); void removePrimaryVerticesInROf(const int rofId); int loadROFrameData(const o2::itsmft::ROFRecord& rof, gsl::span clusters, const dataformats::MCTruthContainer* mcLabels = nullptr); @@ -92,7 +93,7 @@ struct TimeFrame { const dataformats::MCTruthContainer* mcLabels = nullptr); int getTotalClusters() const; - std::vector& getTotVertIteration() { return mTotVertPerIteration; } + auto& getTotVertIteration() { return mTotVertPerIteration; } bool empty() const { return getTotalClusters() == 0; } int getSortedIndex(int rofId, int layer, int idx) const { return mROFramesClusters[layer][rofId] + idx; } int getSortedStartIndex(const int rofId, const int layer) const { return mROFramesClusters[layer][rofId]; } @@ -112,35 +113,35 @@ struct TimeFrame { float getMinR(int layer) const { return mMinR[layer]; } float getMaxR(int layer) const { return mMaxR[layer]; } float getMSangle(int layer) const { return mMSangles[layer]; } - std::vector& getMSangles() { return mMSangles; } + auto& getMSangles() { return mMSangles; } float getPhiCut(int layer) const { return mPhiCuts[layer]; } - std::vector& getPhiCuts() { return mPhiCuts; } + auto& getPhiCuts() { return mPhiCuts; } float getPositionResolution(int layer) const { return mPositionResolution[layer]; } - std::vector& getPositionResolutions() { return mPositionResolution; } + auto& getPositionResolutions() { return mPositionResolution; } gsl::span getClustersOnLayer(int rofId, int layerId); gsl::span getClustersOnLayer(int rofId, int layerId) const; gsl::span getClustersPerROFrange(int rofMin, int range, int layerId) const; gsl::span getUnsortedClustersOnLayer(int rofId, int layerId) const; - gsl::span getUsedClustersROF(int rofId, int layerId); - gsl::span getUsedClustersROF(int rofId, int layerId) const; + gsl::span getUsedClustersROF(int rofId, int layerId); + gsl::span getUsedClustersROF(int rofId, int layerId) const; gsl::span getROFramesClustersPerROFrange(int rofMin, int range, int layerId) const; gsl::span getROFrameClusters(int layerId) const; gsl::span getNClustersROFrange(int rofMin, int range, int layerId) const; gsl::span getIndexTablePerROFrange(int rofMin, int range, int layerId) const; gsl::span getIndexTable(int rofId, int layerId); - std::vector& getIndexTableWhole(int layerId) { return mIndexTables[layerId]; } - const std::vector& getTrackingFrameInfoOnLayer(int layerId) const { return mTrackingFrameInfo[layerId]; } + auto& getIndexTableWhole(int layerId) { return mIndexTables[layerId]; } + const auto& getTrackingFrameInfoOnLayer(int layerId) const { return mTrackingFrameInfo[layerId]; } const TrackingFrameInfo& getClusterTrackingFrameInfo(int layerId, const Cluster& cl) const; - const gsl::span getClusterLabels(int layerId, const Cluster& cl) const { return getClusterLabels(layerId, cl.clusterId); } - const gsl::span getClusterLabels(int layerId, const int clId) const { return mClusterLabels->getLabels(mClusterExternalIndices[layerId][clId]); } + gsl::span getClusterLabels(int layerId, const Cluster& cl) const { return getClusterLabels(layerId, cl.clusterId); } + gsl::span getClusterLabels(int layerId, const int clId) const { return mClusterLabels->getLabels(mClusterExternalIndices[layerId][clId]); } int getClusterExternalIndex(int layerId, const int clId) const { return mClusterExternalIndices[layerId][clId]; } int getClusterSize(int clusterId) const { return mClusterSize[clusterId]; } - void setClusterSize(const std::vector& v) { mClusterSize = v; } + void setClusterSize(const bounded_vector& v) { mClusterSize = v; } - std::vector& getTrackletsLabel(int layer) { return mTrackletLabels[layer]; } - std::vector& getCellsLabel(int layer) { return mCellLabels[layer]; } + auto& getTrackletsLabel(int layer) { return mTrackletLabels[layer]; } + auto& getCellsLabel(int layer) { return mCellLabels[layer]; } bool hasMCinformation() const { return mClusterLabels; } void initialise(const int iteration, const TrackingParameters& trkParam, const int maxLayers = 7, bool resetVertices = true); @@ -166,11 +167,11 @@ struct TimeFrame { auto& getCellsLookupTable() { return mCellsLookupTable; } auto& getCellsNeighbours() { return mCellsNeighbours; } auto& getCellsNeighboursLUT() { return mCellsNeighboursLUT; } - std::vector>& getRoads() { return mRoads; } - std::vector& getTracks(int rofId) { return mTracks[rofId]; } - std::vector& getTracksLabel(const int rofId) { return mTracksLabel[rofId]; } - std::vector& getLinesLabel(const int rofId) { return mLinesLabels[rofId]; } - std::vector>& getVerticesMCRecInfo() { return mVerticesMCRecInfo; } + auto& getRoads() { return mRoads; } + auto& getTracks(int rofId) { return mTracks[rofId]; } + auto& getTracksLabel(const int rofId) { return mTracksLabel[rofId]; } + auto& getLinesLabel(const int rofId) { return mLinesLabels[rofId]; } + auto& getVerticesMCRecInfo() { return mVerticesMCRecInfo; } int getNumberOfClusters() const; int getNumberOfCells() const; @@ -181,22 +182,28 @@ struct TimeFrame { auto getNumberOfExtendedTracks() const { return mNExtendedTracks; } auto getNumberOfUsedExtendedClusters() const { return mNExtendedUsedClusters; } + /// memory management + void setMemoryPool(std::shared_ptr& pool); + auto& getMemoryPool() const noexcept { return mMemoryPool; } bool checkMemory(unsigned long max) { return getArtefactsMemory() < max; } - unsigned long getArtefactsMemory(); - int getROFCutClusterMult() const { return mCutClusterMult; } - int getROFCutVertexMult() const { return mCutVertexMult; } + unsigned long getArtefactsMemory() const; + void printArtefactsMemory() const; + + /// ROF cuts + int getROFCutClusterMult() const { return mCutClusterMult; }; + int getROFCutVertexMult() const { return mCutVertexMult; }; int getROFCutAllMult() const { return mCutClusterMult + mCutVertexMult; } // Vertexer void computeTrackletsPerROFScans(); void computeTracletsPerClusterScans(); int& getNTrackletsROF(int rofId, int combId) { return mNTrackletsPerROF[combId][rofId]; } - std::vector& getLines(int rofId) { return mLines[rofId]; } + auto& getLines(int rofId) { return mLines[rofId]; } int getNLinesTotal() const { return std::accumulate(mLines.begin(), mLines.end(), 0, [](int sum, const auto& l) { return sum + l.size(); }); } - std::vector& getTrackletClusters(int rofId) { return mTrackletClusters[rofId]; } + auto& getTrackletClusters(int rofId) { return mTrackletClusters[rofId]; } gsl::span getFoundTracklets(int rofId, int combId) const; gsl::span getFoundTracklets(int rofId, int combId); gsl::span getLabelsFoundTracklets(int rofId, int combId) const; @@ -233,6 +240,7 @@ struct TimeFrame { void addClusterExternalIndexToLayer(int layer, const int idx) { mClusterExternalIndices[layer].push_back(idx); } void resetVectors(); + void resetTracklets(); /// Debug and printing void checkTrackletLUTs(); @@ -249,22 +257,22 @@ struct TimeFrame { bool mIsGPU = false; - std::array, nLayers> mClusters; - std::array, nLayers> mTrackingFrameInfo; - std::array, nLayers> mClusterExternalIndices; - std::array, nLayers> mROFramesClusters; + std::array, nLayers> mClusters; + std::array, nLayers> mTrackingFrameInfo; + std::array, nLayers> mClusterExternalIndices; + std::array, nLayers> mROFramesClusters; const dataformats::MCTruthContainer* mClusterLabels = nullptr; - std::array, 2> mNTrackletsPerCluster; - std::array, 2> mNTrackletsPerClusterSum; - std::array, nLayers> mNClustersPerROF; - std::vector> mIndexTables; - std::vector> mTrackletsLookupTable; - std::array, nLayers> mUsedClusters; + std::array, 2> mNTrackletsPerCluster; + std::array, 2> mNTrackletsPerClusterSum; + std::array, nLayers> mNClustersPerROF; + std::array, nLayers> mIndexTables; + std::vector> mTrackletsLookupTable; + std::array, nLayers> mUsedClusters; int mNrof = 0; int mNExtendedTracks{0}; int mNExtendedUsedClusters{0}; - std::vector mROFramesPV = {0}; - std::vector mPrimaryVertices; + bounded_vector mROFramesPV; + bounded_vector mPrimaryVertices; // State if memory will be externally managed. bool mExtAllocator = false; @@ -282,31 +290,20 @@ struct TimeFrame { void setExtAllocator(bool ext) { mExtAllocator = ext; } bool getExtAllocator() const { return mExtAllocator; } - std::array, nLayers> mUnsortedClusters; - std::vector> mTracklets; - std::vector> mCells; - std::vector> mCellSeeds; - std::vector> mCellSeedsChi2; - std::vector> mRoads; - std::vector> mTracks; - std::vector> mCellsNeighbours; - std::vector> mCellsLookupTable; + std::array, nLayers> mUnsortedClusters; + std::vector> mTracklets; + std::vector> mCells; + std::vector> mCellSeeds; + std::vector> mCellSeedsChi2; + bounded_vector> mRoads; + std::vector> mTracks; + std::vector> mCellsNeighbours; + std::vector> mCellsLookupTable; std::vector mMultiplicityCutMask; const o2::base::PropagatorImpl* mPropagatorDevice = nullptr; // Needed only for GPU - void dropTracks() - { - for (auto& v : mTracks) { - deepVectorClear(v); - } - } - protected: - template - void deepVectorClear(std::vector& vec) - { - std::vector().swap(vec); - } + void wipe(); private: void prepareClusters(const TrackingParameters& trkParam, const int maxLayers); @@ -317,39 +314,44 @@ struct TimeFrame { bool isBeamPositionOverridden = false; std::array mMinR; std::array mMaxR; - std::vector mMSangles; - std::vector mPhiCuts; - std::vector mPositionResolution; - std::vector mClusterSize; + bounded_vector mMSangles; + bounded_vector mPhiCuts; + bounded_vector mPositionResolution; + bounded_vector mClusterSize; std::vector mROFMask; - std::vector> mPValphaX; /// PV x and alpha for track propagation - std::vector> mTrackletLabels; - std::vector> mCellLabels; - std::vector> mCellsNeighboursLUT; - std::vector> mTracksLabel; - std::vector mBogusClusters; /// keep track of clusters with wild coordinates - - std::vector> mRoadLabels; + bounded_vector> mPValphaX; /// PV x and alpha for track propagation + std::vector> mTrackletLabels; + std::vector> mCellLabels; + std::vector> mCellsNeighboursLUT; + std::vector> mTracksLabel; + bounded_vector mBogusClusters; /// keep track of clusters with wild coordinates + + bounded_vector> mRoadLabels; int mCutClusterMult; int mCutVertexMult; // Vertexer - std::vector> mNTrackletsPerROF; - std::vector> mLines; - std::vector> mTrackletClusters; - std::array, 2> mTrackletsIndexROF; - std::vector> mLinesLabels; + std::vector> mNTrackletsPerROF; + std::vector> mLines; + std::vector> mTrackletClusters; + std::array, 2> mTrackletsIndexROF; + std::vector> mLinesLabels; std::vector> mVerticesMCRecInfo; std::array mTotalTracklets = {0, 0}; unsigned int mNoVertexROF = 0; - std::vector mTotVertPerIteration; + bounded_vector mTotVertPerIteration; // \Vertexer + + std::shared_ptr mMemoryPool; }; template inline gsl::span TimeFrame::getPrimaryVertices(int rofId) const { + if (mPrimaryVertices.empty()) { + return {}; + } const int start = mROFramesPV[rofId]; const int stop_idx = rofId >= mNrof - 1 ? mNrof : rofId + 1; int delta = mMultiplicityCutMask[rofId] ? mROFramesPV[stop_idx] - start : 0; // return empty span if Rof is excluded @@ -368,6 +370,9 @@ inline gsl::span> TimeFrame::getPri template inline gsl::span TimeFrame::getPrimaryVertices(int romin, int romax) const { + if (mPrimaryVertices.empty()) { + return {}; + } return {&mPrimaryVertices[mROFramesPV[romin]], static_cast::size_type>(mROFramesPV[romax + 1] - mROFramesPV[romin])}; } @@ -417,7 +422,7 @@ inline gsl::span TimeFrame::getClustersOnLayer(int rofId return {}; } int startIdx{mROFramesClusters[layerId][rofId]}; - return {&mClusters[layerId][startIdx], static_cast::size_type>(mROFramesClusters[layerId][rofId + 1] - startIdx)}; + return {&mClusters[layerId][startIdx], static_cast::size_type>(mROFramesClusters[layerId][rofId + 1] - startIdx)}; } template @@ -437,7 +442,7 @@ inline gsl::span TimeFrame::getUsedClustersROF(int rofId return {}; } int startIdx{mROFramesClusters[layerId][rofId]}; - return {&mUsedClusters[layerId][startIdx], static_cast::size_type>(mROFramesClusters[layerId][rofId + 1] - startIdx)}; + return {&mUsedClusters[layerId][startIdx], static_cast::size_type>(mROFramesClusters[layerId][rofId + 1] - startIdx)}; } template diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 2e535e2b6a644..d8236fed51240 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,7 @@ #include "ITStracking/TimeFrame.h" #include "ITStracking/TrackerTraits.h" #include "ITStracking/Road.h" +#include "ITStracking/BoundedAllocator.h" #include "DataFormatsITS/TrackITS.h" #include "SimulationDataFormat/MCCompLabel.h" @@ -65,6 +67,7 @@ class Tracker LogFunc = [](const std::string& s) { std::cerr << s << '\n'; }); void setParameters(const std::vector& p) { mTrkParams = p; } + void setMemoryPool(std::shared_ptr& pool) { mMemoryPool = pool; } std::vector& getParameters() { return mTrkParams; } void getGlobalConfiguration(); void setBz(float bz) { mTraits->setBz(bz); } @@ -89,7 +92,7 @@ class Tracker void rectifyClusterIndices(); template - float evaluateTask(void (Tracker::*task)(T...), const char* taskName, LogFunc logger, F&&... args); + float evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args); TrackerTraits7* mTraits = nullptr; /// Observer pointer, not owned by this class TimeFrame7* mTimeFrame = nullptr; /// Observer pointer, not owned by this class @@ -100,23 +103,35 @@ class Tracker unsigned int mNumberOfDroppedTFs{0}; unsigned int mTimeFrameCounter{0}; double mTotalTime{0}; + std::shared_ptr mMemoryPool; + + enum State { + TFInit = 0, + Trackleting, + Celling, + Neighbouring, + Roading, + NStates, + }; + State mCurState; + static constexpr std::array StateNames{"TimeFrame initialisation", "Tracklet finding", "Cell finding", "Neighbour finding", "Road finding"}; }; template -float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, LogFunc logger, F&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args) { float diff{0.f}; if constexpr (constants::DoTimeBenchmarks) { auto start = std::chrono::high_resolution_clock::now(); - (this->*task)(std::forward(args)...); + (this->*task)(std::forward(args)...); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration diff_t{end - start}; diff = diff_t.count(); std::stringstream sstream; - if (taskName == nullptr) { + if (taskName.empty()) { sstream << diff << "\t"; } else { sstream << std::setw(2) << " - " << taskName << " completed in: " << diff << " ms"; @@ -124,20 +139,17 @@ float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, L logger(sstream.str()); if (mTrkParams[0].SaveTimeBenchmarks) { - std::stringstream str2file; std::string taskNameStr(taskName); std::transform(taskNameStr.begin(), taskNameStr.end(), taskNameStr.begin(), [](unsigned char c) { return std::tolower(c); }); std::replace(taskNameStr.begin(), taskNameStr.end(), ' ', '_'); - str2file << taskNameStr << "\t" << diff; - std::ofstream file; - file.open("its_time_benchmarks.txt", std::ios::app); - file << str2file.str() << std::endl; - file.close(); + if (std::ofstream file{"its_time_benchmarks.txt", std::ios::app}) { + file << "trk:" << iteration << '\t' << taskNameStr << '\t' << diff << '\n'; + } } } else { - (this->*task)(std::forward(args)...); + (this->*task)(std::forward(args)...); } return diff; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 7340f247e4cf2..22af80e544c86 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -22,6 +22,10 @@ #include "ITStracking/Configuration.h" #include "ITStracking/MathUtils.h" #include "ITStracking/TimeFrame.h" +#include "ITStracking/BoundedAllocator.h" + +#include +#include // #define OPTIMISATION_OUTPUT @@ -54,7 +58,7 @@ class TrackerTraits virtual void findShortPrimaries(); virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); - virtual void processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeed, std::vector& updatedCellId); + virtual void processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); void updateTrackingParameters(const std::vector& trkPars) { mTrkParams = trkPars; } TimeFrame* getTimeFrame() { return mTimeFrame; } @@ -65,6 +69,8 @@ class TrackerTraits bool isMatLUT() const; virtual const char* getName() const noexcept { return "CPU"; } virtual bool isGPU() const noexcept { return false; } + void setMemoryPool(std::shared_ptr& pool) noexcept { mMemoryPool = pool; } + auto getMemoryPool() const noexcept { return mMemoryPool; } // Others GPUhd() static consteval int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; } @@ -92,6 +98,8 @@ class TrackerTraits int mNThreads = 1; bool mApplySmoothing = false; + std::shared_ptr mMemoryPool; + tbb::task_arena mTaskArena; protected: o2::base::PropagatorImpl::MatCorrType mCorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrNONE; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index cb6512248bf0c..72301cd2851b3 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -15,12 +15,11 @@ #include "CommonUtils/ConfigurableParam.h" #include "CommonUtils/ConfigurableParamHelper.h" -namespace o2 -{ -namespace its +namespace o2::its { struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper { + bool saveTimeBenchmarks = false; // dump metrics on file int nIterations = 1; // Number of vertexing passes to perform. int vertPerRofThreshold = 0; // Maximum number of vertices per ROF to trigger second a iteration. @@ -49,6 +48,9 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper0, otherwise use code defaults uint8_t startLayerMask[MaxIter] = {}; // mask of start layer for this iteration (if >0) float minPtIterLgt[MaxIter * (MaxTrackLength - MinTrackLength + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults - float sysErrY2[7] = {0}; // systematic error^2 in Y per layer - float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer + float sysErrY2[7] = {0}; // systematic error^2 in Y per layer + float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer float maxChi2ClusterAttachment = -1.f; float maxChi2NDF = -1.f; float nSigmaCut = -1.f; @@ -76,14 +78,12 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper 0 off float trackFollowerNSigmaZ = 1.f; // sigma in z-cut for track-following search rectangle float trackFollowerNSigmaPhi = 1.f; // sigma in phi-cut for track-following search rectangle float cellsPerClusterLimit = -1.f; float trackletsPerClusterLimit = -1.f; int findShortTracks = -1; - int nThreads = 1; // number of threads to perform the operations in parallel. int nROFsPerIterations = 0; // size of the slice of ROFs to be processed at a time, preferably integer divisors of nROFs per TF, to balance the iterations. int nOrbitsPerIterations = 0; // not implemented: size of the slice of ROFs to be processed at a time, computed using the number of ROFs per orbit. bool perPrimaryVertexProcessing = false; // perform the full tracking considering the vertex hypotheses one at the time. @@ -91,21 +91,23 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper { // GPU-specific parameters - unsigned int tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes int nBlocks = 20; int nThreads = 256; O2ParamDef(ITSGpuTrackingParamConfig, "ITSGpuTrackingParam"); }; -} // namespace its -} // namespace o2 +} // namespace o2::its #endif diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index 89b5527f5b2c9..cff6d215e5e3b 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -19,6 +19,7 @@ #include "ITStracking/TrackerTraits.h" #include "ITStracking/Vertexer.h" #include "ITStracking/VertexerTraits.h" +#include "ITStracking/BoundedAllocator.h" #include "DataFormatsParameters/GRPObject.h" #include "DataFormatsITSMFT/TopologyDictionary.h" #include "DataFormatsCalibration/MeanVertexObject.h" @@ -95,6 +96,7 @@ class ITSTrackingInterface std::unique_ptr mTracker = nullptr; std::unique_ptr mVertexer = nullptr; const o2::dataformats::MeanVertexObject* mMeanVertex; + std::shared_ptr mMemoryPool; }; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h index 56cd286b509ad..77b21b58b967b 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h @@ -27,6 +27,7 @@ #include "ITStracking/Configuration.h" #include "ITStracking/TimeFrame.h" #include "ITStracking/VertexerTraits.h" +#include "ITStracking/BoundedAllocator.h" #include "ReconstructionDataFormats/Vertex.h" #include "ITStracking/ClusterLines.h" @@ -56,6 +57,7 @@ class Vertexer auto& getVertParameters() const { return mTraits->getVertexingParameters(); } void setParameters(const std::vector& vertParams) { mVertParams = vertParams; } void getGlobalConfiguration(); + void setMemoryPool(std::shared_ptr& pool) { mMemoryPool = pool; } std::vector exportVertices(); VertexerTraits* getTraits() const { return mTraits; }; @@ -80,7 +82,8 @@ class Vertexer // Utils void dumpTraits() { mTraits->dumpVertexerTraits(); } template - float evaluateTask(void (Vertexer::*)(T...), const char*, LogFunc logger, T&&... args); + float evaluateTask(void (Vertexer::*task)(T...), std::string_view taskName, int iteration, LogFunc& logger, T&&... args); + void printEpilog(LogFunc& logger, const unsigned int trackletN01, const unsigned int trackletN12, const unsigned selectedN, const unsigned int vertexN, const float initT, @@ -93,6 +96,17 @@ class Vertexer TimeFrame7* mTimeFrame = nullptr; /// Observer pointer, not owned by this class std::vector mVertParams; + std::shared_ptr mMemoryPool; + + enum State { + Init = 0, + Trackleting, + Validating, + Finding, + NStates, + }; + State mCurState; + static constexpr std::array StateNames{"Initialisation", "Tracklet finding", "Tracklet validation", "Vertex finding"}; }; template @@ -120,8 +134,7 @@ inline void Vertexer::findVertices(T&&... args) } template -float Vertexer::evaluateTask(void (Vertexer::*task)(T...), const char* taskName, LogFunc logger, - T&&... args) +float Vertexer::evaluateTask(void (Vertexer::*task)(T...), std::string_view taskName, int iteration, LogFunc& logger, T&&... args) { float diff{0.f}; @@ -134,12 +147,22 @@ float Vertexer::evaluateTask(void (Vertexer::*task)(T...), const char* taskName, diff = diff_t.count(); std::stringstream sstream; - if (taskName == nullptr) { + if (taskName.empty()) { sstream << diff << "\t"; } else { sstream << std::setw(2) << " - " << taskName << " completed in: " << diff << " ms"; } logger(sstream.str()); + + if (mVertParams[0].SaveTimeBenchmarks) { + std::string taskNameStr(taskName); + std::transform(taskNameStr.begin(), taskNameStr.end(), taskNameStr.begin(), + [](unsigned char c) { return std::tolower(c); }); + std::replace(taskNameStr.begin(), taskNameStr.end(), ' ', '_'); + if (std::ofstream file{"its_time_benchmarks.txt", std::ios::app}) { + file << "vtx:" << iteration << '\t' << taskNameStr << '\t' << diff << '\n'; + } + } } else { (this->*task)(std::forward(args)...); } diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h index bc7daf5e3ab24..ae92fda292df3 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h @@ -20,6 +20,7 @@ #include #include +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/Cluster.h" #include "ITStracking/ClusterLines.h" #include "ITStracking/Configuration.h" @@ -31,6 +32,8 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" +#include + namespace o2 { class MCCompLabel; @@ -68,23 +71,23 @@ class VertexerTraits virtual void computeTracklets(const int iteration = 0); virtual void computeTrackletMatching(const int iteration = 0); virtual void computeVertices(const int iteration = 0); - virtual void adoptTimeFrame(TimeFrame7* tf) { mTimeFrame = tf; } + virtual void adoptTimeFrame(TimeFrame7* tf) noexcept { mTimeFrame = tf; } virtual void updateVertexingParameters(const std::vector& vrtPar, const TimeFrameGPUParameters& gpuTfPar); void computeVerticesInRof(int, gsl::span&, - std::vector&, - std::vector&, + bounded_vector&, + bounded_vector&, std::array&, - std::vector&, - std::vector&, + bounded_vector&, + bounded_vector&, TimeFrame7*, - std::vector*, + bounded_vector*, const int iteration = 0); - static const std::vector> selectClusters(const int* indexTable, - const std::array& selectedBinsRect, - const IndexTableUtils& utils); + const bounded_vector> selectClusters(const int* indexTable, + const std::array& selectedBinsRect, + const IndexTableUtils& utils); // utils auto& getVertexingParameters() { return mVrtParams; } @@ -95,9 +98,11 @@ class VertexerTraits int getNThreads() const { return mNThreads; } virtual bool isGPU() const noexcept { return false; } virtual const char* getName() const noexcept { return "CPU"; } + virtual bool usesMemoryPool() const noexcept { return true; } + void setMemoryPool(std::shared_ptr& pool) { mMemoryPool = pool; } template - static std::pair computeMain(const std::vector& elements) + static std::pair computeMain(const bounded_vector& elements) { T elem; size_t maxCount = 0; @@ -119,6 +124,9 @@ class VertexerTraits // Frame related quantities TimeFrame7* mTimeFrame = nullptr; // observer ptr + private: + std::shared_ptr mMemoryPool; + tbb::task_arena mTaskArena; }; inline void VertexerTraits::initialise(const TrackingParameters& trackingParams, const int iteration) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index 80a6ef481fde6..eb7834da740d0 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -20,14 +20,11 @@ #include "DataFormatsITSMFT/TopologyDictionary.h" #include "ITSBase/GeometryTGeo.h" #include "ITSMFTBase/SegmentationAlpide.h" +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/TrackingConfigParam.h" #include -#ifdef WITH_OPENMP -#include -#endif - namespace { struct ClusterHelper { @@ -71,7 +68,7 @@ TimeFrame::~TimeFrame() } template -void TimeFrame::addPrimaryVertices(const std::vector& vertices) +void TimeFrame::addPrimaryVertices(const bounded_vector& vertices) { for (const auto& vertex : vertices) { mPrimaryVertices.emplace_back(vertex); @@ -86,19 +83,19 @@ void TimeFrame::addPrimaryVertices(const std::vector& vertices) } template -void TimeFrame::addPrimaryVertices(const std::vector& vertices, const int rofId, const int iteration) +void TimeFrame::addPrimaryVertices(const bounded_vector& vertices, const int rofId, const int iteration) { addPrimaryVertices(gsl::span(vertices), rofId, iteration); } template -void TimeFrame::addPrimaryVerticesLabels(std::vector>& labels) +void TimeFrame::addPrimaryVerticesLabels(bounded_vector>& labels) { mVerticesMCRecInfo.insert(mVerticesMCRecInfo.end(), labels.begin(), labels.end()); } template -void TimeFrame::addPrimaryVerticesInROF(const std::vector& vertices, const int rofId, const int iteration) +void TimeFrame::addPrimaryVerticesInROF(const bounded_vector& vertices, const int rofId, const int iteration) { mPrimaryVertices.insert(mPrimaryVertices.begin() + mROFramesPV[rofId], vertices.begin(), vertices.end()); for (int i = rofId + 1; i < mROFramesPV.size(); ++i) { @@ -108,7 +105,7 @@ void TimeFrame::addPrimaryVerticesInROF(const std::vector& vert } template -void TimeFrame::addPrimaryVerticesLabelsInROF(const std::vector>& labels, const int rofId) +void TimeFrame::addPrimaryVerticesLabelsInROF(const bounded_vector>& labels, const int rofId) { mVerticesMCRecInfo.insert(mVerticesMCRecInfo.begin() + mROFramesPV[rofId], labels.begin(), labels.end()); } @@ -116,7 +113,7 @@ void TimeFrame::addPrimaryVerticesLabelsInROF(const std::vector void TimeFrame::addPrimaryVertices(const gsl::span& vertices, const int rofId, const int iteration) { - std::vector futureVertices; + bounded_vector futureVertices(mMemoryPool.get()); for (const auto& vertex : vertices) { if (vertex.getTimeStamp().getTimeStamp() < rofId) { // put a copy in the past insertPastVertex(vertex, iteration); @@ -148,16 +145,16 @@ int TimeFrame::loadROFrameData(gsl::span rofs, const itsmft::TopologyDictionary* dict, const dataformats::MCTruthContainer* mcLabels) { - for (int iLayer{0}; iLayer < mUnsortedClusters.size(); ++iLayer) { - deepVectorClear(mUnsortedClusters[iLayer]); - deepVectorClear(mTrackingFrameInfo[iLayer]); - deepVectorClear(mClusterExternalIndices[iLayer]); - mROFramesClusters[iLayer].resize(1, 0); + for (int iLayer{0}; iLayer < nLayers; ++iLayer) { + deepVectorClear(mUnsortedClusters[iLayer], mMemoryPool.get()); + deepVectorClear(mTrackingFrameInfo[iLayer], mMemoryPool.get()); + deepVectorClear(mClusterExternalIndices[iLayer], mMemoryPool.get()); + clearResizeBoundedVector(mROFramesClusters[iLayer], 1, mMemoryPool.get(), 0); if (iLayer < 2) { - deepVectorClear(mTrackletsIndexROF[iLayer]); - deepVectorClear(mNTrackletsPerCluster[iLayer]); - deepVectorClear(mNTrackletsPerClusterSum[iLayer]); + deepVectorClear(mTrackletsIndexROF[iLayer], mMemoryPool.get()); + deepVectorClear(mNTrackletsPerCluster[iLayer], mMemoryPool.get()); + deepVectorClear(mNTrackletsPerClusterSum[iLayer], mMemoryPool.get()); } } @@ -165,8 +162,7 @@ int TimeFrame::loadROFrameData(gsl::span rofs, geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::L2G)); mNrof = 0; - deepVectorClear(mClusterSize); - mClusterSize.reserve(clusters.size()); + clearResizeBoundedVector(mClusterSize, clusters.size(), mMemoryPool.get()); for (auto& rof : rofs) { for (int clusterId{rof.getFirstEntry()}; clusterId < rof.getFirstEntry() + rof.getNEntries(); ++clusterId) { auto& c = clusters[clusterId]; @@ -193,11 +189,7 @@ int TimeFrame::loadROFrameData(gsl::span rofs, locXYZ = dict->getClusterCoordinates(c, patt, false); clusterSize = patt.getNPixels(); } - if (clusterSize < 255) { - mClusterSize.push_back(clusterSize); - } else { - mClusterSize.push_back(255); - } + mClusterSize.push_back(std::clamp(clusterSize, 0u, 255u)); auto sensorID = c.getSensorID(); // Inverse transformation to the local --> tracking auto trkXYZ = geom->getMatrixT2L(sensorID) ^ locXYZ; @@ -226,14 +218,15 @@ int TimeFrame::loadROFrameData(gsl::span rofs, if (mcLabels != nullptr) { mClusterLabels = mcLabels; } + return mNrof; } template void TimeFrame::prepareClusters(const TrackingParameters& trkParam, const int maxLayers) { - std::vector cHelper; - std::vector clsPerBin(trkParam.PhiBins * trkParam.ZBins, 0); + bounded_vector cHelper(mMemoryPool.get()); + bounded_vector clsPerBin(trkParam.PhiBins * trkParam.ZBins, 0, mMemoryPool.get()); for (int rof{0}; rof < mNrof; ++rof) { if ((int)mMultiplicityCutMask.size() == mNrof && !mMultiplicityCutMask[rof]) { continue; @@ -270,7 +263,7 @@ void TimeFrame::prepareClusters(const TrackingParameters& trkParam, con h.bin = bin; h.ind = clsPerBin[bin]++; } - std::vector lutPerBin(clsPerBin.size()); + bounded_vector lutPerBin(clsPerBin.size(), 0, mMemoryPool.get()); lutPerBin[0] = 0; for (unsigned int iB{1}; iB < lutPerBin.size(); ++iB) { lutPerBin[iB] = lutPerBin[iB - 1] + clsPerBin[iB - 1]; @@ -286,7 +279,6 @@ void TimeFrame::prepareClusters(const TrackingParameters& trkParam, con c.radius = h.r; c.indexTableBinIndex = h.bin; } - for (unsigned int iB{0}; iB < clsPerBin.size(); ++iB) { mIndexTables[iLayer][rof * (trkParam.ZBins * trkParam.PhiBins + 1) + iB] = lutPerBin[iB]; } @@ -307,37 +299,34 @@ void TimeFrame::initialise(const int iteration, const TrackingParameter } deepVectorClear(mTracks); deepVectorClear(mTracksLabel); + deepVectorClear(mLines); deepVectorClear(mLinesLabels); if (resetVertices) { deepVectorClear(mVerticesMCRecInfo); } - mTracks.resize(mNrof); - mTracksLabel.resize(mNrof); - mLinesLabels.resize(mNrof); - mCells.resize(trkParam.CellsPerRoad()); - mCellsLookupTable.resize(trkParam.CellsPerRoad() - 1); - mCellsNeighbours.resize(trkParam.CellsPerRoad() - 1); - mCellsNeighboursLUT.resize(trkParam.CellsPerRoad() - 1); - mCellLabels.resize(trkParam.CellsPerRoad()); - mTracklets.resize(std::min(trkParam.TrackletsPerRoad(), maxLayers - 1)); - mTrackletLabels.resize(trkParam.TrackletsPerRoad()); - mTrackletsLookupTable.resize(trkParam.CellsPerRoad()); + clearResizeBoundedVector(mTracks, mNrof, mMemoryPool.get()); + clearResizeBoundedVector(mTracksLabel, mNrof, mMemoryPool.get()); + clearResizeBoundedVector(mLinesLabels, mNrof, mMemoryPool.get()); + clearResizeBoundedVector(mCells, trkParam.CellsPerRoad(), mMemoryPool.get()); + clearResizeBoundedVector(mCellsLookupTable, trkParam.CellsPerRoad() - 1, mMemoryPool.get()); + clearResizeBoundedVector(mCellsNeighbours, trkParam.CellsPerRoad() - 1, mMemoryPool.get()); + clearResizeBoundedVector(mCellsNeighboursLUT, trkParam.CellsPerRoad() - 1, mMemoryPool.get()); + clearResizeBoundedVector(mCellLabels, trkParam.CellsPerRoad(), mMemoryPool.get()); + clearResizeBoundedVector(mTracklets, std::min(trkParam.TrackletsPerRoad(), maxLayers - 1), mMemoryPool.get()); + clearResizeBoundedVector(mTrackletLabels, trkParam.TrackletsPerRoad(), mMemoryPool.get()); + clearResizeBoundedVector(mTrackletsLookupTable, trkParam.TrackletsPerRoad(), mMemoryPool.get()); mIndexTableUtils.setTrackingParameters(trkParam); - mPositionResolution.resize(trkParam.NLayers); - mBogusClusters.resize(trkParam.NLayers, 0); - deepVectorClear(mLines); + clearResizeBoundedVector(mPositionResolution, trkParam.NLayers, mMemoryPool.get()); + clearResizeBoundedVector(mBogusClusters, trkParam.NLayers, mMemoryPool.get()); deepVectorClear(mTrackletClusters); for (unsigned int iLayer{0}; iLayer < std::min((int)mClusters.size(), maxLayers); ++iLayer) { - deepVectorClear(mClusters[iLayer]); - mClusters[iLayer].resize(mUnsortedClusters[iLayer].size()); - deepVectorClear(mUsedClusters[iLayer]); - mUsedClusters[iLayer].resize(mUnsortedClusters[iLayer].size(), false); + clearResizeBoundedVector(mClusters[iLayer], mUnsortedClusters[iLayer].size(), mMemoryPool.get()); + clearResizeBoundedVector(mUsedClusters[iLayer], mUnsortedClusters[iLayer].size(), mMemoryPool.get()); mPositionResolution[iLayer] = o2::gpu::CAMath::Sqrt(0.5 * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer]) + trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer]); } - deepVectorClear(mIndexTables); - mIndexTables.resize(mClusters.size(), std::vector(mNrof * (trkParam.ZBins * trkParam.PhiBins + 1), 0)); - mLines.resize(mNrof); - mTrackletClusters.resize(mNrof); + clearResizeBoundedArray(mIndexTables, mNrof * (trkParam.ZBins * trkParam.PhiBins + 1), mMemoryPool.get()); + clearResizeBoundedVector(mLines, mNrof, mMemoryPool.get()); + clearResizeBoundedVector(mTrackletClusters, mNrof, mMemoryPool.get()); for (int iLayer{0}; iLayer < trkParam.NLayers; ++iLayer) { if (trkParam.SystErrorY2[iLayer] > 0.f || trkParam.SystErrorZ2[iLayer] > 0.f) { @@ -351,7 +340,7 @@ void TimeFrame::initialise(const int iteration, const TrackingParameter } mNTrackletsPerROF.resize(2); for (auto& v : mNTrackletsPerROF) { - v = std::vector(mNrof + 1, 0); + v = bounded_vector(mNrof + 1, 0, mMemoryPool.get()); } if (iteration == 0 || iteration == 3) { prepareClusters(trkParam, maxLayers); @@ -360,7 +349,7 @@ void TimeFrame::initialise(const int iteration, const TrackingParameter if (maxLayers < trkParam.NLayers) { // Vertexer only, but in both iterations for (size_t iLayer{0}; iLayer < maxLayers; ++iLayer) { deepVectorClear(mUsedClusters[iLayer]); - mUsedClusters[iLayer].resize(mUnsortedClusters[iLayer].size(), false); + clearResizeBoundedVector(mUsedClusters[iLayer], mUnsortedClusters[iLayer].size(), mMemoryPool.get()); } } @@ -408,7 +397,7 @@ void TimeFrame::initialise(const int iteration, const TrackingParameter } template -unsigned long TimeFrame::getArtefactsMemory() +unsigned long TimeFrame::getArtefactsMemory() const { unsigned long size{0}; for (auto& trkl : mTracklets) { @@ -423,6 +412,12 @@ unsigned long TimeFrame::getArtefactsMemory() return size + sizeof(Road) * mRoads.size(); } +template +void TimeFrame::printArtefactsMemory() const +{ + LOGP(info, "TimeFrame: Artefacts occupy {:.2f} MB", getArtefactsMemory() / constants::MB); +} + template void TimeFrame::fillPrimaryVerticesXandAlpha() { @@ -501,6 +496,15 @@ void TimeFrame::resetVectors() } } +template +void TimeFrame::resetTracklets() +{ + for (auto& trkl : mTracklets) { + deepVectorClear(trkl); + } + deepVectorClear(mTrackletsLookupTable); +} + template void TimeFrame::printTrackletLUTonLayer(int i) { @@ -598,6 +602,106 @@ void TimeFrame::printSliceInfo(const int startROF, const int sliceSize) } } +template +void TimeFrame::setMemoryPool(std::shared_ptr& pool) +{ + wipe(); + mMemoryPool = pool; + + auto initVector = [&](bounded_vector& vec) { + auto alloc = vec.get_allocator().resource(); + if (alloc != mMemoryPool.get()) { + vec = bounded_vector(mMemoryPool.get()); + } + }; + auto initArrays = [&](std::array, S>& arr) { + for (size_t i{0}; i < S; ++i) { + auto alloc = arr[i].get_allocator().resource(); + if (alloc != mMemoryPool.get()) { + arr[i] = bounded_vector(mMemoryPool.get()); + } + } + }; + auto initVectors = [&](std::vector>& vec) { + for (size_t i{0}; i < vec.size(); ++i) { + auto alloc = vec[i].get_allocator().resource(); + if (alloc != mMemoryPool.get()) { + vec[i] = bounded_vector(mMemoryPool.get()); + } + } + }; + + initVector(mTotVertPerIteration); + initVector(mPrimaryVertices); + initVector(mROFramesPV); + initArrays(mClusters); + initArrays(mTrackingFrameInfo); + initArrays(mClusterExternalIndices); + initArrays(mROFramesClusters); + initArrays(mNTrackletsPerCluster); + initArrays(mNTrackletsPerClusterSum); + initArrays(mNClustersPerROF); + initArrays(mIndexTables); + initArrays(mUsedClusters); + initArrays(mUnsortedClusters); + initVector(mROFramesPV); + initVector(mPrimaryVertices); + initVector(mRoads); + initVector(mRoadLabels); + initVector(mMSangles); + initVector(mPhiCuts); + initVector(mPositionResolution); + initVector(mClusterSize); + initVector(mPValphaX); + initVector(mBogusClusters); + initArrays(mTrackletsIndexROF); + initVectors(mTracks); + initVectors(mTracklets); + initVectors(mCells); + initVectors(mCellSeeds); + initVectors(mCellSeedsChi2); + initVectors(mCellsNeighbours); + initVectors(mCellsLookupTable); +} + +template +void TimeFrame::wipe() +{ + deepVectorClear(mUnsortedClusters); + deepVectorClear(mTracks); + deepVectorClear(mTracklets); + deepVectorClear(mCells); + deepVectorClear(mCellSeeds); + deepVectorClear(mCellSeedsChi2); + deepVectorClear(mRoads); + deepVectorClear(mCellsNeighbours); + deepVectorClear(mCellsLookupTable); + deepVectorClear(mTotVertPerIteration); + deepVectorClear(mPrimaryVertices); + deepVectorClear(mROFramesPV); + deepVectorClear(mClusters); + deepVectorClear(mTrackingFrameInfo); + deepVectorClear(mClusterExternalIndices); + deepVectorClear(mROFramesClusters); + deepVectorClear(mNTrackletsPerCluster); + deepVectorClear(mNTrackletsPerClusterSum); + deepVectorClear(mNClustersPerROF); + deepVectorClear(mIndexTables); + deepVectorClear(mUsedClusters); + deepVectorClear(mUnsortedClusters); + deepVectorClear(mROFramesPV); + deepVectorClear(mPrimaryVertices); + deepVectorClear(mRoads); + deepVectorClear(mRoadLabels); + deepVectorClear(mMSangles); + deepVectorClear(mPhiCuts); + deepVectorClear(mPositionResolution); + deepVectorClear(mClusterSize); + deepVectorClear(mPValphaX); + deepVectorClear(mBogusClusters); + deepVectorClear(mTrackletsIndexROF); +} + template class TimeFrame<7>; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index c452e20f7f707..e354792620436 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -15,6 +15,7 @@ #include "ITStracking/Tracker.h" +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/Cell.h" #include "ITStracking/Constants.h" #include "ITStracking/IndexTableUtils.h" @@ -53,90 +54,96 @@ void Tracker::clustersToTracks(LogFunc logger, LogFunc error) } } - bool dropTF = false; - for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { - if (iteration == 3 && mTrkParams[0].DoUPCIteration) { - mTimeFrame->swapMasks(); + int iteration{0}, iROFs{0}, iVertex{0}; + auto handleException = [&](const auto& err) { + LOGP(error, "Too much memory used during {} in iteration {} in ROF span {}-{} iVtx={}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + StateNames[mCurState], iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, iVertex, + (double)mTimeFrame->getArtefactsMemory() / GB, (double)mTrkParams[iteration].MaxMemory / GB); + LOGP(error, "Exception: {}", err.what()); + if (mTrkParams[iteration].DropTFUponFailure) { + mTimeFrame->wipe(); + mMemoryPool->print(); + ++mNumberOfDroppedTFs; + error("...Dropping Timeframe..."); + } else { + throw err; } - double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; - int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; - int iVertex{std::min(maxNvertices, 0)}; - logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration)); - - total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); - do { - for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask(&Tracker::computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); - nTracklets += mTraits->getTFNumberOfTracklets(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(std::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", - iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); - if (mTrkParams[iteration].DropTFUponFailure) { - dropTF = true; - } - break; - } - float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; - if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(std::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", - trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); - break; - } + }; - timeCells += evaluateTask(&Tracker::computeCells, "Cell finding", evalLog, iteration); - nCells += mTraits->getTFNumberOfCells(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(std::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", - iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); - if (mTrkParams[iteration].DropTFUponFailure) { - dropTF = true; + try { + for (iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { + mMemoryPool->setMaxMemory(mTrkParams[iteration].MaxMemory); + if (iteration == 3 && mTrkParams[0].DoUPCIteration) { + mTimeFrame->swapMasks(); + } + double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; + int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; + int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; + int iVertex{std::min(maxNvertices, 0)}; + logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration)); + + total += evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurState = TFInit], iteration, logger, iteration); + do { + for (iROFs = 0; iROFs < nROFsIterations; ++iROFs) { + timeTracklets += evaluateTask(&Tracker::computeTracklets, StateNames[mCurState = Trackleting], iteration, evalLog, iteration, iROFs, iVertex); + nTracklets += mTraits->getTFNumberOfTracklets(); + float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / float(mTraits->getTFNumberOfClusters()) : 0.f; + if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { + error(std::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); + break; } - break; - } - float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; - if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(std::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", - cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); - break; + timeCells += evaluateTask(&Tracker::computeCells, StateNames[mCurState = Celling], iteration, evalLog, iteration); + nCells += mTraits->getTFNumberOfCells(); + float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / float(mTraits->getTFNumberOfClusters()) : 0.f; + if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { + error(std::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); + break; + } + timeNeighbours += evaluateTask(&Tracker::findCellsNeighbours, StateNames[mCurState = Neighbouring], iteration, evalLog, iteration); + nNeighbours += mTimeFrame->getNumberOfNeighbours(); + timeRoads += evaluateTask(&Tracker::findRoads, StateNames[mCurState = Roading], iteration, evalLog, iteration); } - - timeNeighbours += evaluateTask(&Tracker::findCellsNeighbours, "Neighbour finding", evalLog, iteration); - nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask(&Tracker::findRoads, "Road finding", evalLog, iteration); + iVertex++; + } while (iVertex < maxNvertices); + logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); + logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); + logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); + logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); + total += timeTracklets + timeCells + timeNeighbours + timeRoads; + if (mTraits->supportsExtendTracks() && mTrkParams[iteration].UseTrackFollower) { + int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; + auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", iteration, evalLog, iteration); + total += timeExtending; + logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); } - iVertex++; - } while (iVertex < maxNvertices && !dropTF); - logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); - total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if (mTraits->supportsExtendTracks() && mTrkParams[iteration].UseTrackFollower && !dropTF) { - int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; - auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", [](const std::string&) {}, iteration); - total += timeExtending; - logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); } - if (dropTF) { - error("...Dropping Timeframe..."); - mTimeFrame->dropTracks(); - ++mNumberOfDroppedTFs; - return; + if (mTraits->supportsFindShortPrimaries() && mTrkParams[0].FindShortTracks) { + auto nTracksB = mTimeFrame->getNumberOfTracks(); + total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", 0, logger); + auto nTracksA = mTimeFrame->getNumberOfTracks(); + logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); } + if (mTrkParams[iteration].PrintMemory) { + mMemoryPool->print(); + } + if constexpr (constants::DoTimeBenchmarks) { + logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeFrameCounter, total, mTraits->getNThreads())); + } + } catch (const BoundedMemoryResource::MemoryLimitExceeded& err) { + handleException(err); + return; + } catch (const std::bad_alloc& err) { + handleException(err); + return; + } catch (...) { + error("Uncaught exception, all bets are off..."); } - if (mTraits->supportsFindShortPrimaries() && mTrkParams[0].FindShortTracks) { - auto nTracksB = mTimeFrame->getNumberOfTracks(); - total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); - auto nTracksA = mTimeFrame->getNumberOfTracks(); - logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); - } - - if constexpr (constants::DoTimeBenchmarks) { - logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeFrameCounter, total, mTraits->getNThreads())); + if (mTrkParams[0].PrintMemory) { + mTimeFrame->printArtefactsMemory(); + mMemoryPool->print(); } if (mTimeFrame->hasMCinformation()) { @@ -398,7 +405,9 @@ void Tracker::adoptTimeFrame(TimeFrame7& tf) void Tracker::printSummary() const { - LOGP(info, "Tracker summary: Processed {} TFs (dropped {}) in TOT={:.2f} s, AVG/TF={:.2f} s", mTimeFrameCounter, mNumberOfDroppedTFs, mTotalTime * 1.e-3, mTotalTime * 1.e-3 / ((mTimeFrameCounter > 0) ? (double)mTimeFrameCounter : -1.0)); + auto avgTF = mTotalTime * 1.e-3 / ((mTimeFrameCounter > 0) ? (double)mTimeFrameCounter : -1.0); + auto avgTFwithDropped = mTotalTime * 1.e-3 / (((mTimeFrameCounter + mNumberOfDroppedTFs) > 0) ? (double)(mTimeFrameCounter + mNumberOfDroppedTFs) : -1.0); + LOGP(info, "Tracker summary: Processed {} TFs (dropped {}) in TOT={:.2f} s, AVG/TF={:.2f} ({:.2f}) s", mTimeFrameCounter, mNumberOfDroppedTFs, mTotalTime * 1.e-3, avgTF, avgTFwithDropped); } } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index da0eeb52156be..75d7f9519fbef 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -12,9 +12,6 @@ /// \file TrackerTraits.cxx /// \brief /// - -#include "ITStracking/TrackerTraits.h" - #include #include #include @@ -23,19 +20,20 @@ #include #endif +#include +#include + #include "CommonConstants/MathConstants.h" #include "DetectorsBase/Propagator.h" #include "GPUCommonMath.h" #include "ITStracking/Cell.h" #include "ITStracking/Constants.h" +#include "ITStracking/TrackerTraits.h" +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/Tracklet.h" #include "ReconstructionDataFormats/Track.h" -#ifdef WITH_OPENMP -#include -#endif - using o2::base::PropagatorF; namespace @@ -78,168 +76,168 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROF const int endVtx{iVertex >= 0 ? o2::gpu::CAMath::Min(iVertex + 1, static_cast(primaryVertices.size())) : static_cast(primaryVertices.size())}; int minRof = o2::gpu::CAMath::Max(startROF, rof0 - mTrkParams[iteration].DeltaROF); int maxRof = o2::gpu::CAMath::Min(endROF - 1, rof0 + mTrkParams[iteration].DeltaROF); -#pragma omp parallel for num_threads(mNThreads) - for (int iLayer = 0; iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { - gsl::span layer0 = mTimeFrame->getClustersOnLayer(rof0, iLayer); - if (layer0.empty()) { - continue; - } - float meanDeltaR{mTrkParams[iteration].LayerRadii[iLayer + 1] - mTrkParams[iteration].LayerRadii[iLayer]}; - const int currentLayerClustersNum{static_cast(layer0.size())}; - for (int iCluster{0}; iCluster < currentLayerClustersNum; ++iCluster) { - const Cluster& currentCluster{layer0[iCluster]}; - const int currentSortedIndex{mTimeFrame->getSortedIndex(rof0, iLayer, iCluster)}; - - if (mTimeFrame->isClusterUsed(iLayer, currentCluster.clusterId)) { - continue; - } - const float inverseR0{1.f / currentCluster.radius}; - - for (int iV{startVtx}; iV < endVtx; ++iV) { - auto& primaryVertex{primaryVertices[iV]}; - if (primaryVertex.isFlagSet(2) && iteration != 3) { - continue; - } - const float resolution = o2::gpu::CAMath::Sqrt(Sq(mTrkParams[iteration].PVres) / primaryVertex.getNContributors() + Sq(mTimeFrame->getPositionResolution(iLayer))); + mTaskArena.execute([&] { + tbb::parallel_for( + tbb::blocked_range(0, mTrkParams[iteration].TrackletsPerRoad()), + [&](const tbb::blocked_range& Layers) { + for (int iLayer = Layers.begin(); iLayer < Layers.end(); ++iLayer) { + gsl::span layer0 = mTimeFrame->getClustersOnLayer(rof0, iLayer); + if (layer0.empty()) { + continue; + } + float meanDeltaR{mTrkParams[iteration].LayerRadii[iLayer + 1] - mTrkParams[iteration].LayerRadii[iLayer]}; - const float tanLambda{(currentCluster.zCoordinate - primaryVertex.getZ()) * inverseR0}; + const int currentLayerClustersNum{static_cast(layer0.size())}; + for (int iCluster{0}; iCluster < currentLayerClustersNum; ++iCluster) { + const Cluster& currentCluster{layer0[iCluster]}; + const int currentSortedIndex{mTimeFrame->getSortedIndex(rof0, iLayer, iCluster)}; - const float zAtRmin{tanLambda * (mTimeFrame->getMinR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate}; - const float zAtRmax{tanLambda * (mTimeFrame->getMaxR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate}; + if (mTimeFrame->isClusterUsed(iLayer, currentCluster.clusterId)) { + continue; + } + const float inverseR0{1.f / currentCluster.radius}; - const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution - const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * mTimeFrame->getMSangle(iLayer)))}; + for (int iV{startVtx}; iV < endVtx; ++iV) { + auto& primaryVertex{primaryVertices[iV]}; + if (primaryVertex.isFlagSet(2) && iteration != 3) { + continue; + } + const float resolution = o2::gpu::CAMath::Sqrt(Sq(mTrkParams[iteration].PVres) / primaryVertex.getNContributors() + Sq(mTimeFrame->getPositionResolution(iLayer))); - const int4 selectedBinsRect{getBinsRect(currentCluster, iLayer + 1, zAtRmin, zAtRmax, sigmaZ * mTrkParams[iteration].NSigmaCut, mTimeFrame->getPhiCut(iLayer))}; - if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { - continue; - } + const float tanLambda{(currentCluster.zCoordinate - primaryVertex.getZ()) * inverseR0}; - int phiBinsNum{selectedBinsRect.w - selectedBinsRect.y + 1}; + const float zAtRmin{tanLambda * (mTimeFrame->getMinR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate}; + const float zAtRmax{tanLambda * (mTimeFrame->getMaxR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate}; - if (phiBinsNum < 0) { - phiBinsNum += mTrkParams[iteration].PhiBins; - } + const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution + const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * mTimeFrame->getMSangle(iLayer)))}; - for (int rof1{minRof}; rof1 <= maxRof; ++rof1) { - auto layer1 = mTimeFrame->getClustersOnLayer(rof1, iLayer + 1); - if (layer1.empty()) { - continue; - } - for (int iPhiCount{0}; iPhiCount < phiBinsNum; iPhiCount++) { - int iPhiBin = (selectedBinsRect.y + iPhiCount) % mTrkParams[iteration].PhiBins; - const int firstBinIndex{mTimeFrame->mIndexTableUtils.getBinIndex(selectedBinsRect.x, iPhiBin)}; - const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1}; - if constexpr (debugLevel) { - if (firstBinIndex < 0 || firstBinIndex > mTimeFrame->getIndexTable(rof1, iLayer + 1).size() || - maxBinIndex < 0 || maxBinIndex > mTimeFrame->getIndexTable(rof1, iLayer + 1).size()) { - std::cout << iLayer << "\t" << iCluster << "\t" << zAtRmin << "\t" << zAtRmax << "\t" << sigmaZ * mTrkParams[iteration].NSigmaCut << "\t" << mTimeFrame->getPhiCut(iLayer) << std::endl; - std::cout << currentCluster.zCoordinate << "\t" << primaryVertex.getZ() << "\t" << currentCluster.radius << std::endl; - std::cout << mTimeFrame->getMinR(iLayer + 1) << "\t" << currentCluster.radius << "\t" << currentCluster.zCoordinate << std::endl; - std::cout << "Illegal access to IndexTable " << firstBinIndex << "\t" << maxBinIndex << "\t" << selectedBinsRect.z << "\t" << selectedBinsRect.x << std::endl; - exit(1); - } - } - const int firstRowClusterIndex = mTimeFrame->getIndexTable(rof1, iLayer + 1)[firstBinIndex]; - const int maxRowClusterIndex = mTimeFrame->getIndexTable(rof1, iLayer + 1)[maxBinIndex]; - for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) { - if (iNextCluster >= (int)layer1.size()) { - break; + const int4 selectedBinsRect{getBinsRect(currentCluster, iLayer + 1, zAtRmin, zAtRmax, sigmaZ * mTrkParams[iteration].NSigmaCut, mTimeFrame->getPhiCut(iLayer))}; + if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { + continue; } - const Cluster& nextCluster{layer1[iNextCluster]}; - if (mTimeFrame->isClusterUsed(iLayer + 1, nextCluster.clusterId)) { - continue; + int phiBinsNum{selectedBinsRect.w - selectedBinsRect.y + 1}; + + if (phiBinsNum < 0) { + phiBinsNum += mTrkParams[iteration].PhiBins; } - const float deltaPhi{o2::gpu::GPUCommonMath::Abs(currentCluster.phi - nextCluster.phi)}; - const float deltaZ{o2::gpu::GPUCommonMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) + - currentCluster.zCoordinate - nextCluster.zCoordinate)}; + for (int rof1{minRof}; rof1 <= maxRof; ++rof1) { + auto layer1 = mTimeFrame->getClustersOnLayer(rof1, iLayer + 1); + if (layer1.empty()) { + continue; + } + for (int iPhiCount{0}; iPhiCount < phiBinsNum; iPhiCount++) { + int iPhiBin = (selectedBinsRect.y + iPhiCount) % mTrkParams[iteration].PhiBins; + const int firstBinIndex{mTimeFrame->mIndexTableUtils.getBinIndex(selectedBinsRect.x, iPhiBin)}; + const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1}; + if constexpr (debugLevel) { + if (firstBinIndex < 0 || firstBinIndex > mTimeFrame->getIndexTable(rof1, iLayer + 1).size() || + maxBinIndex < 0 || maxBinIndex > mTimeFrame->getIndexTable(rof1, iLayer + 1).size()) { + std::cout << iLayer << "\t" << iCluster << "\t" << zAtRmin << "\t" << zAtRmax << "\t" << sigmaZ * mTrkParams[iteration].NSigmaCut << "\t" << mTimeFrame->getPhiCut(iLayer) << std::endl; + std::cout << currentCluster.zCoordinate << "\t" << primaryVertex.getZ() << "\t" << currentCluster.radius << std::endl; + std::cout << mTimeFrame->getMinR(iLayer + 1) << "\t" << currentCluster.radius << "\t" << currentCluster.zCoordinate << std::endl; + std::cout << "Illegal access to IndexTable " << firstBinIndex << "\t" << maxBinIndex << "\t" << selectedBinsRect.z << "\t" << selectedBinsRect.x << std::endl; + exit(1); + } + } + const int firstRowClusterIndex = mTimeFrame->getIndexTable(rof1, iLayer + 1)[firstBinIndex]; + const int maxRowClusterIndex = mTimeFrame->getIndexTable(rof1, iLayer + 1)[maxBinIndex]; + for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) { + if (iNextCluster >= (int)layer1.size()) { + break; + } + + const Cluster& nextCluster{layer1[iNextCluster]}; + if (mTimeFrame->isClusterUsed(iLayer + 1, nextCluster.clusterId)) { + continue; + } + + const float deltaPhi{o2::gpu::GPUCommonMath::Abs(currentCluster.phi - nextCluster.phi)}; + const float deltaZ{o2::gpu::GPUCommonMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) + + currentCluster.zCoordinate - nextCluster.zCoordinate)}; #ifdef OPTIMISATION_OUTPUT - MCCompLabel label; - int currentId{currentCluster.clusterId}; - int nextId{nextCluster.clusterId}; - for (auto& lab1 : mTimeFrame->getClusterLabels(iLayer, currentId)) { - for (auto& lab2 : mTimeFrame->getClusterLabels(iLayer + 1, nextId)) { - if (lab1 == lab2 && lab1.isValid()) { - label = lab1; - break; - } - } - if (label.isValid()) { - break; - } - } - off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; + MCCompLabel label; + int currentId{currentCluster.clusterId}; + int nextId{nextCluster.clusterId}; + for (auto& lab1 : mTimeFrame->getClusterLabels(iLayer, currentId)) { + for (auto& lab2 : mTimeFrame->getClusterLabels(iLayer + 1, nextId)) { + if (lab1 == lab2 && lab1.isValid()) { + label = lab1; + break; + } + } + if (label.isValid()) { + break; + } + } + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; #endif - if (deltaZ / sigmaZ < mTrkParams[iteration].NSigmaCut && - (deltaPhi < mTimeFrame->getPhiCut(iLayer) || - o2::gpu::GPUCommonMath::Abs(deltaPhi - constants::math::TwoPi) < mTimeFrame->getPhiCut(iLayer))) { - if (iLayer > 0) { - mTimeFrame->getTrackletsLookupTable()[iLayer - 1][currentSortedIndex]++; + if (deltaZ / sigmaZ < mTrkParams[iteration].NSigmaCut && + (deltaPhi < mTimeFrame->getPhiCut(iLayer) || + o2::gpu::GPUCommonMath::Abs(deltaPhi - constants::math::TwoPi) < mTimeFrame->getPhiCut(iLayer))) { + if (iLayer > 0) { + mTimeFrame->getTrackletsLookupTable()[iLayer - 1][currentSortedIndex]++; + } + const float phi{o2::gpu::GPUCommonMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, + currentCluster.xCoordinate - nextCluster.xCoordinate)}; + const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / + (currentCluster.radius - nextCluster.radius)}; + mTimeFrame->getTracklets()[iLayer].emplace_back(currentSortedIndex, mTimeFrame->getSortedIndex(rof1, iLayer + 1, iNextCluster), tanL, phi, rof0, rof1); + } + } } - const float phi{o2::gpu::GPUCommonMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, - currentCluster.xCoordinate - nextCluster.xCoordinate)}; - const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / - (currentCluster.radius - nextCluster.radius)}; - mTimeFrame->getTracklets()[iLayer].emplace_back(currentSortedIndex, mTimeFrame->getSortedIndex(rof1, iLayer + 1, iNextCluster), tanL, phi, rof0, rof1); } } } } - } - } - } - } - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - return; - } - -#pragma omp parallel for num_threads(mNThreads) - for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { - /// Sort tracklets - auto& trkl{mTimeFrame->getTracklets()[iLayer + 1]}; - std::sort(trkl.begin(), trkl.end(), [](const Tracklet& a, const Tracklet& b) { - return a.firstClusterIndex < b.firstClusterIndex || (a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex < b.secondClusterIndex); + }); }); - /// Remove duplicates - auto& lut{mTimeFrame->getTrackletsLookupTable()[iLayer]}; - int id0{-1}, id1{-1}; - std::vector newTrk; - newTrk.reserve(trkl.size()); - for (auto& trk : trkl) { - if (trk.firstClusterIndex == id0 && trk.secondClusterIndex == id1) { - lut[id0]--; - } else { - id0 = trk.firstClusterIndex; - id1 = trk.secondClusterIndex; - newTrk.push_back(trk); - } - } - trkl.swap(newTrk); - - /// Compute LUT - std::exclusive_scan(lut.begin(), lut.end(), lut.begin(), 0); - lut.push_back(trkl.size()); } - /// Layer 0 is done outside the loop - std::sort(mTimeFrame->getTracklets()[0].begin(), mTimeFrame->getTracklets()[0].end(), [](const Tracklet& a, const Tracklet& b) { + + auto sortTracklets = [](const Tracklet& a, const Tracklet& b) -> bool { return a.firstClusterIndex < b.firstClusterIndex || (a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex < b.secondClusterIndex); + }; + auto equalTracklets = [](const Tracklet& a, const Tracklet& b) -> bool { + return a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex == b.secondClusterIndex; + }; + + mTaskArena.execute([&] { + tbb::parallel_for( + tbb::blocked_range(0, mTrkParams[iteration].CellsPerRoad()), + [&](const tbb::blocked_range& Layers) { + for (int iLayer = Layers.begin(); iLayer < Layers.end(); ++iLayer) { + /// Sort tracklets + auto& trkl{mTimeFrame->getTracklets()[iLayer + 1]}; + tbb::parallel_sort(trkl.begin(), trkl.end(), sortTracklets); + /// Remove duplicates + trkl.erase(std::unique(trkl.begin(), trkl.end(), equalTracklets), trkl.end()); + trkl.shrink_to_fit(); + /// recalculate lut + auto& lut{mTimeFrame->getTrackletsLookupTable()[iLayer]}; + std::fill(lut.begin(), lut.end(), 0); + if (trkl.empty()) { + return; + } + for (const auto& tkl : trkl) { + lut[tkl.firstClusterIndex]++; + } + std::exclusive_scan(lut.begin(), lut.end(), lut.begin(), 0); + lut.push_back(trkl.size()); + } + }); }); - int id0{-1}, id1{-1}; - std::vector newTrk; - newTrk.reserve(mTimeFrame->getTracklets()[0].size()); - for (auto& trk : mTimeFrame->getTracklets()[0]) { - if (trk.firstClusterIndex != id0 || trk.secondClusterIndex != id1) { - id0 = trk.firstClusterIndex; - id1 = trk.secondClusterIndex; - newTrk.push_back(trk); - } - } - mTimeFrame->getTracklets()[0].swap(newTrk); + + /// Layer 0 is done outside the loop + // in-place deduplication + auto& trklt0 = mTimeFrame->getTracklets()[0]; + mTaskArena.execute([&] { tbb::parallel_sort(trklt0.begin(), trklt0.end(), sortTracklets); }); + trklt0.erase(std::unique(trklt0.begin(), trklt0.end(), equalTracklets), trklt0.end()); + trklt0.shrink_to_fit(); /// Create tracklets labels if (mTimeFrame->hasMCinformation()) { @@ -281,104 +279,107 @@ void TrackerTraits::computeLayerCells(const int iteration) } } -#pragma omp parallel for num_threads(mNThreads) - for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { + mTaskArena.execute([&] { + tbb::parallel_for( + tbb::blocked_range(0, mTrkParams[iteration].CellsPerRoad()), + [&](const tbb::blocked_range& Layers) { + for (int iLayer = Layers.begin(); iLayer < Layers.end(); ++iLayer) { - if (mTimeFrame->getTracklets()[iLayer + 1].empty() || - mTimeFrame->getTracklets()[iLayer].empty()) { - continue; - } + if (mTimeFrame->getTracklets()[iLayer + 1].empty() || + mTimeFrame->getTracklets()[iLayer].empty()) { + continue; + } #ifdef OPTIMISATION_OUTPUT - float resolution{o2::gpu::CAMath::Sqrt(0.5f * (mTrkParams[iteration].SystErrorZ2[iLayer] + mTrkParams[iteration].SystErrorZ2[iLayer + 1] + mTrkParams[iteration].SystErrorZ2[iLayer + 2] + mTrkParams[iteration].SystErrorY2[iLayer] + mTrkParams[iteration].SystErrorY2[iLayer + 1] + mTrkParams[iteration].SystErrorY2[iLayer + 2])) / mTrkParams[iteration].LayerResolution[iLayer]}; - resolution = resolution > 1.e-12 ? resolution : 1.f; + float resolution{o2::gpu::CAMath::Sqrt(0.5f * (mTrkParams[iteration].SystErrorZ2[iLayer] + mTrkParams[iteration].SystErrorZ2[iLayer + 1] + mTrkParams[iteration].SystErrorZ2[iLayer + 2] + mTrkParams[iteration].SystErrorY2[iLayer] + mTrkParams[iteration].SystErrorY2[iLayer + 1] + mTrkParams[iteration].SystErrorY2[iLayer + 2])) / mTrkParams[iteration].LayerResolution[iLayer]}; + resolution = resolution > 1.e-12 ? resolution : 1.f; #endif - const int currentLayerTrackletsNum{static_cast(mTimeFrame->getTracklets()[iLayer].size())}; - for (int iTracklet{0}; iTracklet < currentLayerTrackletsNum; ++iTracklet) { - const Tracklet& currentTracklet{mTimeFrame->getTracklets()[iLayer][iTracklet]}; - const int nextLayerClusterIndex{currentTracklet.secondClusterIndex}; - const int nextLayerFirstTrackletIndex{ - mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex]}; - const int nextLayerLastTrackletIndex{ - mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex + 1]}; + const int currentLayerTrackletsNum{static_cast(mTimeFrame->getTracklets()[iLayer].size())}; + for (int iTracklet{0}; iTracklet < currentLayerTrackletsNum; ++iTracklet) { - if (nextLayerFirstTrackletIndex == nextLayerLastTrackletIndex) { - continue; - } + const Tracklet& currentTracklet{mTimeFrame->getTracklets()[iLayer][iTracklet]}; + const int nextLayerClusterIndex{currentTracklet.secondClusterIndex}; + const int nextLayerFirstTrackletIndex{ + mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex]}; + const int nextLayerLastTrackletIndex{ + mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex + 1]}; - for (int iNextTracklet{nextLayerFirstTrackletIndex}; iNextTracklet < nextLayerLastTrackletIndex; ++iNextTracklet) { - if (mTimeFrame->getTracklets()[iLayer + 1][iNextTracklet].firstClusterIndex != nextLayerClusterIndex) { - break; - } - const Tracklet& nextTracklet{mTimeFrame->getTracklets()[iLayer + 1][iNextTracklet]}; - const float deltaTanLambda{std::abs(currentTracklet.tanLambda - nextTracklet.tanLambda)}; + if (nextLayerFirstTrackletIndex == nextLayerLastTrackletIndex) { + continue; + } + + for (int iNextTracklet{nextLayerFirstTrackletIndex}; iNextTracklet < nextLayerLastTrackletIndex; ++iNextTracklet) { + if (mTimeFrame->getTracklets()[iLayer + 1][iNextTracklet].firstClusterIndex != nextLayerClusterIndex) { + break; + } + const Tracklet& nextTracklet{mTimeFrame->getTracklets()[iLayer + 1][iNextTracklet]}; + const float deltaTanLambda{std::abs(currentTracklet.tanLambda - nextTracklet.tanLambda)}; #ifdef OPTIMISATION_OUTPUT - bool good{mTimeFrame->getTrackletsLabel(iLayer)[iTracklet] == mTimeFrame->getTrackletsLabel(iLayer + 1)[iNextTracklet]}; - float signedDelta{currentTracklet.tanLambda - nextTracklet.tanLambda}; - off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; + bool good{mTimeFrame->getTrackletsLabel(iLayer)[iTracklet] == mTimeFrame->getTrackletsLabel(iLayer + 1)[iNextTracklet]}; + float signedDelta{currentTracklet.tanLambda - nextTracklet.tanLambda}; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; #endif - if (deltaTanLambda / mTrkParams[iteration].CellDeltaTanLambdaSigma < mTrkParams[iteration].NSigmaCut) { + if (deltaTanLambda / mTrkParams[iteration].CellDeltaTanLambdaSigma < mTrkParams[iteration].NSigmaCut) { - /// Track seed preparation. Clusters are numbered progressively from the innermost going outward. - const int clusId[3]{ - mTimeFrame->getClusters()[iLayer][currentTracklet.firstClusterIndex].clusterId, - mTimeFrame->getClusters()[iLayer + 1][nextTracklet.firstClusterIndex].clusterId, - mTimeFrame->getClusters()[iLayer + 2][nextTracklet.secondClusterIndex].clusterId}; - const auto& cluster1_glo = mTimeFrame->getUnsortedClusters()[iLayer].at(clusId[0]); - const auto& cluster2_glo = mTimeFrame->getUnsortedClusters()[iLayer + 1].at(clusId[1]); - const auto& cluster3_tf = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + 2).at(clusId[2]); - auto track{buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf)}; + /// Track seed preparation. Clusters are numbered progressively from the innermost going outward. + const int clusId[3]{ + mTimeFrame->getClusters()[iLayer][currentTracklet.firstClusterIndex].clusterId, + mTimeFrame->getClusters()[iLayer + 1][nextTracklet.firstClusterIndex].clusterId, + mTimeFrame->getClusters()[iLayer + 2][nextTracklet.secondClusterIndex].clusterId}; + const auto& cluster1_glo = mTimeFrame->getUnsortedClusters()[iLayer].at(clusId[0]); + const auto& cluster2_glo = mTimeFrame->getUnsortedClusters()[iLayer + 1].at(clusId[1]); + const auto& cluster3_tf = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + 2).at(clusId[2]); + auto track{buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf)}; - float chi2{0.f}; - bool good{false}; - for (int iC{2}; iC--;) { - const TrackingFrameInfo& trackingHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + iC).at(clusId[iC]); + float chi2{0.f}; + bool good{false}; + for (int iC{2}; iC--;) { + const TrackingFrameInfo& trackingHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + iC).at(clusId[iC]); - if (!track.rotate(trackingHit.alphaTrackingFrame)) { - break; - } + if (!track.rotate(trackingHit.alphaTrackingFrame)) { + break; + } - if (!track.propagateTo(trackingHit.xTrackingFrame, getBz())) { - break; - } + if (!track.propagateTo(trackingHit.xTrackingFrame, getBz())) { + break; + } - constexpr float radl = 9.36f; // Radiation length of Si [cm] - constexpr float rho = 2.33f; // Density of Si [g/cm^3] - if (!track.correctForMaterial(mTrkParams[0].LayerxX0[iLayer + iC], mTrkParams[0].LayerxX0[iLayer] * radl * rho, true)) { - break; - } + constexpr float radl = 9.36f; // Radiation length of Si [cm] + constexpr float rho = 2.33f; // Density of Si [g/cm^3] + if (!track.correctForMaterial(mTrkParams[0].LayerxX0[iLayer + iC], mTrkParams[0].LayerxX0[iLayer] * radl * rho, true)) { + break; + } - auto predChi2{track.getPredictedChi2Quiet(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)}; - if (!track.o2::track::TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) { - break; - } - if (!iC && predChi2 > mTrkParams[iteration].MaxChi2ClusterAttachment) { - break; + auto predChi2{track.getPredictedChi2Quiet(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)}; + if (!track.o2::track::TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) { + break; + } + if (!iC && predChi2 > mTrkParams[iteration].MaxChi2ClusterAttachment) { + break; + } + good = !iC; + chi2 += predChi2; + } + if (!good) { + continue; + } + if (iLayer > 0 && (int)mTimeFrame->getCellsLookupTable()[iLayer - 1].size() <= iTracklet) { + mTimeFrame->getCellsLookupTable()[iLayer - 1].resize(iTracklet + 1, mTimeFrame->getCells()[iLayer].size()); + } + mTimeFrame->getCells()[iLayer].emplace_back(iLayer, clusId[0], clusId[1], clusId[2], + iTracklet, iNextTracklet, track, chi2); + } } - good = !iC; - chi2 += predChi2; } - if (!good) { - continue; - } - if (iLayer > 0 && (int)mTimeFrame->getCellsLookupTable()[iLayer - 1].size() <= iTracklet) { - mTimeFrame->getCellsLookupTable()[iLayer - 1].resize(iTracklet + 1, mTimeFrame->getCells()[iLayer].size()); + if (iLayer > 0) { + mTimeFrame->getCellsLookupTable()[iLayer - 1].resize(currentLayerTrackletsNum + 1, mTimeFrame->getCells()[iLayer].size()); } - mTimeFrame->getCells()[iLayer].emplace_back(iLayer, clusId[0], clusId[1], clusId[2], - iTracklet, iNextTracklet, track, chi2); } - } - } - if (iLayer > 0) { - mTimeFrame->getCellsLookupTable()[iLayer - 1].resize(currentLayerTrackletsNum + 1, mTimeFrame->getCells()[iLayer].size()); - } - } - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - return; - } + }); + }); /// Create cells labels if (mTimeFrame->hasMCinformation()) { @@ -415,7 +416,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) } int layerCellsNum{static_cast(mTimeFrame->getCells()[iLayer].size())}; - std::vector> cellsNeighbours; + bounded_vector> cellsNeighbours(mMemoryPool.get()); cellsNeighbours.reserve(nextLayerCellsNum); for (int iCell{0}; iCell < layerCellsNum; ++iCell) { @@ -467,13 +468,8 @@ void TrackerTraits::findCellsNeighbours(const int iteration) } template -void TrackerTraits::processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeeds, std::vector& updatedCellsIds) +void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) { - bool print = iLayer == 3 && iLevel == 2; - if (iLevel < 2 || iLayer < 1) { - std::cout << "Error: layer " << iLayer << " or level " << iLevel << " cannot be processed by processNeighbours" << std::endl; - exit(1); - } CA_DEBUGGER(std::cout << "Processing neighbours layer " << iLayer << " level " << iLevel << ", size of the cell seeds: " << currentCellSeed.size() << std::endl); updatedCellSeeds.reserve(mTimeFrame->getCellsNeighboursLUT()[iLayer - 1].size()); /// This is not the correct value, we could do a loop to count the number of neighbours updatedCellsIds.reserve(updatedCellSeeds.size()); @@ -482,79 +478,98 @@ void TrackerTraits::processNeighbours(int iLayer, int iLevel, const std int failed[5]{0, 0, 0, 0, 0}, attempts{0}, failedByMismatch{0}; #endif -#pragma omp parallel for num_threads(mNThreads) - for (unsigned int iCell = 0; iCell < currentCellSeed.size(); ++iCell) { - const CellSeed& currentCell{currentCellSeed[iCell]}; - if (currentCell.getLevel() != iLevel) { - continue; - } - if (currentCellId.empty() && (mTimeFrame->isClusterUsed(iLayer, currentCell.getFirstClusterIndex()) || - mTimeFrame->isClusterUsed(iLayer + 1, currentCell.getSecondClusterIndex()) || - mTimeFrame->isClusterUsed(iLayer + 2, currentCell.getThirdClusterIndex()))) { - continue; /// this we do only on the first iteration, hence the check on currentCellId - } - const int cellId = currentCellId.empty() ? iCell : currentCellId[iCell]; - const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId - 1] : 0}; - const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId]}; - - for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) { - CA_DEBUGGER(attempts++); - const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell]; - const CellSeed& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId]; - if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) { - CA_DEBUGGER(failedByMismatch++); - continue; - } - if (mTimeFrame->isClusterUsed(iLayer - 1, neighbourCell.getFirstClusterIndex())) { - continue; - } - if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) { - CA_DEBUGGER(failed[0]++); - continue; - } - /// Let's start the fitting procedure - CellSeed seed{currentCell}; - auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1).at(neighbourCell.getFirstClusterIndex()); + mTaskArena.execute([&] { + // TODO better to use concurrent vector? + tbb::combinable, bounded_vector>> locUpdatedData([&] { + return std::make_pair(bounded_vector(mMemoryPool.get()), bounded_vector(mMemoryPool.get())); + }); - if (!seed.rotate(trHit.alphaTrackingFrame)) { - CA_DEBUGGER(failed[1]++); - continue; - } + tbb::parallel_for( + tbb::blocked_range(0, (int)currentCellSeed.size()), + [&](const tbb::blocked_range& Cells) { + auto& [locUpdatedCellsIds, locUpdatedCellSeeds] = locUpdatedData.local(); - if (!propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl::MAX_SIN_PHI, o2::base::PropagatorImpl::MAX_STEP, mCorrType)) { - CA_DEBUGGER(failed[2]++); - continue; - } + for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) { + const CellSeed& currentCell{currentCellSeed[iCell]}; + if (currentCell.getLevel() != iLevel) { + continue; + } + if (currentCellId.empty() && (mTimeFrame->isClusterUsed(iLayer, currentCell.getFirstClusterIndex()) || + mTimeFrame->isClusterUsed(iLayer + 1, currentCell.getSecondClusterIndex()) || + mTimeFrame->isClusterUsed(iLayer + 2, currentCell.getThirdClusterIndex()))) { + continue; /// this we do only on the first iteration, hence the check on currentCellId + } + const int cellId = currentCellId.empty() ? iCell : currentCellId[iCell]; + const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId - 1] : 0}; + const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId]}; + + for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) { + CA_DEBUGGER(attempts++); + const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell]; + const CellSeed& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId]; + if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) { + CA_DEBUGGER(failedByMismatch++); + continue; + } + if (mTimeFrame->isClusterUsed(iLayer - 1, neighbourCell.getFirstClusterIndex())) { + continue; + } + if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) { + CA_DEBUGGER(failed[0]++); + continue; + } + /// Let's start the fitting procedure + CellSeed seed{currentCell}; + auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1).at(neighbourCell.getFirstClusterIndex()); - if (mCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - float radl = 9.36f; // Radiation length of Si [cm] - float rho = 2.33f; // Density of Si [g/cm^3] - if (!seed.correctForMaterial(mTrkParams[0].LayerxX0[iLayer - 1], mTrkParams[0].LayerxX0[iLayer - 1] * radl * rho, true)) { - continue; + if (!seed.rotate(trHit.alphaTrackingFrame)) { + CA_DEBUGGER(failed[1]++); + continue; + } + + if (!propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl::MAX_SIN_PHI, o2::base::PropagatorImpl::MAX_STEP, mCorrType)) { + CA_DEBUGGER(failed[2]++); + continue; + } + + if (mCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { + float radl = 9.36f; // Radiation length of Si [cm] + float rho = 2.33f; // Density of Si [g/cm^3] + if (!seed.correctForMaterial(mTrkParams[0].LayerxX0[iLayer - 1], mTrkParams[0].LayerxX0[iLayer - 1] * radl * rho, true)) { + continue; + } + } + + auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)}; + if ((predChi2 > mTrkParams[0].MaxChi2ClusterAttachment) || predChi2 < 0.f) { + CA_DEBUGGER(failed[3]++); + continue; + } + seed.setChi2(seed.getChi2() + predChi2); + if (!seed.o2::track::TrackParCov::update(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)) { + CA_DEBUGGER(failed[4]++); + continue; + } + seed.getClusters()[iLayer - 1] = neighbourCell.getFirstClusterIndex(); + seed.setLevel(neighbourCell.getLevel()); + seed.setFirstTrackletIndex(neighbourCell.getFirstTrackletIndex()); + seed.setSecondTrackletIndex(neighbourCell.getSecondTrackletIndex()); + + locUpdatedCellSeeds.push_back(seed); + locUpdatedCellsIds.push_back(neighbourCellId); + } } - } + }); + + locUpdatedData.combine_each([&](const auto& localData) { + const auto& [ids, seeds] = localData; + updatedCellsIds.insert(updatedCellsIds.begin(), ids.begin(), ids.end()); + updatedCellSeeds.insert(updatedCellSeeds.begin(), seeds.begin(), seeds.end()); + }); + }); + updatedCellSeeds.shrink_to_fit(); + updatedCellsIds.shrink_to_fit(); - auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)}; - if ((predChi2 > mTrkParams[0].MaxChi2ClusterAttachment) || predChi2 < 0.f) { - CA_DEBUGGER(failed[3]++); - continue; - } - seed.setChi2(seed.getChi2() + predChi2); - if (!seed.o2::track::TrackParCov::update(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)) { - CA_DEBUGGER(failed[4]++); - continue; - } - seed.getClusters()[iLayer - 1] = neighbourCell.getFirstClusterIndex(); - seed.setLevel(neighbourCell.getLevel()); - seed.setFirstTrackletIndex(neighbourCell.getFirstTrackletIndex()); - seed.setSecondTrackletIndex(neighbourCell.getSecondTrackletIndex()); -#pragma omp critical - { - updatedCellsIds.push_back(neighbourCellId); - updatedCellSeeds.push_back(seed); - } - } - } #ifdef CA_DEBUG std::cout << "\t\t- Found " << updatedCellSeeds.size() << " cell seeds out of " << attempts << " attempts" << std::endl; std::cout << "\t\t\t> " << failed[0] << " failed because of level" << std::endl; @@ -570,17 +585,17 @@ template void TrackerTraits::findRoads(const int iteration) { CA_DEBUGGER(std::cout << "Finding roads, iteration " << iteration << std::endl); + for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { CA_DEBUGGER(std::cout << "\t > Processing level " << startLevel << std::endl); - const int minimumLayer{startLevel - 1}; - std::vector trackSeeds; - for (int startLayer{mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) { + bounded_vector trackSeeds(mMemoryPool.get()); + for (int startLayer{mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= startLevel - 1; --startLayer) { if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; } CA_DEBUGGER(std::cout << "\t\t > Starting processing layer " << startLayer << std::endl); - std::vector lastCellId, updatedCellId; - std::vector lastCellSeed, updatedCellSeed; + bounded_vector lastCellId(mMemoryPool.get()), updatedCellId(mMemoryPool.get()); + bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); processNeighbours(startLayer, startLevel, mTimeFrame->getCells()[startLayer], lastCellId, updatedCellSeed, updatedCellId); @@ -588,47 +603,64 @@ void TrackerTraits::findRoads(const int iteration) for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); - std::vector().swap(updatedCellSeed); /// tame the memory peaks - updatedCellId.clear(); + deepVectorClear(updatedCellSeed); /// tame the memory peaks + deepVectorClear(updatedCellId); /// tame the memory peaks processNeighbours(iLayer, --level, lastCellSeed, lastCellId, updatedCellSeed, updatedCellId); } - for (auto& seed : updatedCellSeed) { - if (seed.getQ2Pt() > 1.e3 || seed.getChi2() > mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5)) { - continue; - } - trackSeeds.push_back(seed); - } + std::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), std::back_inserter(trackSeeds), [&](const CellSeed& seed) { + return seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5); + }); } - std::vector tracks(trackSeeds.size()); - std::atomic trackIndex{0}; -#pragma omp parallel for num_threads(mNThreads) - for (size_t seedId = 0; seedId < trackSeeds.size(); ++seedId) { - const CellSeed& seed{trackSeeds[seedId]}; - TrackITSExt temporaryTrack{seed}; - temporaryTrack.resetCovariance(); - temporaryTrack.setChi2(0); - for (int iL{0}; iL < 7; ++iL) { - temporaryTrack.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::its::UnusedIndex); - } - - bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF); - if (!fitSuccess) { - continue; - } - temporaryTrack.getParamOut() = temporaryTrack.getParamIn(); - temporaryTrack.resetCovariance(); - temporaryTrack.setChi2(0); - fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f); - if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) { - continue; - } - tracks[trackIndex++] = temporaryTrack; + if (trackSeeds.empty()) { + continue; } - tracks.resize(trackIndex); - std::sort(tracks.begin(), tracks.end(), [](const TrackITSExt& a, const TrackITSExt& b) { - return a.getChi2() < b.getChi2(); + bounded_vector tracks(mMemoryPool.get()); + tracks.reserve(trackSeeds.size()); + mTaskArena.execute([&] { + tbb::combinable> locTracksData([&] { + return bounded_vector(mMemoryPool.get()); + }); + + tbb::parallel_for( + tbb::blocked_range(size_t(0), trackSeeds.size()), + [&](const tbb::blocked_range& Seeds) { + for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) { + auto& localTracks = locTracksData.local(); + localTracks.reserve(Seeds.size()); + + const CellSeed& seed{trackSeeds[iSeed]}; + TrackITSExt temporaryTrack{seed}; + temporaryTrack.resetCovariance(); + temporaryTrack.setChi2(0); + for (int iL{0}; iL < 7; ++iL) { + temporaryTrack.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::its::UnusedIndex); + } + + bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF); + if (!fitSuccess) { + continue; + } + temporaryTrack.getParamOut() = temporaryTrack.getParamIn(); + temporaryTrack.resetCovariance(); + temporaryTrack.setChi2(0); + fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f); + if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) { + continue; + } + localTracks.push_back(temporaryTrack); + } + }); + + locTracksData.combine_each([&](const bounded_vector& localTracks) { + tracks.insert(tracks.end(), localTracks.begin(), localTracks.end()); + }); + tracks.shrink_to_fit(); + + tbb::parallel_sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) { + return a.getChi2() < b.getChi2(); + }); }); for (auto& track : tracks) { @@ -859,7 +891,7 @@ bool TrackerTraits::trackFollowing(TrackITSExt* track, int rof, bool ou auto propInstance = o2::base::Propagator::Instance(); const int step = -1 + outward * 2; const int end = outward ? mTrkParams[iteration].NLayers - 1 : 0; - std::vector hypotheses(1, *track); // possibly avoid reallocation + bounded_vector hypotheses(1, *track, mMemoryPool.get()); // possibly avoid reallocation for (size_t iHypo{0}; iHypo < hypotheses.size(); ++iHypo) { auto hypo{hypotheses[iHypo]}; int iLayer = static_cast(outward ? hypo.getLastClusterLayer() : hypo.getFirstClusterLayer()); @@ -1017,11 +1049,15 @@ bool TrackerTraits::isMatLUT() const template void TrackerTraits::setNThreads(int n) { -#ifdef WITH_OPENMP + if (mNThreads == n) { + return; + } mNThreads = n > 0 ? n : 1; -#else - mNThreads = 1; +#if defined(OPTIMISATION_OUTPUT) || defined(CA_DEBUG) + mNThreads = 1; // only works while serial #endif + mTaskArena.initialize(mNThreads); + LOGP(info, "Setting tracker with {} threads.", mNThreads); } template class TrackerTraits<7>; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index b3bdd62e072fd..0907253e24580 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -16,26 +16,27 @@ #include "ITSReconstruction/FastMultEst.h" #include "ITStracking/TrackingInterface.h" +#include #include "DataFormatsITSMFT/ROFRecord.h" #include "DataFormatsITSMFT/PhysTrigger.h" #include "DataFormatsTRD/TriggerRecord.h" #include "CommonDataFormat/IRFrame.h" #include "DetectorsBase/GRPGeomHelper.h" +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/TrackingConfigParam.h" #include "Framework/DeviceSpec.h" -namespace o2 -{ -using namespace framework; -namespace its -{ +using namespace o2::framework; +using namespace o2::its; + void ITSTrackingInterface::initialise() { mRunVertexer = true; mCosmicsProcessing = false; std::vector vertParams; std::vector trackParams; + const auto& vertConf = o2::its::VertexerParamConfig::Instance(); const auto& trackConf = o2::its::TrackerParamConfig::Instance(); float bFactor = std::abs(o2::base::Propagator::Instance()->getNominalBz()) / 5.0066791; float bFactorTracklets = bFactor < 0.01 ? 1. : bFactor; // for tracklets only @@ -121,6 +122,18 @@ void ITSTrackingInterface::initialise() throw std::runtime_error(fmt::format("Unsupported ITS tracking mode {:s} ", asString(mMode))); } + // TODO this imposes the same memory limits on each iteration + for (auto& p : vertParams) { + p.PrintMemory = vertConf.printMemory; + p.MaxMemory = vertConf.maxMemory; + p.DropTFUponFailure = vertConf.dropTFUponFailure; + } + for (auto& p : trackParams) { + p.PrintMemory = trackConf.printMemory; + p.MaxMemory = trackConf.maxMemory; + p.DropTFUponFailure = trackConf.dropTFUponFailure; + } + for (auto& params : trackParams) { params.CorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrLUT; } @@ -237,7 +250,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) vMCRecInfo = mTimeFrame->getPrimaryVerticesMCRecInfo(iRof); } if (o2::its::TrackerParamConfig::Instance().doUPCIteration) { - if (vtxSpan.size()) { + if (!vtxSpan.empty()) { if (vtxSpan[0].isFlagSet(Vertex::UPCMode) == 1) { // at least one vertex in this ROF and it is from second vertex iteration LOGP(debug, "ROF {} rejected as vertices are from the UPC iteration", iRof); processUPCMask[iRof] = true; @@ -253,7 +266,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) vtxROF.setFlag(o2::itsmft::ROFRecord::VtxStdMode); } vtxROF.setNEntries(vtxSpan.size()); - bool selROF = vtxSpan.size() == 0; + bool selROF = vtxSpan.empty(); for (auto iV{0}; iV < vtxSpan.size(); ++iV) { auto& v = vtxSpan[iV]; if (multEstConf.isVtxMultCutRequested() && !multEstConf.isPassingVtxMultCut(v.getNContributors())) { @@ -272,7 +285,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) cutVertexMult++; } } else { // cosmics - vtxVecLoc.emplace_back(Vertex()); + vtxVecLoc.emplace_back(); vtxVecLoc.back().setNContributors(1); vtxROF.setNEntries(vtxVecLoc.size()); for (auto& v : vtxVecLoc) { @@ -425,6 +438,7 @@ void ITSTrackingInterface::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) void ITSTrackingInterface::printSummary() const { + mMemoryPool->print(); mTracker->printSummary(); } @@ -437,6 +451,16 @@ void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, mTimeFrame = frame; mVertexer->adoptTimeFrame(*mTimeFrame); mTracker->adoptTimeFrame(*mTimeFrame); + + // set common memory resource + if (!mMemoryPool) { + mMemoryPool = std::make_shared(); + } + vertexerTraits->setMemoryPool(mMemoryPool); + trackerTraits->setMemoryPool(mMemoryPool); + mTimeFrame->setMemoryPool(mMemoryPool); + mTracker->setMemoryPool(mMemoryPool); + mVertexer->setMemoryPool(mMemoryPool); } void ITSTrackingInterface::loadROF(gsl::span& trackROFspan, @@ -446,5 +470,3 @@ void ITSTrackingInterface::loadROF(gsl::span& trackROFspan, { mTimeFrame->loadROFrameData(trackROFspan, clusters, pattIt, mDict, mcLabels); } -} // namespace its -} // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx b/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx index 0b8d59f61eb8d..8ab8c1a9e2e3c 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx @@ -14,6 +14,7 @@ /// #include "ITStracking/Vertexer.h" +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/Cluster.h" #include "ITStracking/ROframe.h" #include "ITStracking/ClusterLines.h" @@ -38,27 +39,46 @@ float Vertexer::clustersToVertices(LogFunc logger) TrackingParameters trkPars; TimeFrameGPUParameters tfGPUpar; mTraits->updateVertexingParameters(mVertParams, tfGPUpar); + + auto handleException = [&](const auto& err) { + LOGP(error, "Encountered critical error in step {}, stopping further processing of this TF: {}", StateNames[mCurState], err.what()); + if (!mVertParams[0].DropTFUponFailure) { + throw err; + } else { + LOGP(error, "Dropping this TF!"); + mTimeFrame->resetTracklets(); + } + }; + float timeTracklet{0.f}, timeSelection{0.f}, timeVertexing{0.f}, timeInit{0.f}; - for (int iteration = 0; iteration < std::min(mVertParams[0].nIterations, (int)mVertParams.size()); ++iteration) { - unsigned int nTracklets01{0}, nTracklets12{0}; - logger(fmt::format("=== ITS {} Seeding vertexer iteration {} summary:", mTraits->getName(), iteration)); - trkPars.PhiBins = mTraits->getVertexingParameters()[0].PhiBins; - trkPars.ZBins = mTraits->getVertexingParameters()[0].ZBins; - auto timeInitIteration = evaluateTask( - &Vertexer::initialiseVertexer, " - Vertexer initialisation", evalLog, trkPars, iteration); - auto timeTrackletIteration = evaluateTask( - &Vertexer::findTracklets, " - Vertexer tracklet finding", evalLog, iteration); - nTracklets01 = mTimeFrame->getTotalTrackletsTF(0); - nTracklets12 = mTimeFrame->getTotalTrackletsTF(1); - auto timeSelectionIteration = evaluateTask( - &Vertexer::validateTracklets, " - Vertexer tracklets validation", evalLog, iteration); - auto timeVertexingIteration = evaluateTask( - &Vertexer::findVertices, " - Vertexer vertex finding", evalLog, iteration); - printEpilog(logger, nTracklets01, nTracklets12, mTimeFrame->getNLinesTotal(), mTimeFrame->getTotVertIteration()[iteration], timeInitIteration, timeTrackletIteration, timeSelectionIteration, timeVertexingIteration); - timeInit += timeInitIteration; - timeTracklet += timeTrackletIteration; - timeSelection += timeSelectionIteration; - timeVertexing += timeVertexingIteration; + try { + for (int iteration = 0; iteration < std::min(mVertParams[0].nIterations, (int)mVertParams.size()); ++iteration) { + mMemoryPool->setMaxMemory(mVertParams[iteration].MaxMemory); + unsigned int nTracklets01{0}, nTracklets12{0}; + logger(fmt::format("=== ITS {} Seeding vertexer iteration {} summary:", mTraits->getName(), iteration)); + trkPars.PhiBins = mTraits->getVertexingParameters()[0].PhiBins; + trkPars.ZBins = mTraits->getVertexingParameters()[0].ZBins; + auto timeInitIteration = evaluateTask( + &Vertexer::initialiseVertexer, StateNames[mCurState = Init], iteration, evalLog, trkPars, iteration); + auto timeTrackletIteration = evaluateTask( + &Vertexer::findTracklets, StateNames[mCurState = Trackleting], iteration, evalLog, iteration); + nTracklets01 = mTimeFrame->getTotalTrackletsTF(0); + nTracklets12 = mTimeFrame->getTotalTrackletsTF(1); + auto timeSelectionIteration = evaluateTask( + &Vertexer::validateTracklets, StateNames[mCurState = Validating], iteration, evalLog, iteration); + auto timeVertexingIteration = evaluateTask(&Vertexer::findVertices, StateNames[mCurState = Finding], iteration, evalLog, iteration); + printEpilog(logger, nTracklets01, nTracklets12, mTimeFrame->getNLinesTotal(), mTimeFrame->getTotVertIteration()[iteration], timeInitIteration, timeTrackletIteration, timeSelectionIteration, timeVertexingIteration); + timeInit += timeInitIteration; + timeTracklet += timeTrackletIteration; + timeSelection += timeSelectionIteration; + timeVertexing += timeVertexingIteration; + } + } catch (const BoundedMemoryResource::MemoryLimitExceeded& err) { + handleException(err); + } catch (const std::bad_alloc& err) { + handleException(err); + } catch (...) { + LOGP(fatal, "Uncaught exception!"); } return timeInit + timeTracklet + timeSelection + timeVertexing; @@ -91,6 +111,7 @@ void Vertexer::getGlobalConfiguration() mVertParams[0].nThreads = vc.nThreads; mVertParams[0].ZBins = vc.ZBins; mVertParams[0].PhiBins = vc.PhiBins; + mVertParams[0].SaveTimeBenchmarks = vc.saveTimeBenchmarks; } void Vertexer::adoptTimeFrame(TimeFrame7& tf) @@ -108,7 +129,10 @@ void Vertexer::printEpilog(LogFunc& logger, logger(fmt::format(" - {} Vertexer: found {} | {} tracklets in: {} ms", mTraits->getName(), trackletN01, trackletN12, trackletT)); logger(fmt::format(" - {} Vertexer: selected {} tracklets in: {} ms", mTraits->getName(), selectedN, selecT)); logger(fmt::format(" - {} Vertexer: found {} vertices in: {} ms", mTraits->getName(), vertexN, vertexT)); - // logger(fmt::format(" - Timeframe {} vertexing completed in: {} ms, using {} thread(s).", mTimeFrameCounter++, total, mTraits->getNThreads())); + if (mVertParams[0].PrintMemory) { + mTimeFrame->printArtefactsMemory(); + mMemoryPool->print(); + } } } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx index 71b54d95d3f8d..035ba7adffda6 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx @@ -10,13 +10,15 @@ // or submit itself to any jurisdiction. /// -#include -#include #include #include #include +#include +#include + #include "ITStracking/VertexerTraits.h" +#include "ITStracking/BoundedAllocator.h" #include "ITStracking/ClusterLines.h" #include "ITStracking/Tracklet.h" @@ -27,16 +29,7 @@ #include #endif -#ifdef WITH_OPENMP -#include -#endif - -namespace o2 -{ -namespace its -{ -using boost::histogram::indexed; -using constants::math::TwoPi; +using namespace o2::its; float smallestAngleDifference(float a, float b) { @@ -46,12 +39,12 @@ float smallestAngleDifference(float a, float b) template void trackleterKernelHost( - const gsl::span& clustersNextLayer, // 0 2 - const gsl::span& clustersCurrentLayer, // 1 1 - const gsl::span& usedClustersNextLayer, // 0 2 + const gsl::span& clustersNextLayer, // 0 2 + const gsl::span& clustersCurrentLayer, // 1 1 + const gsl::span& usedClustersNextLayer, // 0 2 int* indexTableNext, const float phiCut, - std::vector& tracklets, + bounded_vector& tracklets, gsl::span foundTracklets, const IndexTableUtils& utils, const short pivotRof, @@ -112,12 +105,12 @@ void trackletSelectionKernelHost( gsl::span usedClusters2, // Layer 2 const gsl::span& tracklets01, const gsl::span& tracklets12, - std::vector& usedTracklets, + bounded_vector& usedTracklets, const gsl::span foundTracklets01, const gsl::span foundTracklets12, - std::vector& lines, - const gsl::span& trackletLabels, - std::vector& linesLabels, + bounded_vector& lines, + const gsl::span& trackletLabels, + bounded_vector& linesLabels, const short pivotRofId, const short targetRofId, const float tanLambdaCut = 0.025f, @@ -141,7 +134,7 @@ void trackletSelectionKernelHost( usedClusters2[tracklet12.secondClusterIndex] = true; usedTracklets[iTracklet01] = true; lines.emplace_back(tracklet01, clusters0.data(), clusters1.data()); - if (trackletLabels.size()) { + if (!trackletLabels.empty()) { linesLabels.emplace_back(trackletLabels[iTracklet01]); } ++validTracklets; @@ -153,11 +146,11 @@ void trackletSelectionKernelHost( } } -const std::vector> VertexerTraits::selectClusters(const int* indexTable, - const std::array& selectedBinsRect, - const IndexTableUtils& utils) +const bounded_vector> VertexerTraits::selectClusters(const int* indexTable, + const std::array& selectedBinsRect, + const IndexTableUtils& utils) { - std::vector> filteredBins{}; + bounded_vector> filteredBins{mMemoryPool.get()}; int phiBinsNum{selectedBinsRect[3] - selectedBinsRect[1] + 1}; if (phiBinsNum < 0) { phiBinsNum += utils.getNphiBins(); @@ -187,93 +180,98 @@ void VertexerTraits::updateVertexingParameters(const std::vectorgetNrof(); ++pivotRofId) { // Pivot rofId: the rof for which the tracklets are computed - bool skipROF = iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold; - short startROF{std::max((short)0, static_cast(pivotRofId - mVrtParams[iteration].deltaRof))}; - short endROF{std::min(static_cast(mTimeFrame->getNrof()), static_cast(pivotRofId + mVrtParams[iteration].deltaRof + 1))}; - for (auto targetRofId = startROF; targetRofId < endROF; ++targetRofId) { - trackleterKernelHost( - !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 0) : gsl::span(), // Clusters to be matched with the next layer in target rof - !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), // Clusters to be matched with the current layer in pivot rof - mTimeFrame->getUsedClustersROF(targetRofId, 0), // Span of the used clusters in the target rof - mTimeFrame->getIndexTable(targetRofId, 0).data(), // Index table to access the data on the next layer in target rof - mVrtParams[iteration].phiCut, - mTimeFrame->getTracklets()[0], // Flat tracklet buffer - mTimeFrame->getNTrackletsCluster(pivotRofId, 0), // Span of the number of tracklets per each cluster in pivot rof - mIndexTableUtils, - pivotRofId, - targetRofId, - gsl::span(), // Offset in the tracklet buffer - mVrtParams[iteration].maxTrackletsPerCluster); - trackleterKernelHost( - !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 2) : gsl::span(), - !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), - mTimeFrame->getUsedClustersROF(targetRofId, 2), - mTimeFrame->getIndexTable(targetRofId, 2).data(), - mVrtParams[iteration].phiCut, - mTimeFrame->getTracklets()[1], - mTimeFrame->getNTrackletsCluster(pivotRofId, 1), // Span of the number of tracklets per each cluster in pivot rof - mIndexTableUtils, - pivotRofId, - targetRofId, - gsl::span(), // Offset in the tracklet buffer - mVrtParams[iteration].maxTrackletsPerCluster); - } - mTimeFrame->getNTrackletsROF(pivotRofId, 0) = std::accumulate(mTimeFrame->getNTrackletsCluster(pivotRofId, 0).begin(), mTimeFrame->getNTrackletsCluster(pivotRofId, 0).end(), 0); - mTimeFrame->getNTrackletsROF(pivotRofId, 1) = std::accumulate(mTimeFrame->getNTrackletsCluster(pivotRofId, 1).begin(), mTimeFrame->getNTrackletsCluster(pivotRofId, 1).end(), 0); - } -#pragma omp single - mTimeFrame->computeTrackletsPerROFScans(); -#pragma omp single - mTimeFrame->getTracklets()[0].resize(mTimeFrame->getTotalTrackletsTF(0)); -#pragma omp single - mTimeFrame->getTracklets()[1].resize(mTimeFrame->getTotalTrackletsTF(1)); + mTaskArena.execute([&] { + tbb::parallel_for( + tbb::blocked_range(0, (short)mTimeFrame->getNrof()), + [&](const tbb::blocked_range& Rofs) { + for (short pivotRofId = Rofs.begin(); pivotRofId < Rofs.end(); ++pivotRofId) { + bool skipROF = iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold; + short startROF{std::max((short)0, static_cast(pivotRofId - mVrtParams[iteration].deltaRof))}; + short endROF{std::min(static_cast(mTimeFrame->getNrof()), static_cast(pivotRofId + mVrtParams[iteration].deltaRof + 1))}; + for (auto targetRofId = startROF; targetRofId < endROF; ++targetRofId) { + trackleterKernelHost( + !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 0) : gsl::span(), // Clusters to be matched with the next layer in target rof + !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), // Clusters to be matched with the current layer in pivot rof + mTimeFrame->getUsedClustersROF(targetRofId, 0), // Span of the used clusters in the target rof + mTimeFrame->getIndexTable(targetRofId, 0).data(), // Index table to access the data on the next layer in target rof + mVrtParams[iteration].phiCut, + mTimeFrame->getTracklets()[0], // Flat tracklet buffer + mTimeFrame->getNTrackletsCluster(pivotRofId, 0), // Span of the number of tracklets per each cluster in pivot rof + mIndexTableUtils, + pivotRofId, + targetRofId, + gsl::span(), // Offset in the tracklet buffer + mVrtParams[iteration].maxTrackletsPerCluster); + trackleterKernelHost( + !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 2) : gsl::span(), + !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), + mTimeFrame->getUsedClustersROF(targetRofId, 2), + mTimeFrame->getIndexTable(targetRofId, 2).data(), + mVrtParams[iteration].phiCut, + mTimeFrame->getTracklets()[1], + mTimeFrame->getNTrackletsCluster(pivotRofId, 1), // Span of the number of tracklets per each cluster in pivot rof + mIndexTableUtils, + pivotRofId, + targetRofId, + gsl::span(), // Offset in the tracklet buffer + mVrtParams[iteration].maxTrackletsPerCluster); + } + mTimeFrame->getNTrackletsROF(pivotRofId, 0) = std::accumulate(mTimeFrame->getNTrackletsCluster(pivotRofId, 0).begin(), mTimeFrame->getNTrackletsCluster(pivotRofId, 0).end(), 0); + mTimeFrame->getNTrackletsROF(pivotRofId, 1) = std::accumulate(mTimeFrame->getNTrackletsCluster(pivotRofId, 1).begin(), mTimeFrame->getNTrackletsCluster(pivotRofId, 1).end(), 0); + } + }); + }); -#pragma omp for schedule(dynamic) - for (int pivotRofId = 0; pivotRofId < mTimeFrame->getNrof(); ++pivotRofId) { - bool skipROF = iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold; - short startROF{std::max((short)0, static_cast(pivotRofId - mVrtParams[iteration].deltaRof))}; - short endROF{std::min(static_cast(mTimeFrame->getNrof()), static_cast(pivotRofId + mVrtParams[iteration].deltaRof + 1))}; - auto mobileOffset0 = mTimeFrame->getNTrackletsROF(pivotRofId, 0); - auto mobileOffset1 = mTimeFrame->getNTrackletsROF(pivotRofId, 1); - for (auto targetRofId = startROF; targetRofId < endROF; ++targetRofId) { - trackleterKernelHost( - !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 0) : gsl::span(), - !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), - mTimeFrame->getUsedClustersROF(targetRofId, 0), - mTimeFrame->getIndexTable(targetRofId, 0).data(), - mVrtParams[iteration].phiCut, - mTimeFrame->getTracklets()[0], - mTimeFrame->getNTrackletsCluster(pivotRofId, 0), - mIndexTableUtils, - pivotRofId, - targetRofId, - mTimeFrame->getExclusiveNTrackletsCluster(pivotRofId, 0), - mVrtParams[iteration].maxTrackletsPerCluster); - trackleterKernelHost( - !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 2) : gsl::span(), - !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), - mTimeFrame->getUsedClustersROF(targetRofId, 2), - mTimeFrame->getIndexTable(targetRofId, 2).data(), - mVrtParams[iteration].phiCut, - mTimeFrame->getTracklets()[1], - mTimeFrame->getNTrackletsCluster(pivotRofId, 1), - mIndexTableUtils, - pivotRofId, - targetRofId, - mTimeFrame->getExclusiveNTrackletsCluster(pivotRofId, 1), - mVrtParams[iteration].maxTrackletsPerCluster); - } - } - } + mTimeFrame->computeTrackletsPerROFScans(); + mTimeFrame->getTracklets()[0].resize(mTimeFrame->getTotalTrackletsTF(0)); + mTimeFrame->getTracklets()[1].resize(mTimeFrame->getTotalTrackletsTF(1)); + + mTaskArena.execute([&] { + tbb::parallel_for( + tbb::blocked_range(0, (short)mTimeFrame->getNrof()), + [&](const tbb::blocked_range& Rofs) { + for (short pivotRofId = Rofs.begin(); pivotRofId < Rofs.end(); ++pivotRofId) { + bool skipROF = iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold; + short startROF{std::max((short)0, static_cast(pivotRofId - mVrtParams[iteration].deltaRof))}; + short endROF{std::min(static_cast(mTimeFrame->getNrof()), static_cast(pivotRofId + mVrtParams[iteration].deltaRof + 1))}; + auto mobileOffset0 = mTimeFrame->getNTrackletsROF(pivotRofId, 0); + auto mobileOffset1 = mTimeFrame->getNTrackletsROF(pivotRofId, 1); + for (auto targetRofId = startROF; targetRofId < endROF; ++targetRofId) { + trackleterKernelHost( + !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 0) : gsl::span(), + !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), + mTimeFrame->getUsedClustersROF(targetRofId, 0), + mTimeFrame->getIndexTable(targetRofId, 0).data(), + mVrtParams[iteration].phiCut, + mTimeFrame->getTracklets()[0], + mTimeFrame->getNTrackletsCluster(pivotRofId, 0), + mIndexTableUtils, + pivotRofId, + targetRofId, + mTimeFrame->getExclusiveNTrackletsCluster(pivotRofId, 0), + mVrtParams[iteration].maxTrackletsPerCluster); + trackleterKernelHost( + !skipROF ? mTimeFrame->getClustersOnLayer(targetRofId, 2) : gsl::span(), + !skipROF ? mTimeFrame->getClustersOnLayer(pivotRofId, 1) : gsl::span(), + mTimeFrame->getUsedClustersROF(targetRofId, 2), + mTimeFrame->getIndexTable(targetRofId, 2).data(), + mVrtParams[iteration].phiCut, + mTimeFrame->getTracklets()[1], + mTimeFrame->getNTrackletsCluster(pivotRofId, 1), + mIndexTableUtils, + pivotRofId, + targetRofId, + mTimeFrame->getExclusiveNTrackletsCluster(pivotRofId, 1), + mVrtParams[iteration].maxTrackletsPerCluster); + } + } + }); + }); /// Create tracklets labels for L0-L1, information is as flat as in tracklets vector (no rofId) if (mTimeFrame->hasMCinformation()) { for (auto& trk : mTimeFrame->getTracklets()[0]) { - MCCompLabel label; + o2::MCCompLabel label; int sortedId0{mTimeFrame->getSortedIndex(trk.rof[0], 0, trk.firstClusterIndex)}; int sortedId1{mTimeFrame->getSortedIndex(trk.rof[1], 1, trk.secondClusterIndex)}; for (auto& lab0 : mTimeFrame->getClusterLabels(0, mTimeFrame->getClusters()[0][sortedId0].clusterId)) { @@ -333,39 +331,44 @@ void VertexerTraits::computeTracklets(const int iteration) out01.close(); out12.close(); #endif -} // namespace its +} void VertexerTraits::computeTrackletMatching(const int iteration) { -#pragma omp parallel for num_threads(mNThreads) schedule(dynamic) - for (int pivotRofId = 0; pivotRofId < mTimeFrame->getNrof(); ++pivotRofId) { - if (iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold) { - continue; - } - mTimeFrame->getLines(pivotRofId).reserve(mTimeFrame->getNTrackletsCluster(pivotRofId, 0).size()); - std::vector usedTracklets(mTimeFrame->getFoundTracklets(pivotRofId, 0).size(), false); - int startROF{std::max((short)0, static_cast(pivotRofId - mVrtParams[iteration].deltaRof))}; - int endROF{std::min(static_cast(mTimeFrame->getNrof()), static_cast(pivotRofId + mVrtParams[iteration].deltaRof + 1))}; - for (auto targetRofId = startROF; targetRofId < endROF; ++targetRofId) { - trackletSelectionKernelHost( - mTimeFrame->getClustersOnLayer(targetRofId, 0), - mTimeFrame->getClustersOnLayer(pivotRofId, 1), - mTimeFrame->getUsedClustersROF(targetRofId, 0), - mTimeFrame->getUsedClustersROF(targetRofId, 2), - mTimeFrame->getFoundTracklets(pivotRofId, 0), - mTimeFrame->getFoundTracklets(pivotRofId, 1), - usedTracklets, - mTimeFrame->getNTrackletsCluster(pivotRofId, 0), - mTimeFrame->getNTrackletsCluster(pivotRofId, 1), - mTimeFrame->getLines(pivotRofId), - mTimeFrame->getLabelsFoundTracklets(pivotRofId, 0), - mTimeFrame->getLinesLabel(pivotRofId), - pivotRofId, - targetRofId, - mVrtParams[iteration].tanLambdaCut, - mVrtParams[iteration].phiCut); - } - } + mTaskArena.execute([&] { + tbb::parallel_for( + tbb::blocked_range(0, (short)mTimeFrame->getNrof()), + [&](const tbb::blocked_range& Rofs) { + for (short pivotRofId = Rofs.begin(); pivotRofId < Rofs.end(); ++pivotRofId) { + if (iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold) { + continue; + } + mTimeFrame->getLines(pivotRofId).reserve(mTimeFrame->getNTrackletsCluster(pivotRofId, 0).size()); + bounded_vector usedTracklets(mTimeFrame->getFoundTracklets(pivotRofId, 0).size(), false, mMemoryPool.get()); + short startROF{std::max((short)0, static_cast(pivotRofId - mVrtParams[iteration].deltaRof))}; + short endROF{std::min(static_cast(mTimeFrame->getNrof()), static_cast(pivotRofId + mVrtParams[iteration].deltaRof + 1))}; + for (short targetRofId = startROF; targetRofId < endROF; ++targetRofId) { + trackletSelectionKernelHost( + mTimeFrame->getClustersOnLayer(targetRofId, 0), + mTimeFrame->getClustersOnLayer(pivotRofId, 1), + mTimeFrame->getUsedClustersROF(targetRofId, 0), + mTimeFrame->getUsedClustersROF(targetRofId, 2), + mTimeFrame->getFoundTracklets(pivotRofId, 0), + mTimeFrame->getFoundTracklets(pivotRofId, 1), + usedTracklets, + mTimeFrame->getNTrackletsCluster(pivotRofId, 0), + mTimeFrame->getNTrackletsCluster(pivotRofId, 1), + mTimeFrame->getLines(pivotRofId), + mTimeFrame->getLabelsFoundTracklets(pivotRofId, 0), + mTimeFrame->getLinesLabel(pivotRofId), + pivotRofId, + targetRofId, + mVrtParams[iteration].tanLambdaCut, + mVrtParams[iteration].phiCut); + } + } + }); + }); #ifdef VTX_DEBUG TFile* trackletFile = TFile::Open("artefacts_tf.root", "update"); @@ -401,19 +404,19 @@ void VertexerTraits::computeTrackletMatching(const int iteration) void VertexerTraits::computeVertices(const int iteration) { auto nsigmaCut{std::min(mVrtParams[iteration].vertNsigmaCut * mVrtParams[iteration].vertNsigmaCut * (mVrtParams[iteration].vertRadiusSigma * mVrtParams[iteration].vertRadiusSigma + mVrtParams[iteration].trackletSigma * mVrtParams[iteration].trackletSigma), 1.98f)}; - std::vector vertices; - std::vector> polls; + bounded_vector vertices(mMemoryPool.get()); + bounded_vector> polls(mMemoryPool.get()); #ifdef VTX_DEBUG std::vector> dbg_clusLines(mTimeFrame->getNrof()); #endif - std::vector noClustersVec(mTimeFrame->getNrof(), 0); + bounded_vector noClustersVec(mTimeFrame->getNrof(), 0, mMemoryPool.get()); for (int rofId{0}; rofId < mTimeFrame->getNrof(); ++rofId) { if (iteration && (int)mTimeFrame->getPrimaryVertices(rofId).size() > mVrtParams[iteration].vertPerRofThreshold) { continue; } const int numTracklets{static_cast(mTimeFrame->getLines(rofId).size())}; - std::vector usedTracklets(numTracklets, false); + bounded_vector usedTracklets(numTracklets, false, mMemoryPool.get()); for (int line1{0}; line1 < numTracklets; ++line1) { if (usedTracklets[line1]) { continue; @@ -522,7 +525,7 @@ void VertexerTraits::computeVertices(const int iteration) } vertices.back().setTimeStamp(mTimeFrame->getTrackletClusters(rofId)[iCluster].getROF()); if (mTimeFrame->hasMCinformation()) { - std::vector labels; + bounded_vector labels(mMemoryPool.get()); for (auto& index : mTimeFrame->getTrackletClusters(rofId)[iCluster].getLabels()) { labels.push_back(mTimeFrame->getLinesLabel(rofId)[index]); // then we can use nContributors from vertices to get the labels } @@ -569,25 +572,15 @@ void VertexerTraits::computeVertices(const int iteration) #endif } -void VertexerTraits::setNThreads(int n) -{ -#ifdef WITH_OPENMP - mNThreads = n > 0 ? n : 1; -#else - mNThreads = 1; -#endif - LOGP(info, "Setting seeding vertexer with {} threads.", mNThreads); -} - void VertexerTraits::computeVerticesInRof(int rofId, gsl::span& lines, - std::vector& usedLines, - std::vector& clusterLines, + bounded_vector& usedLines, + bounded_vector& clusterLines, std::array& beamPosXY, - std::vector& vertices, - std::vector& verticesInRof, + bounded_vector& vertices, + bounded_vector& verticesInRof, TimeFrame7* tf, - std::vector* labels, + bounded_vector* labels, const int iteration) { int foundVertices{0}; @@ -699,5 +692,16 @@ void VertexerTraits::computeVerticesInRof(int rofId, } verticesInRof.push_back(foundVertices); } -} // namespace its -} // namespace o2 + +void VertexerTraits::setNThreads(int n) +{ + if (mNThreads == n) { + return; + } + mNThreads = n > 0 ? n : 1; +#if defined(VTX_DEBUG) + mNThreads = 1; +#endif + mTaskArena.initialize(mNThreads); + LOGP(info, "Setting seeding vertexer with {} threads.", mNThreads); +} diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx index 5c3b9670fbdb9..a01eb77af1677 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx @@ -12,6 +12,7 @@ #include "ITS3Reconstruction/IOUtils.h" #include "ITStracking/IOUtils.h" #include "ITStracking/TimeFrame.h" +#include "ITStracking/BoundedAllocator.h" #include "DataFormatsITSMFT/CompCluster.h" #include "DataFormatsITSMFT/ROFRecord.h" #include "ITS3Reconstruction/TopologyDictionary.h" @@ -68,8 +69,7 @@ int loadROFrameDataITS3(its::TimeFrame<7>* tf, tf->mNrof = 0; - std::vector clusterSizeVec; - clusterSizeVec.reserve(clusters.size()); + its::bounded_vector clusterSizeVec(clusters.size(), tf->getMemoryPool().get()); for (auto& rof : rofs) { for (int clusterId{rof.getFirstEntry()}; clusterId < rof.getFirstEntry() + rof.getNEntries(); ++clusterId) { From e331961524787cfd2f53541e55ceedd839f78277 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Mon, 2 Jun 2025 18:16:41 +0200 Subject: [PATCH 2/2] ITS: fix single threaded Signed-off-by: Felix Schlepper --- Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 2 +- Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 75d7f9519fbef..97a679689e4a9 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -1049,7 +1049,7 @@ bool TrackerTraits::isMatLUT() const template void TrackerTraits::setNThreads(int n) { - if (mNThreads == n) { + if (mNThreads == n && mTaskArena.is_active()) { return; } mNThreads = n > 0 ? n : 1; diff --git a/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx index 035ba7adffda6..6fd3d99ffe58d 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx @@ -695,7 +695,7 @@ void VertexerTraits::computeVerticesInRof(int rofId, void VertexerTraits::setNThreads(int n) { - if (mNThreads == n) { + if (mNThreads == n && mTaskArena.is_active()) { return; } mNThreads = n > 0 ? n : 1;