diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 8796f063abdc5..85567d70d70d6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -14,88 +14,6 @@ #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct MergeBorderTracks_compMax { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); - } -}; -struct MergeBorderTracks_compMin { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); - } -}; - -struct GPUTPCGMMergerSortTracks_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerSortTracksQPt_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerMergeLoopers_comp { - GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) - { - return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); - } -}; - -struct GPUTPCGMO2OutputSort_comp { - GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) - { - return (a.y > b.y); - } -}; - -} // anonymous namespace -} // namespace o2::gpu::internal - template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { @@ -109,13 +27,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 82834a694d0ba..ec1636dfe7f59 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -125,7 +125,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include - mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks(); + mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index bba97e9eace9b..73b195e8f4fe4 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -201,7 +201,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadclusterOffset[iSector][iRow]; - const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; + const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; // 32 bit enough for number of clusters per row * 1024 const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal if (iThread == nThreads - 1) { smem.nCount = 0; @@ -214,7 +214,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; - int32_t cidx = 0; + int32_t storeCluster = 0; do { if (i >= clusters->nClusters[iSector][iRow]) { break; @@ -239,13 +239,13 @@ GPUdii() void GPUTPCCompressionKernels::Thread diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 00cf127162b94..f72943e6bcd5a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -153,7 +153,7 @@ void GPUChainTracking::PrintMemoryStatistics() addToMap("TPC Sector TrackHits", usageMap, *processors()->tpcTrackers[i].NTrackHits(), processors()->tpcTrackers[i].NMaxTrackHits()); } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); - addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NOutputTracks(), processors()->tpcMerger.NMaxTracks()); + addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { @@ -181,7 +181,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTracks NCl %d NTrk %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracks()); GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } - GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTracks()); + GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 6e86be03e7950..2b3d719a27dea 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -143,7 +143,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -158,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -202,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingPrepareFit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -220,7 +220,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } @@ -233,13 +233,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); if (param().rec.tpc.mergeLoopersAfterburner) { - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); if (doGPU) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0); SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel } runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingLoopers, Merger, &GPUTPCGMMerger::DumpLoopers, *mDebugFile); } DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); @@ -255,10 +256,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -325,8 +326,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mRec->ReturnVolatileDeviceMemory(); } - mIOPtrs.mergedTracks = Merger.OutputTracks(); - mIOPtrs.nMergedTracks = Merger.NOutputTracks(); + mIOPtrs.mergedTracks = Merger.MergedTracks(); + mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); @@ -339,8 +340,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC(); if (doGPU) { - processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); - processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks(); + processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks(); + processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 4662b5464f710..5ca20a39d0462 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -33,7 +33,7 @@ int32_t GPUChainTracking::RunRefit() SetupGPUProcessor(&Refit, false); RefitShadow.SetPtrsFromGPUConstantMem(processorsShadow(), doGPU ? &processorsDevice()->param : nullptr); RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); - RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); + RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.MergedTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index ef38d53173c2b..67ef402961a20 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -176,7 +176,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + if ((GetProcessingSettings().debugMask & 63)) { + *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + } if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 578fe1eeb4ca7..73b14ba1b2fdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -41,9 +41,6 @@ class GPUTPCGMMergedTrack { return mAlpha; } - GPUd() float LastX() const { return mLastX; } - GPUd() float LastY() const { return mLastY; } - GPUd() float LastZ() const { return mLastZ; } GPUd() bool OK() const { return mFlags & 0x01; } GPUd() bool Looper() const { return mFlags & 0x02; } GPUd() bool CSide() const { return mFlags & 0x04; } @@ -55,9 +52,6 @@ class GPUTPCGMMergedTrack GPUd() void SetFirstClusterRef(int32_t v) { mFirstClusterRef = v; } GPUd() void SetParam(const GPUTPCGMTrackParam& v) { mParam = v; } GPUd() void SetAlpha(float v) { mAlpha = v; } - GPUd() void SetLastX(float v) { mLastX = v; } - GPUd() void SetLastY(float v) { mLastY = v; } - GPUd() void SetLastZ(float v) { mLastZ = v; } GPUd() void SetOK(bool v) { if (v) { @@ -101,6 +95,7 @@ class GPUTPCGMMergedTrack GPUd() void SetFlags(uint8_t v) { mFlags = v; } GPUd() void SetLegs(uint8_t v) { mLegs = v; } GPUd() uint8_t Legs() const { return mLegs; } + GPUd() uint8_t Flags() const { return mFlags; } GPUd() const gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() const { return mOuterParam; } GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } @@ -110,9 +105,6 @@ class GPUTPCGMMergedTrack gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index d2aba503be6a6..f1a0816529c3a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -59,17 +59,13 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif -namespace o2::gpu::internal -{ -} +static constexpr int32_t kMaxParts = 400; +static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; + using namespace o2::gpu; -using namespace o2::gpu::internal; using namespace o2::tpc; using namespace gputpcgmmergertypes; -static constexpr int32_t kMaxParts = 400; -static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; - namespace o2::gpu::internal { struct MergeLooperParam { @@ -78,8 +74,79 @@ struct MergeLooperParam { float y; uint32_t id; }; + +struct MergeBorderTracks_compMax { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); + } +}; +struct MergeBorderTracks_compMin { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); + } +}; + +struct GPUTPCGMMergerSortTracks_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + if (a.CCE() != b.CCE()) { + return a.CCE() > b.CCE(); + } + if (a.Legs() != b.Legs()) { + return a.Legs() > b.Legs(); + } + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE + return a.NClusters() > b.NClusters(); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerSortTracksQPt_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerMergeLoopers_comp { + GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) + { + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); + } +}; + } // namespace o2::gpu::internal +using namespace o2::gpu::internal; + #ifndef GPUCA_GPUCODE #include "GPUQA.h" @@ -298,11 +365,11 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) void* GPUTPCGMMerger::SetPointersOutput(void* mem) { - computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { - computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks); if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); @@ -415,7 +482,7 @@ int32_t GPUTPCGMMerger::CheckSectors() GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output) { - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) { mTrackLinks[i] = -1; } @@ -742,11 +809,11 @@ template <> GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread == 0) { + if (iThread == 0 && iBlock == 0) { if (cmpMax) { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMax()); } else { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMin()); } } #endif @@ -1251,7 +1318,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) { b.SetTrackID(itr); - b.SetNClusters(mOutputTracks[itr].NClusters()); + b.SetNClusters(mMergedTracks[itr].NClusters()); if (CAMath::Abs(b.Cov()[4]) >= 0.5f) { b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack? } @@ -1271,12 +1338,12 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { - if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { + if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; } - GPUTPCGMMergedTrack* trk[2] = {&mOutputTracks[i], &mOutputTracks[mTrackLinks[i]]}; + GPUTPCGMMergedTrack* trk[2] = {&mMergedTracks[i], &mMergedTracks[mTrackLinks[i]]}; if (!trk[1]->OK() || trk[1]->CCE()) { continue; @@ -1392,7 +1459,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mMergedTracks[i].CCE() == false) {mMergedTracks[i].SetNClusters(0);mMergedTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1533,7 +1600,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { const GPUTPCGMSectorTrack* t = trackParts[ipart]; - CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs())); + CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs())); int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { @@ -1678,14 +1745,14 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].leg = trackClusters[i].leg; } - uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u); + uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); if (iOutputTrack >= mNMaxTracks) { raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks); - CAMath::AtomicExch(&mMemory->nOutputTracks, mNMaxTracks); + CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks); continue; } - GPUTPCGMMergedTrack& mergedTrack = mOutputTracks[iOutputTrack]; + GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; mergedTrack.SetFlags(0); mergedTrack.SetOK(1); @@ -1718,9 +1785,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } - // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nOutputTracks, p1.QPt(), nParts, nHits); + // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits); - /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nOutputTracks)) + /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks)) { mergedTrack.SetOK(0); mergedTrack.SetNClusters(0); @@ -1742,14 +1809,14 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackOrderProcess[i] = i; } } GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackSort[i] = i; } } @@ -1757,68 +1824,26 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mMergedTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp); #endif } GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mMergedTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp); #endif } GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (trk.OK()) { for (uint32_t j = 0; j < trk.NClusters(); j++) { mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood; @@ -1848,7 +1873,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { @@ -1858,8 +1883,8 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; } @@ -1893,8 +1918,8 @@ GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold - for (uint32_t i = get_global_id(0); i < mMemory->nOutputTracks; i += get_global_size(0)) { - const auto& trk = mOutputTracks[i]; + for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { @@ -1945,11 +1970,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); }; - GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } @@ -1960,7 +1983,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, #if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE) std::vector paramLabels(mMemory->nLooperMatchCandidates); for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) { - paramLabels[i] = GetTrackLabel(mOutputTracks[params[i].id]); + paramLabels[i] = GetTrackLabel(mMergedTracks[params[i].id]); } /*std::vector dropped(mMemory->nLooperMatchCandidates); std::vector droppedMC(mMemory->nLooperMatchCandidates); @@ -1982,8 +2005,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, // bs |= 1; continue; } - const auto& trk1 = mOutputTracks[params[i].id]; - const auto& trk2 = mOutputTracks[params[j].id]; + const auto& trk1 = mMergedTracks[params[i].id]; + const auto& trk2 = mMergedTracks[params[j].id]; const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { @@ -2022,7 +2045,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const int64_t label2 = paramLabels[j]; bool labelEQ = label1 != -1 && label1 == label2; if (1 || EQ || labelEQ) { - // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mOutputTracks[params[i].id].CSide(), (int32_t)mOutputTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); + // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[params[i].id].CSide(), (int32_t)mMergedTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); static auto& tup = GPUROOTDump::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs"); tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs); static auto tup2 = GPUROOTDump::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr"); @@ -2040,9 +2063,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, }*/ #endif if (EQ) { - mOutputTracks[params[j].id].SetMergedLooper(true); + mMergedTracks[params[j].id].SetMergedLooper(true); if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) { - mOutputTracks[params[i].id].SetMergedLooper(true); + mMergedTracks[params[i].id].SetMergedLooper(true); } } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c6e0e02a2dc2..4487b6d937dc2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -69,7 +69,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nRetryRefit; GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; - GPUAtomic(uint32_t) nOutputTracks; + GPUAtomic(uint32_t) nMergedTracks; GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; @@ -103,13 +103,13 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersOutputState(void* mem); void* SetPointersMemory(void* mem); - GPUhdi() int32_t NOutputTracks() const { return mMemory->nOutputTracks; } - GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } - GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdx() { return mOutputTracksdEdx; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdxAlt() const { return mOutputTracksdEdxAlt; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdxAlt() { return mOutputTracksdEdxAlt; } + GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } + GPUhdi() const GPUTPCGMMergedTrack* MergedTracks() const { return mMergedTracks; } + GPUhdi() GPUTPCGMMergedTrack* MergedTracks() { return mMergedTracks; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdx() const { return mMergedTracksdEdx; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdx() { return mMergedTracksdEdx; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdxAlt() const { return mMergedTracksdEdxAlt; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdxAlt() { return mMergedTracksdEdxAlt; } GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } @@ -201,6 +201,7 @@ class GPUTPCGMMerger : public GPUProcessor void DumpFitPrepare(std::ostream& out) const; void DumpRefit(std::ostream& out) const; void DumpFinal(std::ostream& out) const; + void DumpLoopers(std::ostream& out) const; template void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; @@ -261,9 +262,9 @@ class GPUTPCGMMerger : public GPUProcessor uint16_t mMemoryResOutputO2Scratch = (uint16_t)-1; int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks) - GPUTPCGMMergedTrack* mOutputTracks = nullptr; //* array of output merged tracks - GPUdEdxInfo* mOutputTracksdEdx = nullptr; //* dEdx information - GPUdEdxInfo* mOutputTracksdEdxAlt = nullptr; //* dEdx alternative information + GPUTPCGMMergedTrack* mMergedTracks = nullptr; //* array of output merged tracks + GPUdEdxInfo* mMergedTracksdEdx = nullptr; //* dEdx information + GPUdEdxInfo* mMergedTracksdEdxAlt = nullptr; //* dEdx alternative information GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks int32_t* mSectorTrackInfoIndex = nullptr; GPUTPCGMMergedTrackHit* mClusters = nullptr; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index ac55f423b1c42..9c924e74ec519 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -94,7 +94,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSector, in void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* type) const { out << "\nTPC Merger Links " << type << "\n"; - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = 0; i < n; i++) { if (mTrackLinks[i] != -1) { out << " " << i << ": " << mTrackLinks[i] << "\n"; @@ -138,8 +138,8 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { - const auto& trk = mOutputTracks[i]; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; } @@ -150,8 +150,8 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const { DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { - const auto& trk = mOutputTracks[i]; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + const auto& trk = mMergedTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; } @@ -162,12 +162,12 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const { out << "\nTPC Merger Refit Prepare\n"; out << " Sort\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { out << " " << i << ": " << mTrackOrderAttach[i] << "\n"; } out << " Clusters\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { - const auto& trk = mOutputTracks[j]; + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { + const auto& trk = mMergedTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; @@ -195,25 +195,38 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Refit\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { - const auto& trk = mOutputTracks[i]; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + const auto& trk = mMergedTracks[i]; if (trk.NClusters() == 0) { continue; } const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() - << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) - << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; + << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxMaxTPC : -1.f) + << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] + << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } out << std::setprecision(ss); } +void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const +{ + out << "\n TPC Merger Looper Afterburner\n"; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << (int)mMergedTracks[i].MergedLooper() << " "; + } + out << "\n"; +} + void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { - const auto& trk = mOutputTracks[j]; + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { + const auto& trk = mMergedTracks[j]; if (trk.NClusters() == 0) { continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index d72d59a6250e7..1631777d80482 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -21,10 +21,10 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode) { - const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NOutputTracks(); + const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; - GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); + GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, mode == -1); }); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9ead17ea5c7c0..eb22ca49e9242 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -34,13 +34,25 @@ using namespace o2::tpc::constants; GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } +namespace o2::gpu::internal +{ + +struct GPUTPCGMO2OutputSort_comp { + GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) + { + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); + } +}; + +} // namespace o2::gpu::internal + template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - const uint32_t nTracks = merger.NOutputTracks(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); + const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); - const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); + const GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); constexpr uint8_t flagsReject = getFlagsReject(); const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); @@ -84,12 +96,10 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); + GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, internal::GPUTPCGMO2OutputSort_comp()); } - GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return (a.y > b.y); }; - GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } @@ -97,9 +107,9 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { constexpr float MinDelta = 0.1f; - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); - GPUdEdxInfo* tracksdEdxAlt = merger.OutputTracksdEdxAlt(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); + GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); + GPUdEdxInfo* tracksdEdxAlt = merger.MergedTracksdEdxAlt(); const int32_t nTracks = merger.NOutputTracksTPCO2(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); constexpr uint8_t flagsReject = getFlagsReject(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 0d8547263207b..4b616fce83f5f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -94,10 +94,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ storeOuter = 0; if (iWay == nWays - 1) { StoreOuter(outerParam, prop, 0); - if (merger->OutputTracks()[iTrk].Looper()) { + if (merger->MergedTracks()[iTrk].Looper()) { storeOuter = 1; } - } else if (iWay == nWays - 2 && merger->OutputTracks()[iTrk].Looper()) { + } else if (iWay == nWays - 2 && merger->MergedTracks()[iTrk].Looper()) { storeOuter = 2; } } @@ -435,9 +435,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. if (param.par.dodEdx && param.dodEdxEnabled) { - dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); + dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } Alpha = prop.GetAlpha(); @@ -596,7 +596,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->MergedTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -1136,33 +1136,14 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr t.QPt() = 1.e-4f; } - CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->OutputTracks()[iTrk].Looper()); }); + CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); }); track.SetOK(ok); track.SetNClustersFitted(nTrackHits); track.Param() = t; track.Alpha() = Alpha; - if (track.OK()) { - int32_t ind = track.FirstClusterRef(); - const GPUParam& GPUrestrict() param = merger->Param(); - float alphaa = param.Alpha(merger->Clusters()[ind].sector); - float xx, yy, zz; - if (merger->Param().par.earlyTpcTransform) { - xx = merger->ClustersXYZ()[ind].x; - yy = merger->ClustersXYZ()[ind].y; - zz = merger->ClustersXYZ()[ind].z - track.Param().GetTZOffset(); - } else { - const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[merger->Clusters()[ind].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].sector, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); - } - float sinA, cosA; - CAMath::SinCos(alphaa - track.Alpha(), sinA, cosA); - track.SetLastX(xx * cosA - yy * sinA); - track.SetLastY(xx * sinA + yy * cosA); - track.SetLastZ(zz); - // merger->DebugRefitMergedTrack(track); - } + // if (track.OK()) merger->DebugRefitMergedTrack(track); } GPUd() void GPUTPCGMTrackParam::Rotate(float alpha) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx deleted file mode 100644 index 78eea63edecdd..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.cxx -/// \author David Rohr - -#include "GPUTPCGMTracksToTPCSeeds.h" -#include "GPUTPCGlobalMergerComponent.h" -#include "GPUTPCGMMergerTypes.h" -#include "GPUTPCGMMerger.h" -#include "GPULogging.h" -#include "AliTPCtracker.h" -#include "AliTPCtrack.h" -#include "AliTPCseed.h" -#include "AliTPCtrackerSector.h" -#include "TObjArray.h" -#include "AliTPCclusterMI.h" - -using namespace o2::gpu; - -void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - seeds->Clear(); - int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - - AliTPCtrack tr; - tr.Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - AliTPCseed* seed = new (tpctracker->NextFreeSeed()) AliTPCseed(tr); - for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - seed->SetClusterPointer(j, nullptr); - seed->SetClusterIndex(j, -1); - } - int32_t ncls = 0; - int32_t lastrow = -1; - int32_t lastleg = -1; - for (int32_t j = track.NClusters() - 1; j >= 0; j--) { - const GPUTPCGMMergedTrackHit& cls = merger->Clusters()[track.FirstClusterRef() + j]; - if (cls.state & GPUTPCGMMergedTrackHit::flagReject) { - continue; - } - if (lastrow != -1 && (cls.row < lastrow || cls.leg != lastleg)) { - break; - } - if (cls.row == lastrow) { - continue; - } - - AliTPCtrackerRow& row = tpctracker->GetRow(cls.sector % 18, cls.row); - uint32_t clIndexOffline = 0; - AliTPCclusterMI* clOffline = row.FindNearest2(cls.y, cls.z, 0.01f, 0.01f, clIndexOffline); - if (!clOffline) { - continue; - } - clIndexOffline = row.GetIndex(clIndexOffline); - - clOffline->Use(10); - seed->SetClusterPointer(cls.row, clOffline); - seed->SetClusterIndex2(cls.row, clIndexOffline); - - lastrow = cls.row; - lastleg = cls.leg; - ncls++; - } - - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - seed->SetNumberOfClusters(ncls); - seed->SetNFoundable(ncls); - seed->SetChi2(track.GetParam().GetChi2()); - - float alpha = seed->GetAlpha(); - if (alpha >= 2.f * M_PI) { - alpha -= 2.f * M_PI; - } - if (alpha < 0) { - alpha += 2.f * M_PI; - } - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - - seed->SetPoolID(tpctracker->GetLastSeedId()); - seed->SetIsSeeding(kTRUE); - seed->SetSeed1(GPUCA_ROW_COUNT - 1); - seed->SetSeed2(GPUCA_ROW_COUNT - 2); - seed->SetSeedType(0); - seed->SetFirstPoint(-1); - seed->SetLastPoint(-1); - seeds->AddLast(seed); // note, track is seed, don't free the seed - index++; - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - const gputpcgmmergertypes::GPUTPCOuterParam& param = track.OuterParam(); - seed->Set(param.X, param.alpha, param.P, param.C); - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - seed->Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - } -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h deleted file mode 100644 index 029cb108d4119..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.h -/// \author David Rohr - -#ifndef GPUTPCGMTRACKSTOTPCSEEDS_H -#define GPUTPCGMTRACKSTOTPCSEEDS_H - -class TObjArray; -class AliTPCtracker; - -class GPUTPCGMTracksToTPCSeeds -{ - public: - static void CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker); - static void UpdateParamsOuter(TObjArray* seeds); - static void UpdateParamsInner(TObjArray* seeds); -}; - -#endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index a21593b7ba9e9..5af3ebb51b9d6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -100,13 +100,13 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0) { int32_t firstIdx = j; - auto firstItem = merger.OutputTracks()[firstIdx]; + auto firstItem = merger.MergedTracks()[firstIdx]; int32_t currIdx = firstIdx; int32_t sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; do { tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = merger.OutputTracks()[sourceIdx]; + merger.MergedTracks()[currIdx] = merger.MergedTracks()[sourceIdx]; currIdx = sourceIdx; sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; } while (sourceIdx != firstIdx); tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = firstItem; + merger.MergedTracks()[currIdx] = firstItem; } } }