From 72eef240d7e046f6b90750d400390f65c5169de0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:06:16 +0200 Subject: [PATCH 1/3] GPU: Fix allocator / deallocator mismatch --- GPU/GPUTracking/Base/GPUReconstruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index e0c866fd9421b..06f1c27fb6c06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -378,7 +378,7 @@ class GPUReconstruction std::vector res; }; struct alignedDeleter { - void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; + void operator()(void* ptr) { ::operator delete[](ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; }; std::unordered_map mMemoryReuse1to1; std::vector> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag From e555a20cad2e5aca451f701f54896d36859a5eee Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:16:30 +0200 Subject: [PATCH 2/3] GPU TPC Merger: Clarify more variable names --- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +- .../Global/GPUChainTrackingMerger.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 40 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 8 ++-- GPU/GPUTracking/SectorTracker/GPUTPCTrack.h | 3 -- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index ec1636dfe7f59..61f8a614fbe6f 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -124,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters = io.clustersNative->nClustersTotal; mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; - mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include + mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NMergedTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index e9721ec9d12bf..173d2fb916239 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -154,7 +154,7 @@ void GPUChainTracking::PrintMemoryStatistics() } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); - addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); + addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2()); @@ -182,7 +182,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); - GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); + GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters()); } void GPUChainTracking::PrepareKernelDebugOutput() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 2b3d719a27dea..118f0bf73a845 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -261,9 +261,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (param().dodEdxEnabled) { GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } - GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { - GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NMergedTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0); } @@ -330,7 +330,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); - mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment(); mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt(); mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2(); @@ -344,7 +344,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); - processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment(); processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt(); processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2(); @@ -355,7 +355,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters()); + GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters()); } return 0; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 1d5a7a0b1df47..16182464c12fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -372,9 +372,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } - computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { - computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters); } computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters); return mem; @@ -446,7 +446,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxSingleSectorTracks = ntrk; } } - mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); + mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { @@ -1354,14 +1354,14 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i continue; } - uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); - if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters); - for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) { + uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); + if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters); + for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) { mClusters[k].num = 0; mClusters[k].state = 0; } - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); return; } @@ -1711,20 +1711,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); - if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); + if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; } - GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; - GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1759,13 +1759,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetLooper(leg > 0); mergedTrack.SetLegs(leg); mergedTrack.SetNClusters(nHits); - mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster); + mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1796,13 +1796,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { - const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } @@ -1855,7 +1855,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) { if (mSharedCount[mClusters[i].num] > 1) { mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared; } @@ -1876,7 +1876,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) { mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 4487b6d937dc2..54a541ebe0fd6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -70,7 +70,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; GPUAtomic(uint32_t) nMergedTracks; - GPUAtomic(uint32_t) nOutputTrackClusters; + GPUAtomic(uint32_t) nMergedTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS]; @@ -113,8 +113,8 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } - GPUhdi() uint32_t NMaxOutputTrackClusters() const { return mNMaxOutputTrackClusters; } - GPUhdi() uint32_t NOutputTrackClusters() const { return mMemory->nOutputTrackClusters; } + GPUhdi() uint32_t NMaxMergedTrackClusters() const { return mNMaxMergedTrackClusters; } + GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; } GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; } GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); } GPUhdi() const GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() const { return mClustersXYZ; } @@ -249,7 +249,7 @@ class GPUTPCGMMerger : public GPUProcessor uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks uint32_t mNMaxTracks = 0; // maximum number of output tracks uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector - uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) + uint32_t mNMaxMergedTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) uint32_t mNMaxClusters = 0; // max total unique clusters (in event) uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index 225f5f0e2c7ad..7306c84cf949c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -53,9 +53,6 @@ class GPUTPCTrack GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); } GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } - GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; } - GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); } - GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } private: int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array From 6c9233635ef7da898d0b7a794f57b5e7b6f8d8ab Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:43:59 +0200 Subject: [PATCH 3/3] GPU TPC Merger: Fix out of bounds check --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 16182464c12fe..533e697cc5852 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1513,7 +1513,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMSectorTrack* trackParts[kMaxParts]; for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { @@ -1712,7 +1711,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); - if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue;