From 431ade5314d36ebbcc174abff00f58029fd0d445 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 26 Jul 2025 12:30:19 +0200 Subject: [PATCH 01/52] GPU QA: Add track t0 QA --- GPU/GPUTracking/qa/GPUQA.cxx | 72 +++++++++++++++++++++++++++++++++--- GPU/GPUTracking/qa/GPUQA.h | 5 +++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 3f57ecd8f88be..c3028facc67a5 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -531,10 +531,12 @@ int32_t GPUQA::InitQACreateHistograms() snprintf(name, 2048, i ? "nrows_with_cluster" : "nclusters"); createHist(mNCl[i], name, name, 160, 0, 159); } - snprintf(name, 2048, "tracks"); std::unique_ptr binsPt{CreateLogAxis(AXIS_BINS[4], PT_MIN_CLUST, PT_MAX)}; - createHist(mTracks, name, name, AXIS_BINS[4], binsPt.get()); - createHist(mClXY, "clXY", "clXY", 1000, -250, 250, 1000, -250, 250); + createHist(mTracks, "tracks_pt", "tracks_pt", AXIS_BINS[4], binsPt.get()); + const uint32_t maxTime = (mTracking && mTracking->GetParam().continuousMaxTimeBin > 0) ? mTracking->GetParam().continuousMaxTimeBin : TPC_MAX_TIME_BIN_TRIGGERED; + createHist(mT0[0], "tracks_t0", "tracks_t0", (maxTime + 1) / 10, 0, maxTime); + createHist(mT0[1], "tracks_t0_res", "tracks_t0_res", 1000, -100, 100); + createHist(mClXY, "clXY", "clXY", 1000, -250, 250, 1000, -250, 250); // TODO: Pass name only once } if ((mQATasks & taskClusterCounts) && mConfig.clusterRejectionHistograms) { @@ -1732,6 +1734,11 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } mNCl[1]->Fill(nClCorrected); + mT0[0]->Fill(track.GetParam().GetTZOffset()); + if (mTrackMCLabels.size() && !mTrackMCLabels[i].isFake() && !track.MergedLooper() && !track.CCE()) { + const auto& info = GetMCTrack(mTrackMCLabels[i]); + mT0[1]->Fill(track.GetParam().GetTZOffset() - info.t0); + } } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { @@ -2130,7 +2137,7 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) // Create Canvas for track statistic histos if (mQATasks & taskTrackStatistics) { - mCTracks = createGarbageCollected("ctracks", "Track Pt", 0, 0, 700, 700. * 2. / 3.); + mCTracks = createGarbageCollected("ctrackspt", "Track Pt", 0, 0, 700, 700. * 2. / 3.); mCTracks->cd(); mPTracks = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); mPTracks->Draw(); @@ -2138,7 +2145,15 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) SetLegend(mLTracks); for (int32_t i = 0; i < 2; i++) { - snprintf(name, 2048, "cncl%d Pull", i); + snprintf(name, 2048, "ctrackst0%d", i); + mCT0[i] = createGarbageCollected(name, "Track T0", 0, 0, 700, 700. * 2. / 3.); + mCT0[i]->cd(); + mPT0[i] = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); + mPT0[i]->Draw(); + mLT0[i] = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); + SetLegend(mLT0[i]); + + snprintf(name, 2048, "cncl%d", i); mCNCl[i] = createGarbageCollected(name, i ? "Number of clusters (corrected for multiple per row)" : "Number of clusters per track", 0, 0, 700, 700. * 2. / 3.); mCNCl[i]->cd(); mPNCl[i] = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); @@ -2742,7 +2757,7 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) if (mQATasks & taskTrackStatistics) { // Process track statistic histograms float tmpMax = 0.; - for (int32_t k = 0; k < ConfigNumInputs; k++) { + for (int32_t k = 0; k < ConfigNumInputs; k++) { // TODO: Simplify this drawing, avoid copy&paste TH1F* e = mTracks; if (GetHist(e, tin, k, nNewInput) == nullptr) { continue; @@ -2786,6 +2801,51 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) } for (int32_t i = 0; i < 2; i++) { + tmpMax = 0.; + for (int32_t k = 0; k < ConfigNumInputs; k++) { + TH1F* e = mT0[i]; + if (GetHist(e, tin, k, nNewInput) == nullptr) { + continue; + } + e->SetMaximum(-1111); + if (e->GetMaximum() > tmpMax) { + tmpMax = e->GetMaximum(); + } + } + mPT0[i]->cd(); + for (int32_t k = 0; k < ConfigNumInputs; k++) { + TH1F* e = mT0[i]; + if (GetHist(e, tin, k, nNewInput) == nullptr) { + continue; + } + if (tout && !mConfig.inputHistogramsOnly && k == 0) { + e->Write(); + } + e->SetMaximum(tmpMax * 1.02); + e->SetMinimum(tmpMax * -0.02); + e->SetStats(kFALSE); + e->SetLineWidth(1); + e->GetYaxis()->SetTitle("a.u."); + e->GetXaxis()->SetTitle(i ? "to vs t0_{mc}" : "t0"); + if (qcout) { + qcout->Add(e); + } + e->SetMarkerColor(kBlack); + e->SetLineColor(colorNums[k % COLORCOUNT]); + e->Draw(k == 0 ? "" : "same"); + GetName(fname, k); + snprintf(name, 2048, "%sTrack T0 %s", fname, i ? "" : "resolution"); + mLT0[i]->AddEntry(e, name, "l"); + } + mLT0[i]->Draw(); + mCT0[i]->cd(); + snprintf(name, 2048, "plots/t0%s.pdf", i ? "_res" : ""); + mCT0[i]->Print(name); + if (mConfig.writeRootFiles) { + snprintf(name, 2048, "plots/t0%s.root", i ? "_res" : ""); + mCT0[i]->Print(name); + } + tmpMax = 0.; for (int32_t k = 0; k < ConfigNumInputs; k++) { TH1F* e = mNCl[i]; diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index e587b15f68d80..591eb1722bf9f 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -305,6 +305,11 @@ class GPUQA TPad* mPNCl[2]; TLegend* mLNCl[2]; + TH1F* mT0[2]; + TCanvas* mCT0[2]; + TPad* mPT0[2]; + TLegend* mLT0[2]; + TH2F* mClXY; TCanvas* mCClXY; TPad* mPClXY; From 13992539e584d906d6d31eb92b30ab52fdd0d9b0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Jul 2025 15:21:28 +0200 Subject: [PATCH 02/52] GPU Display: Block until display started and print meaningful info messages to the console --- GPU/GPUTracking/Global/GPUChainTracking.cxx | 7 +++- .../Interface/GPUO2InterfaceDisplay.cxx | 6 ++- GPU/GPUTracking/display/GPUDisplay.cxx | 42 ++++++++++++++----- GPU/GPUTracking/display/GPUDisplay.h | 27 +++++++----- GPU/GPUTracking/display/GPUDisplayInterface.h | 3 +- .../display/frontend/GPUDisplayKeys.cxx | 2 +- .../display/render/GPUDisplayDraw.cxx | 2 +- 7 files changed, 61 insertions(+), 28 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 9d2578731a30c..79e9ce6cef766 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -824,6 +824,7 @@ int32_t GPUChainTracking::RunChainFinalize() if (GetProcessingSettings().eventDisplay) { if (!mDisplayRunning) { + GPUInfo("Starting Event Display..."); if (mEventDisplay->StartDisplay()) { return (1); } @@ -832,6 +833,8 @@ int32_t GPUChainTracking::RunChainFinalize() mEventDisplay->ShowNextEvent(); } + mEventDisplay->WaitTillEventShown(); + if (GetProcessingSettings().eventDisplay->EnableSendKey()) { while (kbhit()) { getch(); @@ -863,9 +866,9 @@ int32_t GPUChainTracking::RunChainFinalize() return (2); } GetProcessingSettings().eventDisplay->setDisplayControl(0); - GPUInfo("Loading next event"); + GPUInfo("Loading next event..."); - mEventDisplay->WaitForNextEvent(); + mEventDisplay->BlockTillNextEvent(); } return 0; diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx index 60d5eaf9ae162..ad740200a253a 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx @@ -46,7 +46,8 @@ int32_t GPUO2InterfaceDisplay::startDisplay() if (retVal) { return retVal; } - mDisplay->WaitForNextEvent(); + mDisplay->WaitTillEventShown(); + mDisplay->BlockTillNextEvent(); return 0; } @@ -59,6 +60,7 @@ int32_t GPUO2InterfaceDisplay::show(const GPUTrackingInOutPointers* ptrs) ptrs = tmpPtr.get(); } mDisplay->ShowNextEvent(ptrs); + mDisplay->WaitTillEventShown(); do { usleep(10000); } while (mFrontend->getDisplayControl() == 0); @@ -66,7 +68,7 @@ int32_t GPUO2InterfaceDisplay::show(const GPUTrackingInOutPointers* ptrs) return 1; } mFrontend->setDisplayControl(0); - mDisplay->WaitForNextEvent(); + mDisplay->BlockTillNextEvent(); return 0; } diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 136b1947f60ee..163e4c0981bc2 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -21,6 +21,8 @@ #include #include #include +#include +#include #ifndef _WIN32 #include "../utils/linux_helpers.h" @@ -143,7 +145,7 @@ void GPUDisplay::ResizeScene(int32_t width, int32_t height, bool init) mBackend->resizeScene(width, height); if (init) { - mResetScene = 1; + mResetScene = true; mViewMatrix = MY_HMM_IDENTITY; mModelMatrix = MY_HMM_IDENTITY; } @@ -220,6 +222,14 @@ int32_t GPUDisplay::DrawGLScene() GPUError("Runtime error %s during display", e.what()); retVal = 1; } + + if (mLoadAndShowEvent) { + { + std::lock_guard lock(mMutexLoadAndShowEvent); + mLoadAndShowEvent = false; + } + mCVLoadAndShowEvent.notify_one(); + } mSemLockDisplay.Unlock(); return retVal; @@ -266,9 +276,9 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla mCfgR.camLookOrigin = mCfgR.camYUp = false; mAngleRollOrigin = -1e9f; mCfgR.fov = 45.f; - mUpdateDrawCommands = 1; + mUpdateDrawCommands = true; - mResetScene = 0; + mResetScene = false; } else { float moveZ = scalefactor * ((float)mMouseWheelTmp / 150 + (float)(mFrontend->mKeys[(uint8_t)'W'] - mFrontend->mKeys[(uint8_t)'S']) * (!mFrontend->mKeys[mFrontend->KEY_SHIFT]) * 0.2f * mFPSScale); float moveY = scalefactor * ((float)(mFrontend->mKeys[mFrontend->KEY_PAGEDOWN] - mFrontend->mKeys[mFrontend->KEY_PAGEUP]) * 0.2f * mFPSScale); @@ -386,7 +396,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla } if (deltaLine) { SetInfo("%s line width: %f", deltaLine > 0 ? "Increasing" : "Decreasing", mCfgL.lineWidth); - mUpdateDrawCommands = 1; + mUpdateDrawCommands = true; } minSize *= 2; int32_t deltaPoint = mFrontend->mKeys[(uint8_t)'+'] * (!mFrontend->mKeysShift[(uint8_t)'+']) - mFrontend->mKeys[(uint8_t)'-'] * (!mFrontend->mKeysShift[(uint8_t)'-']); @@ -396,7 +406,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla } if (deltaPoint) { SetInfo("%s point size: %f", deltaPoint > 0 ? "Increasing" : "Decreasing", mCfgL.pointSize); - mUpdateDrawCommands = 1; + mUpdateDrawCommands = true; } } @@ -616,7 +626,7 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) disableUnsupportedOptions(); } if (mUpdateEventData || mUpdateVertexLists) { - mUpdateDrawCommands = 1; + mUpdateDrawCommands = true; } if (animateTime < 0 && (mUpdateEventData || mResetScene) && mIOPtrs) { @@ -625,8 +635,8 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) mTimerFPS.ResetStart(); mFramesDoneFPS = 0; mFPSScaleadjust = 0; - mUpdateVertexLists = 1; - mUpdateEventData = 0; + mUpdateVertexLists = true; + mUpdateEventData = false; } hmm_mat4 nextViewMatrix = MY_HMM_IDENTITY; @@ -658,7 +668,7 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) mBackend->drawField(); } - mUpdateDrawCommands = mUpdateRenderPipeline = 0; + mUpdateDrawCommands = mUpdateRenderPipeline = false; mBackend->finishDraw(doScreenshot, renderToMixBuffer, mixSlaveImage); if (animateTime < 0) { @@ -708,15 +718,25 @@ void GPUDisplay::ShowNextEvent(const GPUTrackingInOutPointers* ptrs) if (mMaxClusterZ <= 0) { mResetScene = true; } - mSemLockDisplay.Unlock(); mFrontend->mNeedUpdate = 1; mUpdateEventData = true; + mLoadAndShowEvent = true; + mSemLockDisplay.Unlock(); } -void GPUDisplay::WaitForNextEvent() { mSemLockDisplay.Lock(); } +void GPUDisplay::BlockTillNextEvent() { mSemLockDisplay.Lock(); } + +void GPUDisplay::WaitTillEventShown() +{ + std::unique_lock lock(mMutexLoadAndShowEvent); + while (mLoadAndShowEvent) { + mCVLoadAndShowEvent.wait(lock); + } +} int32_t GPUDisplay::StartDisplay() { + mLoadAndShowEvent = true; if (mFrontend->StartDisplay()) { return (1); } diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 06977c26e0b63..b59e3c52e9bd3 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -20,13 +20,15 @@ #include "GPUDisplayInterface.h" #include "GPUSettings.h" -#include "../utils/vecpod.h" -#include "../utils/qsem.h" - #include +#include +#include + #include "HandMadeMath.h" #include "utils/timer.h" +#include "utils/vecpod.h" +#include "utils/qsem.h" namespace o2::gpu { @@ -44,7 +46,8 @@ class GPUDisplay : public GPUDisplayInterface int32_t StartDisplay() override; void ShowNextEvent(const GPUTrackingInOutPointers* ptrs = nullptr) override; - void WaitForNextEvent() override; + void BlockTillNextEvent() override; + void WaitTillEventShown() override; void SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) override; void UpdateCalib(const GPUCalibObjectsConst* calib) override { mCalib = calib; } void UpdateParam(const GPUParam* param) override { mParam = param; } @@ -221,7 +224,10 @@ class GPUDisplay : public GPUDisplayInterface GPUSettingsDisplayRenderer mCfgR; const GPUSettingsProcessing& mProcessingSettings; GPUQA* mQA; + qSem mSemLockDisplay; + std::mutex mMutexLoadAndShowEvent; + std::condition_variable mCVLoadAndShowEvent; bool mDrawTextInCompatMode = false; int32_t mDrawTextFontSize = 0; @@ -272,13 +278,14 @@ class GPUDisplay : public GPUDisplayInterface vecpod mTRDTrackIds; vecpod mITSStandaloneTracks; std::vector mTrackFilter; - bool mUpdateTrackFilter = false; - int32_t mUpdateVertexLists = 1; - int32_t mUpdateEventData = 0; - int32_t mUpdateDrawCommands = 1; - int32_t mUpdateRenderPipeline = 0; - volatile int32_t mResetScene = 0; + volatile bool mUpdateTrackFilter = false; + volatile bool mUpdateVertexLists = true; + volatile bool mUpdateEventData = false; + volatile bool mUpdateDrawCommands = true; + volatile bool mUpdateRenderPipeline = false; + volatile bool mResetScene = false; + volatile bool mLoadAndShowEvent = false; int32_t mAnimate = 0; HighResTimer mAnimationTimer; diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 574a8cffc71f0..7caceb1699da6 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -40,7 +40,8 @@ class GPUDisplayInterface virtual ~GPUDisplayInterface(); virtual int32_t StartDisplay() = 0; virtual void ShowNextEvent(const GPUTrackingInOutPointers* ptrs = nullptr) = 0; - virtual void WaitForNextEvent() = 0; + virtual void BlockTillNextEvent() = 0; + virtual void WaitTillEventShown() = 0; virtual void SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) = 0; virtual void UpdateCalib(const GPUCalibObjectsConst* calib) = 0; virtual void UpdateParam(const GPUParam* param) = 0; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index 54258857a244c..e1e6d9e54df0a 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -89,7 +89,7 @@ void GPUDisplay::HandleKey(uint8_t key) mFrontend->mDisplayControl = 2; SetInfo("Exiting", 1); } else if (key == 'r') { - mResetScene = 1; + mResetScene = true; SetInfo("View reset", 1); } else if (key == mFrontend->KEY_ALT && mFrontend->mKeysShift[mFrontend->KEY_ALT]) { mCfgR.camLookOrigin ^= 1; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index cccf0f098bd8d..9410ee69fb5bd 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -944,7 +944,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() break; // TODO: Only sector 0 filled for now } - mUpdateVertexLists = 0; + mUpdateVertexLists = false; size_t totalVertizes = 0; for (int32_t i = 0; i < NSECTORS; i++) { totalVertizes += mVertexBuffer[i].size(); From 9f359eefa7a89ae88f0f04722ea8088d83eefdc0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 10:01:55 +0200 Subject: [PATCH 03/52] GPU: Use [[maybe_unused]] to silence compiler warnings --- .../Global/GPUChainTrackingClusterizer.cxx | 3 +-- .../Global/GPUChainTrackingDebugAndProfiling.cxx | 3 +-- GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx | 3 +-- .../SectorTracker/GPUTPCTrackletConstructor.cxx | 3 +-- GPU/GPUTracking/display/render/GPUDisplayDraw.cxx | 4 +--- GPU/GPUTracking/qa/GPUQA.cxx | 6 ++---- GPU/GPUTracking/qa/GPUQAHelper.h | 4 ---- GPU/GPUTracking/utils/qconfig.h | 15 +++++---------- 8 files changed, 12 insertions(+), 29 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index fd3699ae4d125..1fa2014fe47e7 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -591,7 +591,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) return ForwardTPCDigits(); } #ifdef GPUCA_TPC_GEOMETRY_O2 - int32_t tpcTimeBinCut = mUpdateNewCalibObjects && mNewCalibValues->newTPCTimeBinCut ? mNewCalibValues->tpcTimeBinCut : param().tpcCutTimeBin; + [[maybe_unused]] int32_t tpcTimeBinCut = mUpdateNewCalibObjects && mNewCalibValues->newTPCTimeBinCut ? mNewCalibValues->tpcTimeBinCut : param().tpcCutTimeBin; mRec->PushNonPersistentMemory(qStr2Tag("TPCCLUST")); const auto& threadContext = GetThreadContext(); const bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding; @@ -894,7 +894,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0); uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubsector : GPUTrackingInOutZS::NENDPOINTS; - (void)tpcTimeBinCut; // TODO: To be used in decoding kernels switch (mCFContext->zsVersion) { default: GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 173d2fb916239..7d790d8e3913f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -70,7 +70,7 @@ int32_t GPUChainTracking::DoProfile() fwrite(&bmpFH, 1, sizeof(bmpFH), fp2); fwrite(&bmpIH, 1, sizeof(bmpIH), fp2); - int32_t nEmptySync = 0; + [[maybe_unused]] int32_t nEmptySync = 0; for (uint32_t i = 0; i < bmpheight * ConstructorBlockCount() * ConstructorThreadCount(); i += ConstructorBlockCount() * ConstructorThreadCount()) { int32_t fEmpty = 1; for (uint32_t j = 0; j < ConstructorBlockCount() * ConstructorThreadCount(); j++) { @@ -103,7 +103,6 @@ int32_t GPUChainTracking::DoProfile() } else { nEmptySync = 0; } - (void)nEmptySync; // if (nEmptySync == GPUCA_SCHED_ROW_STEP + 2) break; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 5ca20a39d0462..eeabab6ed395f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -28,8 +28,7 @@ int32_t GPUChainTracking::RunRefit() GPUTrackingRefitProcessor& Refit = processors()->trackingRefit; GPUTrackingRefitProcessor& RefitShadow = doGPU ? processorsShadow()->trackingRefit : Refit; - const auto& threadContext = GetThreadContext(); - (void)threadContext; + [[maybe_unused]] const auto& threadContext = GetThreadContext(); SetupGPUProcessor(&Refit, false); RefitShadow.SetPtrsFromGPUConstantMem(processorsShadow(), doGPU ? &processorsDevice()->param : nullptr); RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 2660f6d8cbf44..0b22bfa57c89e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -274,7 +274,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } CADEBUG(printf("%14s: SEA PROP ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); - bool found = false; + [[maybe_unused]] bool found = false; float yUncorrected = tParam.GetY(), zUncorrected = tParam.GetZ(); do { if (row.NHits() < 1) { @@ -373,7 +373,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, r.mFirstRow = iRow; } } while (false); - (void)found; if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { uint32_t pad = CAMath::Float2UIntRn(GPUTPCGeometry::LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); if (pad < GPUTPCGeometry::NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 9410ee69fb5bd..577d6cd906297 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -129,7 +129,7 @@ GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSector, int32_t select, ui if (mOverlayTFClusters.size() > 0 || iCol == 0 || mNCollissions) { const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSector] : 0; const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSector] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); - const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; + [[maybe_unused]] const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; for (int32_t cidInSector = firstCluster; cidInSector < lastCluster; cidInSector++) { const int32_t cid = GET_CID(iSector, cidInSector); #ifdef GPUCA_TPC_GEOMETRY_O2 @@ -139,8 +139,6 @@ GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSector, int32_t select, ui continue; } } -#else - (void)checkClusterCollision; #endif if (mCfgH.hideUnmatchedClusters && mQA && mQA->SuppressHit(cid)) { continue; diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index c3028facc67a5..0466fed0250e1 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -99,7 +99,7 @@ using namespace o2::gpu; bool unattached = attach == 0; \ float qpt = 0; \ bool lowPt = false; \ - bool mev200 = false; \ + [[maybe_unused]] bool mev200 = false; \ bool mergedLooper = false; \ int32_t id = attach & gputpcgmmergertypes::attachTrackMask; \ if (!unattached) { \ @@ -126,7 +126,6 @@ using namespace o2::gpu; #define CHECK_CLUSTER_STATE_NOCOUNT() \ CHECK_CLUSTER_STATE_INIT() \ - (void)mev200; /* silence unused variable warning*/ \ if (!lowPt && !mergedLooper) { \ GPUTPCClusterRejection::GetProtectionStatus(attach, physics, protect); \ } @@ -1981,8 +1980,7 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) std::vector colorNums(COLORCOUNT); if (!qcout) { - static int32_t initColorsInitialized = initColors(); - (void)initColorsInitialized; + [[maybe_unused]] static int32_t initColorsInitialized = initColors(); } for (int32_t i = 0; i < COLORCOUNT; i++) { colorNums[i] = qcout ? defaultColorNums[i] : mColors[i]->GetNumber(); diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index a830562119467..e9d98f3e4e305 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -105,10 +105,6 @@ class GPUTPCTrkLbl *labelWeight = bestLabel.fWeight; *totalWeight = mTotalWeight; *maxCount = bestLabelCount; - } else { - (void)labelWeight; - (void)totalWeight; - (void)maxCount; } U retVal = bestLabel; if (bestLabelCount < (1.f - mTrackMCMaxFake) * mNCl) { diff --git a/GPU/GPUTracking/utils/qconfig.h b/GPU/GPUTracking/utils/qconfig.h index bc755e583c3b7..a809cc69be501 100644 --- a/GPU/GPUTracking/utils/qconfig.h +++ b/GPU/GPUTracking/utils/qconfig.h @@ -97,12 +97,9 @@ enum qConfigRetVal { qcrOK = 0, #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, ...) \ { \ - constexpr const char* preopt = preoptname; \ - (void)preopt; \ - constexpr const char preoptshort = preoptnameshort; \ - (void)preoptshort; \ - name& tmp = parent.instance; \ - (void)tmp; \ + [[maybe_unused]] constexpr const char* preopt = preoptname; \ + [[maybe_unused]] constexpr const char preoptshort = preoptnameshort; \ + [[maybe_unused]] name& tmp = parent.instance; \ bool tmpfound = true; \ if (found) { \ } @@ -174,10 +171,8 @@ enum qConfigRetVal { qcrOK = 0, const char* qon_mxcat(qConfig_subconfig_, name) = preoptnameshort == 0 ? (qon_mxstr(name) ": --" preoptname "\n\t\t" descr) : (qon_mxstr(name) ": -" qon_mxstr('a') " (--" preoptname ")\n\t\t" descr); \ (void)qon_mxcat(qConfig_subconfig_, name); \ if (subConfig == nullptr || strcmp(subConfig, followSub == 2 ? qon_mxstr(name) : preoptname) == 0) { \ - constexpr const char* preopt = preoptname; \ - (void)preopt; \ - constexpr const char preoptshort = preoptnameshort; \ - (void)preoptshort; \ + [[maybe_unused]] constexpr const char* preopt = preoptname; \ + [[maybe_unused]] constexpr const char preoptshort = preoptnameshort; \ char argBuffer[2] = {preoptnameshort, 0}; \ printf("\n %s: (--%s%s%s)\n", descr, preoptname, preoptnameshort == 0 ? "" : " or -", argBuffer); #define BeginHiddenConfig(name, instance) { From bd684b613ff6956db790a6c9fb317bfc73291cfa Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 10:02:15 +0200 Subject: [PATCH 04/52] GPU: Improve timing messages for GPU Display and GPU QA --- GPU/GPUTracking/display/GPUDisplay.cxx | 15 ++++-- GPU/GPUTracking/display/GPUDisplay.h | 2 +- .../display/render/GPUDisplayDraw.cxx | 53 ++++++++++++++++--- .../display/render/GPUDisplayImportEvent.cxx | 21 +++++++- GPU/GPUTracking/qa/GPUQA.cxx | 29 +++++----- GPU/GPUTracking/utils/timer.cxx | 7 +++ GPU/GPUTracking/utils/timer.h | 1 + 7 files changed, 97 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 163e4c0981bc2..e7c04a1bfb407 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -237,6 +237,7 @@ int32_t GPUDisplay::DrawGLScene() void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSlaveImage, hmm_mat4& nextViewMatrix) { + HighResTimer timer(mUpdateVertexLists && mChain->GetProcessingSettings().debugLevel >= 2); int32_t mMouseWheelTmp = mFrontend->mMouseWheel; mFrontend->mMouseWheel = 0; bool lookOrigin = mCfgR.camLookOrigin ^ mFrontend->mKeys[mFrontend->KEY_ALT]; @@ -420,6 +421,9 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla mFrontend->mMouseDnX = mFrontend->mMouseMvX; mFrontend->mMouseDnY = mFrontend->mMouseMvY; } + if (timer.IsRunning()) { + GPUInfo("Display Time: Camera:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } } void GPUDisplay::DrawGLScene_drawCommands() @@ -618,7 +622,6 @@ void GPUDisplay::DrawGLScene_drawCommands() void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) // negative time = no mixing { - bool showTimer = false; bool doScreenshot = (mRequestScreenshot || mAnimateScreenshot) && animateTime < 0; updateOptions(); @@ -629,8 +632,9 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) mUpdateDrawCommands = true; } + HighResTimer timerDraw; if (animateTime < 0 && (mUpdateEventData || mResetScene) && mIOPtrs) { - showTimer = true; + timerDraw.ResetStart(); DrawGLScene_updateEventData(); mTimerFPS.ResetStart(); mFramesDoneFPS = 0; @@ -646,8 +650,8 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) // Prepare Event if (mUpdateVertexLists && mIOPtrs) { size_t totalVertizes = DrawGLScene_updateVertexList(); - if (showTimer) { - printf("Event visualization time: %'d us (vertices %'ld / %'ld bytes)\n", (int32_t)(mTimerDraw.GetCurrentElapsedTime() * 1000000.), (int64_t)totalVertizes, (int64_t)(totalVertizes * sizeof(mVertexBuffer[0][0]))); + if (timerDraw.IsRunning()) { + printf("Event visualization time: %'d us (vertices %'ld / %'ld bytes)\n", (int32_t)(timerDraw.GetCurrentElapsedTime() * 1000000.), (int64_t)totalVertizes, (int64_t)(totalVertizes * sizeof(mVertexBuffer[0][0]))); } } @@ -668,7 +672,8 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) mBackend->drawField(); } - mUpdateDrawCommands = mUpdateRenderPipeline = false; + mUpdateDrawCommands = false; + mUpdateRenderPipeline = false; mBackend->finishDraw(doScreenshot, renderToMixBuffer, mixSlaveImage); if (animateTime < 0) { diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index b59e3c52e9bd3..c8deeb2378970 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -310,7 +310,7 @@ class GPUDisplay : public GPUDisplayInterface float mFPSScale = 1, mFPSScaleadjust = 0; int32_t mFramesDone = 0, mFramesDoneFPS = 0; - HighResTimer mTimerFPS, mTimerDisplay, mTimerDraw; + HighResTimer mTimerFPS; vboList mGlDLLines[NSECTORS][N_LINES_TYPE]; vecpod> mGlDLFinal[NSECTORS]; vboList mGlDLFinalITS; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 577d6cd906297..06c572e516853 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -755,6 +755,7 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) size_t GPUDisplay::DrawGLScene_updateVertexList() { + HighResTimer timer(mChain->GetProcessingSettings().debugLevel >= 2); for (int32_t i = 0; i < NSECTORS; i++) { mVertexBuffer[i].clear(); mVertexBufferStart[i].clear(); @@ -776,6 +777,10 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLFinal[iSector].resize(mNCollissions); } } + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Init:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } + int32_t numThreads = getNumThreads(); tbb::task_arena(numThreads).execute([&] { if (mChain && (mChain->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { @@ -785,6 +790,9 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLLines[iSector][tINITLINK] = DrawLinks(tracker, tINITLINK, true); tracker.SetPointersDataLinks(mChain->rec()->Res(tracker.MemoryResLinks()).Ptr()); // clang-format off }, tbb::simple_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Links:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { const GPUTPCTracker& tracker = sectorTracker(iSector); @@ -798,11 +806,17 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLGridTRD[iSector] = DrawGridTRD(iSector); } // clang-format off }, tbb::simple_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Seeds:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { const GPUTPCTracker& tracker = sectorTracker(iSector); mGlDLLines[iSector][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); // clang-format off }, tbb::simple_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Sector Tracks:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } } tbb::parallel_for(0, numThreads, [&](int32_t iThread) { mThreadTracks[iThread].resize(mNCollissions); @@ -875,6 +889,9 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][sector][1].emplace_back(i); }); } + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Sort merged tracks:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } GPUTPCGMPropagator prop; prop.SetMaxSinPhi(.999); @@ -903,6 +920,9 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } } // clang-format off }, tbb::simple_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Merged Tracks:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { @@ -911,36 +931,49 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } } // clang-format off }, tbb::simple_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Clusters:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } + }); // End omp parallel mGlDLFinalITS = DrawFinalITS(); - for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { - for (int32_t i = N_POINTS_TYPE_TPC; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i++) { + for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF + N_POINTS_TYPE_ITS; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTRD(iSector, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsITS(iSector, i, iCol); } } + break; // TODO: Only sector 0 filled for now + } + + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex ITS:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { - for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i++) { + for (int32_t i = N_POINTS_TYPE_TPC; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTOF(iSector, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTRD(iSector, i, iCol); } } - break; // TODO: Only sector 0 filled for now + } + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex TRD:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { - for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF + N_POINTS_TYPE_ITS; i++) { + for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSector][i][iCol] = DrawSpacePointsITS(iSector, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTOF(iSector, i, iCol); } } break; // TODO: Only sector 0 filled for now } + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex TOF:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } mUpdateVertexLists = false; size_t totalVertizes = 0; @@ -968,5 +1001,9 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() for (int32_t i = 0; i < (mUseMultiVBO ? GPUCA_NSECTORS : 1); i++) { mVertexBuffer[i].clear(); } + if (timer.IsRunning()) { + GPUInfo("Display Time: Vertex Final:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } + return totalVertizes; } diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index ab4c0abd7b60e..df3b385c14fe5 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -23,6 +23,7 @@ #include "GPUTPCConvertImpl.h" #include "GPUTRDGeometry.h" #include "GPUTRDTrackletWord.h" +#include "GPUChainTracking.h" #include "GPUParam.inc" #include "DataFormatsTOF/Cluster.h" @@ -40,7 +41,7 @@ using namespace o2::gpu; void GPUDisplay::DrawGLScene_updateEventData() { - mTimerDraw.ResetStart(); + HighResTimer timer(mChain->GetProcessingSettings().debugLevel >= 2); if (mIOPtrs->clustersNative) { mCurrentClusters = mIOPtrs->clustersNative->nClustersTotal; } else { @@ -110,6 +111,9 @@ void GPUDisplay::DrawGLScene_updateEventData() } } } + if (timer.IsRunning()) { + GPUInfo("Display Time: Init:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } if (mCfgH.trackFilter) { uint32_t nTracks = mConfig.showTPCTracksFromO2Format ? mIOPtrs->nOutputTracksTPCO2 : mIOPtrs->nMergedTracks; @@ -128,6 +132,9 @@ void GPUDisplay::DrawGLScene_updateEventData() } } mUpdateTrackFilter = false; + if (timer.IsRunning()) { + GPUInfo("Display Time: Track Filter:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { for (int32_t iSector = r.begin(); iSector < r.end(); iSector++) { @@ -174,6 +181,9 @@ void GPUDisplay::DrawGLScene_updateEventData() } return maxClusterZ; // clang-format off }, [](const float a, const float b) { return std::max(a, b); }, tbb::simple_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Load TPC:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, mCurrentSpacePointsTRD, 32), float(mMaxClusterZ), [&](const tbb::blocked_range& r, float maxClusterZ) { int32_t trdTriggerRecord = -1; @@ -209,6 +219,9 @@ void GPUDisplay::DrawGLScene_updateEventData() } return maxClusterZ; // clang-format off }, [](const float a, const float b) { return std::max(a, b); }, tbb::static_partitioner()); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Load TRD:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, mCurrentClustersTOF, 32), float(mMaxClusterZ), [&](const tbb::blocked_range& r, float maxClusterZ) { for (int32_t i = r.begin(); i < r.end(); i++) { @@ -230,6 +243,9 @@ void GPUDisplay::DrawGLScene_updateEventData() } return maxClusterZ; // clang-format off }, [](const float a, const float b) { return std::max(a, b); }); // clang-format on + if (timer.IsRunning()) { + GPUInfo("Display Time: Load TOF:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } if (mCurrentClustersITS) { float itsROFhalfLen = 0; @@ -270,4 +286,7 @@ void GPUDisplay::DrawGLScene_updateEventData() } } } + if (timer.IsRunning()) { + GPUInfo("Display Time: Load ITS:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); + } } diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 0466fed0250e1..d542e39c55b52 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -663,10 +663,9 @@ void GPUQA::InitO2MCData(GPUTrackingInOutPointers* updateIOPtr) { #ifdef GPUCA_O2_LIB if (!mO2MCDataLoaded) { - HighResTimer timer; + HighResTimer timer(mTracking && mTracking->GetProcessingSettings().debugLevel); if (mTracking && mTracking->GetProcessingSettings().debugLevel) { GPUInfo("Start reading O2 Track MC information"); - timer.Start(); } static constexpr float PRIM_MAX_T = 0.01f; @@ -776,7 +775,7 @@ void GPUQA::InitO2MCData(GPUTrackingInOutPointers* updateIOPtr) } } } - if (mTracking && mTracking->GetProcessingSettings().debugLevel) { + if (timer.IsRunning()) { GPUInfo("Finished reading O2 Track MC information (%f seconds)", timer.GetCurrentElapsedTime()); } mO2MCDataLoaded = true; @@ -922,7 +921,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mClusterParam.resize(GetNMCLabels()); memset(mClusterParam.data(), 0, mClusterParam.size() * sizeof(mClusterParam[0])); } - HighResTimer timer; + HighResTimer timer(QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 2)); mNEvents++; if (mConfig.writeMCLabels) { @@ -934,9 +933,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx bool mcAvail = mcPresent() || tracksExtMC; - if (mcAvail) { - // Assign Track MC Labels - timer.Start(); + if (mcAvail) { // Assign Track MC Labels if (tracksExternal) { #ifdef GPUCA_O2_LIB for (uint32_t i = 0; i < tracksExternal->size(); i++) { @@ -987,7 +984,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } }); } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Assign Track Labels:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } @@ -1127,7 +1124,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Cluster attach status:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } @@ -1154,7 +1151,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Compute cluster label weights:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } @@ -1178,7 +1175,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } // clang-format off }, tbb::simple_partitioner()); // clang-format on - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Compute track mc parameters:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } @@ -1268,7 +1265,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Fill efficiency histograms:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } } @@ -1444,7 +1441,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Fill resolution histograms:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } } @@ -1671,7 +1668,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Fill cluster histograms:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } } @@ -1765,7 +1762,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx clusterAttachCounts.clear(); } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Fill track statistics:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } } @@ -1838,7 +1835,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mClusterCounts = counts_t(); } - if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { + if (timer.IsRunning()) { GPUInfo("QA Time: Cluster Counts:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } diff --git a/GPU/GPUTracking/utils/timer.cxx b/GPU/GPUTracking/utils/timer.cxx index f3b108fc6f159..df3790ad9ccbf 100644 --- a/GPU/GPUTracking/utils/timer.cxx +++ b/GPU/GPUTracking/utils/timer.cxx @@ -23,6 +23,13 @@ #include #endif +HighResTimer::HighResTimer(bool start) +{ + if (start) { + ResetStart(); + } +} + inline double HighResTimer::GetTime() { #ifdef _WIN32 diff --git a/GPU/GPUTracking/utils/timer.h b/GPU/GPUTracking/utils/timer.h index 44a01b04747cb..35b1d707b97b0 100644 --- a/GPU/GPUTracking/utils/timer.h +++ b/GPU/GPUTracking/utils/timer.h @@ -21,6 +21,7 @@ class HighResTimer { public: HighResTimer() = default; + HighResTimer(bool start); ~HighResTimer() = default; void Start(); void Stop(); From 46778cc5fbf6b7663616e642d487710ac87adde5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 10:50:58 +0200 Subject: [PATCH 05/52] GPU Display: Speed up drawing clusters with many collisions --- GPU/GPUTracking/display/GPUDisplay.h | 3 +- .../display/render/GPUDisplayDraw.cxx | 135 ++++++++++-------- 2 files changed, 78 insertions(+), 60 deletions(-) diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index c8deeb2378970..1cdbf62da2202 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -193,7 +193,7 @@ class GPUDisplay : public GPUDisplayInterface void SetCollisionColor(int32_t col); void updateConfig(); void drawPointLinestrip(int32_t iSector, int32_t cid, int32_t id, int32_t id_limit = TRACK_TYPE_ID_LIMIT); - vboList DrawClusters(int32_t iSector, int32_t select, uint32_t iCol); + void DrawClusters(int32_t iSector); vboList DrawSpacePointsTRD(int32_t iSector, int32_t select, int32_t iCol); vboList DrawSpacePointsTOF(int32_t iSector, int32_t select, int32_t iCol); vboList DrawSpacePointsITS(int32_t iSector, int32_t select, int32_t iCol); @@ -256,6 +256,7 @@ class GPUDisplay : public GPUDisplayInterface vecpod mVertexBuffer[NSECTORS]; vecpod mVertexBufferStart[NSECTORS]; vecpod mVertexBufferCount[NSECTORS]; + std::vector> mClusterBufferSizeCache[NSECTORS]; std::unique_ptr mGlobalPosPtr; std::unique_ptr mGlobalPosPtrTRD; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 06c572e516853..9ed16f10361f5 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -122,70 +122,91 @@ GPUDisplay::vboList GPUDisplay::DrawSpacePointsITS(int32_t iSector, int32_t sele return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSector, int32_t select, uint32_t iCol) +void GPUDisplay::DrawClusters(int32_t iSector) { - size_t startCount = mVertexBufferStart[iSector].size(); - size_t startCountInner = mVertexBuffer[iSector].size(); - if (mOverlayTFClusters.size() > 0 || iCol == 0 || mNCollissions) { - const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSector] : 0; - const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSector] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); - [[maybe_unused]] const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; - for (int32_t cidInSector = firstCluster; cidInSector < lastCluster; cidInSector++) { - const int32_t cid = GET_CID(iSector, cidInSector); + std::vector, N_POINTS_TYPE_TPC>> vertexCache(mNCollissions); + if (mClusterBufferSizeCache[iSector].size() < (uint32_t)mNCollissions) { + mClusterBufferSizeCache[iSector].resize(mNCollissions); + } + for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { + for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { + vertexCache[iCol][i].reserve(mClusterBufferSizeCache[iSector][iCol][i]); + } + } + + uint32_t col = 0; + const int32_t nClustersInSector = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); + [[maybe_unused]] const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; + for (int32_t cidInSector = 0; cidInSector < nClustersInSector; cidInSector++) { + const int32_t cid = GET_CID(iSector, cidInSector); #ifdef GPUCA_TPC_GEOMETRY_O2 - if (checkClusterCollision) { - const auto& labels = mIOPtrs->clustersNative->clustersMCTruth->getLabels(cid); - if (labels.size() ? (iCol != mQA->GetMCLabelCol(labels[0])) : (iCol != 0)) { - continue; - } - } + if (checkClusterCollision) { + const auto& labels = mIOPtrs->clustersNative->clustersMCTruth->getLabels(cid); + col = labels.size() ? mQA->GetMCLabelCol(labels[0]) : 0; + } else #endif - if (mCfgH.hideUnmatchedClusters && mQA && mQA->SuppressHit(cid)) { - continue; + if (mOverlayTFClusters.size()) { + while (col < mOverlayTFClusters.size() && cidInSector >= mOverlayTFClusters[col][iSector]) { + col++; } - bool draw = mGlobalPos[cid].w == select; - - if (mCfgH.markAdjacentClusters) { - const int32_t attach = mIOPtrs->mergedTrackHitAttachment[cid]; - if (attach) { - if (mCfgH.markAdjacentClusters >= 32) { - if (mQA && mQA->clusterRemovable(attach, mCfgH.markAdjacentClusters == 33)) { - draw = select == tMARKED; - } - } else if ((mCfgH.markAdjacentClusters & 2) && (attach & gputpcgmmergertypes::attachTube)) { - draw = select == tMARKED; - } else if ((mCfgH.markAdjacentClusters & 1) && (attach & (gputpcgmmergertypes::attachGood | gputpcgmmergertypes::attachTube)) == 0) { - draw = select == tMARKED; - } else if ((mCfgH.markAdjacentClusters & 4) && (attach & gputpcgmmergertypes::attachGoodLeg) == 0) { - draw = select == tMARKED; - } else if ((mCfgH.markAdjacentClusters & 16) && (attach & gputpcgmmergertypes::attachHighIncl)) { - draw = select == tMARKED; - } else if (mCfgH.markAdjacentClusters & 8) { - if (fabsf(mIOPtrs->mergedTracks[attach & gputpcgmmergertypes::attachTrackMask].GetParam().GetQPt()) > 20.f) { - draw = select == tMARKED; - } + } + if (mCfgH.hideUnmatchedClusters && mQA && mQA->SuppressHit(cid)) { + continue; + } + int32_t select = mGlobalPos[cid].w; + + if (mCfgH.markAdjacentClusters) { + const int32_t attach = mIOPtrs->mergedTrackHitAttachment[cid]; + if (attach) { + if (mCfgH.markAdjacentClusters >= 32) { + if (mQA && mQA->clusterRemovable(attach, mCfgH.markAdjacentClusters == 33)) { + select = tMARKED; + } + } else if ((mCfgH.markAdjacentClusters & 2) && (attach & gputpcgmmergertypes::attachTube)) { + select = tMARKED; + } else if ((mCfgH.markAdjacentClusters & 1) && (attach & (gputpcgmmergertypes::attachGood | gputpcgmmergertypes::attachTube)) == 0) { + select = tMARKED; + } else if ((mCfgH.markAdjacentClusters & 4) && (attach & gputpcgmmergertypes::attachGoodLeg) == 0) { + select = tMARKED; + } else if ((mCfgH.markAdjacentClusters & 16) && (attach & gputpcgmmergertypes::attachHighIncl)) { + select = tMARKED; + } else if (mCfgH.markAdjacentClusters & 8) { + if (fabsf(mIOPtrs->mergedTracks[attach & gputpcgmmergertypes::attachTrackMask].GetParam().GetQPt()) > 20.f) { + select = tMARKED; } } - } else if (mCfgH.markClusters) { - int16_t flags; - if (mParam->par.earlyTpcTransform) { - flags = mIOPtrs->clusterData[iSector][cidInSector].flags; - } else { - flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags(); - } - const bool match = flags & mCfgH.markClusters; - draw = (select == tMARKED) ? (match) : (draw && !match); - } else if (mCfgH.markFakeClusters) { - const bool fake = (mQA->HitAttachStatus(cid)); - draw = (select == tMARKED) ? (fake) : (draw && !fake); } - if (draw) { - mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); + } else if (mCfgH.markClusters) { + int16_t flags; + if (mParam->par.earlyTpcTransform) { + flags = mIOPtrs->clusterData[iSector][cidInSector].flags; + } else { + flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags(); + } + if (flags & mCfgH.markClusters) { + select = tMARKED; + } + } else if (mCfgH.markFakeClusters) { + if (mQA->HitAttachStatus(cid)) { + select = tMARKED; } } + vertexCache[col][select].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); + } + + size_t startCountInner = mVertexBuffer[iSector].size(); + mVertexBuffer[iSector].resize(mVertexBuffer[iSector].size() + nClustersInSector); + for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { + for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { + uint32_t count = vertexCache[iCol][i].size(); + mClusterBufferSizeCache[iSector][iCol][i] = std::max(mClusterBufferSizeCache[iSector][iCol][i], count); + memcpy((void*)&mVertexBuffer[iSector][startCountInner], (const void*)vertexCache[iCol][i].data(), count * sizeof(vertexCache[iCol][i][0])); + size_t startCount = mVertexBufferStart[iSector].size(); + insertVertexList(iSector, startCountInner, startCountInner + count); + startCountInner += count; + mGlDLPoints[iSector][i][iCol] = vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector); + } } - insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); - return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t id, bool dodown) @@ -925,11 +946,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { - for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { - for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSector][i][iCol] = DrawClusters(iSector, i, iCol); - } - } // clang-format off + DrawClusters(iSector); // clang-format off }, tbb::simple_partitioner()); // clang-format on if (timer.IsRunning()) { GPUInfo("Display Time: Vertex Clusters:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); From a66fd68a95561e8e17cb0ae767c2d54a538459f9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 10:54:41 +0200 Subject: [PATCH 06/52] GPU Display: ResetScene should reset which collision to show --- GPU/GPUTracking/display/GPUDisplay.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index e7c04a1bfb407..35ebb132398ab 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -273,6 +273,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla mCfgL.pointSize = 2.0f; mCfgL.lineWidth = 1.4f; mCfgL.drawSector = -1; + mCfgL.showCollision = -1; mCfgH.xAdd = mCfgH.zAdd = 0; mCfgR.camLookOrigin = mCfgR.camYUp = false; mAngleRollOrigin = -1e9f; From ab6f95f9a2fffba06c2a90ca35fd889c7987e0bf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 11:11:27 +0200 Subject: [PATCH 07/52] GPU Display: Extrapolate tracks only on-demand when first requested --- GPU/GPUTracking/display/GPUDisplay.cxx | 2 +- GPU/GPUTracking/display/GPUDisplay.h | 1 + GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx | 3 +++ GPU/GPUTracking/display/render/GPUDisplayDraw.cxx | 4 ++++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 35ebb132398ab..7cad25916940a 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -633,7 +633,7 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) mUpdateDrawCommands = true; } - HighResTimer timerDraw; + HighResTimer timerDraw(mUpdateVertexLists); if (animateTime < 0 && (mUpdateEventData || mResetScene) && mIOPtrs) { timerDraw.ResetStart(); DrawGLScene_updateEventData(); diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 1cdbf62da2202..7279f2ee87fdb 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -287,6 +287,7 @@ class GPUDisplay : public GPUDisplayInterface volatile bool mUpdateRenderPipeline = false; volatile bool mResetScene = false; volatile bool mLoadAndShowEvent = false; + bool mTracksArePropagated = false; int32_t mAnimate = 0; HighResTimer mAnimationTimer; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index e1e6d9e54df0a..6dc09545733fe 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -493,6 +493,9 @@ void GPUDisplay::HandleKey(uint8_t key) if (memcmp((void*)&oldCfgH, (void*)&mCfgH, sizeof(mCfgH)) != 0) { mUpdateEventData = true; } + if (mCfgL.propagateTracks != 0 && !mTracksArePropagated) { + mUpdateVertexLists = true; + } if (memcmp((void*)&oldCfgL, (void*)&mCfgL, sizeof(mCfgL)) != 0 || memcmp((void*)&oldCfgR, (void*)&mCfgR, sizeof(mCfgR)) != 0) { mUpdateDrawCommands = true; } diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 9ed16f10361f5..ebb1b41802ba0 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -494,6 +494,9 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp if (!mIOPtrs->clustersNative) { continue; } + if (mCfgL.propagateTracks == 0) { + continue; + } // Propagate track paramters / plot MC tracks for (int32_t iMC = 0; iMC < 2; iMC++) { @@ -992,6 +995,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() GPUInfo("Display Time: Vertex TOF:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } + mTracksArePropagated = mCfgL.propagateTracks != 0; mUpdateVertexLists = false; size_t totalVertizes = 0; for (int32_t i = 0; i < NSECTORS; i++) { From 478f166c696ea8dcfc6a86d209a5fcfc210f4354 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 11:34:16 +0200 Subject: [PATCH 08/52] TPC: Change some default settings --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 6419d63bb7ada..4bdca9b62c462 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -132,11 +132,11 @@ AddOptionRTC(cfInnerThreshold, uint8_t, 0, "", 0, "Cluster Finder extends cluste AddOptionRTC(cfMinSplitNum, uint8_t, 1, "", 0, "Minimum number of split charges in a cluster for the cluster to be marked as split") AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression") AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255") -AddOptionRTC(cfEdgeTwoPads, uint8_t, 1, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster") +AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster") AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger") AddOptionRTC(nWaysOuter, int8_t, 0, "", 0, "Store outer param") AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits") -AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 0, "", 0, "Reject clusters that get the IFC mask error during refit") +AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit") AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128") AddOptionRTC(dEdxTruncHigh, uint8_t, 77, "", 0, "High truncation threshold, fraction of 128") AddOptionRTC(extrapolationTracking, int8_t, 1, "", 0, "Enable Extrapolation Tracking (prolong tracks to adjacent sectors to find short segments)") From 04e9c4006825b761778ed0c3a98cc32761011535 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Jul 2025 11:56:53 +0200 Subject: [PATCH 09/52] GPU TPC: Do looper cluster attachment always in separate kernel --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 - .../Definitions/GPUDefParametersDefaults.h | 10 ---- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 - .../Global/GPUChainTrackingMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 57 +++---------------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 6 +- .../Standalone/Benchmark/standalone.cxx | 1 - GPU/GPUTracking/kernels.cmake | 1 - 8 files changed, 12 insertions(+), 71 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 09aae2aacf16d..ff4ce2c905507 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -271,9 +271,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() #endif mProcessingSettings->overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; - if (param().rec.tpc.looperInterpolationInExtraPass == -1) { - param().rec.tpc.looperInterpolationInExtraPass = 0; - } if (GetProcessingSettings().createO2Output > 1) { mProcessingSettings->createO2Output = 1; } diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 48d00b274dc9c..b1f12034d9c2f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -80,7 +80,6 @@ #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half @@ -143,7 +142,6 @@ #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half @@ -206,7 +204,6 @@ #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half @@ -261,7 +258,6 @@ #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 #define GPUCA_PAR_COMP_GATHER_KERNEL 4 #define GPUCA_PAR_COMP_GATHER_MODE 3 @@ -529,9 +525,6 @@ #ifndef GPUCA_PAR_SORT_BEFORE_FIT #define GPUCA_PAR_SORT_BEFORE_FIT 0 #endif - #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif #ifndef GPUCA_PAR_COMP_GATHER_KERNEL #define GPUCA_PAR_COMP_GATHER_KERNEL 0 #endif @@ -566,9 +559,6 @@ #ifndef GPUCA_PAR_SORT_BEFORE_FIT #define GPUCA_PAR_SORT_BEFORE_FIT 0 #endif - #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif #ifndef GPUCA_PAR_COMP_GATHER_KERNEL #define GPUCA_PAR_COMP_GATHER_KERNEL 0 #endif diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 4bdca9b62c462..a22524713c5c0 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -154,7 +154,6 @@ AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation inst AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position") AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") -AddOptionRTC(looperInterpolationInExtraPass, int8_t, -1, "", 0, "Perform looper interpolation in an extra pass") AddOptionRTC(dropSecondaryLegsInOutput, int8_t, 1, "", 0, "Do not store secondary legs of looping track in TrackTPC") AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response") AddOptionRTC(PID_useNsigma, int8_t, 1, "", 0, "Use nSigma instead of absolute distance in PID response") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 118f0bf73a845..5d3ac212c5b54 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -224,9 +224,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } - if (param().rec.tpc.looperInterpolationInExtraPass == -1 ? mRec->getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION : param().rec.tpc.looperInterpolationInExtraPass) { - runKernel(GetGridAuto(0)); - } + runKernel(GetGridAuto(0)); DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); runKernel(GetGridAuto(0, deviceType)); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index f224e860839df..1c74bb4a9b2c5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -189,32 +189,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("\tLeg %3d Sector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.leg, (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on if (allowModification && changeDirection && !noFollowCircle && !noFollowCircle2) { - bool tryFollow = lastRow != 255; - if (tryFollow) { - const GPUTPCGMTrackParam backup = *this; - const float backupAlpha = prop.GetAlpha(); - if (FollowCircle<0>(merger, prop, lastSector, lastRow, iTrk, clAlpha, xx, yy, cluster.sector, cluster.row, inFlyDirection)) { - CADEBUG(printf("Error during follow circle, resetting track!\n")); - *this = backup; - prop.SetTrack(this, backupAlpha); + if (lastRow != 255) { + if (!(merger->Param().rec.tpc.disableRefitAttachment & 4)) { + StoreAttachMirror(merger, lastSector, lastRow, iTrk, clAlpha, yy, xx, cluster.sector, cluster.row, inFlyDirection, prop.GetAlpha()); noFollowCircle = true; - tryFollow = false; } } - if (tryFollow) { - MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, false, cluster.sector); - lastUpdateX = mX; - lastLeg = cluster.leg; - lastSector = cluster.sector; - lastRow = 255; - N++; - resetT0 = initResetT0(); - // clang-format off - CADEBUG(printf("\n")); - CADEBUG(printf("\t%21sMirror Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); - // clang-format on - continue; - } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; @@ -269,8 +249,8 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf(" -- MirroredY: %f --> %f", mP[0], mirrordY)); if (CAMath::Abs(yy - mP[0]) > CAMath::Abs(yy - mirrordY)) { CADEBUG(printf(" - Mirroring!!!")); - if (allowModification) { - AttachClustersMirror<0>(merger, cluster.sector, cluster.row, iTrk, yy, prop); // TODO: Never true, will always call FollowCircle above, really??? + if (allowModification && !(merger->Param().rec.tpc.disableRefitAttachment & 8)) { + StoreAttachMirror(merger, cluster.sector, cluster.row, iTrk, 0, yy, 0, -1, 0, 0, prop.GetAlpha()); } MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, true, cluster.sector); noFollowCircle = false; @@ -751,24 +731,15 @@ GPUdii() void GPUTPCGMTrackParam::RefitLoop(const GPUTPCGMMerger* GPUrestrict() GPUTPCGMLoopData& data = Merger->LoopData()[loopIdx]; prop.SetTrack(&data.param, data.alpha); if (data.toSector == -1) { - data.param.AttachClustersMirror<1>(Merger, data.sector, data.row, data.track, data.toY, prop, true); + data.param.AttachClustersMirror(Merger, data.sector, data.row, data.track, data.toY, prop); } else { - data.param.FollowCircle<1>(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection, true); + data.param.FollowCircle(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection); } } -template -GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection, bool phase2) +GPUdi() int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection) { static constexpr float kSectAngle = 2 * M_PI / 18.f; - if (Merger->Param().rec.tpc.disableRefitAttachment & 4) { - return 1; - } - const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; - if (inExtraPass && phase2 == false) { - StoreAttachMirror(Merger, sector, iRow, iTrack, toAlpha, toY, toX, toSector, toRow, inFlyDirection, prop.GetAlpha()); - return 1; - } const GPUParam& GPUrestrict() param = Merger->Param(); bool right; float dAlpha = toAlpha - prop.GetAlpha(); @@ -862,19 +833,9 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr return (0); } -template -GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop, bool phase2) +GPUdi() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop) { static constexpr float kSectAngle = 2 * M_PI / 18.f; - - if (Merger->Param().rec.tpc.disableRefitAttachment & 8) { - return; - } - const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; - if (inExtraPass && phase2 == false) { - StoreAttachMirror(Merger, sector, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); - return; - } // Note that the coordinate system is rotated by 90 degree swapping X and Y! float X = mP[2] > 0 ? mP[0] : -mP[0]; float toX = mP[2] > 0 ? toY : -toY; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index e3a5b2f7c1d01..90ff3154a3fe9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -150,10 +150,8 @@ class GPUTPCGMTrackParam GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); // We force to compile these twice, for RefitLoop and for Fit, for better optimization - template - GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop, bool phase2 = false); - template - GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection, bool phase2 = false); + GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop); + GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection); GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha); GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, const GPUTPCGMPropagator& prop, int32_t phase); GPUd() static void RefitLoop(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index fed4610b2f13a..5240b5ca47967 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -460,7 +460,6 @@ int32_t SetupReconstruction() procSet.tpcInputWithClusterRejection = 1; } recSet.tpc.disableRefitAttachment = 0xFF; - recSet.tpc.looperInterpolationInExtraPass = 0; recSet.maxTrackQPtB5 = CAMath::Min(recSet.maxTrackQPtB5, recSet.tpc.rejectQPtB5); recSet.useMatLUT = true; recAsync->SetSettings(&grp, &recSet, &procSet, &steps); diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 7ebe631d86e92..e1fef5795828b 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -142,7 +142,6 @@ o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP TRACKLET_SELECTOR_HITS_REG_SIZE ALTERNATE_BORDER_SORT SORT_BEFORE_FIT - MERGER_SPLIT_LOOP_INTERPOLATION NO_ATOMIC_PRECHECK COMP_GATHER_KERNEL COMP_GATHER_MODE From 6740bd05ba70da5c5380496a7fb7a1721c530d0d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 23 Jul 2025 13:09:28 +0200 Subject: [PATCH 10/52] GPU TPC: Keep merged track legs as individual track segments during refit --- .../DataTypes/GPUMemorySizeScalers.h | 2 +- .../Definitions/GPUDefConstantsAndSettings.h | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 22 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 558 ++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 4 +- .../display/render/GPUDisplayDraw.cxx | 14 +- GPU/GPUTracking/qa/GPUQA.cxx | 17 +- GPU/GPUTracking/qa/GPUQA.h | 3 +- 9 files changed, 287 insertions(+), 339 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index 164ecb32c26c7..ff8abdc1a491e 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -47,7 +47,7 @@ struct GPUMemorySizeScalers { double tpcSectorTracksPerHit = 0.02; double tpcSectorTrackHitsPerHit = 0.8; double tpcSectorTrackHitsPerHitWithRejection = 1.0; - double tpcMergedTrackPerSectorTrack = 0.9; + double tpcMergedTrackPerSectorTrack = 1.0; double tpcMergedTrackHitPerSectorHit = 1.1; size_t tpcCompressedUnattachedHitsBase1024[3] = {900, 900, 500}; // No ratio, but integer fraction of 1024 for exact computation diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 46988208256fc..e5a2c8eb75bcb 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -32,7 +32,7 @@ #define GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(QPTB5) (CAMath::Abs(QPTB5) > 10 ? 10 : (CAMath::Abs(QPTB5) > 5 ? 15 : 29)) // Minimum hits should depend on Pt, low Pt tracks can have few hits. 29 Hits default, 15 for < 200 mev, 10 for < 100 mev -#define GPUCA_MERGER_MAX_TRACK_CLUSTERS 1000 // Maximum number of clusters a track may have after merging +#define GPUCA_MERGER_MAX_TRACK_CLUSTERS 1024 // Maximum number of clusters a track may have after merging #define GPUCA_MAXN 40 // Maximum number of neighbor hits to consider in one row in neightbors finder #define GPUCA_MIN_TRACK_PTB5_DEFAULT 0.010f // Default setting for minimum track Pt at some places (at B=0.5T) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 7d790d8e3913f..fbd999f8feb56 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -216,7 +216,9 @@ void GPUChainTracking::PrintOutputStat() } else { for (uint32_t k = 0; k < mIOPtrs.nMergedTracks; k++) { if (mIOPtrs.mergedTracks[k].OK()) { - nTracks++; + if (!mIOPtrs.mergedTracks[k].MergedLooper()) { + nTracks++; + } nAttachedClusters += mIOPtrs.mergedTracks[k].NClusters(); nAttachedClustersFitted += mIOPtrs.mergedTracks[k].NClustersFitted(); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 73b14ba1b2fdf..483cbc15998bc 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -45,13 +45,18 @@ class GPUTPCGMMergedTrack GPUd() bool Looper() const { return mFlags & 0x02; } GPUd() bool CSide() const { return mFlags & 0x04; } GPUd() bool CCE() const { return mFlags & 0x08; } - GPUd() bool MergedLooper() const { return mFlags & 0x10; } + GPUd() bool MergedLooperUnconnected() const { return mFlags & 0x10; } + GPUd() bool MergedLooperConnected() const { return mFlags & 0x20; } + GPUd() bool MergedLooper() const { return mFlags & 0x30; } + GPUd() int32_t PrevSegment() const { return mPrevSegment; } + GPUd() uint8_t Flags() const { return mFlags; } GPUd() void SetNClusters(int32_t v) { mNClusters = v; } GPUd() void SetNClustersFitted(int32_t v) { mNClustersFitted = v; } GPUd() void SetFirstClusterRef(int32_t v) { mFirstClusterRef = v; } GPUd() void SetParam(const GPUTPCGMTrackParam& v) { mParam = v; } GPUd() void SetAlpha(float v) { mAlpha = v; } + GPUd() void SetPrevSegment(int32_t v) { mPrevSegment = v; } GPUd() void SetOK(bool v) { if (v) { @@ -84,7 +89,7 @@ class GPUTPCGMMergedTrack mFlags &= 0xF7; } } - GPUd() void SetMergedLooper(bool v) + GPUd() void SetMergedLooperUnconnected(bool v) { if (v) { mFlags |= 0x10; @@ -92,10 +97,15 @@ class GPUTPCGMMergedTrack mFlags &= 0xEF; } } + GPUd() void SetMergedLooperConnected(bool v) + { + if (v) { + mFlags |= 0x20; + } else { + mFlags &= 0xDF; + } + } GPUd() void SetFlags(uint8_t v) { mFlags = v; } - GPUd() void SetLegs(uint8_t v) { mLegs = v; } - GPUd() uint8_t Legs() const { return mLegs; } - GPUd() uint8_t Flags() const { return mFlags; } GPUd() const gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() const { return mOuterParam; } GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } @@ -106,11 +116,11 @@ class GPUTPCGMMergedTrack float mAlpha; //* alpha angle uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays + int32_t mPrevSegment; //* next segment in case of looping track // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters uint32_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; - uint8_t mLegs; #if !defined(GPUCA_STANDALONE) ClassDefNV(GPUTPCGMMergedTrack, 0); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 338ecae4f9b95..3e2eae2e2ad6b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -59,9 +59,6 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif -static constexpr int32_t kMaxParts = 400; -static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; - using namespace o2::gpu; using namespace o2::tpc; using namespace gputpcgmmergertypes; @@ -98,9 +95,6 @@ struct GPUTPCGMMergerSortTracks_comp { if (a.CCE() != b.CCE()) { return a.CCE() > b.CCE(); } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } GPUCA_DETERMINISTIC_CODE( // clang-format off if (a.NClusters() != b.NClusters()) { return a.NClusters() > b.NClusters(); @@ -1348,8 +1342,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i continue; } bool celooper = (trk[0]->GetParam().GetQPt() * Param().qptB5Scaler > 1 && trk[0]->GetParam().GetQPt() * trk[1]->GetParam().GetQPt() < 0); - bool looper = trk[0]->Looper() || trk[1]->Looper() || celooper; - if (!looper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) { + if (!celooper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) { continue; } @@ -1365,7 +1358,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } bool needswap = false; - if (looper) { + if (celooper) { float z0max, z1max; if (Param().par.earlyTpcTransform) { z0max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z)); @@ -1386,15 +1379,13 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i GPUCommonAlgorithm::swap(trk[0], trk[1]); } - bool reverse[2] = {false, false}; - if (looper) { - if (Param().par.earlyTpcTransform) { - reverse[0] = (mClustersXYZ[trk[0]->FirstClusterRef()].z > mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) ^ (trk[0]->CSide() > 0); - reverse[1] = (mClustersXYZ[trk[1]->FirstClusterRef()].z < mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) ^ (trk[1]->CSide() > 0); - } else { - reverse[0] = cls[mClusters[trk[0]->FirstClusterRef()].num].getTime() < cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(); - reverse[1] = cls[mClusters[trk[1]->FirstClusterRef()].num].getTime() > cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(); - } + if (celooper) { + trk[0]->SetMergedLooperConnected(true); + trk[0]->SetCCE(true); + trk[0]->SetLooper(true); + trk[1]->SetCCE(true); + trk[1]->SetLooper(true); + continue; } if (Param().par.continuousTracking) { @@ -1415,31 +1406,14 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i trk[1]->Param().TZOffset() = offset; } } - int32_t pos = newRef; - int32_t leg = -1; - int32_t lastLeg = -1; #pragma unroll for (int32_t k = 1; k >= 0; k--) { - int32_t loopstart = reverse[k] ? (trk[k]->NClusters() - 1) : 0; - int32_t loopend = reverse[k] ? -1 : (int32_t)trk[k]->NClusters(); - int32_t loopinc = reverse[k] ? -1 : 1; - for (int32_t j = loopstart; j != loopend; j += loopinc) { + for (uint32_t j = 0; j != trk[k]->NClusters(); j++) { if (Param().par.earlyTpcTransform) { mClustersXYZ[pos] = mClustersXYZ[trk[k]->FirstClusterRef() + j]; } - mClusters[pos] = mClusters[trk[k]->FirstClusterRef() + j]; - if (looper) { - if (mClusters[trk[k]->FirstClusterRef() + j].leg != lastLeg) { - leg++; - lastLeg = mClusters[trk[k]->FirstClusterRef() + j].leg; - } - mClusters[pos].leg = leg; - } - pos++; - } - if (celooper) { - lastLeg = -1; + mClusters[pos++] = mClusters[trk[k]->FirstClusterRef() + j]; } } trk[1]->SetFirstClusterRef(newRef); @@ -1449,10 +1423,6 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i trk[1]->SetNClusters(GPUCA_MERGER_MAX_TRACK_CLUSTERS); } trk[1]->SetCCE(true); - if (looper) { - trk[1]->SetLooper(true); - trk[1]->SetLegs(leg + 1); - } trk[0]->SetNClusters(0); trk[0]->SetOK(false); } @@ -1465,32 +1435,6 @@ namespace o2::gpu::internal { namespace // anonymous { -struct GPUTPCGMMerger_CompareClusterIdsLooper { - struct clcomparestruct { - uint8_t leg; - }; - - const uint8_t leg; - const bool outwards; - const GPUTPCGMMerger::trackCluster* const cmp1; - const clcomparestruct* const cmp2; - GPUd() GPUTPCGMMerger_CompareClusterIdsLooper(uint8_t l, bool o, const GPUTPCGMMerger::trackCluster* c1, const clcomparestruct* c2) : leg(l), outwards(o), cmp1(c1), cmp2(c2) {} - GPUd() bool operator()(const int16_t aa, const int16_t bb) - { - const clcomparestruct& a = cmp2[aa]; - const clcomparestruct& b = cmp2[bb]; - const GPUTPCGMMerger::trackCluster& a1 = cmp1[aa]; - const GPUTPCGMMerger::trackCluster& b1 = cmp1[bb]; - if (a.leg != b.leg) { - return ((leg > 0) ^ (a.leg > b.leg)); - } - if (a1.row != b1.row) { - return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards); - } - return GPUCA_DETERMINISTIC_CODE((a1.id != b1.id) ? (a1.id > b1.id) : (aa > bb), a1.id > b1.id); - } -}; - struct GPUTPCGMMerger_CompareClusterIds { const GPUTPCGMMerger::trackCluster* const mCmp; GPUd() GPUTPCGMMerger_CompareClusterIds(const GPUTPCGMMerger::trackCluster* cmp) : mCmp(cmp) {} @@ -1509,296 +1453,269 @@ struct GPUTPCGMMerger_CompareClusterIds { GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - GPUTPCGMSectorTrack* trackParts[kMaxParts]; + static constexpr int32_t kMaxParts = 16; + static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; - for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; + GPUTPCGMSectorTrack* trackParts[kMaxParts]; - if (track.PrevSegmentNeighbour() >= 0) { - continue; - } - if (track.PrevNeighbour() >= 0) { - continue; + int32_t itr = iBlock * nThreads + iThread; + GPUTPCGMSectorTrack* trbase = nullptr; + int32_t leg = 0; + int32_t lastMergedSegment = -1; + while (true) { + if (trbase) { + int32_t jtr = trbase->NextNeighbour(); + if (jtr >= 0) { + trbase = &(mSectorTrackInfos[jtr]); + if (trbase->PrevSegmentNeighbour() >= 0) { + trbase = nullptr; + } else { + trbase->SetPrevSegmentNeighbour(1000000001); + leg++; + } + } else { + trbase = nullptr; + } } - int32_t nParts = 0; - int32_t nHits = 0; - int32_t leg = 0; - GPUTPCGMSectorTrack *trbase = &track, *tr = &track; - tr->SetPrevSegmentNeighbour(1000000000); - while (true) { - if (nParts >= kMaxParts) { + + if (trbase == nullptr) { + while (itr < SectorTrackInfoLocalTotal()) { + trbase = &mSectorTrackInfos[itr]; + if (trbase->PrevSegmentNeighbour() >= 0 || trbase->PrevNeighbour() >= 0) { + itr += nThreads * nBlocks; + continue; + } break; } - if (nHits + tr->NClusters() > kMaxClusters) { + if (itr >= SectorTrackInfoLocalTotal()) { break; } - nHits += tr->NClusters(); + itr += nThreads * nBlocks; + trbase->SetPrevSegmentNeighbour(1000000000); + leg = 0; + lastMergedSegment = -1; + } - tr->SetLeg(leg); - trackParts[nParts++] = tr; - for (int32_t i = 0; i < 2; i++) { - if (tr->ExtrapolatedTrackId(i) != -1) { - if (nParts >= kMaxParts) { - break; - } - if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { - break; - } - trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)]; - trackParts[nParts++]->SetLeg(leg); - nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); + do { + int32_t nParts = 0; + int32_t nHits = 0; + + GPUTPCGMSectorTrack* tr = trbase; + while (true) { + if (nParts >= kMaxParts) { + break; } - } - int32_t jtr = tr->NextSegmentNeighbour(); - if (jtr >= 0) { - tr = &(mSectorTrackInfos[jtr]); - tr->SetPrevSegmentNeighbour(1000000002); - continue; - } - jtr = trbase->NextNeighbour(); - if (jtr >= 0) { - trbase = &(mSectorTrackInfos[jtr]); - tr = trbase; - if (tr->PrevSegmentNeighbour() >= 0) { + if (nHits + tr->NClusters() > kMaxClusters) { break; } - tr->SetPrevSegmentNeighbour(1000000001); - leg++; - continue; + nHits += tr->NClusters(); + + tr->SetLeg(leg); + trackParts[nParts++] = tr; + for (int32_t i = 0; i < 2; i++) { + if (tr->ExtrapolatedTrackId(i) != -1) { + if (nParts >= kMaxParts) { + break; + } + if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { + break; + } + trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)]; + trackParts[nParts++]->SetLeg(leg); + nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); + } + } + int32_t jtr = tr->NextSegmentNeighbour(); + if (jtr >= 0) { + tr = &(mSectorTrackInfos[jtr]); + tr->SetPrevSegmentNeighbour(1000000002); + continue; + } + break; } - break; - } - // unpack and sort clusters - if (nParts > 1 && leg == 0) { - GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a->X() != b->X()) { + // unpack and sort clusters + if (nParts > 1 && leg == 0) { + GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a->X() != b->X()) { + return (a->X() > b->X()); + } + if (a->Y() != b->Y()) { + return (a->Y() > b->Y()); + } + if (a->Z() != b->Z()) { + return (a->Z() > b->Z()); + } + return a->QPt() > b->QPt(); + , // !GPUCA_DETERMINISTIC_CODE return (a->X() > b->X()); - } - if (a->Y() != b->Y()) { - return (a->Y() > b->Y()); - } - if (a->Z() != b->Z()) { - return (a->Z() > b->Z()); - } - return a->QPt() > b->QPt(); - , // !GPUCA_DETERMINISTIC_CODE - return (a->X() > b->X()); - ) // clang-format on - }); - } - - if (Param().rec.tpc.dropLoopers && leg > 0) { - nParts = 1; - leg = 0; - } - - trackCluster trackClusters[kMaxClusters]; - nHits = 0; - for (int32_t ipart = 0; ipart < nParts; ipart++) { - const GPUTPCGMSectorTrack* t = trackParts[ipart]; - CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs())); - int32_t nTrackHits = t->NClusters(); - trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; - for (int32_t i = 0; i < nTrackHits; i++, c2--) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()]; - const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; - uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0]; - *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()}; - } - nHits += nTrackHits; - } - if (nHits < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(track.QPt() * Param().qptB5Scaler)) { - continue; - } + ) // clang-format on + }); + } + + if (Param().rec.tpc.dropLoopers && leg > 0) { + nParts = 1; + leg = 0; + } + + trackCluster trackClusters[kMaxClusters]; + nHits = 0; + for (int32_t ipart = 0; ipart < nParts; ipart++) { + const GPUTPCGMSectorTrack* t = trackParts[ipart]; + CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs())); + int32_t nTrackHits = t->NClusters(); + trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; + for (int32_t i = 0; i < nTrackHits; i++, c2--) { + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()]; + const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; + uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0]; + *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()}; + } + nHits += nTrackHits; + } + if (nHits < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(trbase->QPt() * Param().qptB5Scaler)) { + break; + } - int32_t ordered = leg == 0; - if (ordered) { + bool ordered = true; for (int32_t i = 1; i < nHits; i++) { if (trackClusters[i].row > trackClusters[i - 1].row || trackClusters[i].id == trackClusters[i - 1].id) { - ordered = 0; + ordered = false; break; } } - } - int32_t firstTrackIndex = 0; - int32_t lastTrackIndex = nParts - 1; - if (ordered == 0) { - int32_t nTmpHits = 0; - trackCluster trackClustersUnsorted[kMaxClusters]; - int16_t clusterIndices[kMaxClusters]; - for (int32_t i = 0; i < nHits; i++) { - trackClustersUnsorted[i] = trackClusters[i]; - clusterIndices[i] = i; - } + int32_t firstTrackIndex = 0; + int32_t lastTrackIndex = nParts - 1; + if (ordered == 0) { + int32_t nTmpHits = 0; + trackCluster trackClustersUnsorted[kMaxClusters]; + int16_t clusterIndices[kMaxClusters]; + for (int32_t i = 0; i < nHits; i++) { + trackClustersUnsorted[i] = trackClusters[i]; + clusterIndices[i] = i; + } + + GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters)); - if (leg > 0) { - // Find QPt and DzDs for the segment closest to the vertex, if low/mid Pt - float baseZT = 1e9; - uint8_t baseLeg = 0; + nTmpHits = 0; + firstTrackIndex = lastTrackIndex = -1; for (int32_t i = 0; i < nParts; i++) { - if (trackParts[i]->Leg() == 0 || trackParts[i]->Leg() == leg) { - float zt; - if (Param().par.earlyTpcTransform) { - zt = CAMath::Min(CAMath::Abs(trackParts[i]->ClusterZT0()), CAMath::Abs(trackParts[i]->ClusterZTN())); - } else { - zt = -trackParts[i]->MinClusterZT(); // Negative time ~ smallest z, to behave the same way // TODO: Check all these min / max ZT - } - if (zt < baseZT) { - baseZT = zt; - baseLeg = trackParts[i]->Leg(); - } + nTmpHits += trackParts[i]->NClusters(); + if (nTmpHits > clusterIndices[0] && firstTrackIndex == -1) { + firstTrackIndex = i; } - } - int32_t iLongest = 1e9; - int32_t length = 0; - for (int32_t i = (baseLeg ? (nParts - 1) : 0); baseLeg ? (i >= 0) : (i < nParts); baseLeg ? i-- : i++) { - if (trackParts[i]->Leg() != baseLeg) { - break; - } - if (trackParts[i]->OrigTrack()->NHits() > length) { - iLongest = i; - length = trackParts[i]->OrigTrack()->NHits(); + if (nTmpHits > clusterIndices[nHits - 1] && lastTrackIndex == -1) { + lastTrackIndex = i; } } - bool outwards; - if (Param().par.earlyTpcTransform) { - outwards = (trackParts[iLongest]->ClusterZT0() > trackParts[iLongest]->ClusterZTN()) ^ trackParts[iLongest]->CSide(); - } else { - outwards = trackParts[iLongest]->ClusterZT0() < trackParts[iLongest]->ClusterZTN(); - } - GPUTPCGMMerger_CompareClusterIdsLooper::clcomparestruct clusterSort[kMaxClusters]; - for (int32_t iPart = 0; iPart < nParts; iPart++) { - const GPUTPCGMSectorTrack* t = trackParts[iPart]; - int32_t nTrackHits = t->NClusters(); - for (int32_t j = 0; j < nTrackHits; j++) { - int32_t i = nTmpHits + j; - clusterSort[i].leg = t->Leg(); + + int32_t nFilteredHits = 0; + int32_t indPrev = -1; + for (int32_t i = 0; i < nHits; i++) { + int32_t ind = clusterIndices[i]; + if (indPrev >= 0 && trackClustersUnsorted[ind].id == trackClustersUnsorted[indPrev].id) { + continue; } - nTmpHits += nTrackHits; + indPrev = ind; + trackClusters[nFilteredHits] = trackClustersUnsorted[ind]; + nFilteredHits++; } - - GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIdsLooper(baseLeg, outwards, trackClusters, clusterSort)); - } else { - GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters)); + nHits = nFilteredHits; } - nTmpHits = 0; - firstTrackIndex = lastTrackIndex = -1; - for (int32_t i = 0; i < nParts; i++) { - nTmpHits += trackParts[i]->NClusters(); - if (nTmpHits > clusterIndices[0] && firstTrackIndex == -1) { - firstTrackIndex = i; - } - if (nTmpHits > clusterIndices[nHits - 1] && lastTrackIndex == -1) { - lastTrackIndex = i; - } + + const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); + if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); + break; } - int32_t nFilteredHits = 0; - int32_t indPrev = -1; + GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster; + for (int32_t i = 0; i < nHits; i++) { - int32_t ind = clusterIndices[i]; - if (indPrev >= 0 && trackClustersUnsorted[ind].id == trackClustersUnsorted[indPrev].id) { - continue; + uint8_t state; + if (Param().par.earlyTpcTransform) { + const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; + clXYZ[i].x = c.x; + clXYZ[i].y = c.y; + clXYZ[i].z = c.z; + clXYZ[i].amp = c.amp; + state = c.flags; + } else { + const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id]; + state = c.getFlags(); } - indPrev = ind; - trackClusters[nFilteredHits] = trackClustersUnsorted[ind]; - nFilteredHits++; + cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags + cl[i].row = trackClusters[i].row; + cl[i].num = trackClusters[i].id; + cl[i].sector = trackClusters[i].sector; + cl[i].leg = trackClusters[i].leg; } - nHits = nFilteredHits; - } - - const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); - if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); - CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); - continue; - } - GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster; + uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); + if (iOutputTrack >= mNMaxTracks) { + raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks); + CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks); + break; + } - for (int32_t i = 0; i < nHits; i++) { - uint8_t state; - if (Param().par.earlyTpcTransform) { - const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; - GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; - clXYZ[i].x = c.x; - clXYZ[i].y = c.y; - clXYZ[i].z = c.z; - clXYZ[i].amp = c.amp; - state = c.flags; + GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; + mergedTrack.SetFlags(0); + mergedTrack.SetOK(true); + mergedTrack.SetLooper(leg > 0 || trbase->NextNeighbour() >= 0); + mergedTrack.SetNClusters(nHits); + mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); + GPUTPCGMTrackParam& p1 = mergedTrack.Param(); + const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; + mergedTrack.SetCSide(p2.CSide()); + mergedTrack.SetMergedLooperConnected(leg > 0); + mergedTrack.SetPrevSegment(lastMergedSegment); + lastMergedSegment = iOutputTrack; + + GPUTPCGMBorderTrack b; + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); + if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { + p1.X() = toX; + p1.Y() = b.Par()[0]; + p1.Z() = b.Par()[1]; + p1.SinPhi() = b.Par()[2]; } else { - const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id]; - state = c.getFlags(); - } - cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags - cl[i].row = trackClusters[i].row; - cl[i].num = trackClusters[i].id; - cl[i].sector = trackClusters[i].sector; - cl[i].leg = trackClusters[i].leg; - } - - uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); - if (iOutputTrack >= mNMaxTracks) { - raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks); - CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks); - continue; - } - - GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; - - mergedTrack.SetFlags(0); - mergedTrack.SetOK(1); - mergedTrack.SetLooper(leg > 0); - mergedTrack.SetLegs(leg); - mergedTrack.SetNClusters(nHits); - mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); - GPUTPCGMTrackParam& p1 = mergedTrack.Param(); - const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; - mergedTrack.SetCSide(p2.CSide()); - - GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); - if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { - p1.X() = toX; - p1.Y() = b.Par()[0]; - p1.Z() = b.Par()[1]; - p1.SinPhi() = b.Par()[2]; - } else { - p1.X() = p2.X(); - p1.Y() = p2.Y(); - p1.Z() = p2.Z(); - p1.SinPhi() = p2.SinPhi(); - } - p1.TZOffset() = p2.TZOffset(); - p1.DzDs() = p2.DzDs(); - p1.QPt() = p2.QPt(); - mergedTrack.SetAlpha(p2.Alpha()); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { - p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; - } + p1.X() = p2.X(); + p1.Y() = p2.Y(); + p1.Z() = p2.Z(); + p1.SinPhi() = p2.SinPhi(); + } + p1.TZOffset() = p2.TZOffset(); + p1.DzDs() = p2.DzDs(); + p1.QPt() = p2.QPt(); + mergedTrack.SetAlpha(p2.Alpha()); + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { + p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; + } - // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits); + // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits); - /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks)) - { - mergedTrack.SetOK(0); - mergedTrack.SetNClusters(0); - } - if (mergedTrack.NClusters() && mergedTrack.OK()) */ - if (Param().rec.tpc.mergeCE) { - bool CEside; - if (Param().par.earlyTpcTransform) { - const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; - CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); - } else { - auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; - CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); + /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks)) + { + mergedTrack.SetOK(0); + mergedTrack.SetNClusters(0); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); - } - } // itr + if (mergedTrack.NClusters() && mergedTrack.OK()) */ + if (Param().rec.tpc.mergeCE) { + bool CEside; + if (Param().par.earlyTpcTransform) { + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; + CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); + } else { + auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; + CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); + } + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); + } + } while (false); + } } GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) @@ -1911,6 +1828,7 @@ GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { + return; // FIXME: !!!! const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { const auto& trk = mMergedTracks[i]; @@ -2057,9 +1975,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, }*/ #endif if (EQ) { - mMergedTracks[params[j].id].SetMergedLooper(true); + mMergedTracks[params[j].id].SetMergedLooperUnconnected(true); if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) { - mMergedTracks[params[i].id].SetMergedLooper(true); + mMergedTracks[params[i].id].SetMergedLooperUnconnected(true); } } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 9c924e74ec519..90f2fce5cdd2e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -205,7 +205,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] - << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; + << " NFitted " << trk.NClustersFitted() << " flags " << (int)trk.Flags() << "\n"; } out << std::setprecision(ss); } @@ -217,7 +217,7 @@ void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const if (i && i % 100 == 0) { out << "\n"; } - out << (int)mMergedTracks[i].MergedLooper() << " "; + out << (int)mMergedTracks[i].MergedLooperUnconnected() << " "; } out << "\n"; } diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index ebb1b41802ba0..43de5a1d5011a 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -464,6 +464,13 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp } else { if (!drawing) { startCountInner = mVertexBuffer[iSector].size(); + if constexpr (std::is_same_v) { + if (k == 0 && track->PrevSegment() >= 0) { + const auto& prevtrk = mIOPtrs->mergedTracks[track->PrevSegment()]; + int32_t prevcid = mIOPtrs->mergedTrackHits[prevtrk.FirstClusterRef() + prevtrk.NClusters() - 1].num; + drawPointLinestrip(iSector, prevcid, tFINALTRACK, separateExtrapolatedTracksLimit); + } + } if (lastCluster != -1 && (!mCfgH.splitCETracks || lastSide == (mGlobalPos[cid].z < 0))) { int32_t lastcid; if constexpr (std::is_same_v) { @@ -512,6 +519,11 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp if (lastCluster == -1) { continue; } + if constexpr (std::is_same_v) { + if (track->MergedLooperConnected()) { + continue; + } + } } size_t startCountInner = mVertexBuffer[iSector].size(); @@ -610,7 +622,7 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp if ((inFlyDirection == 0 && x < 0) || (inFlyDirection && x * x + trkParam.Y() * trkParam.Y() > (iMC ? (450 * 450) : (300 * 300)))) { break; } - if (fabsf(trkParam.Z() + ZOffset) > mMaxClusterZ + (iMC ? 0 : 0)) { + if (fabsf(trkParam.Z() + ZOffset) > mMaxClusterZ) { break; } if (fabsf(trkParam.Z() - z0) > (iMC ? GPUTPCGeometry::TPCLength() : GPUTPCGeometry::TPCLength())) { diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index d542e39c55b52..dcc2a37992a05 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -100,13 +100,15 @@ using namespace o2::gpu; float qpt = 0; \ bool lowPt = false; \ [[maybe_unused]] bool mev200 = false; \ - bool mergedLooper = false; \ + bool mergedLooperUnconnected = false; \ + bool mergedLooperConnected = false; \ int32_t id = attach & gputpcgmmergertypes::attachTrackMask; \ if (!unattached) { \ qpt = fabsf(mTracking->mIOPtrs.mergedTracks[id].GetParam().GetQPt()); \ lowPt = qpt * mTracking->GetParam().qptB5Scaler > mTracking->GetParam().rec.tpc.rejectQPtB5; \ mev200 = qpt > 5; \ - mergedLooper = mTracking->mIOPtrs.mergedTracks[id].MergedLooper(); \ + mergedLooperUnconnected = mTracking->mIOPtrs.mergedTracks[id].MergedLooperUnconnected(); \ + mergedLooperConnected = mTracking->mIOPtrs.mergedTracks[id].MergedLooperConnected(); \ } \ bool physics = false, protect = false; \ CHECK_CLUSTER_STATE_INIT_LEG_BY_MC(); @@ -118,15 +120,17 @@ using namespace o2::gpu; } \ if (lowPt) { \ mClusterCounts.nLowPt++; \ - } else if (mergedLooper) { \ - mClusterCounts.nMergedLooper++; \ + } else if (mergedLooperUnconnected) { \ + mClusterCounts.nMergedLooperUnconnected++; \ + } else if (mergedLooperConnected) { \ + mClusterCounts.nMergedLooperConnected++; \ } else { \ GPUTPCClusterRejection::GetProtectionStatus(attach, physics, protect, &mClusterCounts, &mev200); \ } #define CHECK_CLUSTER_STATE_NOCOUNT() \ CHECK_CLUSTER_STATE_INIT() \ - if (!lowPt && !mergedLooper) { \ + if (!lowPt && !mergedLooperUnconnected && !mergedLooperConnected) { \ GPUTPCClusterRejection::GetProtectionStatus(attach, physics, protect); \ } @@ -2967,7 +2971,8 @@ int32_t GPUQA::DoClusterCounts(uint64_t* attachClusterCounts, int32_t mode) PrintClusterCount(mode, num, "Removed (Strategy B)", mClusterCounts.nTotal - mClusterCounts.nProt, mClusterCounts.nTotal); } - PrintClusterCount(mode, num, "Merged Loopers (Afterburner)", mClusterCounts.nMergedLooper, mClusterCounts.nTotal); + PrintClusterCount(mode, num, "Merged Loopers (Track Merging)", mClusterCounts.nMergedLooperConnected, mClusterCounts.nTotal); + PrintClusterCount(mode, num, "Merged Loopers (Afterburner)", mClusterCounts.nMergedLooperUnconnected, mClusterCounts.nTotal); PrintClusterCount(mode, num, "High Inclination Angle", mClusterCounts.nHighIncl, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Rejected", mClusterCounts.nRejected, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Tube (> 200 MeV)", mClusterCounts.nTube, mClusterCounts.nTotal); diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index 591eb1722bf9f..92e931892339a 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -291,7 +291,8 @@ class GPUQA TLegend* mLClust[N_CLS_TYPE]; struct counts_t { - int64_t nRejected = 0, nTube = 0, nTube200 = 0, nLoopers = 0, nLowPt = 0, n200MeV = 0, nPhysics = 0, nProt = 0, nUnattached = 0, nTotal = 0, nHighIncl = 0, nAbove400 = 0, nFakeRemove400 = 0, nFullFakeRemove400 = 0, nBelow40 = 0, nFakeProtect40 = 0, nMergedLooper = 0, nCorrectlyAttachedNormalized = 0, nCorrectlyAttachedNormalizedNonFake = 0; + int64_t nRejected = 0, nTube = 0, nTube200 = 0, nLoopers = 0, nLowPt = 0, n200MeV = 0, nPhysics = 0, nProt = 0, nUnattached = 0, nTotal = 0, nHighIncl = 0, nAbove400 = 0, nFakeRemove400 = 0, nFullFakeRemove400 = 0, nBelow40 = 0, nFakeProtect40 = 0; + int64_t nMergedLooperConnected = 0, nMergedLooperUnconnected = 0, nCorrectlyAttachedNormalized = 0, nCorrectlyAttachedNormalizedNonFake = 0; double nUnaccessible = 0; } mClusterCounts; From d59f715447f1d140f525fe5430d466065e01e668 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Jul 2025 11:47:33 +0200 Subject: [PATCH 11/52] GPU TPC: Order legs in descending way and store leg id per track not cluster --- .../GPUTPCCompressionKernels.cxx | 8 +-- .../DataTypes/GPUTPCGMMergedTrackHit.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 3 ++ GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 26 ++++------ GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 1 - GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 10 ++-- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 52 +++++-------------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 2 +- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 9 ---- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 2 +- .../display/render/GPUDisplayDraw.cxx | 22 +++++--- GPU/GPUTracking/qa/GPUQA.cxx | 2 +- 12 files changed, 52 insertions(+), 87 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 73b195e8f4fe4..5503eeb30cdd6 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -32,7 +32,6 @@ GPUdii() void GPUTPCCompressionKernels::ThreadnStoredTracks, 1u); compressor.mAttachedClusterFirstIndex[myTrack] = trk.FirstClusterRef(); - lastLeg = hit.leg; c.qPtA[myTrack] = qpt; c.rowA[myTrack] = hit.row; c.sliceA[myTrack] = hit.sector; @@ -114,12 +109,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread= 0) { continue; } - int32_t leg = 0; GPUTPCGMSectorTrack *trbase = &track, *tr = &track; while (true) { int32_t iTrk = tr - mSectorTrackInfos; @@ -200,7 +199,6 @@ void GPUTPCGMMerger::CheckMergedTracks() if (tr->PrevSegmentNeighbour() >= 0) { break; } - leg++; continue; } break; @@ -1463,7 +1461,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread int32_t leg = 0; int32_t lastMergedSegment = -1; while (true) { - if (trbase) { + if (trbase && !Param().rec.tpc.dropLoopers) { int32_t jtr = trbase->NextNeighbour(); if (jtr >= 0) { trbase = &(mSectorTrackInfos[jtr]); @@ -1471,7 +1469,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread trbase = nullptr; } else { trbase->SetPrevSegmentNeighbour(1000000001); - leg++; + leg--; } } else { trbase = nullptr; @@ -1492,7 +1490,12 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } itr += nThreads * nBlocks; trbase->SetPrevSegmentNeighbour(1000000000); + int32_t jtr = trbase->NextNeighbour(); leg = 0; + while (jtr >= 0) { + leg++; + jtr = mSectorTrackInfos[jtr].NextNeighbour(); + } lastMergedSegment = -1; } @@ -1535,7 +1538,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } // unpack and sort clusters - if (nParts > 1 && leg == 0) { + if (nParts > 1) { GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { GPUCA_DETERMINISTIC_CODE( // clang-format off if (a->X() != b->X()) { @@ -1554,11 +1557,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread }); } - if (Param().rec.tpc.dropLoopers && leg > 0) { - nParts = 1; - leg = 0; - } - trackCluster trackClusters[kMaxClusters]; nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { @@ -1570,7 +1568,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()]; const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0]; - *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()}; + *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector()}; } nHits += nTrackHits; } @@ -1651,7 +1649,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].row = trackClusters[i].row; cl[i].num = trackClusters[i].id; cl[i].sector = trackClusters[i].sector; - cl[i].leg = trackClusters[i].leg; } uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); @@ -1664,7 +1661,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; mergedTrack.SetFlags(0); mergedTrack.SetOK(true); - mergedTrack.SetLooper(leg > 0 || trbase->NextNeighbour() >= 0); + mergedTrack.SetLooper(leg > 0 || lastMergedSegment >= 0); mergedTrack.SetNClusters(nHits); mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); @@ -1799,7 +1796,6 @@ GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t if (!trk.OK() || trk.NClusters() == 0) { continue; } - uint8_t goodLeg = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].leg; for (uint32_t j = 0; j < trk.NClusters(); j++) { int32_t id = mClusters[trk.FirstClusterRef() + j].num; uint32_t weight = mTrackOrderAttach[i] | attachAttached; @@ -1809,7 +1805,7 @@ GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t } else if (clusterState & GPUTPCGMMergedTrackHit::flagHighIncl) { weight |= attachHighIncl; } - if (mClusters[trk.FirstClusterRef() + j].leg == goodLeg) { + if (trk.Leg() == 0) { weight |= attachGoodLeg; } CAMath::AtomicMax(&mClusterAttachment[id], weight); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 54a541ebe0fd6..76f3f3cdcba08 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -82,7 +82,6 @@ class GPUTPCGMMerger : public GPUProcessor uint32_t id; uint8_t row; uint8_t sector; - uint8_t leg; }; struct tmpSort { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 74a8df388d163..b10b1d0510fd7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -65,14 +65,15 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock if (!tracks[i].OK()) { continue; } + if (merger.Param().rec.tpc.dropSecondaryLegsInOutput && tracks[i].MergedLooper()) { + continue; + } + uint32_t nCl = 0; for (uint32_t j = 0; j < tracks[i].NClusters(); j++) { if ((trackClusters[tracks[i].FirstClusterRef() + j].state & flagsReject) || (merger.ClusterAttachment()[trackClusters[tracks[i].FirstClusterRef() + j].num] & flagsRequired) != flagsRequired) { continue; } - if (merger.Param().rec.tpc.dropSecondaryLegsInOutput && trackClusters[tracks[i].FirstClusterRef() + j].leg != trackClusters[tracks[i].FirstClusterRef() + tracks[i].NClusters() - 1].leg) { - continue; - } nCl++; } if (nCl == 0) { @@ -192,9 +193,6 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks if ((trackClusters[tracks[i].FirstClusterRef() + j].state & flagsReject) || (merger.ClusterAttachment()[trackClusters[tracks[i].FirstClusterRef() + j].num] & flagsRequired) != flagsRequired) { continue; } - if (merger.Param().rec.tpc.dropSecondaryLegsInOutput && trackClusters[tracks[i].FirstClusterRef() + j].leg != trackClusters[tracks[i].FirstClusterRef() + tracks[i].NClusters() - 1].leg) { - continue; - } int32_t clusterIdGlobal = trackClusters[tracks[i].FirstClusterRef() + j].num; int32_t sector = trackClusters[tracks[i].FirstClusterRef() + j].sector; int32_t globalRow = trackClusters[tracks[i].FirstClusterRef() + j].row; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 1c74bb4a9b2c5..c76d8f6ab4409 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -54,7 +54,7 @@ using namespace o2::gpu; using namespace o2::tpc; -GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* GPUrestrict() clustersXYZ, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, gputpcgmmergertypes::GPUTPCOuterParam* GPUrestrict() outerParam) +GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* GPUrestrict() clustersXYZ, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, gputpcgmmergertypes::GPUTPCOuterParam* GPUrestrict() outerParam, int8_t leg) { static constexpr float kDeg2Rad = M_PI / 180.f; CADEBUG(static constexpr float kSectAngle = 2 * M_PI / 18.f); @@ -83,22 +83,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float lastUpdateX = -1.f; uint8_t lastRow = 255; uint8_t lastSector = 255; - uint8_t storeOuter = 0; for (int32_t iWay = 0; iWay < nWays; iWay++) { int32_t nMissed = 0, nMissed2 = 0; float sumInvSqrtCharge = 0.f; int32_t nAvgCharge = 0; - if (iWay && storeOuter != 255 && param.rec.tpc.nWaysOuter && outerParam) { - storeOuter = 0; + if (iWay && param.rec.tpc.nWaysOuter && outerParam) { if (iWay == nWays - 1) { StoreOuter(outerParam, prop, 0); - if (merger->MergedTracks()[iTrk].Looper()) { - storeOuter = 1; - } - } else if (iWay == nWays - 2 && merger->MergedTracks()[iTrk].Looper()) { - storeOuter = 2; } } @@ -117,8 +110,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ N = 0; lastUpdateX = -1; - const bool inFlyDirection = iWay & 1; - uint8_t lastLeg = clusters[ihitStart].leg; + const bool inFlyDirection = (leg & 1); const int32_t wayDirection = (iWay & 1) ? -1 : 1; bool noFollowCircle = false, noFollowCircle2 = false; @@ -130,15 +122,6 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ noFollowCircle2 = true; } - if (storeOuter == 2 && clusters[ihit].leg == clusters[maxN - 1].leg - 1) { - if (lastLeg == clusters[maxN - 1].leg) { - StoreOuter(outerParam, prop, 1); - storeOuter = 255; - } else { - storeOuter = 0; - } - } - if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) { CADEBUG(printf("\tSkipping hit, %d hits rejected, flag %X\n", nMissed, (int32_t)clusters[ihit].state)); if (iWay + 2 >= nWays && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) { @@ -183,12 +166,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ const auto& cluster = clusters[ihit]; - bool changeDirection = (cluster.leg - lastLeg) & 1; // clang-format off - CADEBUG(if (changeDirection) printf("\t\tChange direction\n")); - CADEBUG(printf("\tLeg %3d Sector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.leg, (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); + CADEBUG(printf("\tSector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on - if (allowModification && changeDirection && !noFollowCircle && !noFollowCircle2) { + if (allowModification && false /*changeDirection*/ && !noFollowCircle && !noFollowCircle2) { if (lastRow != 255) { if (!(merger->Param().rec.tpc.disableRefitAttachment & 4)) { StoreAttachMirror(merger, lastSector, lastRow, iTrk, clAlpha, yy, xx, cluster.sector, cluster.row, inFlyDirection, prop.GetAlpha()); @@ -197,8 +178,8 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { - bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2; + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, leg == 0, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { @@ -244,7 +225,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } - if (err == 0 && changeDirection) { + if (err == 0 && false /*changeDirection*/) { const float mirrordY = prop.GetMirroredYTrack(); CADEBUG(printf(" -- MirroredY: %f --> %f", mP[0], mirrordY)); if (CAMath::Abs(yy - mP[0]) > CAMath::Abs(yy - mirrordY)) { @@ -256,7 +237,6 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ noFollowCircle = false; lastUpdateX = mX; - lastLeg = cluster.leg; lastRow = 255; N++; resetT0 = initResetT0(); @@ -270,7 +250,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float uncorrectedY = -1e6f; if (allowModification) { - uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop); + uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, leg == 0, prop); } const int32_t err2 = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; @@ -334,10 +314,6 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ ConstrainSinPhi(); if (retVal == 0) // track is updated { - if (storeOuter == 1 && cluster.leg == clusters[maxN - 1].leg) { - StoreOuter(outerParam, prop, 2); - storeOuter = 255; - } noFollowCircle2 = false; lastUpdateX = mX; covYYUpd = mC[0]; @@ -352,7 +328,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetTrack(this, prop.GetAlpha()); } if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { - if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + if (param.dodEdxEnabled && iWay == nWays - 1) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { float qtot = 0, qmax = 0, pad = 0, relTime = 0; @@ -486,7 +462,7 @@ GPUd() void GPUTPCGMTrackParam::MirrorTo(GPUTPCGMPropagator& GPUrestrict() prop, GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* GPUrestrict() merger, GPUTPCGMPropagator& GPUrestrict() prop, float& GPUrestrict() xx, float& GPUrestrict() yy, float& GPUrestrict() zz, int32_t maxN, float clAlpha, uint8_t& GPUrestrict() clusterState, bool rejectChi2) { - if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector && clusters[ihit].leg == clusters[ihit + wayDirection].leg) { + if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector) { float maxDistY, maxDistZ; prop.GetErr2(maxDistY, maxDistZ, merger->Param(), zz, clusters[ihit].row, 0, clusters[ihit].sector, -1.f, 0.f, 0.f); // TODO: Use correct time, avgCharge maxDistY = (maxDistY + mC[0]) * 20.f; @@ -530,7 +506,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t clusterState |= clusters[ihit].state; count += clamp; } - if (!(ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector && clusters[ihit].leg == clusters[ihit + wayDirection].leg)) { + if (!(ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector)) { break; } ihit += wayDirection; @@ -1071,7 +1047,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); @@ -1085,7 +1061,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() NTolerated = 0; // Clusters not fit but tollerated for track length cut t = track.Param(); Alpha = track.Alpha(); - ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, 1, GPUCA_MAX_SIN_PHI, &track.OuterParam()); + ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, 1, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); } else { uint32_t nRefit = CAMath::AtomicAdd(&merger->Memory()->nRetryRefit, 1u); merger->RetryRefitIds()[nRefit] = iTrk; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 90ff3154a3fe9..3412388003ec6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -141,7 +141,7 @@ class GPUTPCGMTrackParam GPUd() bool CheckNumericalQuality(float overrideCovYY = -1.f) const; GPUd() bool CheckCov() const; - GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, int32_t& N, int32_t& NTolerated, float& Alpha, int32_t attempt = 0, float maxSinPhi = GPUCA_MAX_SIN_PHI, gputpcgmmergertypes::GPUTPCOuterParam* outerParam = nullptr); + GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, int32_t& N, int32_t& NTolerated, float& Alpha, int32_t attempt = 0, float maxSinPhi = GPUCA_MAX_SIN_PHI, gputpcgmmergertypes::GPUTPCOuterParam* outerParam = nullptr, int8_t leg = 0); GPUd() void MoveToReference(GPUTPCGMPropagator& prop, const GPUParam& param, float& alpha); GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector); GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 3f342c6111f04..29ccab2a765da 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -223,15 +223,6 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov float tOffset; if constexpr (std::is_same_v) { count = trkX.NClusters(); - if (trkX.Looper()) { - int32_t leg = mPtrackHits[trkX.FirstClusterRef() + trkX.NClusters() - 1].leg; - for (int32_t i = trkX.NClusters() - 2; i > 0; i--) { - if (mPtrackHits[trkX.FirstClusterRef() + i].leg != leg) { - begin = i + 1; - break; - } - } - } tOffset = trkX.GetParam().GetTZOffset(); } else if constexpr (std::is_same_v) { count = trkX.getNClusters(); diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 431fa357e8b89..f8fa0342ee62d 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -101,7 +101,7 @@ class GPUTRDTracker_t : public GPUProcessor { return true; } - GPUd() bool PreCheckTrackTRDCandidate(const GPUTPCGMMergedTrack& trk) const { return trk.OK() && !trk.Looper(); } + GPUd() bool PreCheckTrackTRDCandidate(const GPUTPCGMMergedTrack& trk) const { return trk.OK() && !trk.MergedLooper(); } GPUd() bool CheckTrackTRDCandidate(const TRDTRK& trk) const; GPUd() int32_t LoadTrack(const TRDTRK& trk, uint32_t tpcTrackId, bool checkTrack = true, HelperTrackAttributes* attribs = nullptr); diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 43de5a1d5011a..679d5a6cf88fd 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -434,6 +434,16 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp // Print TPC part of track int32_t separateExtrapolatedTracksLimit = (mCfgH.separateExtrapolatedTracks ? tEXTRAPOLATEDTRACK : TRACK_TYPE_ID_LIMIT); uint32_t lastSide = -1; + int32_t prevcid = -1; + int32_t leg = 0; + if constexpr (std::is_same_v) { + if (track->PrevSegment() >= 0) { + const auto& prevtrk = mIOPtrs->mergedTracks[track->PrevSegment()]; + prevcid = mIOPtrs->mergedTrackHits[prevtrk.FirstClusterRef() + ((track->Leg() & 1) ? (prevtrk.NClusters() - 1) : 0)].num; + leg = track->Leg(); + } + } + for (int32_t k = 0; k < nClusters; k++) { if constexpr (std::is_same_v) { if (mCfgH.hideRejectedClusters && (mIOPtrs->mergedTrackHits[track->FirstClusterRef() + k].state & GPUTPCGMMergedTrackHit::flagReject)) { @@ -464,13 +474,6 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp } else { if (!drawing) { startCountInner = mVertexBuffer[iSector].size(); - if constexpr (std::is_same_v) { - if (k == 0 && track->PrevSegment() >= 0) { - const auto& prevtrk = mIOPtrs->mergedTracks[track->PrevSegment()]; - int32_t prevcid = mIOPtrs->mergedTrackHits[prevtrk.FirstClusterRef() + prevtrk.NClusters() - 1].num; - drawPointLinestrip(iSector, prevcid, tFINALTRACK, separateExtrapolatedTracksLimit); - } - } if (lastCluster != -1 && (!mCfgH.splitCETracks || lastSide == (mGlobalPos[cid].z < 0))) { int32_t lastcid; if constexpr (std::is_same_v) { @@ -479,6 +482,8 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp lastcid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; } drawPointLinestrip(iSector, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); + } else if (prevcid != -1 && k == 0 && (leg & 1) == 0) { + drawPointLinestrip(iSector, prevcid, tFINALTRACK, separateExtrapolatedTracksLimit); } drawPointLinestrip(iSector, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } @@ -487,6 +492,9 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp lastCluster = k; lastSide = mGlobalPos[cid].z < 0; } + if (prevcid != -1 && (leg & 1) && drawing) { + drawPointLinestrip(iSector, prevcid, tFINALTRACK, separateExtrapolatedTracksLimit); + } // Print ITS part of track if constexpr (std::is_same_v) { diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index dcc2a37992a05..aa4db98d0b71a 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -1704,7 +1704,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } rowClCount += !(trackClusters[track.FirstClusterRef() + jNext].state & GPUTPCGMMergedTrackHit::flagReject); } - if (trackClusters[track.FirstClusterRef() + j].leg == trackClusters[track.FirstClusterRef() + track.NClusters() - 1].leg && rowClCount) { + if (!track.MergedLooper() && rowClCount) { nClCorrected++; } if (mcAvail && rowClCount) { From 5d7203ebf03f4925fde451e9268d3434851b56a1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Jul 2025 12:15:36 +0200 Subject: [PATCH 12/52] GPU TPC: 16 bits are enough for nclusters --- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 1ea6ab10918d5..60be206ed7e42 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -119,9 +119,8 @@ class GPUTPCGMMergedTrack float mAlpha; //* alpha angle uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays int32_t mPrevSegment; //* next segment in case of looping track - // TODO: Change to 8 bit - uint32_t mNClusters; //* number of track clusters - uint32_t mNClustersFitted; //* number of clusters used in fit + uint16_t mNClusters; //* number of track clusters + uint16_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; uint8_t mLeg; From b6a34aa0adfee8e8ae78f110855fb495efe12550 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Jul 2025 13:12:21 +0200 Subject: [PATCH 13/52] GPU Display: Make 'none' frontend and backend work --- GPU/GPUTracking/Global/GPUChainTracking.cxx | 1 + .../Standalone/Benchmark/standalone.cxx | 4 +- GPU/GPUTracking/display/CMakeLists.txt | 2 + .../display/backend/GPUDisplayBackend.cxx | 4 +- .../display/backend/GPUDisplayBackend.h | 3 +- .../display/backend/GPUDisplayBackendNone.cxx | 30 ++++++++++++ .../display/backend/GPUDisplayBackendNone.h | 49 +++++++++++++++++++ .../display/frontend/GPUDisplayFrontend.cxx | 33 +++++++++++-- .../display/frontend/GPUDisplayFrontend.h | 5 +- .../frontend/GPUDisplayFrontendGlfw.cxx | 11 ----- .../display/frontend/GPUDisplayFrontendGlfw.h | 1 - .../frontend/GPUDisplayFrontendGlut.cxx | 10 ---- .../display/frontend/GPUDisplayFrontendGlut.h | 1 - .../frontend/GPUDisplayFrontendNone.cxx | 19 +++++++ .../display/frontend/GPUDisplayFrontendNone.h | 7 ++- .../frontend/GPUDisplayFrontendWayland.cxx | 10 ---- .../frontend/GPUDisplayFrontendWayland.h | 1 - .../frontend/GPUDisplayFrontendWindows.cxx | 10 ---- .../frontend/GPUDisplayFrontendWindows.h | 1 - .../frontend/GPUDisplayFrontendX11.cxx | 12 +---- .../display/frontend/GPUDisplayFrontendX11.h | 6 +-- 21 files changed, 150 insertions(+), 70 deletions(-) create mode 100644 GPU/GPUTracking/display/backend/GPUDisplayBackendNone.cxx create mode 100644 GPU/GPUTracking/display/backend/GPUDisplayBackendNone.h diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 79e9ce6cef766..b0ea052063f20 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -826,6 +826,7 @@ int32_t GPUChainTracking::RunChainFinalize() if (!mDisplayRunning) { GPUInfo("Starting Event Display..."); if (mEventDisplay->StartDisplay()) { + GPUError("Error starting Event Display"); return (1); } mDisplayRunning = true; diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 5240b5ca47967..1b1cb510af7be 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -703,10 +703,10 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU configStandalone.noprompt = 1; } if (tmpRetVal == 3 && configStandalone.proc.ignoreNonFatalGPUErrors) { - printf("Non-FATAL GPU error occured, ignoring\n"); + printf("GPU Standalone Benchmark: Non-FATAL GPU error occured, ignoring\n"); } else if (tmpRetVal && !configStandalone.continueOnError) { if (tmpRetVal != 2) { - printf("Error occured\n"); + printf("GPU Standalone Benchmark: Error occured\n"); } return 1; } diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index 25b028d573bcf..32d25ee08b729 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -55,7 +55,9 @@ set(SRCS ../utils/qsem.cxx helpers/GPUDisplayMagneticField.cxx frontend/GPUDisplayFrontend.cxx frontend/GPUDisplayFrontendGlfw.cxx + frontend/GPUDisplayFrontendNone.cxx backend/GPUDisplayBackend.cxx + backend/GPUDisplayBackendNone.cxx backend/GPUDisplayBackendOpenGL.cxx) set(SRCS_NO_H helpers/GPUDisplayLoader.cxx diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx index 98d2593c27950..3694ab93398cc 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx @@ -16,7 +16,7 @@ #include "helpers/GPUDisplayMagneticField.h" #include "GPUDisplayBackendOpenGL.h" - +#include "GPUDisplayBackendNone.h" #ifdef GPUCA_BUILD_EVENT_DISPLAY_VULKAN #include "GPUDisplayBackendVulkan.h" #endif @@ -51,6 +51,8 @@ GPUDisplayBackend* GPUDisplayBackend::getBackend(const char* type) #endif if (strcmp(type, "opengl") == 0 || strcmp(type, "auto") == 0) { return new GPUDisplayBackendOpenGL; + } else if (strcmp(type, "none") == 0) { + return new GPUDisplayBackendNone; } else { GPUError("Requested renderer not available"); } diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h index dc56dedf587ed..546c53e1e63ff 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h @@ -57,7 +57,8 @@ class GPUDisplayBackend enum backendTypes { TYPE_INVALID = -1, TYPE_OPENGL = 0, - TYPE_VULKAN = 1 + TYPE_VULKAN = 1, + TYPE_NONE = 2 }; struct DrawArraysIndirectCommand { diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendNone.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendNone.cxx new file mode 100644 index 0000000000000..c0011265dbe52 --- /dev/null +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendNone.cxx @@ -0,0 +1,30 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDisplayBackendNone.cxx +/// \author David Rohr + +#include "GPUCommonDef.h" +#include "GPUDisplayBackendNone.h" + +using namespace o2::gpu; + +GPUDisplayBackendNone::GPUDisplayBackendNone() +{ + mBackendType = TYPE_NONE; + mBackendName = "NONE"; +} + +int32_t GPUDisplayBackendNone::InitBackendA() +{ + + return 0; +} diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendNone.h b/GPU/GPUTracking/display/backend/GPUDisplayBackendNone.h new file mode 100644 index 0000000000000..4af69692d79c1 --- /dev/null +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendNone.h @@ -0,0 +1,49 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDisplayBackendNone.h +/// \author David Rohr + +#ifndef GPUDISPLAYBACKENDNONE_H +#define GPUDISPLAYBACKENDNONE_H + +#include "GPUDisplayBackend.h" + +namespace o2::gpu +{ +class GPUDisplayBackendNone : public GPUDisplayBackend +{ + public: + GPUDisplayBackendNone(); + ~GPUDisplayBackendNone() override = default; + + protected: + uint32_t DepthBits() override { return 32; }; + uint32_t drawVertices(const vboList& v, const drawType t) override { return 0; } + void ActivateColor(std::array& color) override {} + void setDepthBuffer() override {} + int32_t InitBackendA() override; + void ExitBackendA() override {} + void loadDataToGPU(size_t totalVertizes) override {} + void prepareDraw(const hmm_mat4& proj, const hmm_mat4& view, bool requestScreenshot, bool toMixBuffer, float includeMixImage) override {} + void finishDraw(bool doScreenshot, bool toMixBuffer, float includeMixImage) override {} + void finishFrame(bool doScreenshot, bool toMixBuffer, float includeMixImage) override {} + void prepareText() override {} + void finishText() override {} + void pointSizeFactor(float factor) override {} + void lineWidthFactor(float factor) override {} + void OpenGLPrint(const char* s, float x, float y, float* color, float scale) override {} + void addFontSymbol(int32_t symbol, int32_t sizex, int32_t sizey, int32_t offsetx, int32_t offsety, int32_t advance, void* data) override {} + void initializeTextDrawing() override {} +}; +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx index 22970c3228815..df5c45c6beaa8 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx @@ -17,7 +17,11 @@ #ifdef _WIN32 #include "GPUDisplayFrontendWindows.h" -#elif defined(GPUCA_BUILD_EVENT_DISPLAY_X11) +#else +#include +#endif + +#ifdef GPUCA_BUILD_EVENT_DISPLAY_X11 #include "GPUDisplayFrontendX11.h" #endif #ifdef GPUCA_BUILD_EVENT_DISPLAY_GLFW @@ -29,6 +33,7 @@ #ifdef GPUCA_BUILD_EVENT_DISPLAY_WAYLAND #include "GPUDisplayFrontendWayland.h" #endif +#include "GPUDisplayFrontendNone.h" #include "GPULogging.h" #include @@ -118,7 +123,7 @@ bool GPUDisplayFrontend::isGUIRunning() } GPUDisplayFrontend* GPUDisplayFrontend::getFrontend(const char* type) -{ +{ // clang-format off #if !defined(GPUCA_STANDALONE) && defined(GPUCA_BUILD_EVENT_DISPLAY_GLFW) if (strcmp(type, "glfw") == 0 || strcmp(type, "auto") == 0) { return new GPUDisplayFrontendGlfw; @@ -148,11 +153,13 @@ GPUDisplayFrontend* GPUDisplayFrontend::getFrontend(const char* type) return new GPUDisplayFrontendGlut; } else #endif - { + if (strcmp(type, "none") == 0) { + return new GPUDisplayFrontendNone; + } else { GPUError("Requested frontend not available"); } return nullptr; -} +} // clang-format on GPUDisplayBackend* GPUDisplayFrontend::backend() { @@ -163,3 +170,21 @@ int32_t& GPUDisplayFrontend::drawTextFontSize() { return mDisplay->drawTextFontSize(); } + +int32_t GPUDisplayFrontend::StartDisplay() +{ +#ifndef _WIN32 + static pthread_t hThread; + if (pthread_create(&hThread, nullptr, FrontendThreadWrapper, this)) { + GPUError("Coult not Create frontend Thread..."); + return (1); + } +#else + HANDLE hThread; + if ((hThread = CreateThread(nullptr, nullptr, &OpenGLWrapper, this, nullptr, nullptr)) == nullptr) { + GPUError("Coult not Create GL Thread..."); + return (1); + } +#endif + return (0); +} diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h index 9087ec9a431f6..0abab8bb0a121 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h @@ -40,14 +40,15 @@ class GPUDisplayFrontend : public GPUDisplayFrontendInterface TYPE_X11 = 1, TYPE_GLUT = 2, TYPE_GLFW = 3, - TYPE_WAYLAND = 4 + TYPE_WAYLAND = 4, + TYPE_NONE = 5 }; // Compile time minimum version defined in GPUDisplay.h, keep in sync! static constexpr int32_t GL_MIN_VERSION_MAJOR = 4; static constexpr int32_t GL_MIN_VERSION_MINOR = 5; - virtual int32_t StartDisplay() = 0; // Start the display. This function returns, and should spawn a thread that runs the display, and calls InitDisplay + int32_t StartDisplay(); // Start the display. This function returns, and should spawn a thread that runs the display, and calls InitDisplay void DisplayExit() override = 0; // Stop the display. Display thread should call ExitDisplay and the function returns after the thread has terminated virtual void SwitchFullscreen(bool set) = 0; // Toggle full-screen mode virtual void ToggleMaximized(bool set) = 0; // Maximize window diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx index 4d80917a26215..ba22f92660fd0 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx @@ -33,7 +33,6 @@ extern "C" int32_t gl3wInit(); #include #include #include -#include #ifdef GPUCA_O2_LIB #if __has_include("../src/imgui.h") @@ -417,16 +416,6 @@ void GPUDisplayFrontendGlfw::ToggleMaximized(bool set) void GPUDisplayFrontendGlfw::SetVSync(bool enable) { glfwSwapInterval(enable); } -int32_t GPUDisplayFrontendGlfw::StartDisplay() -{ - static pthread_t hThread; - if (pthread_create(&hThread, nullptr, FrontendThreadWrapper, this)) { - GPUError("Coult not Create GL Thread..."); - return (1); - } - return (0); -} - bool GPUDisplayFrontendGlfw::EnableSendKey() { #ifdef GPUCA_O2_LIB diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h index 5276652a370a1..43dd3d65531dd 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h @@ -28,7 +28,6 @@ class GPUDisplayFrontendGlfw : public GPUDisplayFrontend GPUDisplayFrontendGlfw(); ~GPUDisplayFrontendGlfw() override = default; - int32_t StartDisplay() override; void DisplayExit() override; void SwitchFullscreen(bool set) override; void ToggleMaximized(bool set) override; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx index 334a60446a4f3..1b2f2a21150c3 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx @@ -309,13 +309,3 @@ void GPUDisplayFrontendGlut::SwitchFullscreen(bool set) void GPUDisplayFrontendGlut::ToggleMaximized(bool set) {} void GPUDisplayFrontendGlut::SetVSync(bool enable) {} - -int32_t GPUDisplayFrontendGlut::StartDisplay() -{ - static pthread_t hThread; - if (pthread_create(&hThread, nullptr, FrontendThreadWrapper, this)) { - GPUError("Coult not Create GL Thread..."); - return (1); - } - return (0); -} diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h index 96f8f4af6cba5..9351349e2287d 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h @@ -26,7 +26,6 @@ class GPUDisplayFrontendGlut : public GPUDisplayFrontend GPUDisplayFrontendGlut(); ~GPUDisplayFrontendGlut() override = default; - int32_t StartDisplay() override; void DisplayExit() override; void SwitchFullscreen(bool set) override; void ToggleMaximized(bool set) override; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx index c48000bd80685..8a7eab7e00526 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx @@ -13,4 +13,23 @@ /// \author David Rohr #include "GPUDisplayFrontendNone.h" +#include "GPUDisplayGUIWrapper.h" using namespace o2::gpu; + +GPUDisplayFrontendNone::GPUDisplayFrontendNone() +{ + mFrontendType = TYPE_NONE; + mFrontendName = "NONE"; +} + +int32_t GPUDisplayFrontendNone::FrontendMain() +{ + if (InitDisplay()) { + return 1; + } + do { + DrawGLScene(); + HandleSendKey(); + } while (mDisplayControl != 2); + return 0; +} diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h index defd759ac4df6..3c7b67c35a0ce 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h @@ -21,15 +21,18 @@ namespace o2::gpu { class GPUDisplayFrontendNone : public GPUDisplayFrontend { - GPUDisplayFrontendNone() = default; + public: + GPUDisplayFrontendNone(); ~GPUDisplayFrontendNone() override = default; - int32_t StartDisplay() override { return 1; } void DisplayExit() override {} void SwitchFullscreen(bool set) override {} void ToggleMaximized(bool set) override {} void SetVSync(bool enable) override {} void OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton = true) override {} + + private: + int32_t FrontendMain() override; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx index 7a652297d89d7..5a42954c90fa7 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx @@ -475,16 +475,6 @@ void GPUDisplayFrontendWayland::SetVSync(bool enable) { } -int32_t GPUDisplayFrontendWayland::StartDisplay() -{ - static pthread_t hThread; - if (pthread_create(&hThread, nullptr, FrontendThreadWrapper, this)) { - GPUError("Coult not Create frontend Thread..."); - return (1); - } - return (0); -} - void GPUDisplayFrontendWayland::getSize(int32_t& width, int32_t& height) { width = mDisplayWidth; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h index 6dfe0a361fbb6..55676c694cfef 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h @@ -36,7 +36,6 @@ class GPUDisplayFrontendWayland : public GPUDisplayFrontend GPUDisplayFrontendWayland(); ~GPUDisplayFrontendWayland() override = default; - int32_t StartDisplay() override; void DisplayExit() override; void SwitchFullscreen(bool set) override; void ToggleMaximized(bool set) override; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx index e511718e258f7..30148e0cb00ee 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx @@ -375,13 +375,3 @@ void OpenGLPrint(const char* s, float x, float y, float r, float g, float b, flo void SwitchFullscreen(bool set) {} void ToggleMaximized(bool set) {} void SetVSync(bool enable) {} - -int32_t GPUDisplayFrontendWindows::StartDisplay() -{ - HANDLE hThread; - if ((hThread = CreateThread(nullptr, nullptr, &OpenGLWrapper, this, nullptr, nullptr)) == nullptr) { - GPUError("Coult not Create GL Thread..."); - return (1); - } - return (0); -} diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h index a8534f3f0fc1f..cac5b62c4cc63 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h @@ -25,7 +25,6 @@ class GPUDisplayFrontendWindows : public GPUDisplayFrontend GPUDisplayFrontendWindows(); ~GPUDisplayFrontendWindows() override = default; - int32_t StartDisplay() override; void DisplayExit() override; void SwitchFullscreen(bool set) override; void ToggleMaximized(bool set) override; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx index 96011aa064bac..be56fc8a1e546 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx @@ -23,6 +23,8 @@ #include #include +#include + #ifdef GPUCA_BUILD_EVENT_DISPLAY_VULKAN #include #include @@ -518,16 +520,6 @@ void GPUDisplayFrontendX11::SetVSync(bool enable) } } -int32_t GPUDisplayFrontendX11::StartDisplay() -{ - static pthread_t hThread; - if (pthread_create(&hThread, nullptr, FrontendThreadWrapper, this)) { - GPUError("Coult not Create frontend Thread..."); - return (1); - } - return (0); -} - void GPUDisplayFrontendX11::getSize(int32_t& width, int32_t& height) { Window root_return; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h index f14d05b3080bd..7ea38271f2ee9 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h @@ -16,9 +16,10 @@ #define GPUDISPLAYFRONTENDX11_H #include "GPUDisplayFrontend.h" -#include #include -#include +#include +#include +#include #include namespace o2::gpu @@ -29,7 +30,6 @@ class GPUDisplayFrontendX11 : public GPUDisplayFrontend GPUDisplayFrontendX11(); ~GPUDisplayFrontendX11() override = default; - int32_t StartDisplay() override; void DisplayExit() override; void SwitchFullscreen(bool set) override; void ToggleMaximized(bool set) override; From 0dd873ed2d405256667fb0bcead9b42a8f19d68b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Jul 2025 18:44:05 +0200 Subject: [PATCH 14/52] GPU Display: Skip rejected first/last clusters when drawing connected looper segments --- GPU/GPUTracking/display/render/GPUDisplayDraw.cxx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 679d5a6cf88fd..15b58504b3ef2 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -439,8 +439,14 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp if constexpr (std::is_same_v) { if (track->PrevSegment() >= 0) { const auto& prevtrk = mIOPtrs->mergedTracks[track->PrevSegment()]; - prevcid = mIOPtrs->mergedTrackHits[prevtrk.FirstClusterRef() + ((track->Leg() & 1) ? (prevtrk.NClusters() - 1) : 0)].num; leg = track->Leg(); + for (int32_t iChk = (leg & 1) ? (prevtrk.NClusters() - 1) : 0; iChk != ((leg & 1) ? -1 : (int32_t)prevtrk.NClusters()); iChk += (leg & 1) ? -1 : 1) { + const auto& hit = mIOPtrs->mergedTrackHits[prevtrk.FirstClusterRef() + iChk]; + if (!mCfgH.hideRejectedClusters || !(hit.state & GPUTPCGMMergedTrackHit::flagReject)) { + prevcid = hit.num; + break; + } + } } } From 8b0aa24c942b6c53721b05180c0cdb9416e86969 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Jul 2025 22:58:52 +0200 Subject: [PATCH 15/52] GPU TPC: Fix sorting of clusters in segments of looping tracks --- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 107 ++++++++++++++---- GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h | 3 - .../display/render/GPUDisplayDraw.cxx | 18 ++- 4 files changed, 94 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 60be206ed7e42..358a808e120a9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -42,7 +42,7 @@ class GPUTPCGMMergedTrack return mAlpha; } GPUd() bool OK() const { return mFlags & 0x01; } - GPUd() bool Looper() const { return mFlags & 0x02; } + GPUd() bool Looper() const { return mFlags & 0x02; } // TODO: Get rid of the looper flag, or rename it if still needed. GPUd() bool CSide() const { return mFlags & 0x04; } GPUd() bool CCE() const { return mFlags & 0x08; } GPUd() bool MergedLooperUnconnected() const { return mFlags & 0x10; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 81e96af47152d..27aee906860d7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1435,13 +1435,14 @@ namespace // anonymous { struct GPUTPCGMMerger_CompareClusterIds { const GPUTPCGMMerger::trackCluster* const mCmp; - GPUd() GPUTPCGMMerger_CompareClusterIds(const GPUTPCGMMerger::trackCluster* cmp) : mCmp(cmp) {} + const bool revert; + GPUd() GPUTPCGMMerger_CompareClusterIds(const GPUTPCGMMerger::trackCluster* cmp, bool r) : mCmp(cmp), revert(r) {} GPUd() bool operator()(const int16_t aa, const int16_t bb) { const GPUTPCGMMerger::trackCluster& a = mCmp[aa]; const GPUTPCGMMerger::trackCluster& b = mCmp[bb]; if (a.row != b.row) { - return (a.row > b.row); + return (a.row > b.row) ^ revert; } return GPUCA_DETERMINISTIC_CODE((a.id != b.id) ? (a.id > b.id) : (aa > bb), a.id > b.id); } @@ -1460,6 +1461,8 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMSectorTrack* trbase = nullptr; int32_t leg = 0; int32_t lastMergedSegment = -1; + bool revertSegments = false; + bool revertInSegment = false; while (true) { if (trbase && !Param().rec.tpc.dropLoopers) { int32_t jtr = trbase->NextNeighbour(); @@ -1469,7 +1472,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread trbase = nullptr; } else { trbase->SetPrevSegmentNeighbour(1000000001); - leg--; + leg += revertSegments ? 1 : -1; } } else { trbase = nullptr; @@ -1488,15 +1491,68 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (itr >= SectorTrackInfoLocalTotal()) { break; } - itr += nThreads * nBlocks; + revertSegments = false; + revertInSegment = false; trbase->SetPrevSegmentNeighbour(1000000000); int32_t jtr = trbase->NextNeighbour(); leg = 0; - while (jtr >= 0) { - leg++; - jtr = mSectorTrackInfos[jtr].NextNeighbour(); + if (jtr >= 0) { + int32_t lasttr = itr; + while (jtr >= 0) { // --------------- count segments --------------- + if (&mSectorTrackInfos[jtr] == trbase) { + break; // Break cyclic graph + } + lasttr = jtr; + leg++; + jtr = mSectorTrackInfos[jtr].NextNeighbour(); + } + + float mainZT = 1e9; + revertSegments = true; + for (uint32_t k = 0; k < 2; k++) { // --------------- check if first or last segment is primary --------------- + int32_t ichk = k ? lasttr : itr; + const GPUTPCGMSectorTrack* trchk = &mSectorTrackInfos[ichk]; + while (true) { + float zt = Param().par.earlyTpcTransform ? CAMath::Min(CAMath::Abs(trchk->ClusterZT0()), CAMath::Abs(trchk->ClusterZTN())) : -trchk->MinClusterZT(); // Negative time ~ smallest z, behaves the same way + if (zt < mainZT) { + if (k) { + revertSegments = false; + break; + } + mainZT = zt; + } + int32_t next = trchk->NextSegmentNeighbour(); + if (next < 0 || next == ichk) { + break; // Breaks also cycles + } + trchk = &mSectorTrackInfos[next]; + } + } + if (revertSegments) { + leg = 0; + } + + { // --------------- find longest sector track of main segment --------------- + int32_t length = 0; + int32_t ichk = revertSegments ? itr : lasttr; + const GPUTPCGMSectorTrack* trchk = &mSectorTrackInfos[ichk]; + const GPUTPCGMSectorTrack* longest = trchk; + while (true) { + if (trchk->OrigTrack()->NHits() > length) { + longest = trchk; + length = trchk->OrigTrack()->NHits(); + } + int32_t next = trchk->NextSegmentNeighbour(); + if (next < 0 || next == ichk) { + break; // Breaks also cycles + } + trchk = &mSectorTrackInfos[next]; + } + revertInSegment = (longest->ClusterZT0() < longest->ClusterZTN()) ^ (Param().par.earlyTpcTransform ? !longest->CSide() : false); + } } lastMergedSegment = -1; + itr += nThreads * nBlocks; } do { @@ -1513,7 +1569,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } nHits += tr->NClusters(); - tr->SetLeg(leg); trackParts[nParts++] = tr; for (int32_t i = 0; i < 2; i++) { if (tr->ExtrapolatedTrackId(i) != -1) { @@ -1523,8 +1578,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { break; } - trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)]; - trackParts[nParts++]->SetLeg(leg); + trackParts[nParts++] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)]; nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); } } @@ -1538,7 +1592,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } // unpack and sort clusters - if (nParts > 1) { + if (nParts > 1 && (!revertInSegment ^ (leg & 1))) { GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { GPUCA_DETERMINISTIC_CODE( // clang-format off if (a->X() != b->X()) { @@ -1576,11 +1630,14 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread break; } - bool ordered = true; - for (int32_t i = 1; i < nHits; i++) { - if (trackClusters[i].row > trackClusters[i - 1].row || trackClusters[i].id == trackClusters[i - 1].id) { - ordered = false; - break; + const bool mustReverse = revertInSegment ^ (leg & 1); + bool ordered = !mustReverse; + if (ordered) { + for (int32_t i = 1; i < nHits; i++) { + if ((trackClusters[i].row > trackClusters[i - 1].row) ^ mustReverse || trackClusters[i].id == trackClusters[i - 1].id) { + ordered = false; + break; + } } } int32_t firstTrackIndex = 0; @@ -1594,7 +1651,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread clusterIndices[i] = i; } - GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters)); + GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters, mustReverse)); nTmpHits = 0; firstTrackIndex = lastTrackIndex = -1; @@ -1659,16 +1716,24 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; + GPUTPCGMTrackParam& p1 = mergedTrack.Param(); + const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetFlags(0); mergedTrack.SetOK(true); - mergedTrack.SetLooper(leg > 0 || lastMergedSegment >= 0); + mergedTrack.SetLeg(leg); + mergedTrack.SetLooper(leg > 0); mergedTrack.SetNClusters(nHits); mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); - GPUTPCGMTrackParam& p1 = mergedTrack.Param(); - const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); mergedTrack.SetMergedLooperConnected(leg > 0); - mergedTrack.SetPrevSegment(lastMergedSegment); + if (revertSegments) { + mergedTrack.SetPrevSegment(-1); + if (lastMergedSegment >= 0) { + mMergedTracks[lastMergedSegment].SetPrevSegment(iOutputTrack); + } + } else { + mergedTrack.SetPrevSegment(lastMergedSegment); + } lastMergedSegment = iOutputTrack; GPUTPCGMBorderTrack b; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 27e4a89300ca4..1de3928aac409 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -55,7 +55,6 @@ class GPUTPCGMSectorTrack GPUd() float DzDs() const { return mParam.mDzDs; } GPUd() float QPt() const { return mParam.mQPt; } GPUd() float TZOffset() const { return mTZOffset; } - GPUd() uint8_t Leg() const { return mLeg; } GPUd() int32_t LocalTrackId() const { return mLocalTrackId; } GPUd() void SetLocalTrackId(int32_t v) { mLocalTrackId = v; } @@ -99,7 +98,6 @@ class GPUTPCGMSectorTrack GPUd() void SetNeighbor(int32_t v, int32_t i) { mNeighbour[i] = v; } GPUd() void SetPrevSegmentNeighbour(int32_t v) { mSegmentNeighbour[0] = v; } GPUd() void SetNextSegmentNeighbour(int32_t v) { mSegmentNeighbour[1] = v; } - GPUd() void SetLeg(uint8_t v) { mLeg = v; } GPUd() void CopyParamFrom(const GPUTPCGMSectorTrack& t) { @@ -136,7 +134,6 @@ class GPUTPCGMSectorTrack int32_t mLocalTrackId; // Corrected local track id in terms of GMSectorTracks array for extrapolated tracks, UNDEFINED for local tracks! int32_t mExtrapolatedTrackIds[2]; // IDs of associated extrapolated tracks uint8_t mSector; // sector of this track segment - uint8_t mLeg; // Leg of this track segment ClassDefNV(GPUTPCGMSectorTrack, 1); }; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 15b58504b3ef2..3a56f874d2d12 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -376,6 +376,7 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp size_t startCountInner = mVertexBuffer[iSector].size(); bool drawing = false; + uint32_t lastSide = -1; if constexpr (std::is_same_v) { if (!mCfgH.drawTracksAndFilter && !(mCfgH.drawTPCTracks || (mCfgH.drawITSTracks && mIOPtrs->tpcLinkITS && mIOPtrs->tpcLinkITS[i] != -1) || (mCfgH.drawTRDTracks && mIOPtrs->tpcLinkTRD && mIOPtrs->tpcLinkTRD[i] != -1) || (mCfgH.drawTOFTracks && mIOPtrs->tpcLinkTOF && mIOPtrs->tpcLinkTOF[i] != -1))) { @@ -397,6 +398,7 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp drawing = true; mVertexBuffer[iSector].emplace_back(mGlobalPosTOF[cid].x, mGlobalPosTOF[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[cid].z); mGlobalPosTOF[cid].w = tTOFATTACHED; + lastSide = mGlobalPosTOF[cid].z < 0; } } @@ -410,6 +412,7 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp drawing = true; mVertexBuffer[iSector].emplace_back(mGlobalPosTRD2[cid].x, mGlobalPosTRD2[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[cid].z); mVertexBuffer[iSector].emplace_back(mGlobalPosTRD[cid].x, mGlobalPosTRD[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[cid].z); + lastSide = mGlobalPosTRD[cid].z < 0; mGlobalPosTRD[cid].w = tTRDATTACHED; } }; @@ -433,17 +436,15 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp // Print TPC part of track int32_t separateExtrapolatedTracksLimit = (mCfgH.separateExtrapolatedTracks ? tEXTRAPOLATEDTRACK : TRACK_TYPE_ID_LIMIT); - uint32_t lastSide = -1; - int32_t prevcid = -1; - int32_t leg = 0; if constexpr (std::is_same_v) { if (track->PrevSegment() >= 0) { const auto& prevtrk = mIOPtrs->mergedTracks[track->PrevSegment()]; - leg = track->Leg(); - for (int32_t iChk = (leg & 1) ? (prevtrk.NClusters() - 1) : 0; iChk != ((leg & 1) ? -1 : (int32_t)prevtrk.NClusters()); iChk += (leg & 1) ? -1 : 1) { + for (int32_t iChk = prevtrk.NClusters() - 1; iChk >= 0; iChk--) { const auto& hit = mIOPtrs->mergedTrackHits[prevtrk.FirstClusterRef() + iChk]; if (!mCfgH.hideRejectedClusters || !(hit.state & GPUTPCGMMergedTrackHit::flagReject)) { - prevcid = hit.num; + drawPointLinestrip(iSector, hit.num, tFINALTRACK, separateExtrapolatedTracksLimit); + lastSide = mGlobalPos[hit.num].z < 0; + drawing = true; break; } } @@ -488,8 +489,6 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp lastcid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; } drawPointLinestrip(iSector, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); - } else if (prevcid != -1 && k == 0 && (leg & 1) == 0) { - drawPointLinestrip(iSector, prevcid, tFINALTRACK, separateExtrapolatedTracksLimit); } drawPointLinestrip(iSector, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } @@ -498,9 +497,6 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp lastCluster = k; lastSide = mGlobalPos[cid].z < 0; } - if (prevcid != -1 && (leg & 1) && drawing) { - drawPointLinestrip(iSector, prevcid, tFINALTRACK, separateExtrapolatedTracksLimit); - } // Print ITS part of track if constexpr (std::is_same_v) { From 2fdfffbf959926dd834bd244c6e171ce736ebce7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Jul 2025 12:54:14 +0200 Subject: [PATCH 16/52] GPU TPC: Use inner SectorRefit parameters as start parameters for odd leg numbers --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- .../Merger/GPUTPCGMSectorTrack.cxx | 59 ++++++++++++------- GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h | 1 + 3 files changed, 39 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 27aee906860d7..9f9df0884901c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1738,7 +1738,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMBorderTrack b; const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); - if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { + if (p2.TransportToX<2>(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; p1.Z() = b.Par()[1]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index 11b153c7f0d8b..2b6d826baea56 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -274,13 +274,25 @@ GPUd() bool GPUTPCGMSectorTrack::FilterErrors(const GPUTPCGMMerger* merger, int3 return ok; } +template <> +GPUd() bool GPUTPCGMSectorTrack::TransportToX<2>(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const +{ + if (CAMath::Abs(x - mParam2.mX) < CAMath::Abs(x - mParam.mX) && mParam2.mX > 0) { + return TransportToX<1>(merger, x, Bz, b, maxSinPhi, doCov); + } else { + return TransportToX<0>(merger, x, Bz, b, maxSinPhi, doCov); + } +} + +template GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const { + const auto& param = I ? mParam2 : mParam; Bz = -Bz; - float ex = mParam.mCosPhi; - float ey = mParam.mSinPhi; - float k = mParam.mQPt * Bz; - float dx = x - mParam.mX; + float ex = param.mCosPhi; + float ey = param.mSinPhi; + float k = param.mQPt * Bz; + float dx = x - param.mX; float ey1 = k * dx + ey; if (CAMath::Abs(ey1) > maxSinPhi) { @@ -308,13 +320,13 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, f dS = dl + dl * a * (k2 + a * (k4)); //+ k6*a) ); } - float dz = dS * mParam.mDzDs; + float dz = dS * param.mDzDs; - b.SetPar(0, mParam.mY + dy); - b.SetPar(1, mParam.mZ + dz); + b.SetPar(0, param.mY + dy); + b.SetPar(1, param.mZ + dz); b.SetPar(2, ey1); - b.SetPar(3, mParam.mDzDs); - b.SetPar(4, mParam.mQPt); + b.SetPar(3, param.mDzDs); + b.SetPar(4, param.mQPt); if (merger->Param().par.earlyTpcTransform) { b.SetZOffsetLinear(mTZOffset); } else { @@ -327,33 +339,33 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, f float ex1i = 1.f / ex1; float hh = dxcci * ex1i * norm2; - float h2 = hh * mParam.mSecPhi; + float h2 = hh * param.mSecPhi; float h4 = Bz * dxcci * hh; - float c20 = mParam.mC3; - float c22 = mParam.mC5; - float c31 = mParam.mC7; - float c33 = mParam.mC9; - float c40 = mParam.mC10; - float c42 = mParam.mC12; - float c44 = mParam.mC14; + float c20 = param.mC3; + float c22 = param.mC5; + float c31 = param.mC7; + float c33 = param.mC9; + float c40 = param.mC10; + float c42 = param.mC12; + float c44 = param.mC14; float c20ph4c42 = c20 + h4 * c42; float h2c22 = h2 * c22; float h4c44 = h4 * c44; float n7 = c31 + dS * c33; - if (CAMath::Abs(mParam.mQPt) > 6.66f) // Special treatment for low Pt + if (CAMath::Abs(param.mQPt) > 6.66f) // Special treatment for low Pt { - b.SetCov(0, CAMath::Max(mParam.mC0, mParam.mC0 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40))); // Do not decrease Y cov for matching! + b.SetCov(0, CAMath::Max(param.mC0, param.mC0 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40))); // Do not decrease Y cov for matching! float C2tmp = dS * 2.f * c31; if (C2tmp < 0) { C2tmp = 0; } - b.SetCov(1, mParam.mC2 + C2tmp + dS * dS * c33); // Incorrect formula, correct would be "dS * (c31 + n7)", but we need to make sure cov(Z) increases regardless of the direction of the propagation + b.SetCov(1, param.mC2 + C2tmp + dS * dS * c33); // Incorrect formula, correct would be "dS * (c31 + n7)", but we need to make sure cov(Z) increases regardless of the direction of the propagation } else { - b.SetCov(0, mParam.mC0 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40)); - b.SetCov(1, mParam.mC2 + dS * (c31 + n7)); + b.SetCov(0, param.mC0 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40)); + b.SetCov(1, param.mC2 + dS * (c31 + n7)); } b.SetCov(2, c22 + dxBz * (c42 + c42 + dxBz * c44)); b.SetCov(3, c33); @@ -366,6 +378,9 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, f return 1; } +template GPUdni() bool GPUTPCGMSectorTrack::TransportToX<0>(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const; +template GPUdni() bool GPUTPCGMSectorTrack::TransportToX<1>(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const; + GPUd() bool GPUTPCGMSectorTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float newX, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const { //* diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 1de3928aac409..81facce76cf10 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -113,6 +113,7 @@ class GPUTPCGMSectorTrack } GPUd() bool FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi = GPUCA_MAX_SIN_PHI, float sinPhiMargin = 0.f); + template GPUd() bool TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov = true) const; GPUd() bool TransportToXAlpha(GPUTPCGMMerger* merger, float x, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const; GPUd() void CopyBaseTrackCov(); From 7617f32843aaa9ff697d0439cef548ea3e41a87f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Jul 2025 19:27:21 +0200 Subject: [PATCH 17/52] GPU: Fix kernel file include order, must follow template specialization usage --- GPU/GPUTracking/kernels.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index e1fef5795828b..5bcda68e691b3 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -19,7 +19,7 @@ o2_gpu_kernel_file_list(TPCSECTORDATA TPCTRACKER GPUTPCTrackingData.cxx) o2_gpu_kernel_file_list(TPCOCCUPANCY GPUTPCClusterOccupancyMap.cxx) o2_gpu_kernel_file_list(TPCDEDX GPUdEdx.cxx) o2_gpu_kernel_file_list(MATLUT MatLayerCylSet.cxx MatLayerCyl.cxx Ray.cxx) -o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMMerger.cxx GPUTPCGMSectorTrack.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) +o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMSectorTrack.cxx GPUTPCGMMerger.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizationWithError.cxx Propagator.cxx TrackLTIntegral.cxx) o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) From 6b98f49d865d09f533f0b3ed2515298c25677778 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Jul 2025 23:54:03 +0200 Subject: [PATCH 18/52] GPU TPC: Remove early transform option, we can just process triggered data as continuous data --- GPU/GPUTracking/Base/GPUConstantMem.h | 2 - GPU/GPUTracking/Base/GPUParam.cxx | 2 - GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 1 - GPU/GPUTracking/CMakeLists.txt | 2 - GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 - .../DataTypes/GPUTPCGMMergedTrackHit.h | 5 - .../Definitions/GPUDefParametersDefaults.h | 3 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 - GPU/GPUTracking/Global/GPUChainTracking.cxx | 17 +- GPU/GPUTracking/Global/GPUChainTracking.h | 1 - .../Global/GPUChainTrackingMerger.cxx | 5 - .../Global/GPUChainTrackingSectorTracker.cxx | 23 +-- .../Global/GPUChainTrackingTransformation.cxx | 34 +--- .../Interface/GPUO2InterfaceDisplay.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 112 ++++--------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 5 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- .../Merger/GPUTPCGMSectorTrack.cxx | 18 +-- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 148 +++++------------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 6 +- .../GPUTPCSectorDebugSortKernels.cxx | 1 - .../SectorTracker/GPUTPCTracker.cxx | 1 - GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 4 - .../SectorTracker/GPUTPCTrackingData.cxx | 74 ++------- .../SectorTracker/GPUTPCTrackingData.h | 23 ++- .../Standalone/Benchmark/standalone.cxx | 2 +- GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx | 54 ------- GPU/GPUTracking/TPCConvert/GPUTPCConvert.h | 56 ------- .../TPCConvert/GPUTPCConvertKernel.cxx | 48 ------ .../TPCConvert/GPUTPCConvertKernel.h | 31 ---- .../display/helpers/GPUDisplayHelpers.cxx | 3 - .../display/render/GPUDisplayDraw.cxx | 38 ++--- .../display/render/GPUDisplayImportEvent.cxx | 32 ++-- GPU/GPUTracking/kernels.cmake | 1 - GPU/GPUTracking/qa/GPUQA.cxx | 12 +- 36 files changed, 135 insertions(+), 637 deletions(-) delete mode 100644 GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx delete mode 100644 GPU/GPUTracking/TPCConvert/GPUTPCConvert.h delete mode 100644 GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx delete mode 100644 GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index ffb17997b9190..c496151c3dfd0 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -23,7 +23,6 @@ #include "GPUTPCGMMerger.h" #include "GPUTRDTracker.h" -#include "GPUTPCConvert.h" #include "GPUTPCCompression.h" #include "GPUTPCDecompression.h" #include "GPUTPCClusterFinder.h" @@ -42,7 +41,6 @@ namespace o2::gpu struct GPUConstantMem { GPUParam param; GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]; - GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; GPUTPCGMMerger tpcMerger; diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 3062e1c4d2064..649682939ab39 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -111,7 +111,6 @@ void GPUParam::SetDefaults(float solenoidBz, bool assumeConstantBz) par.continuousTracking = false; continuousMaxTimeBin = 0; tpcCutTimeBin = 0; - par.earlyTpcTransform = false; } void GPUParam::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p, const GPURecoStepConfiguration* w, const GPUSettingsRecDynamic* d) @@ -122,7 +121,6 @@ void GPUParam::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessi continuousMaxTimeBin = g->grpContinuousMaxTimeBin == -1 ? GPUSettings::TPC_MAX_TF_TIME_BIN : g->grpContinuousMaxTimeBin; tpcCutTimeBin = g->tpcCutTimeBin; } - par.earlyTpcTransform = rec.tpc.forceEarlyTransform == -1 ? (!par.continuousTracking) : rec.tpc.forceEarlyTransform; qptB5Scaler = CAMath::Abs(bzkG) > 0.1f ? CAMath::Abs(bzkG) / 5.006680f : 1.f; // Repeat here, since passing in g is optional if (p) { UpdateRun3ClusterErrors(p->param.tpcErrorParamY, p->param.tpcErrorParamZ); diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index f7b08f9dd0c48..641b0a2d095ca 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -17,7 +17,6 @@ #include "GPUReconstructionThreading.h" #include "GPUChain.h" #include "GPUDefParametersRuntime.h" -#include "GPUTPCClusterData.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTRDTrackletWord.h" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index a7159549322a0..a976e5c93b7c6 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -174,8 +174,6 @@ set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx) set(SRCS_NO_CINT ${SRCS_NO_CINT} Global/GPUChainITS.cxx dEdx/GPUdEdx.cxx - TPCConvert/GPUTPCConvert.cxx - TPCConvert/GPUTPCConvertKernel.cxx DataCompression/GPUTPCCompression.cxx DataCompression/GPUTPCCompressionTrackModel.cxx DataCompression/GPUTPCCompressionKernels.cxx diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 3e9623e23559b..801c60f6b02ba 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -101,7 +101,6 @@ class GPUTPCTrack; class GPUTPCHitId; class GPUTPCGMMergedTrack; struct GPUTPCGMMergedTrackHit; -struct GPUTPCGMMergedTrackHitXYZ; class GPUTRDTrackletWord; class GPUTRDSpacePoint; struct GPUTPCMCInfo; @@ -237,7 +236,6 @@ struct GPUTrackingInOutPointers { const GPUTPCGMMergedTrack* mergedTracks = nullptr; uint32_t nMergedTracks = 0; const GPUTPCGMMergedTrackHit* mergedTrackHits = nullptr; - const GPUTPCGMMergedTrackHitXYZ* mergedTrackHitsXYZ = nullptr; uint32_t nMergedTrackHits = 0; const uint32_t* mergedTrackHitAttachment = nullptr; const uint8_t* mergedTrackHitStates = nullptr; diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index a965f306dac79..789c1f00262f8 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -38,11 +38,6 @@ struct GPUTPCGMMergedTrackHit { flagHighIncl = 0x80 }; }; -struct GPUTPCGMMergedTrackHitXYZ { - float x, y, z; - uint16_t amp; -}; - } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index b1f12034d9c2f..7b76860d4ca2b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -305,9 +305,6 @@ #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 512 #endif - #ifndef GPUCA_LB_GPUTPCConvertKernel - #define GPUCA_LB_GPUTPCConvertKernel 256 - #endif #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 256 #endif diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index a22524713c5c0..60ee12252b0a8 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -147,7 +147,6 @@ AddOptionRTC(compressionTypeMask, uint8_t, o2::gpu::GPUSettings::CompressionFull AddOptionRTC(compressionSortOrder, uint8_t, o2::gpu::GPUSettings::SortTime, "", 0, "Sort order of TPC compression (0 = time, 1 = pad, 2 = Z-time-pad, 3 = Z-pad-time, 4 = no sorting (use incoming order))") AddOptionRTC(sigBitsCharge, uint8_t, 4, "", 0, "Number of significant bits for TPC cluster charge in compression mode 1") AddOptionRTC(sigBitsWidth, uint8_t, 3, "", 0, "Number of significant bits for TPC cluster width in compression mode 1") -AddOptionRTC(forceEarlyTransform, int8_t, -1, "", 0, "Force early TPC transformation also for continuous data (-1 = auto)") AddOptionRTC(dropLoopers, uint8_t, 0, "", 0, "Drop looping tracks starting from second loop") AddOptionRTC(mergerCovSource, uint8_t, 2, "", 0, "Method to obtain covariance in track merger: 0 = simple filterErrors method, 1 = use cov from track following, 2 = refit (default)") AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation instead of extrapolation for chi2 based cluster rejection") @@ -661,7 +660,6 @@ EndConfig() BeginHiddenConfig(GPUSettingsParam, param) AddVariableRTC(continuousTracking, int8_t, 0) // Continuous tracking, estimate bz and errors for abs(z) = 125cm during seeding AddVariableRTC(dodEdx, int8_t, 0) // Do dEdx computation -AddVariableRTC(earlyTpcTransform, int8_t, 0) // do Early TPC transformation EndConfig() EndNamespace() // gpu diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index b0ea052063f20..f42b7cc34df73 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -21,11 +21,11 @@ #include "GPUChainTrackingGetters.inc" #include "GPUReconstructionIO.h" #include "GPUChainTrackingDefs.h" -#include "GPUTPCClusterData.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCTrack.h" #include "GPUTPCHitId.h" +#include "GPUTPCClusterData.h" #include "TPCZSLinkMapping.h" #include "GPUTRDTrackletWord.h" #include "AliHLTTPCClusterMCData.h" @@ -93,9 +93,6 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() if (GetRecoSteps() & RecoStep::TRDTracking) { mRec->RegisterGPUProcessor(&processors()->trdTrackerO2, GetRecoStepsGPU() & RecoStep::TRDTracking); } - if (GetRecoSteps() & RecoStep::TPCConversion) { - mRec->RegisterGPUProcessor(&processors()->tpcConverter, GetRecoStepsGPU() & RecoStep::TPCConversion); - } if (GetRecoSteps() & RecoStep::TPCCompression) { mRec->RegisterGPUProcessor(&processors()->tpcCompressor, GetRecoStepsGPU() & RecoStep::TPCCompression); } @@ -141,9 +138,6 @@ void GPUChainTracking::RegisterGPUProcessors() if (GetRecoStepsGPU() & RecoStep::TRDTracking) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->trdTrackerO2, &processors()->trdTrackerO2); } - if (GetRecoStepsGPU() & RecoStep::TPCConversion) { - mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcConverter, &processors()->tpcConverter); - } if (GetRecoStepsGPU() & RecoStep::TPCCompression) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcCompressor, &processors()->tpcCompressor); } @@ -182,11 +176,9 @@ bool GPUChainTracking::ValidateSteps() GPUError("Invalid GPU Reconstruction Step Setting: dEdx requires TPC Merger to be active"); return false; } - if (!param().par.earlyTpcTransform) { - if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { - GPUError("Invalid Reconstruction Step Setting: Tracking without early transform requires TPC Conversion to be active"); - return false; - } + if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { + GPUError("Invalid Reconstruction Step Setting: Tracking requires TPC Conversion to be active"); + return false; } if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCClusterFinding) && !(GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCRaw)) { GPUError("Invalid input, TPC Clusterizer needs TPC raw input"); @@ -566,7 +558,6 @@ void GPUChainTracking::AllocateIOMemory() AllocateIOMemoryHelper(mIOPtrs.nMCInfosTPCCol, mIOPtrs.mcInfosTPCCol, mIOMem.mcInfosTPCCol); AllocateIOMemoryHelper(mIOPtrs.nMergedTracks, mIOPtrs.mergedTracks, mIOMem.mergedTracks); AllocateIOMemoryHelper(mIOPtrs.nMergedTrackHits, mIOPtrs.mergedTrackHits, mIOMem.mergedTrackHits); - AllocateIOMemoryHelper(mIOPtrs.nMergedTrackHits, mIOPtrs.mergedTrackHitsXYZ, mIOMem.mergedTrackHitsXYZ); AllocateIOMemoryHelper(mIOPtrs.nTRDTracks, mIOPtrs.trdTracks, mIOMem.trdTracks); AllocateIOMemoryHelper(mIOPtrs.nTRDTracklets, mIOPtrs.trdTracklets, mIOMem.trdTracklets); AllocateIOMemoryHelper(mIOPtrs.nTRDTracklets, mIOPtrs.trdSpacePoints, mIOMem.trdSpacePoints); diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 7d4adcd70af7f..d7e821187e1fe 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -114,7 +114,6 @@ class GPUChainTracking : public GPUChain std::unique_ptr mcInfosTPCCol; std::unique_ptr mergedTracks; std::unique_ptr mergedTrackHits; - std::unique_ptr mergedTrackHitsXYZ; std::unique_ptr trdTracklets; std::unique_ptr trdSpacePoints; std::unique_ptr trdTriggerTimes; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 5d3ac212c5b54..36e4e9af83fbd 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -260,9 +260,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); - if (param().par.earlyTpcTransform) { - GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NMergedTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); - } GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0); } if (GetProcessingSettings().outputSharedClusterMap) { @@ -327,7 +324,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.mergedTracks = Merger.MergedTracks(); mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); - mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment(); mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt(); @@ -341,7 +337,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks(); processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); - processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment(); processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 708037239071e..d13e8d5544631 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -17,7 +17,6 @@ #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" -#include "GPUTPCClusterData.h" #include "GPUTrackingInputProvider.h" #include "GPUTPCClusterOccupancyMap.h" #include "GPUDefParametersRuntime.h" @@ -74,25 +73,13 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() GPUInfo("Running TPC Sector Tracker"); } bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSectorTracking; - if (!param().par.earlyTpcTransform) { - for (uint32_t i = 0; i < NSECTORS; i++) { - processors()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); - if (doGPU) { - processorsShadow()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor - } - } - mRec->MemoryScalers()->nTPCHits = mIOPtrs.clustersNative->nClustersTotal; - } else { - int32_t offset = 0; - for (uint32_t i = 0; i < NSECTORS; i++) { - processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset); - if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) { - processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset()); - } - offset += mIOPtrs.nClusterData[i]; + for (uint32_t i = 0; i < NSECTORS; i++) { + processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); + if (doGPU) { + processorsShadow()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor } - mRec->MemoryScalers()->nTPCHits = offset; } + mRec->MemoryScalers()->nTPCHits = mIOPtrs.clustersNative->nClustersTotal; GPUInfo("Event has %u TPC Clusters, %d TRD Tracklets", (uint32_t)mRec->MemoryScalers()->nTPCHits, mIOPtrs.nTRDTracklets); for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index c9d4d269f070c..83ddc45830621 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -16,13 +16,11 @@ #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUTrackingInputProvider.h" -#include "GPUTPCClusterData.h" #include "GPUReconstructionConvert.h" #include "GPUMemorySizeScalers.h" -#include "GPUTPCConvert.h" #include "AliHLTTPCRawCluster.h" #include "GPUConstantMem.h" -#include "GPUTPCConvertKernel.h" +#include "GPUTPCClusterData.h" #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/ZeroSuppression.h" @@ -41,9 +39,6 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() { mRec->PushNonPersistentMemory(qStr2Tag("TPCTRANS")); const auto& threadContext = GetThreadContext(); - bool doGPU = GetRecoStepsGPU() & RecoStep::TPCConversion; - GPUTPCConvert& convert = processors()->tpcConverter; - GPUTPCConvert& convertShadow = doGPU ? processorsShadow()->tpcConverter : convert; bool transferClusters = false; if (mRec->IsGPU() && !(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) && NeedTPCClustersOnGPU()) { @@ -58,31 +53,12 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0); transferClusters = true; } - if (!param().par.earlyTpcTransform) { - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during sector data creation / refit"); - } - if (transferClusters) { - SynchronizeStream(0); // TODO: Synchronize implicitly with next step - } - return 0; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during sector data creation / refit"); } - SetupGPUProcessor(&convert, true); - for (uint32_t i = 0; i < NSECTORS; i++) { - convert.mMemory->clusters[i] = convertShadow.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; - } - - WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->tpcConverter - (char*)processors(), &convertShadow, sizeof(convertShadow), 0); - TransferMemoryResourcesToGPU(RecoStep::TPCConversion, &convert, 0); - runKernel(GetGridBlk(NSECTORS * GPUCA_ROW_COUNT, 0)); - TransferMemoryResourcesToHost(RecoStep::TPCConversion, &convert, 0); - SynchronizeStream(0); - - for (uint32_t i = 0; i < NSECTORS; i++) { - mIOPtrs.nClusterData[i] = (i == NSECTORS - 1 ? mIOPtrs.clustersNative->nClustersTotal : mIOPtrs.clustersNative->clusterOffset[i + 1][0]) - mIOPtrs.clustersNative->clusterOffset[i][0]; - mIOPtrs.clusterData[i] = convert.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; + if (transferClusters) { + SynchronizeStream(0); // TODO: Synchronize implicitly with next step } - mRec->PopNonPersistentMemory(RecoStep::TPCConversion, qStr2Tag("TPCTRANS")); return 0; } diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx index ad740200a253a..8ef1b1980dc0c 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx @@ -30,7 +30,6 @@ GPUO2InterfaceDisplay::GPUO2InterfaceDisplay(const GPUO2InterfaceConfiguration* mConfig->configDisplay.showTPCTracksFromO2Format = true; mParam.reset(new GPUParam); mParam->SetDefaults(&config->configGRP, &config->configReconstruction, &config->configProcessing, nullptr); - mParam->par.earlyTpcTransform = 0; if (mConfig->configProcessing.runMC) { mQA.reset(new GPUQA(nullptr, &config->configQA, mParam.get())); mQA->InitO2MCData(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 9f9df0884901c..fb2fcdfd06776 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -30,7 +30,6 @@ #endif #include "GPUTPCTracker.h" -#include "GPUTPCClusterData.h" #include "GPUTPCTrackParam.h" #include "GPUTPCGMMerger.h" #include "GPUO2DataTypes.h" @@ -365,9 +364,6 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) } } computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters); - if (mRec->GetParam().par.earlyTpcTransform) { - computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters); - } computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters); return mem; } @@ -495,7 +491,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack trk.SinPhi() = inTrack->Param().GetSinPhi(); trk.DzDs() = inTrack->Param().GetDzDs(); trk.QPt() = inTrack->Param().GetQPt(); - trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); + trk.TZOffset() = GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); trk.ShiftZ(this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead sectorTrack.SetX2(0.f); for (int32_t way = 0; way < 2; way++) { @@ -517,13 +513,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack row = ic.RowIndex(); const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex]; flags = cl.getFlags(); - if (Param().par.earlyTpcTransform) { - x = tracker.Data().ClusterData()[clusterIndex].x; - y = tracker.Data().ClusterData()[clusterIndex].y; - z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset(); - } else { - GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); - } + GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); if (prop.PropagateToXAlpha(x, alpha, true)) { return way == 0; } @@ -549,12 +539,8 @@ GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_ const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1]; int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex()); int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex()); - if (Param().par.earlyTpcTransform) { - track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z); - } else { - const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; - track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); - } + const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; + track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); } GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id) @@ -1285,16 +1271,14 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, in } } -GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr) +GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, int32_t itr) { if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) { return; } float z = 0; - if (Param().par.earlyTpcTransform) { - z = clsXYZ->z; - } else { + { float x, y; auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num]; GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(), x, y, z); @@ -1328,7 +1312,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; + const ClusterNative* cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { @@ -1357,14 +1341,8 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i bool needswap = false; if (celooper) { - float z0max, z1max; - if (Param().par.earlyTpcTransform) { - z0max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z)); - z1max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z)); - } else { - z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime()); - z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime()); - } + const float z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime()); + const float z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime()); if (z1max < z0max) { needswap = true; } @@ -1387,30 +1365,19 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } if (Param().par.continuousTracking) { - if (Param().par.earlyTpcTransform) { - const float z0 = trk[0]->CSide() ? CAMath::Max(mClustersXYZ[trk[0]->FirstClusterRef()].z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) : CAMath::Min(mClustersXYZ[trk[0]->FirstClusterRef()].z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z); - const float z1 = trk[1]->CSide() ? CAMath::Max(mClustersXYZ[trk[1]->FirstClusterRef()].z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) : CAMath::Min(mClustersXYZ[trk[1]->FirstClusterRef()].z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z); - const float offset = CAMath::Abs(z1) > CAMath::Abs(z0) ? -z0 : z1; - trk[1]->Param().Z() += trk[1]->Param().TZOffset() - offset; - trk[1]->Param().TZOffset() = offset; - } else { - GPUTPCGMMergedTrackHit* clsmax; - const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(), - cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(), - &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], - &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); - const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); - trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset); - trk[1]->Param().TZOffset() = offset; - } + GPUTPCGMMergedTrackHit* clsmax; + const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(), + cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(), + &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], + &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); + const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); + trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset); + trk[1]->Param().TZOffset() = offset; } int32_t pos = newRef; #pragma unroll for (int32_t k = 1; k >= 0; k--) { for (uint32_t j = 0; j != trk[k]->NClusters(); j++) { - if (Param().par.earlyTpcTransform) { - mClustersXYZ[pos] = mClustersXYZ[trk[k]->FirstClusterRef() + j]; - } mClusters[pos++] = mClusters[trk[k]->FirstClusterRef() + j]; } } @@ -1513,7 +1480,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread int32_t ichk = k ? lasttr : itr; const GPUTPCGMSectorTrack* trchk = &mSectorTrackInfos[ichk]; while (true) { - float zt = Param().par.earlyTpcTransform ? CAMath::Min(CAMath::Abs(trchk->ClusterZT0()), CAMath::Abs(trchk->ClusterZTN())) : -trchk->MinClusterZT(); // Negative time ~ smallest z, behaves the same way + float zt = -trchk->MinClusterZT(); if (zt < mainZT) { if (k) { revertSegments = false; @@ -1548,7 +1515,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } trchk = &mSectorTrackInfos[next]; } - revertInSegment = (longest->ClusterZT0() < longest->ClusterZTN()) ^ (Param().par.earlyTpcTransform ? !longest->CSide() : false); + revertInSegment = longest->ClusterZT0() < longest->ClusterZTN(); } } lastMergedSegment = -1; @@ -1690,18 +1657,8 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread for (int32_t i = 0; i < nHits; i++) { uint8_t state; - if (Param().par.earlyTpcTransform) { - const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; - GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; - clXYZ[i].x = c.x; - clXYZ[i].y = c.y; - clXYZ[i].z = c.z; - clXYZ[i].amp = c.amp; - state = c.flags; - } else { - const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id]; - state = c.getFlags(); - } + const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id]; + state = c.getFlags(); cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags cl[i].row = trackClusters[i].row; cl[i].num = trackClusters[i].id; @@ -1737,7 +1694,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread lastMergedSegment = iOutputTrack; GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX<2>(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1766,15 +1723,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } if (mergedTrack.NClusters() && mergedTrack.OK()) */ if (Param().rec.tpc.mergeCE) { - bool CEside; - if (Param().par.earlyTpcTransform) { - const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; - CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); - } else { - auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; - CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); - } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); + auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; + bool CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], iOutputTrack); } } while (false); } @@ -1897,7 +1848,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector; - const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100); + const float refz = p.GetZ() + GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin) + (trk.CSide() ? -100 : 100); float sinA, cosA; CAMath::SinCos(trk.GetAlpha(), sinA, cosA); float gx = cosA * p.GetX() - sinA * p.GetY(); @@ -1920,15 +1871,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, /*printf("Track %u Sanity qpt %f snp %f bz %f\n", mMemory->nLooperMatchCandidates, p.GetQPt(), p.GetSinPhi(), bz); for (uint32_t k = 0;k < trk.NClusters();k++) { float xx, yy, zz; - if (Param().par.earlyTpcTransform) { - const float zOffset = (mClusters[trk.FirstClusterRef() + k].sector < 18) == (mClusters[trk.FirstClusterRef() + 0].sector < 18) ? p.GetTZOffset() : -p.GetTZOffset(); - xx = mClustersXYZ[trk.FirstClusterRef() + k].x; - yy = mClustersXYZ[trk.FirstClusterRef() + k].y; - zz = mClustersXYZ[trk.FirstClusterRef() + k].z - zOffset; - } else { - const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num]; - GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); - } + const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num]; + GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); float sa2, ca2; CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2); float cx = ca2 * xx - sa2 * yy; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 76f3f3cdcba08..7813ca4595271 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -116,8 +116,6 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; } GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; } GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); } - GPUhdi() const GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() const { return mClustersXYZ; } - GPUhdi() GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() { return (mClustersXYZ); } GPUhdi() GPUAtomic(uint32_t) * ClusterAttachment() const { return mClusterAttachment; } GPUhdi() uint32_t* TrackOrderAttach() const { return mTrackOrderAttach; } GPUhdi() uint32_t* TrackOrderProcess() const { return mTrackOrderProcess; } @@ -225,7 +223,7 @@ class GPUTPCGMMerger : public GPUProcessor template GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); - GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr); + GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, int32_t itr); void CheckMergedTracks(); #ifndef GPUCA_GPUCODE @@ -267,7 +265,6 @@ class GPUTPCGMMerger : public GPUProcessor GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks int32_t* mSectorTrackInfoIndex = nullptr; GPUTPCGMMergedTrackHit* mClusters = nullptr; - GPUTPCGMMergedTrackHitXYZ* mClustersXYZ = nullptr; GPUAtomic(uint32_t) * mClusterAttachment = nullptr; o2::tpc::TrackTPC* mOutputTracksTPCO2 = nullptr; uint32_t* mOutputClusRefsTPCO2 = nullptr; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 90f2fce5cdd2e..e1cec59d96b95 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -19,7 +19,6 @@ #include #include #include "GPUTPCTracker.h" -#include "GPUTPCClusterData.h" #include "GPUTPCTrackParam.h" #include "GPUTPCGMMerger.h" #include "GPUO2DataTypes.h" diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index b10b1d0510fd7..ab3eb02db393e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -90,7 +90,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock } uint32_t myId = CAMath::AtomicAdd(&merger.Memory()->nO2Tracks, 1u); tmpData[i] = {nCl, CAMath::AtomicAdd(&merger.Memory()->nO2ClusRefs, nCl + (nCl + 1) / 2)}; - trackSort[myId] = {i, (merger.Param().par.earlyTpcTransform || tracks[i].CSide()) ? tracks[i].GetParam().GetTZOffset() : -tracks[i].GetParam().GetTZOffset()}; + trackSort[myId] = {i, tracks[i].CSide() ? tracks[i].GetParam().GetTZOffset() : -tracks[i].GetParam().GetTZOffset()}; } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index 2b6d826baea56..a44837c897f46 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -37,11 +37,7 @@ GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCT mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; mSector = sector; - if (merger->Param().par.earlyTpcTransform) { - mTZOffset = t.GetZOffset(); - } else { - mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, t.GetZOffset(), merger->Param().continuousMaxTimeBin); - } + mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, t.GetZOffset(), merger->Param().continuousMaxTimeBin); mNClusters = sectorTr->NHits(); } @@ -327,11 +323,7 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, f b.SetPar(2, ey1); b.SetPar(3, param.mDzDs); b.SetPar(4, param.mQPt); - if (merger->Param().par.earlyTpcTransform) { - b.SetZOffsetLinear(mTZOffset); - } else { - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); - } + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); if (!doCov) { return (1); @@ -486,11 +478,7 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float b.SetPar(2, ey1); b.SetPar(3, dzds); b.SetPar(4, qpt); - if (merger->Param().par.earlyTpcTransform) { - b.SetZOffsetLinear(mTZOffset); - } else { - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); - } + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); b.SetCov(0, c00 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40)); b.SetCov(1, c11 + dS * (c31 + n7)); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index c76d8f6ab4409..80d4809e4a466 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -29,7 +29,6 @@ #include "GPUTPCGMPolynomialField.h" #include "GPUTPCGMMerger.h" #include "GPUTPCTracker.h" -#include "GPUTPCClusterData.h" #include "GPUdEdx.h" #include "GPUParam.h" #include "GPUO2DataTypes.h" @@ -54,7 +53,7 @@ using namespace o2::gpu; using namespace o2::tpc; -GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* GPUrestrict() clustersXYZ, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, gputpcgmmergertypes::GPUTPCOuterParam* GPUrestrict() outerParam, int8_t leg) +GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, gputpcgmmergertypes::GPUTPCOuterParam* GPUrestrict() outerParam, int8_t leg) { static constexpr float kDeg2Rad = M_PI / 180.f; CADEBUG(static constexpr float kSectAngle = 2 * M_PI / 18.f); @@ -68,7 +67,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetPolynomialField(¶m.polynomialField); prop.SetMaxSinPhi(maxSinPhi); if ((clusters[0].sector < 18) == (clusters[N - 1].sector < 18)) { - ShiftZ2(clusters, clustersXYZ, merger, N); + ShiftZ2(clusters, merger, N); } if (param.rec.tpc.mergerInterpolateErrors) { for (int32_t i = 0; i < N; i++) { @@ -136,12 +135,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ uint8_t clusterState = clusters[ihit].state; const float clAlpha = param.Alpha(clusters[ihit].sector); float xx, yy, zz; - if (param.par.earlyTpcTransform) { - const float zOffset = (clusters[ihit].sector < 18) == (clusters[0].sector < 18) ? mTZOffset : -mTZOffset; - xx = clustersXYZ[ihit].x; - yy = clustersXYZ[ihit].y; - zz = clustersXYZ[ihit].z - zOffset; - } else { + { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTZOffset); } @@ -150,7 +144,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // CADEBUG(if ((uint32_t)merger->GetTrackingChain()->mIOPtrs.nMCLabelsTPC > clusters[ihit].num)) // CADEBUG({printf(" MC:"); for (int32_t i = 0; i < 3; i++) {int32_t mcId = merger->GetTrackingChain()->mIOPtrs.mcLabelsTPC[clusters[ihit].num].fClusterID[i].fMCID; if (mcId >= 0) printf(" %d", mcId); } } printf("\n")); // clang-format on - if (MergeDoubleRowClusters(ihit, wayDirection, clusters, clustersXYZ, merger, prop, xx, yy, zz, maxN, clAlpha, clusterState, allowModification) == -1) { + if (MergeDoubleRowClusters(ihit, wayDirection, clusters, merger, prop, xx, yy, zz, maxN, clAlpha, clusterState, allowModification) == -1) { nMissed++; nMissed2++; continue; @@ -334,15 +328,11 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float qtot = 0, qmax = 0, pad = 0, relTime = 0; const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { - if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { - qtot += clustersXYZ[ihit].amp; - } else { - const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; - qtot += cl.qTot; - qmax = CAMath::Max(qmax, cl.qMax); - pad += cl.getPad(); - relTime += cl.getTime(); - } + const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; + qtot += cl.qTot; + qmax = CAMath::Max(qmax, cl.qMax); + pad += cl.getPad(); + relTime += cl.getTime(); } qtot /= clusterCount; // TODO: Weighted Average pad /= clusterCount; @@ -372,7 +362,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } if (((nWays - iWay) & 1) && (iWay != nWays - 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { - ShiftZ2(clusters, clustersXYZ, merger, maxN); + ShiftZ2(clusters, merger, maxN); } } ConstrainSinPhi(); @@ -460,7 +450,7 @@ GPUd() void GPUTPCGMTrackParam::MirrorTo(GPUTPCGMPropagator& GPUrestrict() prop, mChi2 = 0; } -GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* GPUrestrict() merger, GPUTPCGMPropagator& GPUrestrict() prop, float& GPUrestrict() xx, float& GPUrestrict() yy, float& GPUrestrict() zz, int32_t maxN, float clAlpha, uint8_t& GPUrestrict() clusterState, bool rejectChi2) +GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, const GPUTPCGMMerger* GPUrestrict() merger, GPUTPCGMPropagator& GPUrestrict() prop, float& GPUrestrict() xx, float& GPUrestrict() yy, float& GPUrestrict() zz, int32_t maxN, float clAlpha, uint8_t& GPUrestrict() clusterState, bool rejectChi2) { if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector) { float maxDistY, maxDistZ; @@ -479,18 +469,10 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t xx = yy = zz = 0.f; clusterState = 0; while (true) { - float clx, cly, clz, clamp; - if (merger->Param().par.earlyTpcTransform) { - const float zOffset = (clusters[ihit].sector < 18) == (clusters[0].sector < 18) ? mTZOffset : -mTZOffset; - clx = clustersXYZ[ihit].x; - cly = clustersXYZ[ihit].y; - clz = clustersXYZ[ihit].z - zOffset; - clamp = clustersXYZ[ihit].amp; - } else { - const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; - clamp = cl.qTot; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); - } + const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; + float clamp = cl.qTot; + float clx, cly, clz; + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); float dy = cly - projY; float dz = clz - projZ; if (noReject == 0 && (dy * dy > maxDistY || dz * dz > maxDistZ)) { @@ -525,16 +507,11 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop) { float Y, Z; - if (Merger->Param().par.earlyTpcTransform) { + float X = 0; + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(sector, iRow, mP[0], mP[1], X); + if (prop.GetPropagatedYZ(X, Y, Z)) { Y = mP[0]; Z = mP[1]; - } else { - float X = 0; - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(sector, iRow, mP[0], mP[1], X); - if (prop.GetPropagatedYZ(X, Y, Z)) { - Y = mP[0]; - Z = mP[1]; - } } return AttachClusters(Merger, sector, iRow, iTrack, goodLeg, Y, Z); } @@ -544,7 +521,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric if (Merger->Param().rec.tpc.disableRefitAttachment & 1) { return -1e6f; } - const GPUTPCTracker& GPUrestrict() tracker = *(Merger -> GetConstantMem()->tpcTrackers + sector); + const GPUTPCTracker& GPUrestrict() tracker = *(Merger->GetConstantMem()->tpcTrackers + sector); const GPUTPCRow& GPUrestrict() row = tracker.Row(iRow); GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); @@ -552,7 +529,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->MergedTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -568,12 +545,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric const float sy21 = 1.f / sy2; const float sz21 = 1.f / sz2; float uncorrectedY, uncorrectedZ; - if (Merger->Param().par.earlyTpcTransform) { - uncorrectedY = Y; - uncorrectedZ = Z; - } else { - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, Y, Z, uncorrectedY, uncorrectedZ); - } + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, Y, Z, uncorrectedY, uncorrectedZ); if (CAMath::Abs(uncorrectedY) > row.getTPCMaxY()) { return uncorrectedY; @@ -870,25 +842,18 @@ GPUdi() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUr } } -GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, int32_t N) +GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N) { float tzInner, tzOuter; float xInner, xOuter; if (N == 0) { N = 1; } - if (merger->Param().par.earlyTpcTransform) { - tzInner = clustersXYZ[N - 1].z; - tzOuter = clustersXYZ[0].z; - xInner = clustersXYZ[N - 1].x; - xOuter = clustersXYZ[0].x; - } else { - const auto& GPUrestrict() cls = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear; - tzInner = cls[clusters[N - 1].num].getTime(); - tzOuter = cls[clusters[0].num].getTime(); - xInner = GPUTPCGeometry::Row2X(clusters[N - 1].row); - xOuter = GPUTPCGeometry::Row2X(clusters[0].row); - } + const auto& GPUrestrict() cls = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear; + tzInner = cls[clusters[N - 1].num].getTime(); + tzOuter = cls[clusters[0].num].getTime(); + xInner = GPUTPCGeometry::Row2X(clusters[N - 1].row); + xOuter = GPUTPCGeometry::Row2X(clusters[0].row); ShiftZ(merger, clusters[0].sector, tzInner, tzOuter, xInner, xOuter); } @@ -925,53 +890,20 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge } if (!beamlineReached) { - if (merger->Param().par.earlyTpcTransform) { - float basez, basex; - if (CAMath::Abs(tz1) < CAMath::Abs(tz2)) { - basez = tz1; - basex = x1; - } else { - basez = tz2; - basex = x2; - } - float refZ = ((basez > 0) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; - deltaZ = basez - refZ - mTZOffset; + float baset, basex; + if (CAMath::Abs(tz1) > CAMath::Abs(tz2)) { + baset = tz1; + basex = x1; } else { - float baset, basex; - if (CAMath::Abs(tz1) > CAMath::Abs(tz2)) { - baset = tz1; - basex = x1; - } else { - baset = tz2; - basex = x2; - } - float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; - float basez; - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, baset, basez, mTZOffset); - deltaZ = basez - refZ; + baset = tz2; + basex = x2; } + float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; + float basez; + merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, baset, basez, mTZOffset); + deltaZ = basez - refZ; } - if (merger->Param().par.earlyTpcTransform) { - mTZOffset += deltaZ; - mP[1] -= deltaZ; - deltaZ = 0; - float zMax = CAMath::Max(tz1, tz2); - float zMin = CAMath::Min(tz1, tz2); - // printf("Z Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, zMin, zMax, mTZOffset); - if (zMin < 0 && zMin - mTZOffset < -GPUTPCGeometry::TPCLength()) { - deltaZ = zMin - mTZOffset + GPUTPCGeometry::TPCLength(); - } else if (zMax > 0 && zMax - mTZOffset > GPUTPCGeometry::TPCLength()) { - deltaZ = zMax - mTZOffset - GPUTPCGeometry::TPCLength(); - } - if (zMin < 0 && zMax - (mTZOffset + deltaZ) > 0) { - deltaZ = zMax - mTZOffset; - } else if (zMax > 0 && zMin - (mTZOffset + deltaZ) < 0) { - deltaZ = zMin - mTZOffset; - } - // if (deltaZ != 0) printf("Moving clusters to TPC Range: Shift %f in Z: %f to %f --> %f to %f in Z\n", deltaZ, tz2 - mTZOffset, tz1 - mTZOffset, tz2 - mTZOffset - deltaZ, tz1 - mTZOffset - deltaZ); - mTZOffset += deltaZ; - mP[1] -= deltaZ; - } else { + { float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(sector, deltaZ); mTZOffset += deltaT; mP[1] -= deltaZ; @@ -1047,7 +979,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); @@ -1061,7 +993,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() NTolerated = 0; // Clusters not fit but tollerated for track length cut t = track.Param(); Alpha = track.Alpha(); - ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, 1, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); + ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, 1, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); } else { uint32_t nRefit = CAMath::AtomicAdd(&merger->Memory()->nRetryRefit, 1u); merger->RetryRefitIds()[nRefit] = iTrk; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 3412388003ec6..435f88bb93a16 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -141,10 +141,10 @@ class GPUTPCGMTrackParam GPUd() bool CheckNumericalQuality(float overrideCovYY = -1.f) const; GPUd() bool CheckCov() const; - GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, int32_t& N, int32_t& NTolerated, float& Alpha, int32_t attempt = 0, float maxSinPhi = GPUCA_MAX_SIN_PHI, gputpcgmmergertypes::GPUTPCOuterParam* outerParam = nullptr, int8_t leg = 0); + GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, int32_t attempt = 0, float maxSinPhi = GPUCA_MAX_SIN_PHI, gputpcgmmergertypes::GPUTPCOuterParam* outerParam = nullptr, int8_t leg = 0); GPUd() void MoveToReference(GPUTPCGMPropagator& prop, const GPUParam& param, float& alpha); GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector); - GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2); + GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2); GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use @@ -184,7 +184,7 @@ class GPUTPCGMTrackParam GPUd() void Rotate(float alpha); GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float tzInner, float tzOuter, float x1, float x2); - GPUd() void ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, int32_t N); + GPUd() void ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N); GPUd() static float Reciprocal(float x) { return 1.f / x; } GPUdi() static void Assign(float& x, bool mask, float v) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx index 7981ef5af26d8..afd90184b60ca 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx @@ -13,7 +13,6 @@ /// \author David Rohr #include "GPUParam.h" -#include "GPUTPCClusterData.h" #include "GPUTPCHit.h" #include "GPUTPCTrackingData.h" #include "GPUProcessor.h" diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index c19e96f1879a8..41530cb629ce8 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -17,7 +17,6 @@ #include "GPUTPCTrack.h" #include "GPUCommonMath.h" -#include "GPUTPCClusterData.h" #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index 3bebdc4fa2b06..60cc12573be99 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -69,10 +69,6 @@ class GPUTPCTracker : public GPUProcessor int32_t nLocalTrackHits = 0; // see above }; - GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const - { - return mData.ClusterData(); - } GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx index 6c3ffbb5120bc..7ebe13e8bfb9e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx @@ -13,7 +13,6 @@ /// \author Matthias Kretz, Sergey Gorbunov, David Rohr #include "GPUParam.h" -#include "GPUTPCClusterData.h" #include "GPUTPCHit.h" #include "GPUTPCTrackingData.h" #include "GPUProcessor.h" @@ -45,9 +44,8 @@ void GPUTPCTrackingData::InitializeRows(const GPUParam& p) } } -void GPUTPCTrackingData::SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset) +void GPUTPCTrackingData::SetClusterData(int32_t nClusters, int32_t clusterIdOffset) { - mClusterData = data; mNumberOfHits = nClusters; mClusterIdOffset = clusterIdOffset; } @@ -164,50 +162,10 @@ GPUdii() void GPUTPCTrackingData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSector, float* tmpMinMax) { -#ifdef GPUCA_GPUCODE - constexpr bool EarlyTransformWithoutClusterNative = false; -#else - bool EarlyTransformWithoutClusterNative = mem->param.par.earlyTpcTransform && mem->ioPtrs.clustersNative == nullptr; -#endif - int32_t* tmpHitIndex = nullptr; - const uint32_t* NumberOfClustersInRow = nullptr; - const uint32_t* RowOffsets = nullptr; - #ifndef GPUCA_GPUCODE vecpod YZData(mNumberOfHits); vecpod binMemory(mNumberOfHits); - uint32_t RowOffsetsA[GPUCA_ROW_COUNT]; - uint32_t NumberOfClustersInRowA[GPUCA_ROW_COUNT]; - vecpod tmpHitIndexA; - if (EarlyTransformWithoutClusterNative) { // Implies mem->param.par.earlyTpcTransform but no ClusterNative present - NumberOfClustersInRow = NumberOfClustersInRowA; - RowOffsets = RowOffsetsA; - tmpHitIndexA.resize(mNumberOfHits); - tmpHitIndex = tmpHitIndexA.data(); - - memset(NumberOfClustersInRowA, 0, GPUCA_ROW_COUNT * sizeof(NumberOfClustersInRowA[0])); - for (int32_t i = 0; i < mNumberOfHits; i++) { - const int32_t tmpRow = mClusterData[i].row; - NumberOfClustersInRowA[tmpRow]++; - } - int32_t tmpOffset = 0; - for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { - RowOffsetsA[i] = tmpOffset; - tmpOffset += NumberOfClustersInRow[i]; - } - int32_t RowsFilled[GPUCA_ROW_COUNT]; - memset(RowsFilled, 0, GPUCA_ROW_COUNT * sizeof(int32_t)); - for (int32_t i = 0; i < mNumberOfHits; i++) { - float2 tmp; - tmp.x = mClusterData[i].y; - tmp.y = mClusterData[i].z; - int32_t tmpRow = mClusterData[i].row; - int32_t newIndex = RowOffsetsA[tmpRow] + (RowsFilled[tmpRow])++; - YZData[newIndex] = tmp; - tmpHitIndex[newIndex] = i; - } - } // Other cases below in loop over rows #else float2* YZData = (float2*)mLinkUpData; // TODO: we can do this as well on the CPU, just must make sure that CPU has the scratch memory calink* binMemory = (calink*)mHitWeights; @@ -221,8 +179,8 @@ GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_ float zMin = 1.e6f; float zMax = -1.e6f; - const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex]; - const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]); + const uint32_t NumberOfClusters = mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex]; + const uint32_t RowOffset = mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]; constexpr const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24); GPUTPCRow& row = mRows[rowIndex]; if (iThread == 0) { @@ -251,25 +209,11 @@ GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_ continue; } - if (EarlyTransformWithoutClusterNative) { - for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) { - UpdateMinMaxYZ(yMin, yMax, zMin, zMax, YZData[RowOffset + i].x, YZData[RowOffset + i].y); - } - } else if (mem->param.par.earlyTpcTransform) { // Early transform case with ClusterNative present - for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) { - float2 tmp; - tmp.x = mClusterData[RowOffset + i].y; - tmp.y = mClusterData[RowOffset + i].z; - UpdateMinMaxYZ(yMin, yMax, zMin, zMax, tmp.x, tmp.y); - YZData[RowOffset + i] = tmp; - } - } else { - for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) { - float x, y, z; - GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getTime(), x, y, z); - UpdateMinMaxYZ(yMin, yMax, zMin, zMax, y, z); - YZData[RowOffset + i] = CAMath::MakeFloat2(y, z); - } + for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) { + float x, y, z; + GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getTime(), x, y, z); + UpdateMinMaxYZ(yMin, yMax, zMin, zMax, y, z); + YZData[RowOffset + i] = CAMath::MakeFloat2(y, z); } if (iThread == 0) { @@ -380,7 +324,7 @@ GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_ const int32_t globalHitIndex = RowOffset + hitIndex; // allows to find the global hit index / coordinates from a global bin sorted hit index - mClusterDataIndex[globalBinsortedIndex] = EarlyTransformWithoutClusterNative ? tmpHitIndex[globalHitIndex] : (RowOffset + hitIndex); + mClusterDataIndex[globalBinsortedIndex] = RowOffset + hitIndex; const float xx = ((YZData[globalHitIndex].x - y0) * stepYi) + .5; const float yy = ((YZData[globalHitIndex].y - z0) * stepZi) + .5; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h index b08fbed4b319d..3db4b6c36e722 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h @@ -29,13 +29,13 @@ class GPUTPCHit; class GPUTPCTrackingData { public: - GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} + GPUTPCTrackingData() = default; #ifndef GPUCA_GPUCODE_DEVICE ~GPUTPCTrackingData() = default; void InitializeRows(const GPUParam& p); void SetMaxData(); - void SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset); + void SetClusterData(int32_t nClusters, int32_t clusterIdOffset); void* SetPointersScratch(void* mem, bool idsOnGPU); void* SetPointersLinks(void* mem); void* SetPointersWeights(void* mem); @@ -112,8 +112,6 @@ class GPUTPCTrackingData GPUhdi() GPUglobalref() GPUAtomic(uint32_t) * HitWeights() { return (mHitWeights); } - GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mClusterData; } - private: #ifndef GPUCA_GPUCODE GPUTPCTrackingData& operator=(const GPUTPCTrackingData&) = delete; // ROOT 5 tries to use this if it is not private @@ -127,16 +125,16 @@ class GPUTPCTrackingData friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsFinder; - int32_t mNumberOfHits; // the number of hits in this sector - int32_t mNumberOfHitsPlusAlign; - int32_t mClusterIdOffset; + int32_t mNumberOfHits = 0; // the number of hits in this sector + int32_t mNumberOfHitsPlusAlign = 0; + int32_t mClusterIdOffset = 0; - GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions + GPUglobalref() GPUTPCRow* mRows = nullptr; // The row objects needed for most accessor functions - GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index - GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index - GPUglobalref() cahit2* mHitData; // packed y,z coordinate of the given (global) hit index - GPUglobalref() int32_t* mClusterDataIndex; // see ClusterDataIndex() + GPUglobalref() calink* mLinkUpData = nullptr; // hit index in the row above which is linked to the given (global) hit index + GPUglobalref() calink* mLinkDownData = nullptr; // hit index in the row below which is linked to the given (global) hit index + GPUglobalref() cahit2* mHitData = nullptr; // packed y,z coordinate of the given (global) hit index + GPUglobalref() int32_t* mClusterDataIndex = nullptr; // see ClusterDataIndex() /* * The size of the array is row.Grid.N + row.Grid.Ny + 3. The row.Grid.Ny + 3 is an optimization @@ -144,7 +142,6 @@ class GPUTPCTrackingData */ GPUglobalref() calink* mFirstHitInBin; // see FirstHitInBin GPUglobalref() GPUAtomic(uint32_t) * mHitWeights; // the weight of the longest tracklet crossed the cluster - GPUglobalref() const GPUTPCClusterData* mClusterData; }; GPUdi() calink GPUTPCTrackingData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 1b1cb510af7be..38af340d67d7a 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -585,7 +585,7 @@ int32_t LoadEvent(int32_t iEvent, int32_t x) } } - if (!rec->GetParam().par.earlyTpcTransform && !chainTracking->mIOPtrs.clustersNative && !chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS && !chainTracking->mIOPtrs.tpcCompressedClusters) { + if (!chainTracking->mIOPtrs.clustersNative && !chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS && !chainTracking->mIOPtrs.tpcCompressedClusters) { printf("Need cluster native data for on-the-fly TPC transform\n"); return 1; } diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx deleted file mode 100644 index 899149d320bda..0000000000000 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCConvert.cxx -/// \author David Rohr - -#include "GPUTPCConvert.h" -#include "TPCFastTransform.h" -#include "GPUTPCClusterData.h" -#include "GPUReconstruction.h" -#include "GPUO2DataTypes.h" -#include "GPUParam.h" - -using namespace o2::gpu; - -void GPUTPCConvert::InitializeProcessor() {} - -void* GPUTPCConvert::SetPointersOutput(void* mem) -{ - if (mRec->GetParam().par.earlyTpcTransform) { - computePointerWithAlignment(mem, mClusters, mNClustersTotal); - } - return mem; -} - -void* GPUTPCConvert::SetPointersMemory(void* mem) -{ - computePointerWithAlignment(mem, mMemory, 1); - return mem; -} - -void GPUTPCConvert::RegisterMemoryAllocation() -{ - AllocateAndInitializeLate(); - mMemoryResMemory = mRec->RegisterMemoryAllocation(this, &GPUTPCConvert::SetPointersMemory, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_PERMANENT, "TPCConvertMemory"); - mMemoryResOutput = mRec->RegisterMemoryAllocation(this, &GPUTPCConvert::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT, "TPCConvertOutput"); -} - -void GPUTPCConvert::SetMaxData(const GPUTrackingInOutPointers& io) -{ - if (io.clustersNative) { - mNClustersTotal = io.clustersNative->nClustersTotal; - } else { - mNClustersTotal = 0; - } -} diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h deleted file mode 100644 index 9bf40417192b6..0000000000000 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCConvert.h -/// \author David Rohr - -#ifndef GPUTPCCONVERT_H -#define GPUTPCCONVERT_H - -#include "GPUDef.h" -#include "GPUProcessor.h" - -namespace o2::gpu -{ -struct GPUTPCClusterData; - -class GPUTPCConvert : public GPUProcessor -{ - friend class GPUTPCConvertKernel; - friend class GPUChainTracking; - - public: -#ifndef GPUCA_GPUCODE - void InitializeProcessor(); - void RegisterMemoryAllocation(); - void SetMaxData(const GPUTrackingInOutPointers& io); - - void* SetPointersOutput(void* mem); - void* SetPointersMemory(void* mem); -#endif - - constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; - - struct Memory { - GPUTPCClusterData* clusters[NSECTORS]; - }; - - protected: - Memory* mMemory = nullptr; - GPUTPCClusterData* mClusters = nullptr; - uint32_t mNClustersTotal = 0; - - int16_t mMemoryResOutput = -1; - int16_t mMemoryResMemory = -1; -}; -} // namespace o2::gpu - -#endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx deleted file mode 100644 index 806a06dfbbe02..0000000000000 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCConvertKernel.cxx -/// \author David Rohr - -#include "GPUTPCConvertKernel.h" -#include "GPUConstantMem.h" -#include "TPCFastTransform.h" -#include "GPUTPCClusterData.h" -#include "GPUO2DataTypes.h" -#include "GPUTPCConvertImpl.h" - -using namespace o2::gpu; - -template <> -GPUdii() void GPUTPCConvertKernel::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) -{ - const int32_t iSector = iBlock / GPUCA_ROW_COUNT; - const int32_t iRow = iBlock % GPUCA_ROW_COUNT; - GPUTPCConvert& GPUrestrict() convert = processors.tpcConverter; - const o2::tpc::ClusterNativeAccess* GPUrestrict() native = processors.ioPtrs.clustersNative; - GPUTPCClusterData* GPUrestrict() clusters = convert.mMemory -> clusters[iSector]; - const int32_t idOffset = native->clusterOffset[iSector][iRow]; - const int32_t indexOffset = native->clusterOffset[iSector][iRow] - native->clusterOffset[iSector][0]; - - for (uint32_t k = get_local_id(0); k < native->nClusters[iSector][iRow]; k += get_local_size(0)) { - const auto& GPUrestrict() clin = native -> clusters[iSector][iRow][k]; - float x, y, z; - GPUTPCConvertImpl::convert(processors, iSector, iRow, clin.getPad(), clin.getTime(), x, y, z); - auto& GPUrestrict() clout = clusters[indexOffset + k]; - clout.x = x; - clout.y = y; - clout.z = z; - clout.row = iRow; - clout.amp = clin.qTot; - clout.flags = clin.getFlags(); - clout.id = idOffset + k; - } -} diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h deleted file mode 100644 index d62e10e682a4b..0000000000000 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCConvertKernel.h -/// \author David Rohr - -#ifndef GPUTPCCONVERTKERNEL_H -#define GPUTPCCONVERTKERNEL_H - -#include "GPUGeneralKernels.h" - -namespace o2::gpu -{ -class GPUTPCConvertKernel : public GPUKernelTemplate -{ - public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCConversion; } - template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors); -}; -} // namespace o2::gpu - -#endif diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index 9d188d03c7b69..8726563c0ec39 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -52,9 +52,6 @@ void GPUDisplay::disableUnsupportedOptions() if (!mChain) { mCfgL.excludeClusters = mCfgL.drawInitLinks = mCfgL.drawLinks = mCfgL.drawSeeds = mCfgL.drawTracklets = mCfgL.drawTracks = mCfgL.drawExtrapolatedTracks = 0; } - if (mConfig.showTPCTracksFromO2Format && mParam->par.earlyTpcTransform) { - throw std::runtime_error("Cannot run GPU display with early Transform when input is O2 tracks"); - } } void GPUDisplay::DoScreenshot(const char* filename, std::vector& pixels, float animateTime) diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 3a56f874d2d12..608eeb056b6ad 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -22,7 +22,6 @@ #include "GPUTRDTracker.h" #include "GPUTRDTrackletWord.h" #include "GPUQA.h" -#include "GPUTPCClusterData.h" #include "GPUTPCConvertImpl.h" #include "GPUTPCGMPropagator.h" #include "GPUTPCMCInfo.h" @@ -41,7 +40,7 @@ using namespace o2::gpu; -#define GET_CID(sector, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[sector][i].id : (mIOPtrs->clustersNative->clusterOffset[sector][0] + i)) +#define GET_CID(sector, i) (mIOPtrs->clustersNative->clusterOffset[sector][0] + i) const GPUTRDGeometry* GPUDisplay::trdGeometry() { return (GPUTRDGeometry*)mCalib->trdGeometry; } const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetProcessors()->tpcTrackers[iSector]; } @@ -135,7 +134,7 @@ void GPUDisplay::DrawClusters(int32_t iSector) } uint32_t col = 0; - const int32_t nClustersInSector = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); + const int32_t nClustersInSector = mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0; [[maybe_unused]] const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; for (int32_t cidInSector = 0; cidInSector < nClustersInSector; cidInSector++) { const int32_t cid = GET_CID(iSector, cidInSector); @@ -178,11 +177,7 @@ void GPUDisplay::DrawClusters(int32_t iSector) } } else if (mCfgH.markClusters) { int16_t flags; - if (mParam->par.earlyTpcTransform) { - flags = mIOPtrs->clusterData[iSector][cidInSector].flags; - } else { - flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags(); - } + flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags(); if (flags & mCfgH.markClusters) { select = tMARKED; } @@ -556,24 +551,17 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp trkParam.Set(t); } - if (mParam->par.earlyTpcTransform) { - if constexpr (std::is_same_v) { - x = mIOPtrs->mergedTrackHitsXYZ[track->FirstClusterRef() + lastCluster].x; - ZOffset = track->GetParam().GetTZOffset(); - } + float y, z; + if constexpr (std::is_same_v) { + auto cl = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster]; + const auto& cln = mIOPtrs->clustersNative->clustersLinear[cl.num]; + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.sector, cl.row, cln.getPad(), cln.getTime(), x, y, z); + ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSector, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); } else { - float y, z; - if constexpr (std::is_same_v) { - auto cl = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster]; - const auto& cln = mIOPtrs->clustersNative->clustersLinear[cl.num]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.sector, cl.row, cln.getPad(), cln.getTime(), x, y, z); - ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSector, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); - } else { - uint8_t sector, row; - auto cln = track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative, sector, row); - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, sector, row, cln.getPad(), cln.getTime(), x, y, z); - ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, track->getTime0(), mParam->continuousMaxTimeBin); - } + uint8_t sector, row; + auto cln = track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative, sector, row); + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, sector, row, cln.getPad(), cln.getTime(), x, y, z); + ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, track->getTime0(), mParam->continuousMaxTimeBin); } } else { const GPUTPCMCInfo& mc = mIOPtrs->mcInfosTPC[i]; diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index df3b385c14fe5..9c516ebb960d7 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -19,7 +19,6 @@ #include "GPUDisplay.h" #include "frontend/GPUDisplayInfo.inc" #include "GPUO2DataTypes.h" -#include "GPUTPCClusterData.h" #include "GPUTPCConvertImpl.h" #include "GPUTRDGeometry.h" #include "GPUTRDTrackletWord.h" @@ -139,35 +138,24 @@ void GPUDisplay::DrawGLScene_updateEventData() mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { for (int32_t iSector = r.begin(); iSector < r.end(); iSector++) { int32_t row = 0; - uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); + uint32_t nCls = mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0; for (uint32_t i = 0; i < nCls; i++) { int32_t cid; - if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSector][i]; - cid = cl.id; - row = cl.row; - } else { - cid = mIOPtrs->clustersNative->clusterOffset[iSector][0] + i; - while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSector][row + 1] <= (uint32_t)cid) { - row++; - } + cid = mIOPtrs->clustersNative->clusterOffset[iSector][0] + i; + while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSector][row + 1] <= (uint32_t)cid) { + row++; } if (cid >= mNMaxClusters) { throw std::runtime_error("Cluster Buffer Size exceeded"); } float4* ptr = &mGlobalPos[cid]; - if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSector][i]; - mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? GPUTPCGeometry::Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); - } else { - float x, y, z; - const auto& cln = mIOPtrs->clustersNative->clusters[iSector][0][i]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSector, row, cln.getPad(), cln.getTime(), x, y, z); - if (mCfgH.clustersOnNominalRow) { - x = GPUTPCGeometry::Row2X(row); - } - mParam->Sector2Global(iSector, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); + float x, y, z; + const auto& cln = mIOPtrs->clustersNative->clusters[iSector][0][i]; + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSector, row, cln.getPad(), cln.getTime(), x, y, z); + if (mCfgH.clustersOnNominalRow) { + x = GPUTPCGeometry::Row2X(row); } + mParam->Sector2Global(iSector, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 5bcda68e691b3..3ab84ca3f67cd 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -90,7 +90,6 @@ o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPC o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) -o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index aa4db98d0b71a..31bfaa47d7420 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -45,7 +45,6 @@ #include "GPUTPCGMPropagator.h" #include "AliHLTTPCClusterMCData.h" #include "GPUTPCMCInfo.h" -#include "GPUTPCClusterData.h" #include "GPUO2DataTypes.h" #include "GPUParam.inc" #include "GPUTPCClusterRejection.h" @@ -1048,8 +1047,6 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx bool comp; if (revLabel == -1) { comp = true; - } else if (mTracking->GetParam().par.earlyTpcTransform) { - comp = fabsf(trks[i].GetParam().GetZ() + trks[i].GetParam().GetTZOffset()) < fabsf(trks[revLabel].GetParam().GetZ() + trks[revLabel].GetParam().GetTZOffset()); } else { float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSECTORS / 2, trks[i].GetParam().GetTZOffset()); float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSECTORS / 2, trks[revLabel].GetParam().GetTZOffset()); @@ -1387,13 +1384,8 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx if (!mParam->continuousMaxTimeBin) { return param.GetZ() - mc1.z; } -#ifdef GPUCA_TPC_GEOMETRY_O2 - if (!mParam->par.earlyTpcTransform) { - float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSECTORS / 2, param.GetTZOffset() - mc1.t0); - return param.GetZ() + shift - mc1.z; - } -#endif - return param.Z() + param.TZOffset() - mc1.z; + float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSECTORS / 2, param.GetTZOffset() - mc1.t0); + return param.GetZ() + shift - mc1.z; }; prop.SetTrack(¶m, alpha); From c60297666362f6b26f444414633fbc4e154602d8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 9 Sep 2025 11:13:41 +0200 Subject: [PATCH 19/52] GPU: Fix direction for material correction in sector track refit --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index fb2fcdfd06776..71b93221445f5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -514,7 +514,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex]; flags = cl.getFlags(); GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); - if (prop.PropagateToXAlpha(x, alpha, true)) { + if (prop.PropagateToXAlpha(x, alpha, way == 0)) { return way == 0; } trk.ConstrainSinPhi(); From c41507ae2021e5d273a5945b2532a8f53daf6b5d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 9 Sep 2025 11:17:08 +0200 Subject: [PATCH 20/52] GPU TPC: Add Pt cut to treat < 100 MeV always as secondary --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 43 ++++++++++--------- .../SectorTracker/GPUTPCTrackParam.cxx | 38 ++++++++-------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 80d4809e4a466..79e69603423b6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -862,30 +862,31 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge if (!merger->Param().par.continuousTracking) { return; } - const float r1 = CAMath::Max(0.0001f, CAMath::Abs(mP[4] * merger->Param().polynomialField.GetNominalBz())); - - const float dist2 = mX * mX + mP[0] * mP[0]; - const float dist1r2 = dist2 * r1 * r1; float deltaZ = 0.f; bool beamlineReached = false; - if (dist1r2 < 4) { - const float alpha = CAMath::ACos(1 - 0.5f * dist1r2); // Angle of a circle, such that |(cosa, sina) - (1,0)| == dist - const float beta = CAMath::ATan2(mP[0], mX); - const int32_t comp = mP[2] > CAMath::Sin(beta); - const float sinab = CAMath::Sin((comp ? 0.5f : -0.5f) * alpha + beta); // Angle of circle through origin and track position, to be compared to Snp - const float res = CAMath::Abs(sinab - mP[2]); - - if (res < 0.2) { - const float r = 1.f / r1; - const float dS = alpha * r; - float z0 = dS * mP[3]; - if (CAMath::Abs(z0) > GPUTPCGeometry::TPCLength()) { - z0 = z0 > 0 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); - } - deltaZ = mP[1] - z0; - beamlineReached = true; + const float r1 = CAMath::Max(0.0001f, CAMath::Abs(mP[4] * merger->Param().polynomialField.GetNominalBz())); + if (r1 < 0.01501) { // 100 MeV @ 0.5T ~ 0.66m cutof + const float dist2 = mX * mX + mP[0] * mP[0]; + const float dist1r2 = dist2 * r1 * r1; + if (dist1r2 < 4) { + const float alpha = CAMath::ACos(1 - 0.5f * dist1r2); // Angle of a circle, such that |(cosa, sina) - (1,0)| == dist + const float beta = CAMath::ATan2(mP[0], mX); + const int32_t comp = mP[2] > CAMath::Sin(beta); + const float sinab = CAMath::Sin((comp ? 0.5f : -0.5f) * alpha + beta); // Angle of circle through origin and track position, to be compared to Snp + const float res = CAMath::Abs(sinab - mP[2]); + + if (res < 0.2) { + const float r = 1.f / r1; + const float dS = alpha * r; + float z0 = dS * mP[3]; + if (CAMath::Abs(z0) > GPUTPCGeometry::TPCLength()) { + z0 = z0 > 0 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); + } + deltaZ = mP[1] - z0; + beamlineReached = true; - // printf("X %9.3f Y %9.3f QPt %9.3f R %9.3f --> Alpha %9.3f Snp %9.3f Snab %9.3f Res %9.3f dS %9.3f z0 %9.3f\n", mX, mP[0], mP[4], r, alpha / 3.1415 * 180, mP[2], sinab, res, dS, z0); + // printf("X %9.3f Y %9.3f QPt %9.3f R %9.3f --> Alpha %9.3f Snp %9.3f Snab %9.3f Res %9.3f dS %9.3f z0 %9.3f\n", mX, mP[0], mP[4], r, alpha / 3.1415 * 180, mP[2], sinab, res, dS, z0); + } } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx index af6f8e6cddc08..a4d9265cf800e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx @@ -745,27 +745,29 @@ GPUd() void GPUTPCTrackParam::ConstrainZ(float& z, int32_t sector, float& z0, fl GPUd() void GPUTPCTrackParam::ShiftZ(float z1, float z2, float x1, float x2, float bz, float defaultZOffsetOverR) { const float r1 = CAMath::Max(0.0001f, CAMath::Abs(mParam.mP[4] * bz)); - - const float dist2 = mParam.mX * mParam.mX + mParam.mP[0] * mParam.mP[0]; - const float dist1r2 = dist2 * r1 * r1; float deltaZ = 0.f; bool beamlineReached = false; - if (dist1r2 < 4) { - const float alpha = CAMath::ACos(1 - 0.5f * dist1r2); // Angle of a circle, such that |(cosa, sina) - (1,0)| == dist - const float beta = CAMath::ATan2(mParam.mP[0], mParam.mX); - const int32_t comp = mParam.mP[2] > CAMath::Sin(beta); - const float sinab = CAMath::Sin((comp ? 0.5f : -0.5f) * alpha + beta); // Angle of circle through origin and track position, to be compared to Snp - const float res = CAMath::Abs(sinab - mParam.mP[2]); - - if (res < 0.2f) { - const float r = 1.f / r1; - const float dS = alpha * r; - float z0 = dS * mParam.mP[3]; - if (CAMath::Abs(z0) > GPUTPCGeometry::TPCLength()) { - z0 = z0 > 0 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); + + if (r1 < 0.01501) { // 100 MeV @ 0.5T ~ 0.66m cutof + const float dist2 = mParam.mX * mParam.mX + mParam.mP[0] * mParam.mP[0]; + const float dist1r2 = dist2 * r1 * r1; + if (dist1r2 < 4) { + const float alpha = CAMath::ACos(1 - 0.5f * dist1r2); // Angle of a circle, such that |(cosa, sina) - (1,0)| == dist + const float beta = CAMath::ATan2(mParam.mP[0], mParam.mX); + const int32_t comp = mParam.mP[2] > CAMath::Sin(beta); + const float sinab = CAMath::Sin((comp ? 0.5f : -0.5f) * alpha + beta); // Angle of circle through origin and track position, to be compared to Snp + const float res = CAMath::Abs(sinab - mParam.mP[2]); + + if (res < 0.2f) { + const float r = 1.f / r1; + const float dS = alpha * r; + float z0 = dS * mParam.mP[3]; + if (CAMath::Abs(z0) > GPUTPCGeometry::TPCLength()) { + z0 = z0 > 0 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); + } + deltaZ = mParam.mP[1] - z0; + beamlineReached = true; } - deltaZ = mParam.mP[1] - z0; - beamlineReached = true; } } From 1374b87dc99a19741f1291c32ef399104d3e55b1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 9 Sep 2025 11:24:37 +0200 Subject: [PATCH 21/52] GPU TPC: Rename some kernels --- .../Definitions/GPUDefParametersDefaults.h | 36 ++++++------- .../Global/GPUChainTrackingMerger.cxx | 10 ++-- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 +-- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 6 +-- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 12 ++--- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 51 ++++++++++--------- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 4 +- .../Merger/GPUTPCGlobalDebugSortKernels.h | 4 +- GPU/GPUTracking/kernels.cmake | 10 ++-- 10 files changed, 72 insertions(+), 71 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 7b76860d4ca2b..29aa3808506dc 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -57,9 +57,9 @@ #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 512 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 @@ -119,9 +119,9 @@ #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 512 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 @@ -181,9 +181,9 @@ #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 @@ -243,9 +243,9 @@ #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 @@ -398,14 +398,14 @@ #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 + #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 #endif #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0 #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 256 diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 36e4e9af83fbd..4d9fcd4b1572a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -165,8 +165,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({{1, -WarpSize(), 0, deviceType}}, 1); - runKernel({{1, -WarpSize(), 0, deviceType}}, 1); + runKernel({{1, -WarpSize(), 0, deviceType}}, 1); + runKernel({{1, -WarpSize(), 0, deviceType}}, 1); } DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCollectedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); @@ -196,11 +196,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } runKernel({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount())); runKernel({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment())); - runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); CondWaitEvent(waitForTransfer, &mEvents->single); runKernel(GetGridAuto(0, deviceType)); - runKernel(GetGridAuto(0, deviceType)); - runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingPrepareFit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 71b93221445f5..a36c98e25d205 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -325,7 +325,7 @@ void* GPUTPCGMMerger::SetPointersMerger(void* mem) computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSECTORS * mNMaxSingleSectorTracks); // UnpackResetIds - RefitSectorTracks - UnpackSectorGlobal memMax = (void*)std::max((size_t)mem, (size_t)memMax); mem = memBase; - computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareClustersForFit0 - SortTracksQPt - PrepareClustersForFit1 - PrepareClustersForFit1 / Finalize0 - Finalize2 + computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareForFit0 - SortTracksQPt - PrepareForFit1 - PrepareForFit1 / Finalize0 - Finalize2 computePointerWithAlignment(mem, mSharedCount, mNMaxClusters); memMax = (void*)std::max((size_t)mem, (size_t)memMax); mem = memBase; @@ -1738,7 +1738,7 @@ GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, } } -GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) +GPUd() void GPUTPCGMMerger::PrepareForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackSort[i] = i; @@ -1763,7 +1763,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int #endif } -GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) +GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; @@ -1777,7 +1777,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr } } -GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) +GPUd() void GPUTPCGMMerger::PrepareForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) { if (mSharedCount[mClusters[i].num] > 1) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 7813ca4595271..2576ed0720c16 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -170,9 +170,9 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() void SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); + GPUd() void PrepareForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); + GPUd() void PrepareForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); + GPUd() void PrepareForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 1631777d80482..6bee239e42848 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -163,21 +163,21 @@ GPUdii() void GPUTPCGMMergerSortTracksPrepare::Thread<0>(int32_t nBlocks, int32_ } template <> -GPUdii() void GPUTPCGMMergerPrepareClusters::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) +GPUdii() void GPUTPCGMMergerPrepareForFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - merger.PrepareClustersForFit0(nBlocks, nThreads, iBlock, iThread); + merger.PrepareForFit0(nBlocks, nThreads, iBlock, iThread); } template <> -GPUdii() void GPUTPCGMMergerPrepareClusters::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) +GPUdii() void GPUTPCGMMergerPrepareForFit::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - merger.PrepareClustersForFit1(nBlocks, nThreads, iBlock, iThread); + merger.PrepareForFit1(nBlocks, nThreads, iBlock, iThread); } template <> -GPUdii() void GPUTPCGMMergerPrepareClusters::Thread<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) +GPUdii() void GPUTPCGMMergerPrepareForFit::Thread<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - merger.PrepareClustersForFit2(nBlocks, nThreads, iBlock, iThread); + merger.PrepareForFit2(nBlocks, nThreads, iBlock, iThread); } template <> diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index bda00822bac6a..dec72b1d431e6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -138,7 +138,7 @@ class GPUTPCGMMergerCollect : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -class GPUTPCGMMergerPrepareClusters : public GPUTPCGMMergerGeneral +class GPUTPCGMMergerPrepareForFit : public GPUTPCGMMergerGeneral { public: template diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index ab3eb02db393e..6ea44e334db7a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -127,22 +127,23 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks for (int32_t iTmp = get_global_id(0); iTmp < nTracks; iTmp += get_global_size(0)) { TrackTPC oTrack; const int32_t i = trackSort[iTmp].x; - auto snpIn = tracks[i].GetParam().GetSinPhi(); + const auto& track = tracks[i]; + auto snpIn = track.GetParam().GetSinPhi(); if (snpIn > SNPThresh) { snpIn = SNPThresh; } else if (snpIn < -SNPThresh) { snpIn = -SNPThresh; } - oTrack.set(tracks[i].GetParam().GetX(), tracks[i].GetAlpha(), - {tracks[i].GetParam().GetY(), tracks[i].GetParam().GetZ(), snpIn, tracks[i].GetParam().GetDzDs(), tracks[i].GetParam().GetQPt()}, - {tracks[i].GetParam().GetCov(0), - tracks[i].GetParam().GetCov(1), tracks[i].GetParam().GetCov(2), - tracks[i].GetParam().GetCov(3), tracks[i].GetParam().GetCov(4), tracks[i].GetParam().GetCov(5), - tracks[i].GetParam().GetCov(6), tracks[i].GetParam().GetCov(7), tracks[i].GetParam().GetCov(8), tracks[i].GetParam().GetCov(9), - tracks[i].GetParam().GetCov(10), tracks[i].GetParam().GetCov(11), tracks[i].GetParam().GetCov(12), tracks[i].GetParam().GetCov(13), tracks[i].GetParam().GetCov(14)}); + oTrack.set(track.GetParam().GetX(), track.GetAlpha(), + {track.GetParam().GetY(), track.GetParam().GetZ(), snpIn, track.GetParam().GetDzDs(), track.GetParam().GetQPt()}, + {track.GetParam().GetCov(0), + track.GetParam().GetCov(1), track.GetParam().GetCov(2), + track.GetParam().GetCov(3), track.GetParam().GetCov(4), track.GetParam().GetCov(5), + track.GetParam().GetCov(6), track.GetParam().GetCov(7), track.GetParam().GetCov(8), track.GetParam().GetCov(9), + track.GetParam().GetCov(10), track.GetParam().GetCov(11), track.GetParam().GetCov(12), track.GetParam().GetCov(13), track.GetParam().GetCov(14)}); - oTrack.setChi2(tracks[i].GetParam().GetChi2()); - auto& outerPar = tracks[i].OuterParam(); + oTrack.setChi2(track.GetParam().GetChi2()); + auto& outerPar = track.OuterParam(); if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); @@ -189,13 +190,13 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks float t1 = 0, t2 = 0; int32_t sector1 = 0, sector2 = 0; const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = merger.GetConstantMem()->ioPtrs.clustersNative; - for (uint32_t j = 0; j < tracks[i].NClusters(); j++) { - if ((trackClusters[tracks[i].FirstClusterRef() + j].state & flagsReject) || (merger.ClusterAttachment()[trackClusters[tracks[i].FirstClusterRef() + j].num] & flagsRequired) != flagsRequired) { + for (uint32_t j = 0; j < track.NClusters(); j++) { + if ((trackClusters[track.FirstClusterRef() + j].state & flagsReject) || (merger.ClusterAttachment()[trackClusters[track.FirstClusterRef() + j].num] & flagsRequired) != flagsRequired) { continue; } - int32_t clusterIdGlobal = trackClusters[tracks[i].FirstClusterRef() + j].num; - int32_t sector = trackClusters[tracks[i].FirstClusterRef() + j].sector; - int32_t globalRow = trackClusters[tracks[i].FirstClusterRef() + j].row; + int32_t clusterIdGlobal = trackClusters[track.FirstClusterRef() + j].num; + int32_t sector = trackClusters[track.FirstClusterRef() + j].sector; + int32_t globalRow = trackClusters[track.FirstClusterRef() + j].row; int32_t clusterIdInRow = clusterIdGlobal - clusters->clusterOffset[sector][globalRow]; clIndArr[nOutCl2] = clusterIdInRow; sectorIndexArr[nOutCl2] = sector; @@ -210,25 +211,25 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks } } - bool cce = tracks[i].CCE() && ((sector1 < MAXSECTOR / 2) ^ (sector2 < MAXSECTOR / 2)); + bool cce = track.CCE() && ((sector1 < MAXSECTOR / 2) ^ (sector2 < MAXSECTOR / 2)); float time0 = 0.f, tFwd = 0.f, tBwd = 0.f; if (merger.Param().par.continuousTracking) { - time0 = tracks[i].GetParam().GetTZOffset(); + time0 = track.GetParam().GetTZOffset(); if (cce) { - bool lastSide = trackClusters[tracks[i].FirstClusterRef()].sector < MAXSECTOR / 2; + bool lastSide = trackClusters[track.FirstClusterRef()].sector < MAXSECTOR / 2; float delta = 0.f; - for (uint32_t iCl = 1; iCl < tracks[i].NClusters(); iCl++) { - auto& cacl1 = trackClusters[tracks[i].FirstClusterRef() + iCl]; + for (uint32_t iCl = 1; iCl < track.NClusters(); iCl++) { + auto& cacl1 = trackClusters[track.FirstClusterRef() + iCl]; if (lastSide ^ (cacl1.sector < MAXSECTOR / 2)) { auto& cl1 = clusters->clustersLinear[cacl1.num]; - auto& cl2 = clusters->clustersLinear[trackClusters[tracks[i].FirstClusterRef() + iCl - 1].num]; + auto& cl2 = clusters->clustersLinear[trackClusters[track.FirstClusterRef() + iCl - 1].num]; delta = CAMath::Abs(cl1.getTime() - cl2.getTime()) * 0.5f; - if (delta < MinDelta) { - delta = MinDelta; - } break; } } + if (delta < MinDelta) { + delta = MinDelta; + } tFwd = tBwd = delta; } else { // estimate max/min time increments which still keep track in the physical limits of the TPC @@ -261,7 +262,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks if (cce) { oTrack.setHasCSideClusters(); oTrack.setHasASideClusters(); - } else if (tracks[i].CSide()) { + } else if (track.CSide()) { oTrack.setHasCSideClusters(); } else { oTrack.setHasASideClusters(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index 5af3ebb51b9d6..7dfa84bee4a10 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -94,7 +94,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread -GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) +GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) { if (iThread || iBlock) { return; @@ -112,7 +112,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread -GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) +GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) { if (iBlock) { return; diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h index 7c3d4246ad303..726e8cee1f7a7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h @@ -29,8 +29,8 @@ class GPUTPCGlobalDebugSortKernels : public GPUKernelTemplate enum K { defaultKernel = 0, clearIds = 0, sectorTracks = 1, - extrapolatedTracks1 = 2, - extrapolatedTracks2 = 3, + mergedTracks1 = 2, + mergedTracks2 = 3, borderTracks = 4 }; GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } typedef GPUTPCGMMerger processorType; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 3ab84ca3f67cd..151f0326e00ca 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -46,8 +46,8 @@ o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPC o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, mergedTracks1" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, mergedTracks2" "= TPCMERGER" NO int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map uint32_t* output) @@ -75,9 +75,9 @@ o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTP o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareForFit, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareForFit, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareForFit, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) From 5e10d878c95aa9850c23ad84dcbf1ffda7865838 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 9 Sep 2025 14:28:47 +0200 Subject: [PATCH 22/52] GPU TPC Merger: Improve sanity check debug code --- GPU/GPUTracking/Definitions/GPUDef.h | 3 --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 24 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index 8ca361dd5003a..d684cd42f8262 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -60,9 +60,6 @@ #ifdef CADEBUG #undef CADEBUG #endif - #ifdef GPUCA_CADEBUG_ENABLED - #undef GPUCA_CADEBUG_ENABLED - #endif #if GPUCA_CADEBUG == 1 && !defined(GPUCA_GPUCODE) #define CADEBUG(...) __VA_ARGS__ #define CADEBUG2(cmd, ...) {__VA_ARGS__; cmd;} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index a36c98e25d205..93d1f45e846b8 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -14,6 +14,7 @@ #define GPUCA_CADEBUG 0 #define GPUCA_MERGE_LOOPER_MC 0 +// #define GPUCA_CADEBUG_ENABLED #include "GPUCommonDef.h" @@ -165,6 +166,7 @@ GPUTPCGMMerger::GPUTPCGMMerger() void GPUTPCGMMerger::CheckMergedTracks() { + uint32_t nErr = 0; std::vector trkUsed(SectorTrackInfoLocalTotal()); for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) { trkUsed[i] = false; @@ -172,6 +174,22 @@ void GPUTPCGMMerger::CheckMergedTracks() for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) { GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; + if (track.PrevSegmentNeighbour() >= 0 && mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour() != itr) { + GPUError("Invalid reciprocal segment link: %d PrevSegmentNeighbour %d NextSegmentNeighbour %d", itr, track.PrevSegmentNeighbour(), mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour()); + nErr++; + } + if (track.NextSegmentNeighbour() >= 0 && mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour() != itr) { + GPUError("Invalid reciprocal segment link: %d NextSegmentNeighbour %d PrevSegmentNeighbour %d", itr, track.NextSegmentNeighbour(), mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour()); + nErr++; + } + if (track.PrevNeighbour() >= 0 && mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour() != itr) { + GPUError("Invalid reciprocal link: %d PrevNeighbour %d NextNeighbour %d", itr, track.PrevNeighbour(), mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour()); + nErr++; + } + if (track.NextNeighbour() >= 0 && mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour() != itr) { + GPUError("Invalid reciprocal link: %d NextNeighbour %d PrevNeighbour %d", itr, track.NextNeighbour(), mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour()); + nErr++; + } if (track.PrevSegmentNeighbour() >= 0) { continue; } @@ -183,6 +201,7 @@ void GPUTPCGMMerger::CheckMergedTracks() int32_t iTrk = tr - mSectorTrackInfos; if (trkUsed[iTrk]) { GPUError("FAILURE: double use"); + nErr++; } trkUsed[iTrk] = true; @@ -206,8 +225,12 @@ void GPUTPCGMMerger::CheckMergedTracks() for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) { if (trkUsed[i] == false) { GPUError("FAILURE: trk missed"); + nErr++; } } + if (nErr == 0) { + GPUInfo("Merged Track Graph OK"); + } } template @@ -1419,6 +1442,7 @@ struct GPUTPCGMMerger_CompareClusterIds { GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { + // if (iThread == 0 && iBlock == 0) { CheckMergedTracks(); } return; // (if GPUCA_CADEBUG_ENABLED) static constexpr int32_t kMaxParts = 16; static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; From 061d1ef532badbabde6103e87666497ce937d958 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 9 Sep 2025 21:03:44 +0200 Subject: [PATCH 23/52] GPU TPC: Fix deterministic mode with per-segment tracking --- GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index 7dfa84bee4a10..6f5e000ddcc7b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -141,6 +141,11 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0) { + merger.MergedTracks()[j].SetPrevSegment(tmp2[merger.MergedTracks()[j].PrevSegment()]); + } + } } GPUbarrier(); for (int32_t i = 0; i < 2 * GPUCA_NSECTORS; i++) { From 9e23fb68029c080a212b51f4feb94eb25f7ebc7a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Jul 2025 23:58:36 +0200 Subject: [PATCH 24/52] GPU TPC: Shift all segments of looping tracks once before track fit --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 29 +++++++++- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 14 +++++ GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 55 +++++++------------ GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 6 +- 4 files changed, 64 insertions(+), 40 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 93d1f45e846b8..58dba0b22230f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -515,7 +515,8 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack trk.DzDs() = inTrack->Param().GetDzDs(); trk.QPt() = inTrack->Param().GetQPt(); trk.TZOffset() = GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); - trk.ShiftZ(this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead + const auto tmp = sectorTrack.ClusterZTN() > sectorTrack.ClusterZT0() ? std::array{sectorTrack.ClusterZTN(), sectorTrack.ClusterZT0()} : std::array{sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN()}; + trk.ShiftZ(this, sector, tmp[0], tmp[1], inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead sectorTrack.SetX2(0.f); for (int32_t way = 0; way < 2; way++) { if (way) { @@ -1791,12 +1792,36 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; - const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; + GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (trk.OK()) { for (uint32_t j = 0; j < trk.NClusters(); j++) { mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood; CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u); } + if (!trk.CCE() && !trk.MergedLooper()) { + GPUTPCGMMergedTrack* updTrk = &trk; + while (updTrk->PrevSegment() >= 0) { + auto next = &mMergedTracks[updTrk->PrevSegment()]; + if (next == &trk) { + break; + } + updTrk = next; + } + const auto &cl0 = mClusters[trk.FirstClusterRef()], &cln = mClusters[updTrk->FirstClusterRef() + updTrk->NClusters() - 1]; + const auto& GPUrestrict() cls = GetConstantMem()->ioPtrs.clustersNative->clustersLinear; + float z0 = cls[cl0.num].getTime(), zn = cls[cln.num].getTime(); + const auto tmp = zn > z0 ? std::array{zn, z0, GPUTPCGeometry::Row2X(cln.row)} : std::array{z0, zn, GPUTPCGeometry::Row2X(cl0.row)}; + trk.Param().ShiftZ(this, cl0.sector, tmp[0], tmp[1], tmp[2]); + updTrk = &trk; + while (updTrk->PrevSegment() >= 0) { + auto next = &mMergedTracks[updTrk->PrevSegment()]; + if (next == &trk) { + break; + } + updTrk = next; + updTrk->Param().TZOffset() = trk.Param().TZOffset(); + } + } } } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 6ea44e334db7a..35de1611e280c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -211,6 +211,20 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks } } + if (track.PrevSegment() >= 0) { + const GPUTPCGMMergedTrack* chkTrk = &tracks[track.PrevSegment()]; + while (chkTrk->PrevSegment() >= 0) { + auto next = &tracks[chkTrk->PrevSegment()]; + if (next == &track) { + break; + } + chkTrk = next; + } + const auto& firstPrevCluster = trackClusters[chkTrk->FirstClusterRef()]; + t1 = clusters->clustersLinear[firstPrevCluster.num].getTime(); + sector1 = firstPrevCluster.sector; + } + bool cce = track.CCE() && ((sector1 < MAXSECTOR / 2) ^ (sector2 < MAXSECTOR / 2)); float time0 = 0.f, tFwd = 0.f, tBwd = 0.f; if (merger.Param().par.continuousTracking) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 79e69603423b6..52b02c7502a6a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -53,7 +53,7 @@ using namespace o2::gpu; using namespace o2::tpc; -GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, gputpcgmmergertypes::GPUTPCOuterParam* GPUrestrict() outerParam, int8_t leg) +GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track) { static constexpr float kDeg2Rad = M_PI / 180.f; CADEBUG(static constexpr float kSectAngle = 2 * M_PI / 18.f); @@ -66,9 +66,6 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetMaterialTPC(); prop.SetPolynomialField(¶m.polynomialField); prop.SetMaxSinPhi(maxSinPhi); - if ((clusters[0].sector < 18) == (clusters[N - 1].sector < 18)) { - ShiftZ2(clusters, merger, N); - } if (param.rec.tpc.mergerInterpolateErrors) { for (int32_t i = 0; i < N; i++) { interpolation.hit[i].errorY = -1; @@ -88,9 +85,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float sumInvSqrtCharge = 0.f; int32_t nAvgCharge = 0; - if (iWay && param.rec.tpc.nWaysOuter && outerParam) { + if (iWay && param.rec.tpc.nWaysOuter) { if (iWay == nWays - 1) { - StoreOuter(outerParam, prop, 0); + StoreOuter(&track.OuterParam(), prop, 0); } } @@ -109,7 +106,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ N = 0; lastUpdateX = -1; - const bool inFlyDirection = (leg & 1); + const bool inFlyDirection = (track.Leg() & 1); const int32_t wayDirection = (iWay & 1) ? -1 : 1; bool noFollowCircle = false, noFollowCircle2 = false; @@ -173,7 +170,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2; - dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, leg == 0, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, track.Leg() == 0, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { @@ -244,7 +241,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float uncorrectedY = -1e6f; if (allowModification) { - uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, leg == 0, prop); + uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, track.Leg() == 0, prop); } const int32_t err2 = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; @@ -361,14 +358,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } - if (((nWays - iWay) & 1) && (iWay != nWays - 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { - ShiftZ2(clusters, merger, maxN); + if (((nWays - iWay) & 1) && (iWay != nWays - 1) && !track.CCE() && !track.Looper()) { + ShiftZ(clusters, merger, maxN); } } ConstrainSinPhi(); GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamUpdateTrack, iTrk)) { - o2::utils::DebugStreamer::instance()->getStreamer("debug_accept_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("debug_accept_track").data() << "iTrk=" << iTrk << "outerParam=" << *outerParam << "track=" << this << "ihitStart=" << ihitStart << "\n"; + o2::utils::DebugStreamer::instance()->getStreamer("debug_accept_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("debug_accept_track").data() << "iTrk=" << iTrk << "outerParam=" << track.OuterParam() << "track=" << this << "ihitStart=" << ihitStart << "\n"; }) if (!(N + NTolerated >= GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(mP[4] * param.qptB5Scaler) && 2 * NTolerated <= CAMath::Max(10, N) && CheckNumericalQuality(covYYUpd))) { @@ -842,22 +839,18 @@ GPUdi() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUr } } -GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N) +GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N) { - float tzInner, tzOuter; - float xInner, xOuter; if (N == 0) { N = 1; } const auto& GPUrestrict() cls = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear; - tzInner = cls[clusters[N - 1].num].getTime(); - tzOuter = cls[clusters[0].num].getTime(); - xInner = GPUTPCGeometry::Row2X(clusters[N - 1].row); - xOuter = GPUTPCGeometry::Row2X(clusters[0].row); - ShiftZ(merger, clusters[0].sector, tzInner, tzOuter, xInner, xOuter); + float z0 = cls[clusters[0].num].getTime(), zn = cls[clusters[N - 1].num].getTime(); + const auto tmp = zn > z0 ? std::array{zn, z0, GPUTPCGeometry::Row2X(clusters[N - 1].row)} : std::array{z0, zn, GPUTPCGeometry::Row2X(clusters[0].row)}; + ShiftZ(merger, clusters[0].sector, tmp[0], tmp[1], tmp[2]); } -GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t sector, float tz1, float tz2, float x1, float x2) +GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t sector, float cltmax, float cltmin, float clx) { if (!merger->Param().par.continuousTracking) { return; @@ -891,25 +884,17 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge } if (!beamlineReached) { - float baset, basex; - if (CAMath::Abs(tz1) > CAMath::Abs(tz2)) { - baset = tz1; - basex = x1; - } else { - baset = tz2; - basex = x2; - } - float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; + float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * clx; float basez; - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, baset, basez, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, cltmax, basez, mTZOffset); deltaZ = basez - refZ; } { float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(sector, deltaZ); mTZOffset += deltaT; mP[1] -= deltaZ; - const float maxT = CAMath::Min(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getT0(); - const float minT = CAMath::Max(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(sector); + const float maxT = cltmin - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getT0(); + const float minT = cltmax - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(sector); // printf("T Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, minT, maxT, mTZOffset); deltaT = 0.f; if (mTZOffset < minT) { @@ -980,7 +965,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, track); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); @@ -994,7 +979,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() NTolerated = 0; // Clusters not fit but tollerated for track length cut t = track.Param(); Alpha = track.Alpha(); - ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, 1, GPUCA_MAX_SIN_PHI, &track.OuterParam(), track.Leg()); + ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, 1, GPUCA_MAX_SIN_PHI, track); } else { uint32_t nRefit = CAMath::AtomicAdd(&merger->Memory()->nRetryRefit, 1u); merger->RetryRefitIds()[nRefit] = iTrk; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 435f88bb93a16..0b65e5f155104 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -141,7 +141,7 @@ class GPUTPCGMTrackParam GPUd() bool CheckNumericalQuality(float overrideCovYY = -1.f) const; GPUd() bool CheckCov() const; - GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, int32_t attempt = 0, float maxSinPhi = GPUCA_MAX_SIN_PHI, gputpcgmmergertypes::GPUTPCOuterParam* outerParam = nullptr, int8_t leg = 0); + GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, int32_t attempt, float maxSinPhi, GPUTPCGMMergedTrack& track); GPUd() void MoveToReference(GPUTPCGMPropagator& prop, const GPUParam& param, float& alpha); GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector); GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2); @@ -183,8 +183,8 @@ class GPUTPCGMTrackParam } GPUd() void Rotate(float alpha); - GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float tzInner, float tzOuter, float x1, float x2); - GPUd() void ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N); + GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float cltmax, float cltmin, float clx); + GPUd() void ShiftZ(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N); GPUd() static float Reciprocal(float x) { return 1.f / x; } GPUdi() static void Assign(float& x, bool mask, float v) From 6ee63190fb8ecf19dc09c2767f3f072bef23e688 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 10 Sep 2025 14:01:40 +0200 Subject: [PATCH 25/52] GPU TPC: Restrict CE-merging to primary legs of segmented tracks --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 32 +++++++++++++---------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 58dba0b22230f..4b7e8481152e6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1348,23 +1348,15 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i continue; } bool celooper = (trk[0]->GetParam().GetQPt() * Param().qptB5Scaler > 1 && trk[0]->GetParam().GetQPt() * trk[1]->GetParam().GetQPt() < 0); + celooper |= trk[0]->PrevSegment() != -1 && trk[1]->PrevSegment() != -1; if (!celooper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) { continue; } - uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); - if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters); - for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) { - mClusters[k].num = 0; - mClusters[k].state = 0; - } - CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); - return; - } - bool needswap = false; - if (celooper) { + if (trk[0]->PrevSegment() == -1 && trk[1]->PrevSegment() >= 0) { + needswap = true; + } else if (celooper) { const float z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime()); const float z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime()); if (z1max < z0max) { @@ -1379,15 +1371,27 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i GPUCommonAlgorithm::swap(trk[0], trk[1]); } - if (celooper) { + if (celooper) { // TODO: Need propper handling, avoid falsely flagging the primary leg as looper trk[0]->SetMergedLooperConnected(true); trk[0]->SetCCE(true); trk[0]->SetLooper(true); + trk[1]->SetMergedLooperConnected(true); trk[1]->SetCCE(true); trk[1]->SetLooper(true); continue; } + uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); + if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters); + for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) { + mClusters[k].num = 0; + mClusters[k].state = 0; + } + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); + return; + } + if (Param().par.continuousTracking) { GPUTPCGMMergedTrackHit* clsmax; const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(), @@ -1747,7 +1751,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetNClusters(0); } if (mergedTrack.NClusters() && mergedTrack.OK()) */ - if (Param().rec.tpc.mergeCE) { + if (leg == 0 && Param().rec.tpc.mergeCE) { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; bool CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], iOutputTrack); From fa6e238f6704c944035cf4d85bb96eef36dbab20 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 26 Jul 2025 13:36:08 +0200 Subject: [PATCH 26/52] GPU: Remove obsolete mergerCovSource and dropSecondaryLegsInOutput, rename TZOffset to TOffset and similar --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 - .../Global/GPUChainTrackingTRD.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 68 ++++++++----------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 6 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 10 +-- .../Merger/GPUTPCGMSectorTrack.cxx | 10 +-- GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h | 20 +++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 24 +++---- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 8 +-- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- .../tools/GPUExtractPbPbCollision.h | 2 +- .../display/render/GPUDisplayDraw.cxx | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 14 ++-- 14 files changed, 80 insertions(+), 92 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 60ee12252b0a8..75d9230a364f0 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -148,12 +148,10 @@ AddOptionRTC(compressionSortOrder, uint8_t, o2::gpu::GPUSettings::SortTime, "", AddOptionRTC(sigBitsCharge, uint8_t, 4, "", 0, "Number of significant bits for TPC cluster charge in compression mode 1") AddOptionRTC(sigBitsWidth, uint8_t, 3, "", 0, "Number of significant bits for TPC cluster width in compression mode 1") AddOptionRTC(dropLoopers, uint8_t, 0, "", 0, "Drop looping tracks starting from second loop") -AddOptionRTC(mergerCovSource, uint8_t, 2, "", 0, "Method to obtain covariance in track merger: 0 = simple filterErrors method, 1 = use cov from track following, 2 = refit (default)") AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation instead of extrapolation for chi2 based cluster rejection") AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position") AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") -AddOptionRTC(dropSecondaryLegsInOutput, int8_t, 1, "", 0, "Do not store secondary legs of looping track in TrackTPC") AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response") AddOptionRTC(PID_useNsigma, int8_t, 1, "", 0, "Use nSigma instead of absolute distance in PID response") AddOptionRTC(adddEdxSubThresholdClusters, int8_t, 1, "", 0, "Add sub threshold clusters in TPC dEdx computation") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx index f9011131803e3..9f72b7443c49f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx @@ -69,7 +69,7 @@ int32_t GPUChainTracking::RunTRDTrackingInternal() GPUTRDTrackerGPU::HelperTrackAttributes trkAttribs, *trkAttribsPtr{nullptr}; if (!isTriggeredEvent) { const float tpcTBinMUS = 0.199606f; - trkAttribs.mTime = trk.GetParam().GetTZOffset() * tpcTBinMUS; + trkAttribs.mTime = trk.GetParam().GetTOffset() * tpcTBinMUS; trkAttribs.mTimeAddMax = 50.f; // half of a TPC drift time in us trkAttribs.mTimeSubMax = 50.f; // half of a TPC drift time in us if (!trk.CCE()) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 4b7e8481152e6..b2d1f91a96d01 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -514,8 +514,8 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack trk.SinPhi() = inTrack->Param().GetSinPhi(); trk.DzDs() = inTrack->Param().GetDzDs(); trk.QPt() = inTrack->Param().GetQPt(); - trk.TZOffset() = GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); - const auto tmp = sectorTrack.ClusterZTN() > sectorTrack.ClusterZT0() ? std::array{sectorTrack.ClusterZTN(), sectorTrack.ClusterZT0()} : std::array{sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN()}; + trk.TOffset() = GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); + const auto tmp = sectorTrack.ClusterTN() > sectorTrack.ClusterT0() ? std::array{sectorTrack.ClusterTN(), sectorTrack.ClusterT0()} : std::array{sectorTrack.ClusterT0(), sectorTrack.ClusterTN()}; trk.ShiftZ(this, sector, tmp[0], tmp[1], inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead sectorTrack.SetX2(0.f); for (int32_t way = 0; way < 2; way++) { @@ -537,7 +537,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack row = ic.RowIndex(); const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex]; flags = cl.getFlags(); - GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); + GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TOffset()); if (prop.PropagateToXAlpha(x, alpha, way == 0)) { return way == 0; } @@ -556,7 +556,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack return 0; } -GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr) +GPUd() void GPUTPCGMMerger::SetTrackClusterT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr) { const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()]; @@ -564,7 +564,7 @@ GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_ int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex()); int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex()); const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; - track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); + track.SetClusterT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); } GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id) @@ -587,7 +587,7 @@ GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads } uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u); GPUTPCGMSectorTrack& track = mSectorTrackInfos[myTrack]; - SetTrackClusterZT(track, iSector, sectorTr); + SetTrackClusterT(track, iSector, sectorTr); track.Set(this, sectorTr, alpha, iSector); track.SetGlobalSectorTrackCov(); track.SetPrevNeighbour(-1); @@ -618,22 +618,12 @@ GPUd() void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) { sectorTr = &trk.Tracks()[itr]; GPUTPCGMSectorTrack track; - SetTrackClusterZT(track, iSector, sectorTr); - if (Param().rec.tpc.mergerCovSource == 0) { - track.Set(this, sectorTr, alpha, iSector); + SetTrackClusterT(track, iSector, sectorTr); + if (RefitSectorTrack(track, sectorTr, alpha, iSector)) { + track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) { continue; } - } else if (Param().rec.tpc.mergerCovSource == 1) { - track.Set(this, sectorTr, alpha, iSector); - track.CopyBaseTrackCov(); - } else if (Param().rec.tpc.mergerCovSource == 2) { - if (RefitSectorTrack(track, sectorTr, alpha, iSector)) { - track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed - if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) { - continue; - } - } } CADEBUG(GPUInfo("INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs())); @@ -709,7 +699,7 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT } trackTmp = *trackMin; track = &trackTmp; - if (Param().rec.tpc.mergerCovSource == 2 && trackTmp.X2() != 0.f) { + if (trackTmp.X2() != 0.f) { trackTmp.UseParam2(); } else { trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector()); @@ -1222,17 +1212,17 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, in } float z1min, z1max, z2min, z2max; - z1min = track1->MinClusterZT(); - z1max = track1->MaxClusterZT(); - z2min = track2->MinClusterZT(); - z2max = track2->MaxClusterZT(); + z1min = track1->MinClusterT(); + z1max = track1->MaxClusterT(); + z2min = track2->MinClusterT(); + z2max = track2->MaxClusterT(); if (track1 != track1Base) { - z1min = CAMath::Min(z1min, track1Base->MinClusterZT()); - z1max = CAMath::Max(z1max, track1Base->MaxClusterZT()); + z1min = CAMath::Min(z1min, track1Base->MinClusterT()); + z1max = CAMath::Max(z1max, track1Base->MaxClusterT()); } if (track2 != track2Base) { - z2min = CAMath::Min(z2min, track2Base->MinClusterZT()); - z2max = CAMath::Max(z2max, track2Base->MaxClusterZT()); + z2min = CAMath::Min(z2min, track2Base->MinClusterT()); + z2max = CAMath::Max(z2max, track2Base->MaxClusterT()); } bool goUp = z2max - z1min > z1max - z2min; @@ -1399,8 +1389,8 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); - trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset); - trk[1]->Param().TZOffset() = offset; + trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TOffset() - offset); + trk[1]->Param().TOffset() = offset; } int32_t pos = newRef; #pragma unroll @@ -1503,19 +1493,19 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread jtr = mSectorTrackInfos[jtr].NextNeighbour(); } - float mainZT = 1e9; + float mainT = 1e9; revertSegments = true; for (uint32_t k = 0; k < 2; k++) { // --------------- check if first or last segment is primary --------------- int32_t ichk = k ? lasttr : itr; const GPUTPCGMSectorTrack* trchk = &mSectorTrackInfos[ichk]; while (true) { - float zt = -trchk->MinClusterZT(); - if (zt < mainZT) { + float t = -trchk->MinClusterT(); + if (t < mainT) { if (k) { revertSegments = false; break; } - mainZT = zt; + mainT = t; } int32_t next = trchk->NextSegmentNeighbour(); if (next < 0 || next == ichk) { @@ -1544,7 +1534,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } trchk = &mSectorTrackInfos[next]; } - revertInSegment = longest->ClusterZT0() < longest->ClusterZTN(); + revertInSegment = longest->ClusterT0() < longest->ClusterTN(); } } lastMergedSegment = -1; @@ -1735,7 +1725,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.Z() = p2.Z(); p1.SinPhi() = p2.SinPhi(); } - p1.TZOffset() = p2.TZOffset(); + p1.TOffset() = p2.TOffset(); p1.DzDs() = p2.DzDs(); p1.QPt() = p2.QPt(); mergedTrack.SetAlpha(p2.Alpha()); @@ -1823,7 +1813,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in break; } updTrk = next; - updTrk->Param().TZOffset() = trk.Param().TZOffset(); + updTrk->Param().TOffset() = trk.Param().TOffset(); } } } @@ -1901,7 +1891,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector; - const float refz = p.GetZ() + GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin) + (trk.CSide() ? -100 : 100); + const float refz = p.GetZ() + GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTOffset(), Param().continuousMaxTimeBin) + (trk.CSide() ? -100 : 100); float sinA, cosA; CAMath::SinCos(trk.GetAlpha(), sinA, cosA); float gx = cosA * p.GetX() - sinA * p.GetY(); @@ -1925,7 +1915,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, for (uint32_t k = 0;k < trk.NClusters();k++) { float xx, yy, zz; const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num]; - GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); + GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTOffset()); float sa2, ca2; CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2); float cx = ca2 * xx - sa2 * yy; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 2576ed0720c16..0159b795aa963 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -151,7 +151,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() uint16_t MemoryResOutputO2Scratch() const { return mMemoryResOutputO2Scratch; } GPUd() int32_t RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector); - GPUd() void SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr); + GPUd() void SetTrackClusterT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr); int32_t CheckSectors(); GPUd() void RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index e1cec59d96b95..863998079f2cd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -51,7 +51,7 @@ void GPUTPCGMMerger::DumpSectorTracks(std::ostream& out) const out << " Track type " << iGlobal << "\n"; for (int32_t j = mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal]; j < mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal + 1]; j++) { const auto& trk = mSectorTrackInfos[j]; - out << " Track " << j << ": LocalId " << (iGlobal ? (trk.LocalTrackId() >> 24) : -1) << "/" << (iGlobal ? (trk.LocalTrackId() & 0xFFFFFF) : -1) << " X " << trk.X() << " offsetz " << trk.TZOffset() << " A " << trk.Alpha() << " Y " << trk.Y() << " Z " << trk.Z() << " SinPhi " << trk.SinPhi() << " CosPhi " << trk.CosPhi() << " SecPhi " << trk.SecPhi() << " Tgl " << trk.DzDs() << " QPt " << trk.QPt() << "\n"; + out << " Track " << j << ": LocalId " << (iGlobal ? (trk.LocalTrackId() >> 24) : -1) << "/" << (iGlobal ? (trk.LocalTrackId() & 0xFFFFFF) : -1) << " X " << trk.X() << " offsetz " << trk.TOffset() << " A " << trk.Alpha() << " Y " << trk.Y() << " Z " << trk.Z() << " SinPhi " << trk.SinPhi() << " CosPhi " << trk.CosPhi() << " SecPhi " << trk.SecPhi() << " Tgl " << trk.DzDs() << " QPt " << trk.QPt() << "\n"; } } } @@ -140,7 +140,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); - out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; + out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; } out << std::setprecision(ss); } @@ -201,7 +201,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const } const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); - out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() + out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << " NFitted " << trk.NClustersFitted() << " flags " << (int)trk.Flags() << "\n"; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 35de1611e280c..9c789a8d95f82 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -33,7 +33,7 @@ using namespace o2::tpc; using namespace o2::tpc::constants; GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl; } -GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } +GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return gputpcgmmergertypes::attachGoodLeg; } namespace o2::gpu::internal { @@ -65,7 +65,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock if (!tracks[i].OK()) { continue; } - if (merger.Param().rec.tpc.dropSecondaryLegsInOutput && tracks[i].MergedLooper()) { + if (tracks[i].MergedLooper()) { continue; } @@ -79,7 +79,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock if (nCl == 0) { continue; } - if (merger.Param().rec.tpc.dropSecondaryLegsInOutput && nCl + 2 < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracks[i].GetParam().GetQPt() * merger.Param().qptB5Scaler)) { // Give 2 hits tolerance in the primary leg, compared to the full fit of the looper + if (nCl + 2 < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracks[i].GetParam().GetQPt() * merger.Param().qptB5Scaler)) { // Give 2 hits tolerance in the primary leg, compared to the full fit of the looper continue; } if (merger.Param().rec.tpc.minNClustersFinalTrack != -1 && nCl < (uint32_t)merger.Param().rec.tpc.minNClustersFinalTrack) { @@ -90,7 +90,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock } uint32_t myId = CAMath::AtomicAdd(&merger.Memory()->nO2Tracks, 1u); tmpData[i] = {nCl, CAMath::AtomicAdd(&merger.Memory()->nO2ClusRefs, nCl + (nCl + 1) / 2)}; - trackSort[myId] = {i, tracks[i].CSide() ? tracks[i].GetParam().GetTZOffset() : -tracks[i].GetParam().GetTZOffset()}; + trackSort[myId] = {i, tracks[i].CSide() ? tracks[i].GetParam().GetTOffset() : -tracks[i].GetParam().GetTOffset()}; } } @@ -228,7 +228,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks bool cce = track.CCE() && ((sector1 < MAXSECTOR / 2) ^ (sector2 < MAXSECTOR / 2)); float time0 = 0.f, tFwd = 0.f, tBwd = 0.f; if (merger.Param().par.continuousTracking) { - time0 = track.GetParam().GetTZOffset(); + time0 = track.GetParam().GetTOffset(); if (cce) { bool lastSide = trackClusters[track.FirstClusterRef()].sector < MAXSECTOR / 2; float delta = 0.f; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index a44837c897f46..bce70ea79f322 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -37,7 +37,7 @@ GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCT mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; mSector = sector; - mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, t.GetZOffset(), merger->Param().continuousMaxTimeBin); + mTOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, t.GetZOffset(), merger->Param().continuousMaxTimeBin); mNClusters = sectorTr->NHits(); } @@ -54,7 +54,7 @@ GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPC mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; mSector = sector; - mTZOffset = trk.GetTZOffset(); + mTOffset = trk.GetTOffset(); mNClusters = sectorTr->NHits(); mParam.mC0 = trk.GetCov(0); mParam.mC2 = trk.GetCov(2); @@ -256,7 +256,7 @@ GPUd() bool GPUTPCGMSectorTrack::FilterErrors(const GPUTPCGMMerger* merger, int3 //* Check that the track parameters and covariance matrix are reasonable - bool ok = CAMath::Finite(mParam.mX) && CAMath::Finite(mParam.mY) && CAMath::Finite(mParam.mZ) && CAMath::Finite(mParam.mSinPhi) && CAMath::Finite(mParam.mDzDs) && CAMath::Finite(mParam.mQPt) && CAMath::Finite(mParam.mCosPhi) && CAMath::Finite(mParam.mSecPhi) && CAMath::Finite(mTZOffset) && CAMath::Finite(mParam.mC0) && CAMath::Finite(mParam.mC2) && + bool ok = CAMath::Finite(mParam.mX) && CAMath::Finite(mParam.mY) && CAMath::Finite(mParam.mZ) && CAMath::Finite(mParam.mSinPhi) && CAMath::Finite(mParam.mDzDs) && CAMath::Finite(mParam.mQPt) && CAMath::Finite(mParam.mCosPhi) && CAMath::Finite(mParam.mSecPhi) && CAMath::Finite(mTOffset) && CAMath::Finite(mParam.mC0) && CAMath::Finite(mParam.mC2) && CAMath::Finite(mParam.mC3) && CAMath::Finite(mParam.mC5) && CAMath::Finite(mParam.mC7) && CAMath::Finite(mParam.mC9) && CAMath::Finite(mParam.mC10) && CAMath::Finite(mParam.mC12) && CAMath::Finite(mParam.mC14); if (mParam.mC0 <= 0.f || mParam.mC2 <= 0.f || mParam.mC5 <= 0.f || mParam.mC9 <= 0.f || mParam.mC14 <= 0.f || mParam.mC0 > 5.f || mParam.mC2 > 5.f || mParam.mC5 > 2.f || mParam.mC9 > 2.f) { @@ -323,7 +323,7 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, f b.SetPar(2, ey1); b.SetPar(3, param.mDzDs); b.SetPar(4, param.mQPt); - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTOffset, merger->Param().continuousMaxTimeBin)); if (!doCov) { return (1); @@ -478,7 +478,7 @@ GPUd() bool GPUTPCGMSectorTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float b.SetPar(2, ey1); b.SetPar(3, dzds); b.SetPar(4, qpt); - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTOffset, merger->Param().continuousMaxTimeBin)); b.SetCov(0, c00 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40)); b.SetCov(1, c11 + dS * (c31 + n7)); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 81facce76cf10..60febbb4428f6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -54,7 +54,7 @@ class GPUTPCGMSectorTrack GPUd() float SecPhi() const { return mParam.mSecPhi; } GPUd() float DzDs() const { return mParam.mDzDs; } GPUd() float QPt() const { return mParam.mQPt; } - GPUd() float TZOffset() const { return mTZOffset; } + GPUd() float TOffset() const { return mTOffset; } GPUd() int32_t LocalTrackId() const { return mLocalTrackId; } GPUd() void SetLocalTrackId(int32_t v) { mLocalTrackId = v; } @@ -62,14 +62,14 @@ class GPUTPCGMSectorTrack GPUd() void SetExtrapolatedTrackId(int32_t n, int32_t v) { mExtrapolatedTrackIds[n] = v; } GPUd() int32_t* ExtrapolatedTrackIds() { return mExtrapolatedTrackIds; } - GPUd() float MaxClusterZT() const { return CAMath::Max(mClusterZT[0], mClusterZT[1]); } - GPUd() float MinClusterZT() const { return CAMath::Min(mClusterZT[0], mClusterZT[1]); } - GPUd() float ClusterZT0() const { return mClusterZT[0]; } - GPUd() float ClusterZTN() const { return mClusterZT[1]; } - GPUd() void SetClusterZT(float v1, float v2) + GPUd() float MaxClusterT() const { return CAMath::Max(mClusterT[0], mClusterT[1]); } + GPUd() float MinClusterT() const { return CAMath::Min(mClusterT[0], mClusterT[1]); } + GPUd() float ClusterT0() const { return mClusterT[0]; } + GPUd() float ClusterTN() const { return mClusterT[1]; } + GPUd() void SetClusterT(float v1, float v2) { - mClusterZT[0] = v1; - mClusterZT[1] = v2; + mClusterT[0] = v1; + mClusterT[1] = v2; } GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); @@ -126,9 +126,9 @@ class GPUTPCGMSectorTrack const GPUTPCTrack* mOrigTrack; // pointer to original sector track sectorTrackParam mParam; // Track parameters sectorTrackParam mParam2; // Parameters at other side - float mTZOffset; // Z offset with early transform, T offset otherwise + float mTOffset; // Z offset with early transform, T offset otherwise float mAlpha; // alpha angle - float mClusterZT[2]; // Minimum maximum cluster Z / T + float mClusterT[2]; // Minimum maximum cluster T int32_t mNClusters; // N clusters int32_t mNeighbour[2]; // int32_t mSegmentNeighbour[2]; // diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 52b02c7502a6a..30fcf7053089b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -134,7 +134,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float xx, yy, zz; { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTOffset); } // clang-format off CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f (Missed %d)\n", ihit, maxN, (int32_t)clusters[ihit].row, clAlpha, (int32_t)clusters[ihit].sector, xx, yy, zz, nMissed)); @@ -469,7 +469,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; float clamp = cl.qTot; float clx, cly, clz; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTOffset); float dy = cly - projY; float dz = clz - projZ; if (noReject == 0 && (dy * dy > maxDistY || dz * dz > maxDistZ)) { @@ -526,7 +526,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -886,27 +886,27 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge if (!beamlineReached) { float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * clx; float basez; - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, cltmax, basez, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, cltmax, basez, mTOffset); deltaZ = basez - refZ; } { float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(sector, deltaZ); - mTZOffset += deltaT; + mTOffset += deltaT; mP[1] -= deltaZ; const float maxT = cltmin - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getT0(); const float minT = cltmax - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(sector); - // printf("T Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, minT, maxT, mTZOffset); + // printf("T Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, minT, maxT, mTOffset); deltaT = 0.f; - if (mTZOffset < minT) { - deltaT = minT - mTZOffset; + if (mTOffset < minT) { + deltaT = minT - mTOffset; } - if (mTZOffset + deltaT > maxT) { - deltaT = maxT - mTZOffset; + if (mTOffset + deltaT > maxT) { + deltaT = maxT - mTOffset; } if (deltaT != 0.f) { deltaZ = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(sector, deltaT); - // printf("Moving clusters to TPC Range: QPt %f, New mTZOffset %f, t1 %f, t2 %f, Shift %f in Z: %f to %f --> %f to %f in T\n", mP[4], mTZOffset + deltaT, tz1, tz2, deltaZ, tz2 - mTZOffset, tz1 - mTZOffset, tz2 - mTZOffset - deltaT, tz1 - mTZOffset - deltaT); - mTZOffset += deltaT; + // printf("Moving clusters to TPC Range: QPt %f, New mTOffset %f, t1 %f, t2 %f, Shift %f in Z: %f to %f --> %f to %f in T\n", mP[4], mTOffset + deltaT, tz1, tz2, deltaZ, tz2 - mTOffset, tz1 - mTOffset, tz2 - mTOffset - deltaT, tz1 - mTOffset - deltaT); + mTOffset += deltaT; mP[1] -= deltaZ; } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 0b65e5f155104..0cf65f84d1c44 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -71,9 +71,9 @@ class GPUTPCGMTrackParam { return mP[4]; } - GPUd() float& TZOffset() + GPUd() float& TOffset() { - return mTZOffset; + return mTOffset; } GPUhd() float GetX() const { return mX; } @@ -82,7 +82,7 @@ class GPUTPCGMTrackParam GPUd() float GetSinPhi() const { return mP[2]; } GPUd() float GetDzDs() const { return mP[3]; } GPUd() float GetQPt() const { return mP[4]; } - GPUd() float GetTZOffset() const { return mTZOffset; } + GPUd() float GetTOffset() const { return mTOffset; } GPUd() float GetKappa(float Bz) const { return -mP[4] * Bz; } @@ -217,7 +217,7 @@ class GPUTPCGMTrackParam GPUd() int32_t initResetT0(); float mX; // x position - float mTZOffset; // Z offset with early transform, T offset otherwise + float mTOffset; // Z offset with early transform, T offset otherwise float mP[5]; // 'active' track parameters: Y, Z, SinPhi, DzDs, q/Pt float mC[15]; // the covariance matrix for Y,Z,SinPhi,.. float mChi2; // the chi^2 value diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 29ccab2a765da..27426cf0ff6a7 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -223,7 +223,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov float tOffset; if constexpr (std::is_same_v) { count = trkX.NClusters(); - tOffset = trkX.GetParam().GetTZOffset(); + tOffset = trkX.GetParam().GetTOffset(); } else if constexpr (std::is_same_v) { count = trkX.getNClusters(); tOffset = trkX.getTime0(); diff --git a/GPU/GPUTracking/Standalone/tools/GPUExtractPbPbCollision.h b/GPU/GPUTracking/Standalone/tools/GPUExtractPbPbCollision.h index fd49c89ae9073..b83d2c40be81c 100644 --- a/GPU/GPUTracking/Standalone/tools/GPUExtractPbPbCollision.h +++ b/GPU/GPUTracking/Standalone/tools/GPUExtractPbPbCollision.h @@ -34,7 +34,7 @@ static void GPUExtractPbPbCollision(GPUParam& param, GPUTrackingInOutPointers& i if (ioPtrs.mergedTracks[i].NClusters() < 40) { continue; } - int32_t time = ioPtrs.mergedTracks[i].GetParam().GetTZOffset(); + int32_t time = ioPtrs.mergedTracks[i].GetParam().GetTOffset(); if (time < 0 || time > param.continuousMaxTimeBin) { continue; } diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 608eeb056b6ad..4953815a6fc19 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -556,7 +556,7 @@ void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMProp auto cl = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster]; const auto& cln = mIOPtrs->clustersNative->clustersLinear[cl.num]; GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.sector, cl.row, cln.getPad(), cln.getTime(), x, y, z); - ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSector, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); + ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSector, track->GetParam().GetTOffset(), mParam->continuousMaxTimeBin); } else { uint8_t sector, row; auto cln = track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative, sector, row); diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 31bfaa47d7420..079e7e7be4dc7 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -1048,8 +1048,8 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx if (revLabel == -1) { comp = true; } else { - float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSECTORS / 2, trks[i].GetParam().GetTZOffset()); - float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSECTORS / 2, trks[revLabel].GetParam().GetTZOffset()); + float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSECTORS / 2, trks[i].GetParam().GetTOffset()); + float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSECTORS / 2, trks[revLabel].GetParam().GetTOffset()); comp = fabsf(trks[i].GetParam().GetZ() + shift1) < fabsf(trks[revLabel].GetParam().GetZ() + shift2); } if (revLabel == -1 || !trks[revLabel].OK() || (trks[i].OK() && comp)) { @@ -1345,7 +1345,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx param.Cov()[k] = (*tracksExternal)[i].getCov()[k]; } param.X() = (*tracksExternal)[i].getX(); - param.TZOffset() = (*tracksExternal)[i].getTime0(); + param.TOffset() = (*tracksExternal)[i].getTime0(); alpha = (*tracksExternal)[i].getAlpha(); side = (*tracksExternal)[i].hasBothSidesClusters() ? 2 : ((*tracksExternal)[i].hasCSideClusters() ? 1 : 0); #endif @@ -1384,7 +1384,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx if (!mParam->continuousMaxTimeBin) { return param.GetZ() - mc1.z; } - float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSECTORS / 2, param.GetTZOffset() - mc1.t0); + float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSECTORS / 2, param.GetTOffset() - mc1.t0); return param.GetZ() + shift - mc1.z; }; @@ -1726,10 +1726,10 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } mNCl[1]->Fill(nClCorrected); - mT0[0]->Fill(track.GetParam().GetTZOffset()); + mT0[0]->Fill(track.GetParam().GetTOffset()); if (mTrackMCLabels.size() && !mTrackMCLabels[i].isFake() && !track.MergedLooper() && !track.CCE()) { const auto& info = GetMCTrack(mTrackMCLabels[i]); - mT0[1]->Fill(track.GetParam().GetTZOffset() - info.t0); + mT0[1]->Fill(track.GetParam().GetTOffset() - info.t0); } } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { @@ -1849,7 +1849,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx if (attach & gputpcgmmergertypes::attachFlagMask) { uint32_t track = attach & gputpcgmmergertypes::attachTrackMask; const auto& trk = mTracking->mIOPtrs.mergedTracks[track]; - mTracking->GetTPCTransformHelper()->Transform(i, j, cl.getPad(), cl.getTime(), x, y, z, trk.GetParam().GetTZOffset()); + mTracking->GetTPCTransformHelper()->Transform(i, j, cl.getPad(), cl.getTime(), x, y, z, trk.GetParam().GetTOffset()); mTracking->GetParam().Sector2Global(i, x, y, z, &x, &y, &z); } uint32_t extState = mTracking->mIOPtrs.mergedTrackHitStates ? mTracking->mIOPtrs.mergedTrackHitStates[clid] : 0; From 6f4f0aee2002daa49800379c0070f3b1ad298430 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 26 Jul 2025 15:30:26 +0200 Subject: [PATCH 27/52] GPU TPC: Fix handling of ce-crossing looping tracks --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 25 ++++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index b2d1f91a96d01..acd8331e8bf8f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1361,6 +1361,21 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i GPUCommonAlgorithm::swap(trk[0], trk[1]); } + if (Param().par.continuousTracking) { + GPUTPCGMMergedTrackHit* clsmax; + const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(), + cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(), + &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], + &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); + const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); + trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TOffset() - offset); + trk[1]->Param().TOffset() = offset; + if (celooper) { + trk[0]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[0]->CSide() * NSECTORS / 2, trk[0]->Param().TOffset() - offset); + trk[0]->Param().TOffset() = offset; + } + } + if (celooper) { // TODO: Need propper handling, avoid falsely flagging the primary leg as looper trk[0]->SetMergedLooperConnected(true); trk[0]->SetCCE(true); @@ -1382,16 +1397,6 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i return; } - if (Param().par.continuousTracking) { - GPUTPCGMMergedTrackHit* clsmax; - const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(), - cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(), - &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], - &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); - const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); - trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TOffset() - offset); - trk[1]->Param().TOffset() = offset; - } int32_t pos = newRef; #pragma unroll for (int32_t k = 1; k >= 0; k--) { From 9dc25499a95c976f533b72d2917b3443c0246997 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 29 Jul 2025 15:17:56 +0200 Subject: [PATCH 28/52] GPU TPC: Always store outer param --- .../reconstruction/test/testGPUCATracking.cxx | 17 ++++++++--------- GPU/GPUTracking/Base/GPUReconstruction.cxx | 1 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 3 +-- GPU/GPUTracking/Global/GPUChainTracking.cxx | 4 ++-- GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 12 ++++++------ GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 2 +- .../Standalone/Benchmark/standalone.cxx | 4 ---- GPU/Workflow/src/GPUWorkflowSpec.cxx | 1 - 9 files changed, 19 insertions(+), 27 deletions(-) diff --git a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx index bdf9b95e94450..3e196fa9bb7cc 100644 --- a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx +++ b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx @@ -50,24 +50,23 @@ BOOST_AUTO_TEST_CASE(CATracking_test1) { GPUO2Interface tracker; - float solenoidBz = -5.00668; //B-field - float refX = 1000.; //transport tracks to this x after tracking, >500 for disabling - bool continuous = false; //time frame data v.s. triggered events + float solenoidBz = -5.00668; // B-field + float refX = 1000.; // transport tracks to this x after tracking, >500 for disabling + bool continuous = false; // time frame data v.s. triggered events GPUO2InterfaceConfiguration config; config.configDeviceBackend.deviceType = GPUDataTypes::DeviceType::CPU; config.configDeviceBackend.forceDeviceType = true; - config.configProcessing.ompThreads = 4; //4 threads if we run on the CPU, 1 = default, 0 = auto-detect - config.configProcessing.runQA = false; //Run QA after tracking - config.configProcessing.eventDisplay = nullptr; //Ptr to event display backend, for running standalone OpenGL event display + config.configProcessing.ompThreads = 4; // 4 threads if we run on the CPU, 1 = default, 0 = auto-detect + config.configProcessing.runQA = false; // Run QA after tracking + config.configProcessing.eventDisplay = nullptr; // Ptr to event display backend, for running standalone OpenGL event display config.configGRP.solenoidBzNominalGPU = solenoidBz; config.configGRP.grpContinuousMaxTimeBin = continuous ? GPUSettings::TPC_MAX_TF_TIME_BIN : 0; // Number of timebins in timeframe if continuous, 0 otherwise - config.configReconstruction.tpc.nWays = 3; //Should always be 3! - config.configReconstruction.tpc.nWaysOuter = true; //Will create outer param for TRD - config.configReconstruction.tpc.searchWindowDZDR = 2.5f; //Should always be 2.5 for looper-finding and/or continuous tracking + config.configReconstruction.tpc.nWays = 3; // Should always be 3! + config.configReconstruction.tpc.searchWindowDZDR = 2.5f; // Should always be 2.5 for looper-finding and/or continuous tracking config.configReconstruction.tpc.trackReferenceX = refX; config.configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, GPUDataTypes::RecoStep::TPCSectorTracking, diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index ff4ce2c905507..b2af986ebfac9 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -270,7 +270,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() GPUError("WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif mProcessingSettings->overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; - param().rec.tpc.nWaysOuter = true; if (GetProcessingSettings().createO2Output > 1) { mProcessingSettings->createO2Output = 1; } diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 75d9230a364f0..b35bc04d7232f 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -134,7 +134,6 @@ AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Dif AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255") AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster") AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger") -AddOptionRTC(nWaysOuter, int8_t, 0, "", 0, "Store outer param") AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits") AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit") AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128") @@ -595,7 +594,7 @@ AddOption(rundEdx, int32_t, -1, "", 0, "Enable dEdx processing") AddOption(runCompression, int32_t, 1, "", 0, "Enable TPC Compression") AddOption(runTransformation, int32_t, 1, "", 0, "Enable TPC Transformation") AddOption(runRefit, bool, false, "", 0, "Enable final track refit") -AddOption(setO2Settings, bool, false, "", 0, "Set O2 defaults for outerParam, output of shared cluster map, referenceX") +AddOption(setO2Settings, bool, false, "", 0, "Set O2 defaults for output of shared cluster map, referenceX") AddHelp("help", 'h') AddHelpAll("helpall", 'H') AddSubConfig(GPUSettingsRec, rec) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index f42b7cc34df73..b69d0941d9375 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -299,8 +299,8 @@ bool GPUChainTracking::ValidateSettings() return false; } if (GetRecoSteps() & RecoStep::TRDTracking) { - if (GetProcessingSettings().trdTrackModelO2 && (GetProcessingSettings().createO2Output == 0 || param().rec.tpc.nWaysOuter == 0 || (GetMatLUT() == nullptr && !GetProcessingSettings().willProvideO2PropagatorLate))) { - GPUError("TRD tracking can only run on O2 TPC tracks if createO2Output is enabled (%d), nWaysOuter is set (%d), and matBudLUT is available (0x%p)", (int32_t)GetProcessingSettings().createO2Output, (int32_t)param().rec.tpc.nWaysOuter, (void*)GetMatLUT()); + if (GetProcessingSettings().trdTrackModelO2 && (GetProcessingSettings().createO2Output == 0 || (GetMatLUT() == nullptr && !GetProcessingSettings().willProvideO2PropagatorLate))) { + GPUError("TRD tracking can only run on O2 TPC tracks if createO2Output is enabled (%d), and matBudLUT is available (0x%p)", (int32_t)GetProcessingSettings().createO2Output, (void*)GetMatLUT()); return false; } if ((GetRecoStepsGPU() & RecoStep::TRDTracking) && !GetProcessingSettings().trdTrackModelO2 && GetProcessingSettings().createO2Output > 1) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx index 9f72b7443c49f..ca47d65b32cd4 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx @@ -62,7 +62,7 @@ int32_t GPUChainTracking::RunTRDTrackingInternal() if (!Tracker.PreCheckTrackTRDCandidate(trk)) { continue; } - const GPUTRDTrackGPU& trktrd = param().rec.tpc.nWaysOuter ? (GPUTRDTrackGPU)trk.OuterParam() : (GPUTRDTrackGPU)trk; + const GPUTRDTrackGPU& trktrd = (GPUTRDTrackGPU)trk.OuterParam(); if (!Tracker.CheckTrackTRDCandidate(trktrd)) { continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 30fcf7053089b..3cebcdbfcdb7a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -85,9 +85,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float sumInvSqrtCharge = 0.f; int32_t nAvgCharge = 0; - if (iWay && param.rec.tpc.nWaysOuter) { - if (iWay == nWays - 1) { - StoreOuter(&track.OuterParam(), prop, 0); + if (iWay) { + if (iWay && ((nWays - iWay) & 1) == 1) { + StoreOuter(&track.OuterParam(), prop.GetAlpha()); } } @@ -626,9 +626,9 @@ GPUd() bool GPUTPCGMTrackParam::FollowCircleChk(float lrFactor, float toY, float (up ? (-mP[0] * lrFactor > toX || (right ^ (mP[2] > 0))) : (-mP[0] * lrFactor < toX || (right ^ (mP[2] < 0)))); // don't overshoot in X } -GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, const GPUTPCGMPropagator& prop, int32_t phase) +GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, float alpha) { - CADEBUG(printf("\t%21sStorO%d Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f\n", "", phase, prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]))); + CADEBUG(printf("\t%21sStorO Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]))); for (int32_t i = 0; i < 5; i++) { outerParam->P[i] = mP[i]; } @@ -636,7 +636,7 @@ GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterPar outerParam->C[i] = mC[i]; } outerParam->X = mX; - outerParam->alpha = prop.GetAlpha(); + outerParam->alpha = alpha; } GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 0cf65f84d1c44..a6258e3d2595d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -153,7 +153,7 @@ class GPUTPCGMTrackParam GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop); GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection); GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha); - GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, const GPUTPCGMPropagator& prop, int32_t phase); + GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, float alpha); GPUd() static void RefitLoop(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); GPUd() void AddCovDiagErrors(const float* GPUrestrict() errors2); diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 38af340d67d7a..7dcabde4e94b6 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -220,7 +220,6 @@ int32_t ReadConfiguration(int argc, char** argv) if (configStandalone.runGPU) { configStandalone.proc.forceHostMemoryPoolSize = 1024 * 1024 * 1024; } - configStandalone.rec.tpc.nWaysOuter = 1; configStandalone.rec.tpc.trackReferenceX = 83; configStandalone.proc.outputSharedClusterMap = 1; configStandalone.proc.clearO2OutputFromGPU = 1; @@ -415,9 +414,6 @@ int32_t SetupReconstruction() steps.outputs.setBits(GPUDataTypes::InOutType::TPCClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCClusterFinding)); if (steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)) { - if (recSet.tpc.nWays > 1) { - recSet.tpc.nWaysOuter = 1; - } if (procSet.createO2Output && !procSet.trdTrackModelO2) { procSet.createO2Output = 1; // Must not be 2, to make sure TPC GPU tracks are still available for TRD } diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index 2642ff3ee1ebc..68f7be8fb6330 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -227,7 +227,6 @@ void GPURecoWorkflowSpec::init(InitContext& ic) mConfig->configProcessing.runQA = -mQATaskMask; } } - mConfig->configReconstruction.tpc.nWaysOuter = true; mConfig->configInterface.outputToExternalBuffers = true; if (mConfParam->synchronousProcessing) { mConfig->configReconstruction.useMatLUT = false; From ef5ba412414bc3d6cd8478f3589b7027c97bc6eb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 31 Jul 2025 16:31:05 +0200 Subject: [PATCH 29/52] GPU TPC: Adjust tagging of adjacent looper clusters to segmented looping tracks --- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 148 +++++++----------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 15 +- 3 files changed, 64 insertions(+), 101 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 6bee239e42848..3cb937d4f7abc 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -32,7 +32,7 @@ template <> GPUdii() void GPUTPCGMMergerFollowLoopers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), uint32_t, i, merger.Memory()->nLoopData, { - GPUTPCGMTrackParam::RefitLoop(&merger, i); + GPUTPCGMTrackParam::PropagateLooper(&merger, i); }); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3cebcdbfcdb7a..915d62c576af9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -85,10 +85,8 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float sumInvSqrtCharge = 0.f; int32_t nAvgCharge = 0; - if (iWay) { - if (iWay && ((nWays - iWay) & 1) == 1) { - StoreOuter(&track.OuterParam(), prop.GetAlpha()); - } + if (iWay && ((nWays - iWay) & 1) == 1) { + StoreOuter(&track.OuterParam(), prop.GetAlpha()); } int32_t resetT0 = initResetT0(); @@ -106,16 +104,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ N = 0; lastUpdateX = -1; - const bool inFlyDirection = (track.Leg() & 1); + const bool inFlyDirection = !((iWay ^ nWays) & 1); const int32_t wayDirection = (iWay & 1) ? -1 : 1; - bool noFollowCircle = false, noFollowCircle2 = false; int32_t goodRows = 0; for (int32_t ihit = ihitStart; ihit >= 0 && ihit < maxN; ihit += wayDirection) { const bool crossCE = lastSector != 255 && ((lastSector < 18) ^ (clusters[ihit].sector < 18)); if (crossCE) { lastSector = clusters[ihit].sector; - noFollowCircle2 = true; } if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) { @@ -160,14 +156,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // clang-format off CADEBUG(printf("\tSector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on - if (allowModification && false /*changeDirection*/ && !noFollowCircle && !noFollowCircle2) { - if (lastRow != 255) { - if (!(merger->Param().rec.tpc.disableRefitAttachment & 4)) { - StoreAttachMirror(merger, lastSector, lastRow, iTrk, clAlpha, yy, xx, cluster.sector, cluster.row, inFlyDirection, prop.GetAlpha()); - noFollowCircle = true; - } - } - } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { + if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, track.Leg() == 0, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); @@ -180,15 +169,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } - int32_t err = prop.PropagateToXAlpha(xx, clAlpha, inFlyDirection); + int32_t retValProp = prop.PropagateToXAlpha(xx, clAlpha, inFlyDirection); // clang-format off CADEBUG(if (!CheckCov()){printf("INVALID COV AFTER PROPAGATE!!!\n");}); // clang-format on - if (err == -2) // Rotation failed, try to bring to new x with old alpha first, rotate, and then propagate to x, alpha + if (retValProp == -2) // Rotation failed, try to bring to new x with old alpha first, rotate, and then propagate to x, alpha { CADEBUG(printf("REROTATE\n")); if (prop.PropagateToXAlpha(xx, prop.GetAlpha(), inFlyDirection) == 0) { - err = prop.PropagateToXAlpha(xx, clAlpha, inFlyDirection); + retValProp = prop.PropagateToXAlpha(xx, clAlpha, inFlyDirection); } } if (lastRow == 255 || CAMath::Abs((int32_t)lastRow - (int32_t)cluster.row) > 5 || lastSector != cluster.sector || (param.rec.tpc.trackFitRejectMode < 0 && -nMissed <= param.rec.tpc.trackFitRejectMode)) { @@ -196,7 +185,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else { goodRows++; } - if (err == 0) { + if (retValProp == 0) { lastRow = cluster.row; lastSector = cluster.sector; } @@ -216,51 +205,28 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } - if (err == 0 && false /*changeDirection*/) { - const float mirrordY = prop.GetMirroredYTrack(); - CADEBUG(printf(" -- MirroredY: %f --> %f", mP[0], mirrordY)); - if (CAMath::Abs(yy - mP[0]) > CAMath::Abs(yy - mirrordY)) { - CADEBUG(printf(" - Mirroring!!!")); - if (allowModification && !(merger->Param().rec.tpc.disableRefitAttachment & 8)) { - StoreAttachMirror(merger, cluster.sector, cluster.row, iTrk, 0, yy, 0, -1, 0, 0, prop.GetAlpha()); - } - MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, true, cluster.sector); - noFollowCircle = false; - - lastUpdateX = mX; - lastRow = 255; - N++; - resetT0 = initResetT0(); - // clang-format off - CADEBUG(printf("\n")); - CADEBUG(printf("\t%21sMirror Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); - // clang-format on - continue; - } - } - float uncorrectedY = -1e6f; if (allowModification) { uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, track.Leg() == 0, prop); } - const int32_t err2 = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; - if (err || err2) { + const bool sinPhiErr = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; + if (retValProp || sinPhiErr) { if (mC[0] > param.rec.tpc.trackFitCovLimit || mC[2] > param.rec.tpc.trackFitCovLimit) { break; } MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); nMissed2++; NTolerated++; - CADEBUG(printf(" --- break (%d, %d)\n", err, err2)); + CADEBUG(printf(" --- break (%d, %d)\n", retValProp, (int32_t)sinPhiErr)); continue; } CADEBUG(printf("\n")); - int32_t retVal; + int32_t retValUpd; float threshold = 3.f + (lastUpdateX >= 0 ? (CAMath::Abs(mX - lastUpdateX) / 2) : 0.f); if (mNDF > 5 && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { - retVal = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; + retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 @@ -284,28 +250,27 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ #endif GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification - retVal = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; + retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; const float invSqrtCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? CAMath::InvSqrt(merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; const float invCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? (1.f / merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; float invAvgCharge = (sumInvSqrtCharge += invSqrtCharge) / ++nAvgCharge; invAvgCharge *= invAvgCharge; - retVal = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.sector, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); + retValUpd = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.sector, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); } GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamUpdateTrack, iTrk)) { - merger->DebugStreamerUpdate(iTrk, ihit, xx, yy, zz, cluster, merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num], *this, prop, interpolation.hit[ihit], rejectChi2, refit, retVal, sumInvSqrtCharge / nAvgCharge * sumInvSqrtCharge / nAvgCharge, yy, zz, clusterState, debugVals.retVal, debugVals.err2Y, debugVals.err2Z); + merger->DebugStreamerUpdate(iTrk, ihit, xx, yy, zz, cluster, merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num], *this, prop, interpolation.hit[ihit], rejectChi2, refit, retValUpd, sumInvSqrtCharge / nAvgCharge * sumInvSqrtCharge / nAvgCharge, yy, zz, clusterState, debugVals.retVal, debugVals.err2Y, debugVals.err2Z); }); } // clang-format off CADEBUG(if (!CheckCov()) GPUError("INVALID COV AFTER UPDATE!!!")); - CADEBUG(printf("\t%21sFit Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f), DzDs %5.2f %16s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[3], "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retVal)); + CADEBUG(printf("\t%21sFit Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f), DzDs %5.2f %16s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[3], "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValUpd)); // clang-format on ConstrainSinPhi(); - if (retVal == 0) // track is updated + if (retValUpd == 0) // track is updated { - noFollowCircle2 = false; lastUpdateX = mX; covYYUpd = mC[0]; nMissed = nMissed2 = 0; @@ -346,7 +311,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } } - } else if (retVal >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track + } else if (retValUpd >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track if (allowModification) { MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagRejectDistance); } else if (iWay == nWays - 1) { @@ -358,6 +323,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } + if (nWays - iWay <= 2 && !(merger->Param().rec.tpc.disableRefitAttachment & 4) && lastRow != 255 && lastSector != 255) { + StoreLoopPropagation(merger, lastSector, lastRow, iTrk, lastRow > clusters[0].row, prop.GetAlpha()); + } if (((nWays - iWay) & 1) && (iWay != nWays - 1) && !track.CCE() && !track.Looper()) { ShiftZ(clusters, merger, maxN); } @@ -639,8 +607,21 @@ GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterPar outerParam->alpha = alpha; } -GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha) +GPUdic(0, 1) void GPUTPCGMTrackParam::StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outerParam, float alpha) { + if (iRow == 0 || iRow == GPUCA_ROW_COUNT - 1) { + return; + } + if (CAMath::Abs(mP[2]) >= GPUCA_MAX_SIN_PHI_LOW) { + return; + } + if (CAMath::Abs(mP[2]) < 0.75) { + return; + } + if ((mP[2] * mP[4] < 0) ^ outerParam) { + return; + } + uint32_t nLoopData = CAMath::AtomicAdd(&Merger->Memory()->nLoopData, 1u); if (nLoopData >= Merger->NMaxTracks()) { Merger->raiseError(GPUErrors::ERROR_MERGER_LOOPER_OVERFLOW, nLoopData, Merger->NMaxTracks()); @@ -649,20 +630,15 @@ GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GP } GPUTPCGMLoopData data; data.param = *this; - data.alpha = alpha; data.track = iTrack; - data.toAlpha = toAlpha; - data.toY = toY; - data.toX = toX; + data.alpha = alpha; data.sector = sector; data.row = iRow; - data.toSector = toSector; - data.toRow = toRow; - data.inFlyDirection = inFlyDirection; + data.outerParam = outerParam; Merger->LoopData()[nLoopData] = data; } -GPUdii() void GPUTPCGMTrackParam::RefitLoop(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx) +GPUdii() void GPUTPCGMTrackParam::PropagateLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx) { GPUTPCGMPropagator prop; prop.SetMaterialTPC(); @@ -675,11 +651,8 @@ GPUdii() void GPUTPCGMTrackParam::RefitLoop(const GPUTPCGMMerger* GPUrestrict() GPUTPCGMLoopData& data = Merger->LoopData()[loopIdx]; prop.SetTrack(&data.param, data.alpha); - if (data.toSector == -1) { - data.param.AttachClustersMirror(Merger, data.sector, data.row, data.track, data.toY, prop); - } else { - data.param.FollowCircle(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection); - } + data.param.AttachClustersLooper(Merger, data.sector, data.row, data.track, data.outerParam, prop); + // data.param.FollowCircle(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection); } GPUdi() int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection) @@ -778,38 +751,33 @@ GPUdi() int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestri return (0); } -GPUdi() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop) +GPUdi() void GPUTPCGMTrackParam::AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outer, GPUTPCGMPropagator& GPUrestrict() prop) { static constexpr float kSectAngle = 2 * M_PI / 18.f; // Note that the coordinate system is rotated by 90 degree swapping X and Y! float X = mP[2] > 0 ? mP[0] : -mP[0]; - float toX = mP[2] > 0 ? toY : -toY; float Y = mP[2] > 0 ? -mX : mX; float Z = mP[1]; - if (CAMath::Abs(mP[2]) >= GPUCA_MAX_SIN_PHI_LOW) { - return; - } float SinPhi = CAMath::Sqrt(1 - mP[2] * mP[2]) * (mP[2] > 0 ? -1 : 1); - if (CAMath::Abs(SinPhi) >= GPUCA_MAX_SIN_PHI_LOW) { - return; - } float b = prop.GetBz(prop.GetAlpha(), mX, mP[0], mP[1]); - int32_t count = CAMath::Float2IntRn(CAMath::Abs((toX - X) * 2.f)); - if (count == 0) { - return; - } - float dx = (toX - X) / count; + float dx = outer ? 1.f : -1.f; const float myRowX = GPUTPCGeometry::Row2X(iRow); - // printf("AttachMirror\n"); - // printf("X %f Y %f Z %f SinPhi %f toY %f -->\n", mX, mP[0], mP[1], mP[2], toY); - // printf("X %f Y %f Z %f SinPhi %f, count %d dx %f (to: %f)\n", X, Y, Z, SinPhi, count, dx, X + count * dx); - while (count--) { + // printf("\nAttachMirror sector %d row %d outer %d\n", (int)sector, (int)iRow, (int)outer); + // printf("X %f Y %f Z %f SinPhi %f -->\n", mX, mP[0], mP[1], mP[2]); + // printf("X %f Y %f Z %f SinPhi %f, dx %f\n", X, Y, Z, SinPhi, dx); + uint32_t maxTries = 100; + while (maxTries--) { float ex = CAMath::Sqrt(1 - SinPhi * SinPhi); float exi = 1.f / ex; float dxBzQ = dx * -b * mP[4]; float newSinPhi = SinPhi + dxBzQ; if (CAMath::Abs(newSinPhi) > GPUCA_MAX_SIN_PHI_LOW) { + // printf("Abort, newSinPhi %f\n", newSinPhi); + return; + } + if (mP[2] > 0 ? (newSinPhi > 0.5) : (newSinPhi < -0.5)) { + // printf("Finished, newSinPhi %f\n", newSinPhi); return; } float dS = dx * exi; @@ -821,18 +789,18 @@ GPUdi() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUr Z += dS * mP[3]; SinPhi = newSinPhi; if (CAMath::Abs(X) > CAMath::Abs(Y) * CAMath::Tan(kSectAngle / 2.f)) { - continue; + // printf("Abort, sector edge\n"); + return; } - // printf("count %d: At X %f Y %f Z %f SinPhi %f\n", count, mP[2] > 0 ? -Y : Y, mP[2] > 0 ? X : -X, Z, SinPhi); - + // printf("count %d: At X %f Y %f Z %f SinPhi %f\n", maxTries, mP[2] > 0 ? -Y : Y, mP[2] > 0 ? X : -X, Z, SinPhi); float paramX = mP[2] > 0 ? -Y : Y; - int32_t step = paramX >= mX ? 1 : -1; + int32_t step = outer ? 1 : -1; int32_t found = 0; for (int32_t j = iRow; j >= 0 && j < GPUCA_ROW_COUNT && found < 3; j += step) { float rowX = mX + GPUTPCGeometry::Row2X(j) - myRowX; if (CAMath::Abs(rowX - paramX) < 1.5f) { - // printf("Attempt row %d\n", j); + // printf("Attempt row %d at y %f\n", j, X); AttachClusters(Merger, sector, j, iTrack, false, mP[2] > 0 ? X : -X, Z); } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index a6258e3d2595d..ee46b61e8d775 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -149,12 +149,12 @@ class GPUTPCGMTrackParam GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); - // We force to compile these twice, for RefitLoop and for Fit, for better optimization - GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop); + // We force to compile these twice, for PropagateLooper and for Fit, for better optimization + GPUd() void AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outer, GPUTPCGMPropagator& prop); GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection); - GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha); + GPUd() void StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outerParam, float alpha); GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, float alpha); - GPUd() static void RefitLoop(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); + GPUd() static void PropagateLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); GPUd() void AddCovDiagErrors(const float* GPUrestrict() errors2); GPUd() void AddCovDiagErrorsWithCorrelations(const float* GPUrestrict() errors2); @@ -227,15 +227,10 @@ class GPUTPCGMTrackParam struct GPUTPCGMLoopData { GPUTPCGMTrackParam param; uint32_t track; - float toY; - float toX; float alpha; - float toAlpha; uint8_t sector; uint8_t row; - int8_t toSector; - uint8_t toRow; - uint8_t inFlyDirection; + uint8_t outerParam; }; GPUdi() int32_t GPUTPCGMTrackParam::initResetT0() From 06b06b995d7374c2b20dc619d0a5c60f91235aab Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 25 Aug 2025 13:33:05 +0200 Subject: [PATCH 30/52] GPU QA: Proper fix for fetching timebins of MC data --- GPU/GPUTracking/qa/GPUQA.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 079e7e7be4dc7..6d1e724e1be3b 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -1729,7 +1729,9 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mT0[0]->Fill(track.GetParam().GetTOffset()); if (mTrackMCLabels.size() && !mTrackMCLabels[i].isFake() && !track.MergedLooper() && !track.CCE()) { const auto& info = GetMCTrack(mTrackMCLabels[i]); - mT0[1]->Fill(track.GetParam().GetTOffset() - info.t0); + if (info.t0 != -100.f) { + mT0[1]->Fill(track.GetParam().GetTOffset() - info.t0); + } } } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { From a9d0c1e906a1eaf5436be29e9233de86854c6b72 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 10 Sep 2025 23:41:31 +0200 Subject: [PATCH 31/52] GPU TPC: Implement looper following with propagator for segmented tracks --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 152 +++++++----------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 9 +- 2 files changed, 65 insertions(+), 96 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 915d62c576af9..71b1878804893 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -154,7 +154,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ const auto& cluster = clusters[ihit]; // clang-format off - CADEBUG(printf("\tSector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); + CADEBUG(printf("\tSector %2d %11sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { @@ -190,7 +190,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ lastSector = cluster.sector; } // clang-format off - CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[0] - yy, mP[1] - zz, sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], err)); + CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[0] - yy, mP[1] - zz, sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValProp)); // clang-format on if (crossCE) { @@ -218,7 +218,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); nMissed2++; NTolerated++; - CADEBUG(printf(" --- break (%d, %d)\n", retValProp, (int32_t)sinPhiErr)); + CADEBUG(printf(", %d --- break\n", (int32_t)sinPhiErr)); continue; } CADEBUG(printf("\n")); @@ -324,7 +324,8 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } if (nWays - iWay <= 2 && !(merger->Param().rec.tpc.disableRefitAttachment & 4) && lastRow != 255 && lastSector != 255) { - StoreLoopPropagation(merger, lastSector, lastRow, iTrk, lastRow > clusters[0].row, prop.GetAlpha()); + StoreLoopPropagation(merger, lastSector, lastRow, iTrk, lastRow > clusters[(iWay & 1) ? (maxN - 1) : 0].row, prop.GetAlpha()); + CADEBUG(printf("\t\tSTORING %d lastRow %d row %d out %d\n", iTrk, (int)lastRow, (int)clusters[(iWay & 1) ? (maxN - 1) : 0].row, lastRow > clusters[(iWay & 1) ? (maxN - 1) : 0].row)); } if (((nWays - iWay) & 1) && (iWay != nWays - 1) && !track.CCE() && !track.Looper()) { ShiftZ(clusters, merger, maxN); @@ -587,16 +588,9 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP return dodEdx; } -GPUd() bool GPUTPCGMTrackParam::FollowCircleChk(float lrFactor, float toY, float toX, bool up, bool right) -{ - return CAMath::Abs(mX * lrFactor - toY) > 1.f && // transport further in Y - CAMath::Abs(mP[2]) < 0.7f && // rotate back - (up ? (-mP[0] * lrFactor > toX || (right ^ (mP[2] > 0))) : (-mP[0] * lrFactor < toX || (right ^ (mP[2] < 0)))); // don't overshoot in X -} - GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, float alpha) { - CADEBUG(printf("\t%21sStorO Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]))); + CADEBUG(printf("\t%21sStorO Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f, SP %5.2f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f\n", "", alpha, mX, mP[0], mP[1], mP[4], mP[2], sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]))); for (int32_t i = 0; i < 5; i++) { outerParam->P[i] = mP[i]; } @@ -607,18 +601,18 @@ GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterPar outerParam->alpha = alpha; } -GPUdic(0, 1) void GPUTPCGMTrackParam::StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outerParam, float alpha) +GPUdic(0, 1) void GPUTPCGMTrackParam::StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards, float alpha) { if (iRow == 0 || iRow == GPUCA_ROW_COUNT - 1) { return; } - if (CAMath::Abs(mP[2]) >= GPUCA_MAX_SIN_PHI_LOW) { + if (CAMath::Abs(mP[2]) >= GPUCA_MAX_SIN_PHI) { // TODO: How can we avoid this? return; } if (CAMath::Abs(mP[2]) < 0.75) { return; } - if ((mP[2] * mP[4] < 0) ^ outerParam) { + if ((mP[2] * mP[4] < 0) ^ outwards) { return; } @@ -634,7 +628,7 @@ GPUdic(0, 1) void GPUTPCGMTrackParam::StoreLoopPropagation(const GPUTPCGMMerger* data.alpha = alpha; data.sector = sector; data.row = iRow; - data.outerParam = outerParam; + data.outwards = outwards; Merger->LoopData()[nLoopData] = data; } @@ -651,107 +645,83 @@ GPUdii() void GPUTPCGMTrackParam::PropagateLooper(const GPUTPCGMMerger* GPUrestr GPUTPCGMLoopData& data = Merger->LoopData()[loopIdx]; prop.SetTrack(&data.param, data.alpha); - data.param.AttachClustersLooper(Merger, data.sector, data.row, data.track, data.outerParam, prop); - // data.param.FollowCircle(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection); + if (false) { + data.param.AttachClustersLooper(Merger, data.sector, data.row, data.track, data.outwards, prop); + } else { + data.param.AttachClustersLooperFollow(Merger, prop, data.sector, data.row, data.track, data.outwards); + } } -GPUdi() int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection) +GPUdi() void GPUTPCGMTrackParam::AttachClustersLooperFollow(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, bool up) { + float toX = mX; + bool inFlyDirection = (Merger->MergedTracks()[iTrack].Leg() & 1) ^ up; + static constexpr float kSectAngle = 2 * M_PI / 18.f; const GPUParam& GPUrestrict() param = Merger->Param(); - bool right; - float dAlpha = toAlpha - prop.GetAlpha(); - int32_t sectorSide = sector >= (GPUCA_NSECTORS / 2) ? (GPUCA_NSECTORS / 2) : 0; - if (CAMath::Abs(dAlpha) > 0.001f) { - right = CAMath::Abs(dAlpha) < CAMath::Pi() ? (dAlpha > 0) : (dAlpha < 0); - } else { - right = toY > mP[0]; - } - bool up = (mP[2] < 0) ^ right; - int32_t targetRow = up ? (GPUCA_ROW_COUNT - 1) : 0; - float lrFactor = mP[2] < 0 ? -1.f : 1.f; // !(right ^ down) // TODO: shouldn't it be "right ? 1.f : -1.f", but that gives worse results... + bool right = (mP[2] < 0) ^ up; + const int32_t sectorSide = sector >= (GPUCA_NSECTORS / 2) ? (GPUCA_NSECTORS / 2) : 0; + float lrFactor = right ^ !up ? 1.f : -1.f; // clang-format off - CADEBUG(printf("CIRCLE Track %d: Sector %d Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f - Next hit: Sector %d Alpha %f X %f Y %f - Right %d Up %d dAlpha %f lrFactor %f\n", iTrack, sector, prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3], toSector, toAlpha, toX, toY, (int32_t)right, (int32_t)up, dAlpha, lrFactor)); + CADEBUG(printf("\nCIRCLE Track %d: Sector %d Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f QPt %f - Right %d Up %d lrFactor %f\n", iTrack, sector, prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3], mP[4], (int32_t)right, (int32_t)up, lrFactor)); // clang-format on - AttachClustersPropagate(Merger, sector, iRow, targetRow, iTrack, false, prop, inFlyDirection, 0.7f); if (prop.RotateToAlpha(prop.GetAlpha() + (CAMath::Pi() / 2.f) * lrFactor)) { - return 1; + return; } CADEBUG(printf("\tRotated: X %f Y %f Z %f SinPhi %f (Alpha %f / %f)\n", mP[0], mX, mP[1], mP[2], prop.GetAlpha(), prop.GetAlpha() + CAMath::Pi() / 2.f)); - while (sector != toSector || FollowCircleChk(lrFactor, toY, toX, up, right)) { - while ((sector != toSector) ? (CAMath::Abs(mX) <= CAMath::Abs(mP[0]) * CAMath::Tan(kSectAngle / 2.f)) : FollowCircleChk(lrFactor, toY, toX, up, right)) { - int32_t err = prop.PropagateToXAlpha(mX + 1.f, prop.GetAlpha(), inFlyDirection); + uint32_t maxTries = 100; + while (true) { + while (CAMath::Abs(mX) <= CAMath::Abs(mP[0]) * CAMath::Tan(kSectAngle / 2.f) + 0.1f) { + if (maxTries-- == 0) { + return; + } + if (CAMath::Abs(mP[2]) > 0.7f) { + return; + } + if (up ? (-mP[0] * lrFactor > GPUTPCGeometry::Row2X(GPUCA_ROW_COUNT - 1)) : (-mP[0] * lrFactor < GPUTPCGeometry::Row2X(0))) { + return; + } + if (!((up ? (-mP[0] * lrFactor >= toX) : (-mP[0] * lrFactor <= toX)) || (right ^ (mP[2] > 0)))) { + return; + } + int32_t err = prop.PropagateToXAlpha(mX + (up ? 1.f : -1.f), prop.GetAlpha(), inFlyDirection); if (err) { CADEBUG(printf("\t\tpropagation error (%d)\n", err)); - prop.RotateToAlpha(prop.GetAlpha() - (CAMath::Pi() / 2.f) * lrFactor); - return 1; + return; } CADEBUG(printf("\tPropagated to y = %f: X %f Z %f SinPhi %f\n", mX, mP[0], mP[1], mP[2])); - for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { // TODO: Avoid iterating over all rows float rowX = GPUTPCGeometry::Row2X(j); if (CAMath::Abs(rowX - (-mP[0] * lrFactor)) < 1.5f) { - CADEBUG(printf("\t\tAttempt row %d (Y %f Z %f)\n", j, mX * lrFactor, mP[1])); + CADEBUG(printf("\t\tAttempt row %d (X %f Y %f Z %f)\n", j, rowX, mX * lrFactor, mP[1])); AttachClusters(Merger, sector, j, iTrack, false, mX * lrFactor, mP[1]); } } } - if (sector != toSector) { - if (right) { - if (++sector >= sectorSide + 18) { - sector -= 18; - } - } else { - if (--sector < sectorSide) { - sector += 18; - } - } - CADEBUG(printf("\tRotating to sector %d\n", sector)); - if (prop.RotateToAlpha(param.Alpha(sector) + (CAMath::Pi() / 2.f) * lrFactor)) { - CADEBUG(printf("\t\trotation error\n")); - prop.RotateToAlpha(prop.GetAlpha() - (CAMath::Pi() / 2.f) * lrFactor); - return 1; - } - CADEBUG(printf("\tAfter Rotatin Alpha %f Position X %f Y %f Z %f SinPhi %f\n", prop.GetAlpha(), mP[0], mX, mP[1], mP[2])); - } - } - CADEBUG(printf("\tRotating back\n")); - for (int32_t i = 0; i < 2; i++) { - if (prop.RotateToAlpha(prop.GetAlpha() + (CAMath::Pi() / 2.f) * lrFactor) == 0) { - break; - } - if (i) { - CADEBUG(printf("Final rotation failed\n")); - return 1; + if (maxTries-- == 0) { + return; } - CADEBUG(printf("\tresetting physical model\n")); - prop.SetTrack(this, prop.GetAlpha()); - } - prop.Rotate180(); - CADEBUG(printf("\tMirrored position: Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f\n", prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3])); - iRow = toRow; - float dx = toX - GPUTPCGeometry::Row2X(toRow); - if (up ^ (toX > mX)) { - if (up) { - while (iRow < GPUCA_ROW_COUNT - 2 && GPUTPCGeometry::Row2X(iRow + 1) + dx <= mX) { - iRow++; + if (right) { + if (++sector >= sectorSide + 18) { + sector -= 18; } } else { - while (iRow > 1 && GPUTPCGeometry::Row2X(iRow - 1) + dx >= mX) { - iRow--; + if (--sector < sectorSide) { + sector += 18; } } - prop.PropagateToXAlpha(GPUTPCGeometry::Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); - AttachClustersPropagate(Merger, sector, iRow, toRow, iTrack, false, prop, inFlyDirection); - } - if (prop.PropagateToXAlpha(toX, prop.GetAlpha(), inFlyDirection)) { - mX = toX; + CADEBUG(printf("\tRotating to sector %d: %f --> %f\n", sector, prop.GetAlpha(), param.Alpha(sector) + (CAMath::Pi() / 2.f) * lrFactor)); + int32_t err = prop.RotateToAlpha(param.Alpha(sector) + (CAMath::Pi() / 2.f) * lrFactor); + if (err) { + CADEBUG(printf("Rotation Error %d\n", err)); + return; + } + CADEBUG(printf("\tAfter Rotating Alpha %f Position X %f Y %f Z %f SinPhi %f\n", prop.GetAlpha(), mP[0], mX, mP[1], mP[2])); } - CADEBUG(printf("Final position: Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f\n", prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3])); - return (0); } -GPUdi() void GPUTPCGMTrackParam::AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outer, GPUTPCGMPropagator& GPUrestrict() prop) +GPUdi() void GPUTPCGMTrackParam::AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards, GPUTPCGMPropagator& GPUrestrict() prop) { static constexpr float kSectAngle = 2 * M_PI / 18.f; // Note that the coordinate system is rotated by 90 degree swapping X and Y! @@ -761,9 +731,9 @@ GPUdi() void GPUTPCGMTrackParam::AttachClustersLooper(const GPUTPCGMMerger* GPUr float SinPhi = CAMath::Sqrt(1 - mP[2] * mP[2]) * (mP[2] > 0 ? -1 : 1); float b = prop.GetBz(prop.GetAlpha(), mX, mP[0], mP[1]); - float dx = outer ? 1.f : -1.f; + float dx = outwards ? 1.f : -1.f; const float myRowX = GPUTPCGeometry::Row2X(iRow); - // printf("\nAttachMirror sector %d row %d outer %d\n", (int)sector, (int)iRow, (int)outer); + // printf("\nAttachMirror sector %d row %d outwards %d\n", (int)sector, (int)iRow, (int)outwards); // printf("X %f Y %f Z %f SinPhi %f -->\n", mX, mP[0], mP[1], mP[2]); // printf("X %f Y %f Z %f SinPhi %f, dx %f\n", X, Y, Z, SinPhi, dx); uint32_t maxTries = 100; @@ -795,7 +765,7 @@ GPUdi() void GPUTPCGMTrackParam::AttachClustersLooper(const GPUTPCGMMerger* GPUr // printf("count %d: At X %f Y %f Z %f SinPhi %f\n", maxTries, mP[2] > 0 ? -Y : Y, mP[2] > 0 ? X : -X, Z, SinPhi); float paramX = mP[2] > 0 ? -Y : Y; - int32_t step = outer ? 1 : -1; + int32_t step = outwards ? 1 : -1; int32_t found = 0; for (int32_t j = iRow; j >= 0 && j < GPUCA_ROW_COUNT && found < 3; j += step) { float rowX = mX + GPUTPCGeometry::Row2X(j) - myRowX; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index ee46b61e8d775..1c084f15874fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -150,9 +150,9 @@ class GPUTPCGMTrackParam GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); // We force to compile these twice, for PropagateLooper and for Fit, for better optimization - GPUd() void AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outer, GPUTPCGMPropagator& prop); - GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection); - GPUd() void StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outerParam, float alpha); + GPUd() void AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards, GPUTPCGMPropagator& prop); + GPUd() void AttachClustersLooperFollow(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards); + GPUd() void StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards, float alpha); GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, float alpha); GPUd() static void PropagateLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); @@ -213,7 +213,6 @@ class GPUTPCGMTrackParam } private: - GPUd() bool FollowCircleChk(float lrFactor, float toY, float toX, bool up, bool right); GPUd() int32_t initResetT0(); float mX; // x position @@ -230,7 +229,7 @@ struct GPUTPCGMLoopData { float alpha; uint8_t sector; uint8_t row; - uint8_t outerParam; + uint8_t outwards; }; GPUdi() int32_t GPUTPCGMTrackParam::initResetT0() From 870ea2e1643f0d13d37bfb7206d21f20be24e6e8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 26 Aug 2025 16:30:03 +0200 Subject: [PATCH 32/52] GPU: Add comments for customizable kernel parameters --- GPU/GPUTracking/kernels.cmake | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 151f0326e00ca..c8ddcd2e9d81d 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -135,17 +135,17 @@ o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPC o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishDeconvolutionFlags" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart) endif() -o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP - NEIGHBOURS_FINDER_UNROLL_GLOBAL - NEIGHBOURS_FINDER_UNROLL_SHARED - TRACKLET_SELECTOR_HITS_REG_SIZE - ALTERNATE_BORDER_SORT - SORT_BEFORE_FIT - NO_ATOMIC_PRECHECK - COMP_GATHER_KERNEL - COMP_GATHER_MODE - SORT_STARTHITS - CF_SCAN_WORKGROUP_SIZE) +o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP # Number of neighhbours finder hits to cache in shared memory + NEIGHBOURS_FINDER_UNROLL_GLOBAL # Unroll factor for neighbours finder iterating hits in local memory + NEIGHBOURS_FINDER_UNROLL_SHARED # Fully unroll iteration over neighbours finder hits in shared memory [0/1] + TRACKLET_SELECTOR_HITS_REG_SIZE # Number of hits to cache in shared memory in tracklet selector + ALTERNATE_BORDER_SORT # Use alternative border sort approach [0/1] + SORT_BEFORE_FIT # Sort tracks after length to reduce warp serialization [0/1] + NO_ATOMIC_PRECHECK # Skip atomic precheck to reduce posterior synchronization [0/1] + COMP_GATHER_KERNEL # Default kernel to use for Compression Gather Operation [0 - 4] + COMP_GATHER_MODE # TPC Compression Gather Mode [0 - 3] + SORT_STARTHITS # Sort start hits to improve cache locality during tracklet construction [0/1] + CF_SCAN_WORKGROUP_SIZE) # Work group size to use in clusterizer scan operation -o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE - MERGER_INTERPOLATION_ERROR_TYPE) +o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE # Data type to use for intermediate storage of dEdx truncated mean inputs + MERGER_INTERPOLATION_ERROR_TYPE) # Data type for storing intermediate track residuals for interpolation From ddcbe9529165a99a98af2af57726990186291889 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 26 Aug 2025 17:59:03 +0200 Subject: [PATCH 33/52] GPU TPC: Require minimum NDF for mergerInterpolateRejectAlsoOnCurrentPosition to avoid killing some tracks when some first clusters are bad --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b35bc04d7232f..a3158a870e1fb 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -148,7 +148,7 @@ AddOptionRTC(sigBitsCharge, uint8_t, 4, "", 0, "Number of significant bits for T AddOptionRTC(sigBitsWidth, uint8_t, 3, "", 0, "Number of significant bits for TPC cluster width in compression mode 1") AddOptionRTC(dropLoopers, uint8_t, 0, "", 0, "Drop looping tracks starting from second loop") AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation instead of extrapolation for chi2 based cluster rejection") -AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position") +AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position starting from NDF > 5") AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 2d612254ba001..0ea888bca8725 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -643,7 +643,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject), err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject && mT->GetNDF() > 5 ), err2Y, err2Z, ¶m); } GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) From b4b2a69f9d7a55c0f6fee90f4e039e9343ab4dfd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 26 Aug 2025 18:01:32 +0200 Subject: [PATCH 34/52] GPU TPC: Make mergerNonInterpolateRejectMinNDF configurable --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 3 ++- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index a3158a870e1fb..37998659e77ef 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -148,7 +148,8 @@ AddOptionRTC(sigBitsCharge, uint8_t, 4, "", 0, "Number of significant bits for T AddOptionRTC(sigBitsWidth, uint8_t, 3, "", 0, "Number of significant bits for TPC cluster width in compression mode 1") AddOptionRTC(dropLoopers, uint8_t, 0, "", 0, "Drop looping tracks starting from second loop") AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation instead of extrapolation for chi2 based cluster rejection") -AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position starting from NDF > 5") +AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position starting from NDF > mergerNonInterpolateRejectMinNDF") +AddOptionRTC(mergerNonInterpolateRejectMinNDF, uint8_t, 5, "", 0, "Minimum NDF of track for non-interpolated reject (both for chi2 and absolute distance)") AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 0ea888bca8725..ef0dcef3b8c02 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -643,7 +643,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject && mT->GetNDF() > 5 ), err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject && mT->GetNDF() > (int32_t)param.rec.tpc.mergerNonInterpolateRejectMinNDF), err2Y, err2Z, ¶m); } GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 71b1878804893..87d789ad34879 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -225,7 +225,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ int32_t retValUpd; float threshold = 3.f + (lastUpdateX >= 0 ? (CAMath::Abs(mX - lastUpdateX) / 2) : 0.f); - if (mNDF > 5 && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { + if (mNDF > (int32_t)param.rec.tpc.mergerNonInterpolateRejectMinNDF && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); From 1aa6fa3a3df03b7b3c5964b7dc1623833df10b91 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 28 Aug 2025 18:04:44 +0200 Subject: [PATCH 35/52] GPU TPC: TrackletSelection: Count shared hits from outside, to allow more shared hits at inner rows --- .../SectorTracker/GPUTPCTrackletSelector.cxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index 0bf3448bed730..3049136c98f1d 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -53,10 +53,10 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread uint32_t nHits = 0; const uint32_t minHits = tracker.Param().rec.tpc.minNClustersTrackSeed == -1 ? GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracklet.Param().QPt() * tracker.Param().qptB5Scaler) : tracker.Param().rec.tpc.minNClustersTrackSeed; const uint32_t sharingMinNorm = minHits * tracker.Param().rec.tpc.trackletMinSharedNormFactor; - float maxShared = maxSharedFrac * sharingMinNorm; + const float maxSharedNorm = maxSharedFrac * sharingMinNorm; GPUCA_UNROLL(, U(1)) - for (irow = firstRow; irow <= lastRow && lastRow - irow + nHits >= minHits; irow++) { + for (irow = lastRow; irow >= firstRow && irow - firstRow + nHits >= minHits; irow--) { calink ih = tracker.TrackletRowHits()[tracklet.FirstHit() + (irow - firstRow)]; if (ih != CALINK_DEAD_CHANNEL) { gap++; @@ -64,7 +64,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread if (ih != CALINK_INVAL && ih != CALINK_DEAD_CHANNEL) { GPUglobalref() const GPUTPCRow& row = tracker.Row(irow); const bool own = (tracker.HitWeight(row, ih) <= w); - const bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); + const bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxSharedNorm : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! gap = 0; #pragma GCC diagnostic push @@ -86,7 +86,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread } } - if (gap > tracker.Param().rec.tpc.trackFollowingMaxRowGap || irow == lastRow) { // store + if (gap > tracker.Param().rec.tpc.trackFollowingMaxRowGap || irow == firstRow) { // store if (nHits >= minHits) { uint32_t nFirstTrackHit = CAMath::AtomicAdd(tracker.NTrackHits(), (uint32_t)nHits); if (nFirstTrackHit + nHits > tracker.NMaxTrackHits()) { @@ -111,11 +111,11 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread #pragma GCC diagnostic pop if constexpr (GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE > 0) { if (inShared) { - tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; + tracker.TrackHits()[nFirstTrackHit + nHits - 1 - jh] = s.mHits[jh][iThread]; } } if (!inShared) { - tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; + tracker.TrackHits()[nFirstTrackHit + nHits - 1 - jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; } } } From ef6e9a0241e5ead39b9aef3997f0a898bf7724e2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 09:17:32 +0200 Subject: [PATCH 36/52] GPU QA: inputHistogramsOnly inplies noEvents --- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 7dcabde4e94b6..2e89a4d72c63e 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -198,6 +198,7 @@ int32_t ReadConfiguration(int argc, char** argv) } if (configStandalone.QA.inputHistogramsOnly) { configStandalone.rundEdx = false; + configStandalone.noEvents = true; } if (configStandalone.QA.dumpToROOT) { configStandalone.proc.outputSharedClusterMap = true; From 0cb5fc5c87871aea97b049e2884e817128c993c8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 09:20:52 +0200 Subject: [PATCH 37/52] GPU TPC: Better formula for cluster weights --- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index 60cc12573be99..2667da4a53977 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -160,13 +160,11 @@ class GPUTPCTracker : public GPUProcessor */ GPUdi() static int32_t CalculateHitWeight(int32_t NHits, float chi2) { - const float chi2_suppress = 6.f; - float weight = (((float)NHits * (chi2_suppress - chi2 / 500.f)) * (1e9f / chi2_suppress / 160.f)); + float weight = NHits * (NHits * 2 - 5) * 128 / chi2; // TODO: Add QPt to this formula if (weight < 0.f || weight > 2e9f) { return 0; } return ((int32_t)weight); - // return( (NHits << 16) + num); } GPUd() void MaximizeHitWeight(const GPUTPCRow& row, int32_t hitIndex, int32_t weight) { mData.MaximizeHitWeight(row, hitIndex, weight); } GPUd() void SetHitWeight(const GPUTPCRow& row, int32_t hitIndex, int32_t weight) { mData.SetHitWeight(row, hitIndex, weight); } From 8494996a3f03425438ff622493cdce788acd5b3b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 10 Sep 2025 19:03:11 +0200 Subject: [PATCH 38/52] GPU: Improve debug dumps --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 +++ GPU/GPUTracking/Global/GPUChainTrackingDebug.h | 4 ++-- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index b2af986ebfac9..17e2a2a27c747 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -269,6 +269,9 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() #ifndef GPUCA_DETERMINISTIC_MODE GPUError("WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif + if (mProcessingSettings->debugLevel >= 6 && ((mProcessingSettings->debugMask + 1) & mProcessingSettings->debugMask)) { + GPUError("WARNING: debugMask %d - debug output might not be deterministic with intermediate steps missing", mProcessingSettings->debugMask); + } mProcessingSettings->overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; if (GetProcessingSettings().createO2Output > 1) { mProcessingSettings->createO2Output = 1; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h index 6c995f65f3dd3..a0be9d833d5a9 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -28,8 +28,8 @@ enum GPUChainTrackingDebugFlags : uint32_t { TPCLinks = 1 << 2, TPCStartHits = 1 << 3, TPCTracklets = 1 << 4, - TPCSectorTracks = 1 << 5, - TPCHitWeights = 1 << 6, + TPCHitWeights = 1 << 5, + TPCSectorTracks = 1 << 6, TPCMergingRanges = 1 << 7, TPCMergingSectorTracks = 1 << 8, TPCMergingMatching = 1 << 9, diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 863998079f2cd..0a83bf47f5725 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -51,7 +51,7 @@ void GPUTPCGMMerger::DumpSectorTracks(std::ostream& out) const out << " Track type " << iGlobal << "\n"; for (int32_t j = mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal]; j < mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal + 1]; j++) { const auto& trk = mSectorTrackInfos[j]; - out << " Track " << j << ": LocalId " << (iGlobal ? (trk.LocalTrackId() >> 24) : -1) << "/" << (iGlobal ? (trk.LocalTrackId() & 0xFFFFFF) : -1) << " X " << trk.X() << " offsetz " << trk.TOffset() << " A " << trk.Alpha() << " Y " << trk.Y() << " Z " << trk.Z() << " SinPhi " << trk.SinPhi() << " CosPhi " << trk.CosPhi() << " SecPhi " << trk.SecPhi() << " Tgl " << trk.DzDs() << " QPt " << trk.QPt() << "\n"; + out << " Track " << j << ": LocalId " << (iGlobal ? (trk.LocalTrackId() >> 24) : -1) << "/" << (iGlobal ? (trk.LocalTrackId() & 0xFFFFFF) : -1) << " NCl " << trk.NClusters() << " X " << trk.X() << " offsetz " << trk.TOffset() << " A " << trk.Alpha() << " Y " << trk.Y() << " Z " << trk.Z() << " SinPhi " << trk.SinPhi() << " CosPhi " << trk.CosPhi() << " SecPhi " << trk.SecPhi() << " Tgl " << trk.DzDs() << " QPt " << trk.QPt() << "\n"; } } } @@ -135,7 +135,7 @@ void GPUTPCGMMerger::DumpMergedBetweenSectors(std::ostream& out) const void GPUTPCGMMerger::DumpCollected(std::ostream& out) const { std::streamsize ss = out.precision(); - out << std::setprecision(2); + out << std::setprecision(6); out << "\nTPC Merger Collected Tracks\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mMergedTracks[i]; From cf38ef222d9d16c3f6d1ebf190162cb737c303f8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 10 Sep 2025 19:03:58 +0200 Subject: [PATCH 39/52] GPU TPC: Deterministic (and faster since not relying on atomics) linking of extrapolated track segments --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 11 +++++------ .../SectorTracker/GPUTPCExtrapolationTracking.cxx | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index acd8331e8bf8f..d1991c8e99646 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -581,7 +581,7 @@ GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads uint32_t nTracks = *trk.NTracks(); for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) { sectorTr = &trk.Tracks()[itr]; - int32_t localId = mTrackIDs[(sectorTr->LocalTrackId() >> 24) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)]; + int32_t localId = mTrackIDs[((sectorTr->LocalTrackId() >> 24) & 0x3F) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)]; if (localId == -1) { continue; } @@ -594,7 +594,7 @@ GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads track.SetNextNeighbour(-1); track.SetNextSegmentNeighbour(-1); track.SetPrevSegmentNeighbour(-1); - track.SetLocalTrackId(localId); + track.SetLocalTrackId(localId | (sectorTr->LocalTrackId() & 0x40000000)); } } @@ -643,10 +643,9 @@ GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThr { for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) { GPUTPCGMSectorTrack& extrapolatedTrack = mSectorTrackInfos[itr]; - GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId()]; - if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) { - localTrack.SetExtrapolatedTrackId(1, itr); - } + GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId() & 0xFFFFFF]; + int up = (extrapolatedTrack.LocalTrackId() & 0x40000000) ? 1 : 0; + localTrack.SetExtrapolatedTrackId(up, itr); } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx index df998ca7cbb9a..eaaefcb278ffe 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx @@ -112,7 +112,7 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT track.SetParam(tParam.GetParam()); track.SetNHits(nHits); track.SetFirstHitID(hitId); - track.SetLocalTrackId((sectorSource.ISector() << 24) | sectorSource.Tracks()[iTrack].LocalTrackId()); + track.SetLocalTrackId((direction == 1 ? 0x40000000 : 0) | (sectorSource.ISector() << 24) | sectorSource.Tracks()[iTrack].LocalTrackId()); } return (nHits >= tracker.Param().rec.tpc.extrapolationTrackingMinHits); From f094c9f864bed212e0868d82cca83587f8ef9c20 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 13:14:33 +0200 Subject: [PATCH 40/52] GPU TPC: Do not interpolate with too few NDF --- GPU/Common/GPUCommonMath.h | 5 +++++ GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 13 ++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 372e067b14aff..0ff31899dec0c 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -74,6 +74,11 @@ class GPUCommonMath GPUhdni() constexpr static float Sqrt(float x); GPUd() static float InvSqrt(float x); template + GPUdi() constexpr static T Square(T x) + { + return x * x; + } + template GPUhd() constexpr static T Abs(T x); GPUd() constexpr static float ASin(float x); GPUd() constexpr static float ACos(float x); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index ef0dcef3b8c02..c81497367e8bd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -653,11 +653,18 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict if (rejectChi2 == rejectInterFill) { inter->posY = mP[0]; inter->posZ = mP[1]; - inter->errorY = mC[0]; - inter->errorZ = mC[2]; + if (mT->NDF() <= 0) { + inter->errorY = inter->errorZ = 100.f; + } else { + inter->errorY = mC[0]; + inter->errorZ = mC[2]; + } } else if (rejectChi2 == rejectInterReject) { float chi2Y, chi2Z; - if (mFitInProjections || mT->NDF() <= 0) { + if (mT->NDF() <= 0) { + chi2Y = CAMath::Square((float)inter->posY - posY) / ((float)inter->errorY + err2Y); + chi2Z = CAMath::Square((float)inter->posZ - posZ) / ((float)inter->errorZ + err2Z); + } else if (mFitInProjections) { const float Iz0 = inter->posY - mP[0]; const float Iz1 = inter->posZ - mP[1]; const float Iw0 = 1.f / (mC[0] + (float)inter->errorY); From 2cab32d8dbcd2c0e4b1571ffb1ab556e52194264 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 13:14:53 +0200 Subject: [PATCH 41/52] GPU TPC: Don't constrain SinPhi between inward/outward refits --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 87d789ad34879..e0fb432c7dae6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -99,7 +99,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetPropagateBzOnly(param.rec.fitPropagateBzOnly > iWay); prop.SetMatLUT((param.rec.useMatLUT && iWay == nWays - 1) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr); prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha); - ConstrainSinPhi(prop.GetFitInProjections() ? 0.95f : GPUCA_MAX_SIN_PHI_LOW); + ConstrainSinPhi(iWay == 0 ? 0.95f : GPUCA_MAX_SIN_PHI_LOW); CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f)\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha())); N = 0; From 64491cd7c3eea983da1cd0150f74af0819c933c3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 13:16:15 +0200 Subject: [PATCH 42/52] GPU: Improve some debug messages --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index e0fb432c7dae6..891a4323b7ab1 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -100,7 +100,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetMatLUT((param.rec.useMatLUT && iWay == nWays - 1) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr); prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha); ConstrainSinPhi(iWay == 0 ? 0.95f : GPUCA_MAX_SIN_PHI_LOW); - CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f)\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha())); + CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha())); N = 0; lastUpdateX = -1; @@ -190,7 +190,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ lastSector = cluster.sector; } // clang-format off - CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[0] - yy, mP[1] - zz, sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValProp)); + CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - PErr %d", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[0] - yy, mP[1] - zz, sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValProp)); // clang-format on if (crossCE) { @@ -265,10 +265,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } // clang-format off CADEBUG(if (!CheckCov()) GPUError("INVALID COV AFTER UPDATE!!!")); - CADEBUG(printf("\t%21sFit Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f), DzDs %5.2f %16s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[3], "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValUpd)); + CADEBUG(printf("\t%21sFit Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f), DzDs %5.2f %16s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - FErr %d\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[3], "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValUpd)); // clang-format on - ConstrainSinPhi(); + ConstrainSinPhi(); // TODO: Limit using ConstrainSinPhi everywhere! if (retValUpd == 0) // track is updated { lastUpdateX = mX; From 46d244730ee7e597bbf1d697177504fd1b14c58d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 13:16:48 +0200 Subject: [PATCH 43/52] GPU: Remove obsolete code --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 891a4323b7ab1..09b70582ed930 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -229,25 +229,6 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); -#if EXTRACT_RESIDUALS == 1 - if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { - const float Iz0 = interpolation.hit[ihit].posY - mP[0]; - const float Iz1 = interpolation.hit[ihit].posZ - mP[1]; - float Iw0 = mC[2] + (float)interpolation.hit[ihit].errorZ; - float Iw2 = mC[0] + (float)interpolation.hit[ihit].errorY; - float Idet1 = 1.f / CAMath::Max(1e-10f, Iw0 * Iw2 - mC[1] * mC[1]); - const float Ik00 = (mC[0] * Iw0 + mC[1] * mC[1]) * Idet1; - const float Ik01 = (mC[0] * mC[1] + mC[1] * Iw2) * Idet1; - const float Ik10 = (mC[1] * Iw0 + mC[2] * mC[1]) * Idet1; - const float Ik11 = (mC[1] * mC[1] + mC[2] * Iw2) * Idet1; - const float ImP0 = mP[0] + Ik00 * Iz0 + Ik01 * Iz1; - const float ImP1 = mP[1] + Ik10 * Iz0 + Ik11 * Iz1; - const float ImC0 = mC[0] - Ik00 * mC[0] + Ik01 * mC[1]; - const float ImC2 = mC[2] - Ik10 * mC[1] + Ik11 * mC[2]; - auto& tup = GPUROOTDump::get("clusterres", "row:clX:clY:clZ:angle:trkX:trkY:trkZ:trkSinPhi:trkDzDs:trkQPt:trkSigmaY2:trkSigmaZ2trkSigmaQPt2"); - tup.Fill((float)cluster.row, xx, yy, zz, clAlpha, mX, ImP0, ImP1, mP[2], mP[3], mP[4], ImC0, ImC2, mC[14]); - } -#endif GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; From 0cc2a8fab2b45c19321c07eca8ebda2503a04f55 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 13:31:32 +0200 Subject: [PATCH 44/52] GPU TPC: Fix setting of FitInProjection and PropagateBzOnly --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 09b70582ed930..43f6ca569057d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -95,8 +95,8 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ ResetCovariance(); prop.SetSeedingErrors(!(refit && attempt == 0)); - prop.SetFitInProjections(param.rec.fitInProjections == -1 ? (iWay != 0) : param.rec.fitInProjections); - prop.SetPropagateBzOnly(param.rec.fitPropagateBzOnly > iWay); + prop.SetFitInProjections(param.rec.fitInProjections == -1 ? (iWay == 0) : param.rec.fitInProjections); + prop.SetPropagateBzOnly(iWay < param.rec.fitPropagateBzOnly); prop.SetMatLUT((param.rec.useMatLUT && iWay == nWays - 1) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr); prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha); ConstrainSinPhi(iWay == 0 ? 0.95f : GPUCA_MAX_SIN_PHI_LOW); From 518ddf412a6f3fee5f1e2bfbed17a55a33bb7595 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 13:31:56 +0200 Subject: [PATCH 45/52] GPU TPC: Fix applying tpc.trackFitCovLimit --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 43f6ca569057d..d687557b2a570 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -211,10 +211,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } const bool sinPhiErr = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; + if (mNDF >= 0 && (mC[0] > param.rec.tpc.trackFitCovLimit || mC[2] > param.rec.tpc.trackFitCovLimit)) { + break; + } if (retValProp || sinPhiErr) { - if (mC[0] > param.rec.tpc.trackFitCovLimit || mC[2] > param.rec.tpc.trackFitCovLimit) { - break; - } MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); nMissed2++; NTolerated++; From 22ff2c518dfc59c0263d542fed69a2b298edd574 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 29 Aug 2025 14:55:22 +0200 Subject: [PATCH 46/52] GPU TPC: Do Interpolation rejection in TrackParam.cxx --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 18 +++------- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 10 ++---- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 33 ++++++++++++++----- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- 5 files changed, 34 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index d1991c8e99646..40932ec502a4b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -542,7 +542,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack return way == 0; } trk.ConstrainSinPhi(); - if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge + if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge return way == 0; } trk.ConstrainSinPhi(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index c81497367e8bd..a0cfd27c90571 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -608,24 +608,16 @@ GPUd() float GPUTPCGMPropagator::PredictChi2(float posY, float posZ, float err2Y } } -GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, const GPUParam& GPUrestrict() param, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, bool refit, int8_t sector, float time, float avgInvCharge, float invCharge GPUCA_DEBUG_STREAMER_CHECK(, DebugStreamerVals* debugVals)) +GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, const GPUParam& GPUrestrict() param, int16_t clusterState, int8_t rejectChi2, bool refit, int8_t sector, float time, float avgInvCharge, float invCharge) { float err2Y, err2Z; GetErr2(err2Y, err2Z, param, posZ, iRow, clusterState, sector, time, avgInvCharge, invCharge); - GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->err2Y = err2Y; debugVals->err2Z = err2Z; }); - if (rejectChi2 >= rejectInterFill) { - if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { - rejectChi2 = rejectDirect; - } else { - int32_t retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z); - GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->retVal = retVal; }); - if (retVal) { - return retVal; - } - } - } + return Update(posY, posZ, iRow, param, clusterState, rejectChi2, refit, err2Y, err2Z); +} +GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, const GPUParam& GPUrestrict() param, int16_t clusterState, int8_t rejectChi2, bool refit, float err2Y, float err2Z) +{ if (mT->NDF() == -5) { // first measurement: no need to filter, as the result is known in advance. just set it. mT->ResetCovariance(); float* mC = mT->Cov(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index a6e2cbc6deb3b..02ef8b293a4b7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -72,11 +72,6 @@ class GPUTPCGMPropagator float radLenInv, DLMax, EP2, sigmadE2, k22, k33, k43, k44; // precalculated values for MS and EnergyLoss correction }; - struct DebugStreamerVals { - int32_t retVal = -100; - float err2Y = -1e6f, err2Z = -1e6f; - }; - GPUd() void SetMaterial(float radLen, float rho); GPUd() void SetMaterialTPC() { SetMaterial(28811.7f, 1.025e-3f); } @@ -109,12 +104,13 @@ class GPUTPCGMPropagator GPUd() int32_t PropagateToXAlphaBz(float posX, float posAlpha, bool inFlyDirection); - GPUd() int32_t Update(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, bool refit, int8_t sideC, float time, float avgInvCharge, float invCharge GPUCA_DEBUG_STREAMER_CHECK(, DebugStreamerVals* debugVals = nullptr)); + GPUd() int32_t Update(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t rejectChi2, bool refit, int8_t sector, float time, float avgInvCharge, float invCharge); + GPUd() int32_t Update(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t rejectChi2, bool refit, float err2Y, float err2Z); GPUd() int32_t Update(float posY, float posZ, int16_t clusterState, bool rejectChi2, float err2Y, float err2Z, const GPUParam* param = nullptr); GPUd() int32_t InterpolateReject(const GPUParam& param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z); GPUd() float PredictChi2(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t sideC, float time, float avgCharge, float charge) const; GPUd() float PredictChi2(float posY, float posZ, float err2Y, float err2Z) const; - GPUd() int32_t RejectCluster(float chiY, float chiZ, uint8_t clusterState) + GPUd() static int32_t RejectCluster(float chiY, float chiZ, uint8_t clusterState) { if (chiY > 9.f || chiZ > 9.f) { // TODO: Check how a track can have chi2/ncl > 18 return 2; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index d687557b2a570..b96e133f696b2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -229,19 +229,34 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); - GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); - if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification + + float err2Y, err2Z; + const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; + const float invSqrtCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? CAMath::InvSqrt(merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; + const float invCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? (1.f / merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; + float invAvgCharge = (sumInvSqrtCharge += invSqrtCharge) / ++nAvgCharge; + invAvgCharge *= invAvgCharge; + + prop.GetErr2(err2Y, err2Z, param, zz, cluster.row, clusterState, cluster.sector, time, invAvgCharge, invCharge); + + int retValInt = 0; + if (rejectChi2 >= GPUTPCGMPropagator::rejectInterFill) { + if (rejectChi2 == GPUTPCGMPropagator::rejectInterReject && interpolation.hit[ihit].errorY < (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + rejectChi2 = GPUTPCGMPropagator::rejectDirect; + } else { + retValInt = prop.InterpolateReject(param, yy, zz, clusterState, rejectChi2, &interpolation.hit[ihit], err2Y, err2Z); + } + } + + if (retValInt) { + retValUpd = retValInt; + } else if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { - const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; - const float invSqrtCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? CAMath::InvSqrt(merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; - const float invCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? (1.f / merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; - float invAvgCharge = (sumInvSqrtCharge += invSqrtCharge) / ++nAvgCharge; - invAvgCharge *= invAvgCharge; - retValUpd = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.sector, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); + retValUpd = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, refit, err2Y, err2Z); } GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamUpdateTrack, iTrk)) { - merger->DebugStreamerUpdate(iTrk, ihit, xx, yy, zz, cluster, merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num], *this, prop, interpolation.hit[ihit], rejectChi2, refit, retValUpd, sumInvSqrtCharge / nAvgCharge * sumInvSqrtCharge / nAvgCharge, yy, zz, clusterState, debugVals.retVal, debugVals.err2Y, debugVals.err2Z); + merger->DebugStreamerUpdate(iTrk, ihit, xx, yy, zz, cluster, merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num], *this, prop, interpolation.hit[ihit], rejectChi2, refit, retValUpd, sumInvSqrtCharge / nAvgCharge * sumInvSqrtCharge / nAvgCharge, yy, zz, clusterState, retValInt, err2Y, err2Z); }); } // clang-format off diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 27426cf0ff6a7..4f9d848f2b703 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -346,7 +346,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov } CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", "", prop.GetAlpha(), x, trk.Par()[0], trk.Par()[1], trk.Par()[4], prop.GetQPt0(), trk.Par()[2], prop.GetSinPhi0(), trk.Par()[0] - y, trk.Par()[1] - z, sqrtf(trk.Cov()[0]), sqrtf(trk.Cov()[2]), sqrtf(trk.Cov()[5]), sqrtf(trk.Cov()[14]), trk.Cov()[10])); lastSector = sector; - if (prop.Update(y, z, row, *mPparam, clusterState, 0, nullptr, true, sector, time, invAvgCharge, invCharge)) { + if (prop.Update(y, z, row, *mPparam, clusterState, 0, true, sector, time, invAvgCharge, invCharge)) { IgnoreErrors(trk.GetSinPhi()); return -3; } From 3818b8d8f616a1aff940533aa5c7769966f99fa4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 30 Aug 2025 08:59:53 +0200 Subject: [PATCH 47/52] GPU: Temporarily disable with without projections since it gives worse results --- GPU/GPUTracking/Base/GPUParam.cxx | 3 -- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 42 ++++++++++--------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 649682939ab39..cc3c6a8bb9140 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -154,9 +154,6 @@ void GPUParam::SetDefaults(const GPUSettingsGRP* g, const GPUSettingsRec* r, con SetDefaults(g->solenoidBzNominalGPU, g->constBz); if (r) { rec = *r; - if (rec.fitPropagateBzOnly == -1) { - rec.fitPropagateBzOnly = rec.tpc.nWays - 1; - } } UpdateSettings(g, p, w); } diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 37998659e77ef..d98008461cfce 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -133,7 +133,7 @@ AddOptionRTC(cfMinSplitNum, uint8_t, 1, "", 0, "Minimum number of split charges AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression") AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255") AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster") -AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger") +AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger (must be odd to end with inward fit)") AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits") AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit") AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index b96e133f696b2..3cfa37e34c22d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -85,26 +85,28 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float sumInvSqrtCharge = 0.f; int32_t nAvgCharge = 0; - if (iWay && ((nWays - iWay) & 1) == 1) { + if (iWay && (iWay & 1) == 0) { StoreOuter(&track.OuterParam(), prop.GetAlpha()); } int32_t resetT0 = initResetT0(); const bool refit = (nWays == 1 || iWay >= 1); + const bool finalOutInFit = iWay + 2 >= nWays; + const bool finalFit = iWay == nWays - 1; const float maxSinForUpdate = CAMath::Sin(70.f * kDeg2Rad); ResetCovariance(); prop.SetSeedingErrors(!(refit && attempt == 0)); - prop.SetFitInProjections(param.rec.fitInProjections == -1 ? (iWay == 0) : param.rec.fitInProjections); - prop.SetPropagateBzOnly(iWay < param.rec.fitPropagateBzOnly); - prop.SetMatLUT((param.rec.useMatLUT && iWay == nWays - 1) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr); + prop.SetFitInProjections(true); // param.rec.fitInProjections == -1 ? (iWay == 0) : param.rec.fitInProjections); // TODO: Reenable once fixed + prop.SetPropagateBzOnly(param.rec.fitPropagateBzOnly == -1 ? !finalFit : param.rec.fitPropagateBzOnly); + prop.SetMatLUT((param.rec.useMatLUT && finalFit) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr); prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha); ConstrainSinPhi(iWay == 0 ? 0.95f : GPUCA_MAX_SIN_PHI_LOW); CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha())); N = 0; lastUpdateX = -1; - const bool inFlyDirection = !((iWay ^ nWays) & 1); + const bool inFlyDirection = iWay & 1; const int32_t wayDirection = (iWay & 1) ? -1 : 1; int32_t goodRows = 0; @@ -116,13 +118,13 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) { CADEBUG(printf("\tSkipping hit, %d hits rejected, flag %X\n", nMissed, (int32_t)clusters[ihit].state)); - if (iWay + 2 >= nWays && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) { + if (finalOutInFit && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) { clusters[ihit].state |= GPUTPCGMMergedTrackHit::flagRejectErr; } continue; } - const bool allowModification = refit && (iWay == 0 || (((nWays - iWay) & 1) ? (ihit >= CAMath::Min(maxN / 2, 30)) : (ihit <= CAMath::Max(maxN / 2, maxN - 30)))); + const bool allowChangeClusters = finalOutInFit && (nWays == 1 || ((iWay & 1) ? (ihit <= CAMath::Max(maxN / 2, maxN - 30)) : (ihit >= CAMath::Min(maxN / 2, 30)))); int32_t ihitMergeFirst = ihit; uint8_t clusterState = clusters[ihit].state; @@ -137,7 +139,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // CADEBUG(if ((uint32_t)merger->GetTrackingChain()->mIOPtrs.nMCLabelsTPC > clusters[ihit].num)) // CADEBUG({printf(" MC:"); for (int32_t i = 0; i < 3; i++) {int32_t mcId = merger->GetTrackingChain()->mIOPtrs.mcLabelsTPC[clusters[ihit].num].fClusterID[i].fMCID; if (mcId >= 0) printf(" %d", mcId); } } printf("\n")); // clang-format on - if (MergeDoubleRowClusters(ihit, wayDirection, clusters, merger, prop, xx, yy, zz, maxN, clAlpha, clusterState, allowModification) == -1) { + if (MergeDoubleRowClusters(ihit, wayDirection, clusters, merger, prop, xx, yy, zz, maxN, clAlpha, clusterState, allowChangeClusters) == -1) { nMissed++; nMissed2++; continue; @@ -156,9 +158,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // clang-format off CADEBUG(printf("\tSector %2d %11sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on - if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { + if (allowChangeClusters && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { - bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2; + bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && finalFit && CAMath::Abs(cluster.row - lastRow) == 2; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, track.Leg() == 0, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); @@ -206,7 +208,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } float uncorrectedY = -1e6f; - if (allowModification) { + if (allowChangeClusters) { uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, track.Leg() == 0, prop); } @@ -228,7 +230,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (mNDF > (int32_t)param.rec.tpc.mergerNonInterpolateRejectMinNDF && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { - int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); + int8_t rejectChi2 = attempt ? 0 // In second attempt, we do not reject + : (param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (finalOutInFit ? (GPUTPCGMPropagator::rejectInterFill + !(iWay & 1)) : 0) // reject via interpolation + : (allowChangeClusters && goodRows > 5); // normal rejection during the fit float err2Y, err2Z; const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; @@ -250,7 +254,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (retValInt) { retValUpd = retValInt; - } else if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification + } else if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowChangeClusters retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { retValUpd = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, refit, err2Y, err2Z); @@ -280,7 +284,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetTrack(this, prop.GetAlpha()); } if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { - if (param.dodEdxEnabled && iWay == nWays - 1) { // TODO: Costimize flag to remove, and option to remove double-clusters + if (param.dodEdxEnabled && finalFit) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { float qtot = 0, qmax = 0, pad = 0, relTime = 0; @@ -308,9 +312,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } } else if (retValUpd >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track - if (allowModification) { + if (allowChangeClusters) { MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagRejectDistance); - } else if (iWay == nWays - 1) { + } else if (finalFit) { MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagRejectErr); } nMissed++; @@ -319,11 +323,11 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } - if (nWays - iWay <= 2 && !(merger->Param().rec.tpc.disableRefitAttachment & 4) && lastRow != 255 && lastSector != 255) { + if (finalOutInFit && !(merger->Param().rec.tpc.disableRefitAttachment & 4) && lastRow != 255 && lastSector != 255) { StoreLoopPropagation(merger, lastSector, lastRow, iTrk, lastRow > clusters[(iWay & 1) ? (maxN - 1) : 0].row, prop.GetAlpha()); CADEBUG(printf("\t\tSTORING %d lastRow %d row %d out %d\n", iTrk, (int)lastRow, (int)clusters[(iWay & 1) ? (maxN - 1) : 0].row, lastRow > clusters[(iWay & 1) ? (maxN - 1) : 0].row)); } - if (((nWays - iWay) & 1) && (iWay != nWays - 1) && !track.CCE() && !track.Looper()) { + if (!(iWay & 1) && !finalFit && !track.CCE() && !track.Looper()) { ShiftZ(clusters, merger, maxN); } } @@ -340,8 +344,6 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ return false; } - // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. - if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { From 9e34c7ab35c4fd40cea890692801940f6af2eae8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 30 Aug 2025 16:38:36 +0200 Subject: [PATCH 48/52] GPU: Improve some debug messages --- GPU/GPUTracking/Global/GPUChainTracking.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 11 +-- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 72 ++++++++++--------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 4 +- 5 files changed, 51 insertions(+), 42 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index b69d0941d9375..430cad041ebe5 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -253,8 +253,8 @@ bool GPUChainTracking::ValidateSettings() GPUError("nWay setting musst be odd number!"); return false; } - if (param().rec.tpc.mergerInterpolateErrors && param().rec.tpc.nWays == 1) { - GPUError("Cannot do error interpolation with NWays = 1!"); + if (param().rec.tpc.mergerInterpolateErrors && param().rec.tpc.nWays < 3) { + GPUError("Cannot do error interpolation with NWays < 3!"); return false; } if (param().continuousMaxTimeBin > (int32_t)GPUSettings::TPC_MAX_TF_TIME_BIN) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index a0cfd27c90571..e91426b51e5c4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -635,10 +635,10 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject && mT->GetNDF() > (int32_t)param.rec.tpc.mergerNonInterpolateRejectMinNDF), err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect, err2Y, err2Z, ¶m); } -GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) +GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z, float deltaZ) { float* GPUrestrict() mC = mT->Cov(); float* GPUrestrict() mP = mT->Par(); @@ -655,10 +655,10 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict float chi2Y, chi2Z; if (mT->NDF() <= 0) { chi2Y = CAMath::Square((float)inter->posY - posY) / ((float)inter->errorY + err2Y); - chi2Z = CAMath::Square((float)inter->posZ - posZ) / ((float)inter->errorZ + err2Z); + chi2Z = CAMath::Square((float)inter->posZ + deltaZ - posZ) / ((float)inter->errorZ + err2Z); } else if (mFitInProjections) { const float Iz0 = inter->posY - mP[0]; - const float Iz1 = inter->posZ - mP[1]; + const float Iz1 = inter->posZ + deltaZ - mP[1]; const float Iw0 = 1.f / (mC[0] + (float)inter->errorY); const float Iw2 = 1.f / (mC[2] + (float)inter->errorZ); const float Ik00 = mC[0] * Iw0; @@ -676,7 +676,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict chi2Z = Jw2 * Jz1 * Jz1; } else { const float Iz0 = inter->posY - mP[0]; - const float Iz1 = inter->posZ - mP[1]; + const float Iz1 = inter->posZ + deltaZ - mP[1]; float Iw0 = mC[2] + (float)inter->errorZ; float Iw2 = mC[0] + (float)inter->errorY; float Idet = CAMath::Max(1e-10f, Iw0 * Iw2 - mC[1] * mC[1]); @@ -706,6 +706,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict chi2Z = CAMath::Abs((Jw1 * Jz0 + Jw2 * Jz1) * Jz1); } if (RejectCluster(chi2Y * param.rec.tpc.clusterRejectChi2TolleranceY, chi2Z * param.rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { // TODO: Relative Pt resolution decreases slightly, why? + // printf("Reject Cluster chiy2 %f chiz2 %f (Pos Y: %f - %f %f ; Pos Z: %f - %f %f)\n", chi2Y, chi2Z, posY, mP[0], (float)inter->posY, posZ, mP[1], (float)inter->posZ + deltaZ); return updateErrorClusterRejectedInInterpolation; } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index 02ef8b293a4b7..47e6c870dac25 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -107,7 +107,7 @@ class GPUTPCGMPropagator GPUd() int32_t Update(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t rejectChi2, bool refit, int8_t sector, float time, float avgInvCharge, float invCharge); GPUd() int32_t Update(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t rejectChi2, bool refit, float err2Y, float err2Z); GPUd() int32_t Update(float posY, float posZ, int16_t clusterState, bool rejectChi2, float err2Y, float err2Z, const GPUParam* param = nullptr); - GPUd() int32_t InterpolateReject(const GPUParam& param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z); + GPUd() int32_t InterpolateReject(const GPUParam& param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z, float deltaZ); GPUd() float PredictChi2(float posY, float posZ, int32_t iRow, const GPUParam& param, int16_t clusterState, int8_t sideC, float time, float avgCharge, float charge) const; GPUd() float PredictChi2(float posY, float posZ, float err2Y, float err2Z) const; GPUd() static int32_t RejectCluster(float chiY, float chiZ, uint8_t clusterState) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3cfa37e34c22d..d865a3b6899b4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -79,6 +79,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float lastUpdateX = -1.f; uint8_t lastRow = 255; uint8_t lastSector = 255; + float deltaZ = 0.f; for (int32_t iWay = 0; iWay < nWays; iWay++) { int32_t nMissed = 0, nMissed2 = 0; @@ -117,7 +118,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) { - CADEBUG(printf("\tSkipping hit, %d hits rejected, flag %X\n", nMissed, (int32_t)clusters[ihit].state)); + CADEBUG(printf("\tSkipping hit %d, %d hits rejected, flag %X\n", ihit, nMissed, (int32_t)clusters[ihit].state)); if (finalOutInFit && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) { clusters[ihit].state |= GPUTPCGMMergedTrackHit::flagRejectErr; } @@ -225,14 +226,25 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } CADEBUG(printf("\n")); - int32_t retValUpd; + int32_t retValUpd = 0, retValInt = 0; float threshold = 3.f + (lastUpdateX >= 0 ? (CAMath::Abs(mX - lastUpdateX) / 2) : 0.f); if (mNDF > (int32_t)param.rec.tpc.mergerNonInterpolateRejectMinNDF && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { - int8_t rejectChi2 = attempt ? 0 // In second attempt, we do not reject - : (param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (finalOutInFit ? (GPUTPCGMPropagator::rejectInterFill + !(iWay & 1)) : 0) // reject via interpolation - : (allowChangeClusters && goodRows > 5); // normal rejection during the fit + int8_t rejectChi2 = 0; + if (attempt == 0) { + if (param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) { + if (iWay == nWays - 3) { + rejectChi2 = GPUTPCGMPropagator::rejectInterFill; + } else if (iWay == nWays - 2) { + rejectChi2 = GPUTPCGMPropagator::rejectInterReject; + } else if (iWay == nWays - 1) { + rejectChi2 = (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && GetNDF() > (int32_t)param.rec.tpc.mergerNonInterpolateRejectMinNDF) ? GPUTPCGMPropagator::rejectDirect : 0; + } + } else { + rejectChi2 = allowChangeClusters && goodRows > 5; + } + } float err2Y, err2Z; const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; @@ -243,18 +255,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.GetErr2(err2Y, err2Z, param, zz, cluster.row, clusterState, cluster.sector, time, invAvgCharge, invCharge); - int retValInt = 0; if (rejectChi2 >= GPUTPCGMPropagator::rejectInterFill) { if (rejectChi2 == GPUTPCGMPropagator::rejectInterReject && interpolation.hit[ihit].errorY < (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { rejectChi2 = GPUTPCGMPropagator::rejectDirect; } else { - retValInt = prop.InterpolateReject(param, yy, zz, clusterState, rejectChi2, &interpolation.hit[ihit], err2Y, err2Z); + retValInt = prop.InterpolateReject(param, yy, zz, clusterState, rejectChi2, &interpolation.hit[ihit], err2Y, err2Z, deltaZ); } } - if (retValInt) { - retValUpd = retValInt; - } else if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowChangeClusters + if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowChangeClusters retValUpd = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { retValUpd = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, refit, err2Y, err2Z); @@ -265,11 +274,11 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } // clang-format off CADEBUG(if (!CheckCov()) GPUError("INVALID COV AFTER UPDATE!!!")); - CADEBUG(printf("\t%21sFit Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f), DzDs %5.2f %16s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - FErr %d\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[3], "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValUpd)); + CADEBUG(printf("\t%21sFit Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f), DzDs %5.2f %16s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - FErr %d %d\n", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[3], "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], retValUpd, retValInt)); // clang-format on - ConstrainSinPhi(); // TODO: Limit using ConstrainSinPhi everywhere! - if (retValUpd == 0) // track is updated + ConstrainSinPhi(); // TODO: Limit using ConstrainSinPhi everywhere! + if (!retValUpd && !retValInt) // track is updated { lastUpdateX = mX; covYYUpd = mC[0]; @@ -311,14 +320,16 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } } - } else if (retValUpd >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track - if (allowChangeClusters) { + } else if (retValInt || retValUpd >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track + if (retValInt || allowChangeClusters) { MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagRejectDistance); } else if (finalFit) { MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagRejectErr); } - nMissed++; - nMissed2++; + if (!retValInt) { + nMissed++; + nMissed2++; + } } else { break; // bad chi2 for the whole track, stop the fit } @@ -328,7 +339,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("\t\tSTORING %d lastRow %d row %d out %d\n", iTrk, (int)lastRow, (int)clusters[(iWay & 1) ? (maxN - 1) : 0].row, lastRow > clusters[(iWay & 1) ? (maxN - 1) : 0].row)); } if (!(iWay & 1) && !finalFit && !track.CCE() && !track.Looper()) { - ShiftZ(clusters, merger, maxN); + deltaZ = ShiftZ(clusters, merger, maxN); + } else { + deltaZ = 0.f; } } ConstrainSinPhi(); @@ -775,7 +788,7 @@ GPUdi() void GPUTPCGMTrackParam::AttachClustersLooper(const GPUTPCGMMerger* GPUr } } -GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N) +GPUd() float GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N) { if (N == 0) { N = 1; @@ -783,13 +796,13 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMergedTrackHit* clusters, c const auto& GPUrestrict() cls = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear; float z0 = cls[clusters[0].num].getTime(), zn = cls[clusters[N - 1].num].getTime(); const auto tmp = zn > z0 ? std::array{zn, z0, GPUTPCGeometry::Row2X(clusters[N - 1].row)} : std::array{z0, zn, GPUTPCGeometry::Row2X(clusters[0].row)}; - ShiftZ(merger, clusters[0].sector, tmp[0], tmp[1], tmp[2]); + return ShiftZ(merger, clusters[0].sector, tmp[0], tmp[1], tmp[2]); } -GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t sector, float cltmax, float cltmin, float clx) +GPUd() float GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t sector, float cltmax, float cltmin, float clx) { if (!merger->Param().par.continuousTracking) { - return; + return 0.f; } float deltaZ = 0.f; bool beamlineReached = false; @@ -828,7 +841,6 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge { float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(sector, deltaZ); mTOffset += deltaT; - mP[1] -= deltaZ; const float maxT = cltmin - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getT0(); const float minT = cltmax - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(sector); // printf("T Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, minT, maxT, mTOffset); @@ -840,13 +852,14 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge deltaT = maxT - mTOffset; } if (deltaT != 0.f) { - deltaZ = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(sector, deltaT); + deltaZ += merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(sector, deltaT); // printf("Moving clusters to TPC Range: QPt %f, New mTOffset %f, t1 %f, t2 %f, Shift %f in Z: %f to %f --> %f to %f in T\n", mP[4], mTOffset + deltaT, tz1, tz2, deltaZ, tz2 - mTOffset, tz1 - mTOffset, tz2 - mTOffset - deltaT, tz1 - mTOffset - deltaT); mTOffset += deltaT; - mP[1] -= deltaZ; } + mP[1] -= deltaZ; } // printf("\n"); + return -deltaZ; } GPUd() bool GPUTPCGMTrackParam::CheckCov() const @@ -861,28 +874,23 @@ GPUd() bool GPUTPCGMTrackParam::CheckNumericalQuality(float overrideCovYY) const { //* Check that the track parameters and covariance matrix are reasonable bool ok = CAMath::Finite(mX) && CAMath::Finite(mChi2); - CADEBUG(printf("OK %d - %f - ", (int32_t)ok, mX); for (int32_t i = 0; i < 5; i++) { printf("%f ", mP[i]); } printf(" - "); for (int32_t i = 0; i < 15; i++) { printf("%f ", mC[i]); } printf("\n")); + // CADEBUG(printf("OK %d - %f - ", (int32_t)ok, mX); for (int32_t i = 0; i < 5; i++) { printf("%f ", mP[i]); } printf(" - "); for (int32_t i = 0; i < 15; i++) { printf("%f ", mC[i]); } printf("\n")); const float* c = mC; for (int32_t i = 0; i < 15; i++) { ok = ok && CAMath::Finite(c[i]); } - CADEBUG(printf("OK1 %d\n", (int32_t)ok)); for (int32_t i = 0; i < 5; i++) { ok = ok && CAMath::Finite(mP[i]); } - CADEBUG(printf("OK2 %d\n", (int32_t)ok)); if ((overrideCovYY > 0 ? overrideCovYY : c[0]) > 4.f * 4.f || c[2] > 4.f * 4.f || c[5] > 2.f * 2.f || c[9] > 2.f * 2.f) { ok = 0; } - CADEBUG(printf("OK3 %d\n", (int32_t)ok)); if (CAMath::Abs(mP[2]) > GPUCA_MAX_SIN_PHI) { ok = 0; } - CADEBUG(printf("OK4 %d\n", (int32_t)ok)); if (!CheckCov()) { ok = false; } - CADEBUG(printf("OK5 %d\n", (int32_t)ok)); return ok; } @@ -903,7 +911,7 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, track); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); - CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); + CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); if (!ok && attempt == 0 && merger->Param().rec.tpc.retryRefit) { for (uint32_t i = 0; i < track.NClusters(); i++) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 1c084f15874fe..f2812be8e16a3 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -183,8 +183,8 @@ class GPUTPCGMTrackParam } GPUd() void Rotate(float alpha); - GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float cltmax, float cltmin, float clx); - GPUd() void ShiftZ(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N); + GPUd() float ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float cltmax, float cltmin, float clx); + GPUd() float ShiftZ(const GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, int32_t N); GPUd() static float Reciprocal(float x) { return 1.f / x; } GPUdi() static void Assign(float& x, bool mask, float v) From 37030d81f4536b9654061013adbe438a9eea4c85 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 4 Sep 2025 11:00:22 +0200 Subject: [PATCH 49/52] GPU TPC: Make Looper Merging Afterburner work with new Segmented Track Fit --- .../DataCompression/GPUTPCClusterRejection.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 52 +++++++++++++------ GPU/GPUTracking/qa/GPUQA.cxx | 2 +- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h index 5c25813e75d29..f39994f2d1045 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h @@ -23,7 +23,7 @@ struct GPUTPCClusterRejection { template static constexpr inline bool GetProtectionStatus(int32_t attach, bool& physics, bool& protect, T* counts = nullptr, S* mev200 = nullptr) { - (void)counts; // Avoid incorrect -Wunused-but-set-parameter warning + (void)counts; // FIXME: Avoid incorrect -Wunused-but-set-parameter warning (void)mev200; if (attach == 0) { return false; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 40932ec502a4b..464f315975920 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1887,13 +1887,12 @@ GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - return; // FIXME: !!!! - const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold + const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the rejection threshold for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); - if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { + if (trk.OK() && trk.NClusters() && trk.Leg() == 0 && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector; const float refz = p.GetZ() + GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTOffset(), Param().continuousMaxTimeBin) + (trk.CSide() ? -100 : 100); float sinA, cosA; @@ -1942,12 +1941,12 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - const MergeLooperParam* params = mLooperCandidates; + const MergeLooperParam* candidates = mLooperCandidates; #if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE) std::vector paramLabels(mMemory->nLooperMatchCandidates); for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) { - paramLabels[i] = GetTrackLabel(mMergedTracks[params[i].id]); + paramLabels[i] = GetTrackLabel(mMergedTracks[candidates[i].id]); } /*std::vector dropped(mMemory->nLooperMatchCandidates); std::vector droppedMC(mMemory->nLooperMatchCandidates); @@ -1961,16 +1960,37 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, for (uint32_t i = get_global_id(0); i < mMemory->nLooperMatchCandidates; i += get_global_size(0)) { for (uint32_t j = i + 1; j < mMemory->nLooperMatchCandidates; j++) { // int32_t bs = 0; - if (CAMath::Abs(params[j].refz) > CAMath::Abs(params[i].refz) + 100.f) { + assert(CAMath::Abs(candidates[i].refz) <= CAMath::Abs(candidates[j].refz)); + if (CAMath::Abs(candidates[j].refz) > CAMath::Abs(candidates[i].refz) + 100.f) { break; } - const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y); + const float d2xy = CAMath::Sum2(candidates[i].x - candidates[j].x, candidates[i].y - candidates[j].y); if (d2xy > 15.f) { // bs |= 1; continue; } - const auto& trk1 = mMergedTracks[params[i].id]; - const auto& trk2 = mMergedTracks[params[j].id]; + + const GPUTPCGMMergedTrack* trkI = &mMergedTracks[candidates[i].id]; + float refZI = candidates[i].refz; + { + const auto* tmp = trkI; + while (tmp->PrevSegment() >= 0) { + const auto* next = &mMergedTracks[tmp->PrevSegment()]; + if (next == trkI) { + break; + } + tmp = next; + } + if (tmp != trkI && tmp->CSide() == trkI->CSide() && CAMath::Abs(tmp->GetParam().GetZ()) > CAMath::Abs(trkI->GetParam().GetZ())) { + float tmpRefZ = refZI + tmp->GetParam().GetZ() - trkI->GetParam().GetZ(); + if (CAMath::Abs(tmpRefZ) < CAMath::Abs(candidates[j].refz) && CAMath::Abs(tmpRefZ) > CAMath::Abs(refZI)) { + trkI = tmp; + refZI = tmpRefZ; + } + } + }; + const auto& trk1 = *trkI; + const auto& trk2 = mMergedTracks[candidates[j].id]; const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { @@ -1978,9 +1998,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, continue; } - const float dznormalized = (CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz)) / (CAMath::TwoPi() * 0.5f * (CAMath::Abs(param1.GetDzDs()) + CAMath::Abs(param2.GetDzDs())) * 1.f / (0.5f * (CAMath::Abs(param1.GetQPt()) + CAMath::Abs(param2.GetQPt())) * CAMath::Abs(Param().polynomialField.GetNominalBz()))); + const float dznormalized = (CAMath::Abs(candidates[j].refz) - CAMath::Abs(refZI)) / (CAMath::TwoPi() * 0.5f * (CAMath::Abs(param1.GetDzDs()) + CAMath::Abs(param2.GetDzDs())) * 1.f / (0.5f * (CAMath::Abs(param1.GetQPt()) + CAMath::Abs(param2.GetQPt())) * CAMath::Abs(Param().polynomialField.GetNominalBz()))); const float phasecorr = CAMath::Modf((CAMath::ASin(param1.GetSinPhi()) + trk1.GetAlpha() - CAMath::ASin(param2.GetSinPhi()) - trk2.GetAlpha()) / CAMath::TwoPi() + 5.5f, 1.f) - 0.5f; - const float phasecorrdirection = (params[j].refz * param1.GetQPt() * param1.GetDzDs()) > 0 ? 1 : -1; + const float phasecorrdirection = (candidates[j].refz * param1.GetQPt() * param1.GetDzDs()) > 0 ? 1 : -1; const float dzcorr = dznormalized + phasecorr * phasecorrdirection; const bool sameside = !(trk1.CSide() ^ trk2.CSide()); const float dzcorrlimit[4] = {sameside ? 0.018f : 0.012f, sameside ? 0.12f : 0.025f, 0.14f, 0.15f}; @@ -2009,11 +2029,11 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const int64_t label2 = paramLabels[j]; bool labelEQ = label1 != -1 && label1 == label2; if (1 || EQ || labelEQ) { - // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[params[i].id].CSide(), (int32_t)mMergedTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); + // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[candidates[i].id].CSide(), (int32_t)mMergedTracks[candidates[j].id].CSide(), candidates[i].tgl, candidates[j].tgl, candidates[i].qpt, candidates[j].qpt, candidates[i].x, candidates[j].x, candidates[i].y, candidates[j].y); static auto& tup = GPUROOTDump::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs"); - tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs); + tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(candidates[j].refz) - CAMath::Abs(refZI), dzcorr, dtgl, dqpt, dznorm, bs); static auto tup2 = GPUROOTDump::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr"); - tup2.Fill((float)labelEQ, params[i].refz, params[j].refz, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), param1.GetSinPhi(), param2.GetSinPhi(), trk1.GetAlpha(), trk2.GetAlpha(), dznormalized, phasecorr, phasecorrdirection, dzcorr); + tup2.Fill((float)labelEQ, refZI, candidates[j].refz, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), param1.GetSinPhi(), param2.GetSinPhi(), trk1.GetAlpha(), trk2.GetAlpha(), dznormalized, phasecorr, phasecorrdirection, dzcorr); } /*if (EQ) { dropped[j] = true; @@ -2027,9 +2047,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, }*/ #endif if (EQ) { - mMergedTracks[params[j].id].SetMergedLooperUnconnected(true); + mMergedTracks[candidates[j].id].SetMergedLooperUnconnected(true); if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) { - mMergedTracks[params[i].id].SetMergedLooperUnconnected(true); + mMergedTracks[candidates[i].id].SetMergedLooperUnconnected(true); } } } diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 6d1e724e1be3b..4f2c13635befa 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -2967,11 +2967,11 @@ int32_t GPUQA::DoClusterCounts(uint64_t* attachClusterCounts, int32_t mode) PrintClusterCount(mode, num, "Merged Loopers (Track Merging)", mClusterCounts.nMergedLooperConnected, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Merged Loopers (Afterburner)", mClusterCounts.nMergedLooperUnconnected, mClusterCounts.nTotal); + PrintClusterCount(mode, num, "Looping Legs (other)", mClusterCounts.nLoopers, mClusterCounts.nTotal); PrintClusterCount(mode, num, "High Inclination Angle", mClusterCounts.nHighIncl, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Rejected", mClusterCounts.nRejected, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Tube (> 200 MeV)", mClusterCounts.nTube, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Tube (< 200 MeV)", mClusterCounts.nTube200, mClusterCounts.nTotal); - PrintClusterCount(mode, num, "Looping Legs", mClusterCounts.nLoopers, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Low Pt < 50 MeV", mClusterCounts.nLowPt, mClusterCounts.nTotal); PrintClusterCount(mode, num, "Low Pt < 200 MeV", mClusterCounts.n200MeV, mClusterCounts.nTotal); From c426151b324bb0abbbf9004ff30e1bcc6a0d2736 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 4 Sep 2025 11:49:36 +0200 Subject: [PATCH 50/52] GPU TPC: Avoid some code duplication --- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 18 ++++++++++++++++++ GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 18 ++---------------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 9 +-------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 358a808e120a9..46b017523a107 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -49,6 +49,24 @@ class GPUTPCGMMergedTrack GPUd() bool MergedLooperConnected() const { return mFlags & 0x20; } GPUd() bool MergedLooper() const { return mFlags & 0x30; } GPUd() int32_t PrevSegment() const { return mPrevSegment; } + template + GPUd() static T* GetFirstSegment_helper(T* me, T* base) + { + if (me->mPrevSegment < 0) { + return me; + } + T* cur = &base[me->mPrevSegment]; + while (cur->mPrevSegment >= 0) { + T* next = &base[cur->mPrevSegment]; + if (next == me) { + return cur; + } + cur = next; + } + return cur; + } + GPUd() GPUTPCGMMergedTrack* GetFirstSegment(GPUTPCGMMergedTrack* base) { return GetFirstSegment_helper(this, base); } + GPUd() const GPUTPCGMMergedTrack* GetFirstSegment(const GPUTPCGMMergedTrack* base) const { return GetFirstSegment_helper(this, base); } GPUd() uint8_t Leg() const { return mLeg; } GPUd() uint8_t Flags() const { return mFlags; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 464f315975920..4e0526e17dec5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1797,14 +1797,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u); } if (!trk.CCE() && !trk.MergedLooper()) { - GPUTPCGMMergedTrack* updTrk = &trk; - while (updTrk->PrevSegment() >= 0) { - auto next = &mMergedTracks[updTrk->PrevSegment()]; - if (next == &trk) { - break; - } - updTrk = next; - } + GPUTPCGMMergedTrack* updTrk = trk.GetFirstSegment(mMergedTracks); const auto &cl0 = mClusters[trk.FirstClusterRef()], &cln = mClusters[updTrk->FirstClusterRef() + updTrk->NClusters() - 1]; const auto& GPUrestrict() cls = GetConstantMem()->ioPtrs.clustersNative->clustersLinear; float z0 = cls[cl0.num].getTime(), zn = cls[cln.num].getTime(); @@ -1973,14 +1966,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const GPUTPCGMMergedTrack* trkI = &mMergedTracks[candidates[i].id]; float refZI = candidates[i].refz; { - const auto* tmp = trkI; - while (tmp->PrevSegment() >= 0) { - const auto* next = &mMergedTracks[tmp->PrevSegment()]; - if (next == trkI) { - break; - } - tmp = next; - } + const auto* tmp = trkI->GetFirstSegment(mMergedTracks); if (tmp != trkI && tmp->CSide() == trkI->CSide() && CAMath::Abs(tmp->GetParam().GetZ()) > CAMath::Abs(trkI->GetParam().GetZ())) { float tmpRefZ = refZI + tmp->GetParam().GetZ() - trkI->GetParam().GetZ(); if (CAMath::Abs(tmpRefZ) < CAMath::Abs(candidates[j].refz) && CAMath::Abs(tmpRefZ) > CAMath::Abs(refZI)) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9c789a8d95f82..d63d764a2613c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -212,14 +212,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks } if (track.PrevSegment() >= 0) { - const GPUTPCGMMergedTrack* chkTrk = &tracks[track.PrevSegment()]; - while (chkTrk->PrevSegment() >= 0) { - auto next = &tracks[chkTrk->PrevSegment()]; - if (next == &track) { - break; - } - chkTrk = next; - } + const GPUTPCGMMergedTrack* chkTrk = track.GetFirstSegment(tracks); const auto& firstPrevCluster = trackClusters[chkTrk->FirstClusterRef()]; t1 = clusters->clustersLinear[firstPrevCluster.num].getTime(); sector1 = firstPrevCluster.sector; From 138c7947bc3eee1aef8a89e1ca9de54fa78437c6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 10 Sep 2025 23:48:18 +0200 Subject: [PATCH 51/52] GPU TPC: Make workarounds for cyclic merge graphs optional (to be checked, but should be removed eventually) --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 8 +++---- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 24 ++++++++++++------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index d98008461cfce..de8ed938b7422 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -199,6 +199,7 @@ AddOptionRTC(fitInProjections, int8_t, -1, "", 0, "Fit in projection, -1 to enab AddOptionRTC(fitPropagateBzOnly, int8_t, -1, "", 0, "Propagate using Bz only for n passes") AddOptionRTC(useMatLUT, int8_t, 0, "", 0, "Use material lookup table for TPC refit") AddOptionRTC(trackingRefitGPUModel, int8_t, 1, "", 0, "Use GPU track model for the Global Track Refit") +AddOptionRTC(enableCyclicGraphWorkarounds, int8_t, 0, "", 0, "Apply workarounds to avoid cyclic merge graphs, should not be needed") AddCustomCPP(void SetMinTrackPtB5(float v) { maxTrackQPtB5 = v > 0.001f ? (1.f / v) : (1.f / 0.001f); }) AddSubConfig(GPUSettingsRecTPC, tpc) AddSubConfig(GPUSettingsRecTRD, trd) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 46b017523a107..b7d6b2aebfbb8 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -50,7 +50,7 @@ class GPUTPCGMMergedTrack GPUd() bool MergedLooper() const { return mFlags & 0x30; } GPUd() int32_t PrevSegment() const { return mPrevSegment; } template - GPUd() static T* GetFirstSegment_helper(T* me, T* base) + GPUd() static T* GetFirstSegment_helper(T* me, T* base, bool workaround) { if (me->mPrevSegment < 0) { return me; @@ -58,15 +58,15 @@ class GPUTPCGMMergedTrack T* cur = &base[me->mPrevSegment]; while (cur->mPrevSegment >= 0) { T* next = &base[cur->mPrevSegment]; - if (next == me) { + if (workaround && next == me) { return cur; } cur = next; } return cur; } - GPUd() GPUTPCGMMergedTrack* GetFirstSegment(GPUTPCGMMergedTrack* base) { return GetFirstSegment_helper(this, base); } - GPUd() const GPUTPCGMMergedTrack* GetFirstSegment(const GPUTPCGMMergedTrack* base) const { return GetFirstSegment_helper(this, base); } + GPUd() GPUTPCGMMergedTrack* GetFirstSegment(GPUTPCGMMergedTrack* base, bool workaround) { return GetFirstSegment_helper(this, base, workaround); } + GPUd() const GPUTPCGMMergedTrack* GetFirstSegment(const GPUTPCGMMergedTrack* base, bool workaround) const { return GetFirstSegment_helper(this, base, workaround); } GPUd() uint8_t Leg() const { return mLeg; } GPUd() uint8_t Flags() const { return mFlags; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 4e0526e17dec5..6121ce0aa89bb 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1461,7 +1461,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (trbase->PrevSegmentNeighbour() >= 0) { trbase = nullptr; } else { - trbase->SetPrevSegmentNeighbour(1000000001); + if (Param().rec.enableCyclicGraphWorkarounds) { + trbase->SetPrevSegmentNeighbour(1000000001); + } leg += revertSegments ? 1 : -1; } } else { @@ -1483,13 +1485,15 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } revertSegments = false; revertInSegment = false; - trbase->SetPrevSegmentNeighbour(1000000000); + if (Param().rec.enableCyclicGraphWorkarounds) { + trbase->SetPrevSegmentNeighbour(1000000000); + } int32_t jtr = trbase->NextNeighbour(); leg = 0; if (jtr >= 0) { int32_t lasttr = itr; while (jtr >= 0) { // --------------- count segments --------------- - if (&mSectorTrackInfos[jtr] == trbase) { + if (Param().rec.enableCyclicGraphWorkarounds && &mSectorTrackInfos[jtr] == trbase) { break; // Break cyclic graph } lasttr = jtr; @@ -1512,7 +1516,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mainT = t; } int32_t next = trchk->NextSegmentNeighbour(); - if (next < 0 || next == ichk) { + if (next < 0 || (Param().rec.enableCyclicGraphWorkarounds && next == ichk)) { break; // Breaks also cycles } trchk = &mSectorTrackInfos[next]; @@ -1533,7 +1537,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread length = trchk->OrigTrack()->NHits(); } int32_t next = trchk->NextSegmentNeighbour(); - if (next < 0 || next == ichk) { + if (next < 0 || (Param().rec.enableCyclicGraphWorkarounds && next == ichk)) { break; // Breaks also cycles } trchk = &mSectorTrackInfos[next]; @@ -1575,7 +1579,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread int32_t jtr = tr->NextSegmentNeighbour(); if (jtr >= 0) { tr = &(mSectorTrackInfos[jtr]); - tr->SetPrevSegmentNeighbour(1000000002); + if (Param().rec.enableCyclicGraphWorkarounds) { + tr->SetPrevSegmentNeighbour(1000000002); + } continue; } break; @@ -1797,7 +1803,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u); } if (!trk.CCE() && !trk.MergedLooper()) { - GPUTPCGMMergedTrack* updTrk = trk.GetFirstSegment(mMergedTracks); + GPUTPCGMMergedTrack* updTrk = trk.GetFirstSegment(mMergedTracks, Param().rec.enableCyclicGraphWorkarounds); const auto &cl0 = mClusters[trk.FirstClusterRef()], &cln = mClusters[updTrk->FirstClusterRef() + updTrk->NClusters() - 1]; const auto& GPUrestrict() cls = GetConstantMem()->ioPtrs.clustersNative->clustersLinear; float z0 = cls[cl0.num].getTime(), zn = cls[cln.num].getTime(); @@ -1806,7 +1812,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in updTrk = &trk; while (updTrk->PrevSegment() >= 0) { auto next = &mMergedTracks[updTrk->PrevSegment()]; - if (next == &trk) { + if (Param().rec.enableCyclicGraphWorkarounds && next == &trk) { break; } updTrk = next; @@ -1966,7 +1972,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const GPUTPCGMMergedTrack* trkI = &mMergedTracks[candidates[i].id]; float refZI = candidates[i].refz; { - const auto* tmp = trkI->GetFirstSegment(mMergedTracks); + const auto* tmp = trkI->GetFirstSegment(mMergedTracks, Param().rec.enableCyclicGraphWorkarounds); if (tmp != trkI && tmp->CSide() == trkI->CSide() && CAMath::Abs(tmp->GetParam().GetZ()) > CAMath::Abs(trkI->GetParam().GetZ())) { float tmpRefZ = refZI + tmp->GetParam().GetZ() - trkI->GetParam().GetZ(); if (CAMath::Abs(tmpRefZ) < CAMath::Abs(candidates[j].refz) && CAMath::Abs(tmpRefZ) > CAMath::Abs(refZI)) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index d63d764a2613c..e911275da1e55 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -212,7 +212,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks } if (track.PrevSegment() >= 0) { - const GPUTPCGMMergedTrack* chkTrk = track.GetFirstSegment(tracks); + const GPUTPCGMMergedTrack* chkTrk = track.GetFirstSegment(tracks, merger.Param().rec.enableCyclicGraphWorkarounds); const auto& firstPrevCluster = trackClusters[chkTrk->FirstClusterRef()]; t1 = clusters->clustersLinear[firstPrevCluster.num].getTime(); sector1 = firstPrevCluster.sector; From ac854e3a1113d80c8e47b6823001c2605c82b955 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 11 Sep 2025 08:51:04 +0200 Subject: [PATCH 52/52] GPU: Add some more optional sanity checks --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 5 +- GPU/GPUTracking/Definitions/GPUSettingsList.h | 3 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 6 +- GPU/GPUTracking/Global/GPUChainTracking.h | 2 +- .../Global/GPUChainTrackingClusterizer.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 2 +- .../Global/GPUChainTrackingMerger.cxx | 6 + GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 162 +++++++++++------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 1 - .../Standalone/Benchmark/standalone.cxx | 2 +- 11 files changed, 128 insertions(+), 67 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 17e2a2a27c747..6d64fb3daca6a 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -294,14 +294,15 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) { mProcessingSettings->mergerSortTracks = false; } - if (GetProcessingSettings().debugLevel > 3 || !IsGPU() || GetProcessingSettings().deterministicGPUReconstruction) { mProcessingSettings->delayedOutput = false; } - if (!GetProcessingSettings().rtc.enable) { mProcessingSettings->rtc.optConstexpr = false; } + if (GetProcessingSettings().allSanityChecks) { + mProcessingSettings->clusterizerZSSanityCheck = mProcessingSettings->mergerSanityCheck = mProcessingSettings->outputSanityCheck = true; + } mMemoryScalers->scalingFactor = GetProcessingSettings().memoryScalingFactor; mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index de8ed938b7422..8b5f70f25a4d9 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -352,6 +352,9 @@ AddOption(fastTransformObjectsMinMemorySize, uint32_t, 400u * 1024 * 1024, "", 0 AddOption(lateO2MatLutProvisioningSize, uint32_t, 0u, "", 0, "Memory size to reserve for late provisioning of matlut table") AddOption(throttleAlarms, bool, false, "", 0, "Throttle rate at which alarms are sent to the InfoLogger in online runs") AddOption(outputSanityCheck, bool, false, "", 0, "Run some simple sanity checks finding errors in the output") +AddOption(mergerSanityCheck, bool, false, "", 0, "Run some simple sanity checks after / during track merging") +AddOption(clusterizerZSSanityCheck, bool, false, "", 0, "Run some simple sanity checks on ZS decoding during clusterization") +AddOption(allSanityChecks, bool, false, "", 0, "Enable all sanity checks") AddOption(tpcSingleSector, int32_t, -1, "", 0, "Restrict TPC processing to a single sector") AddOption(tpcDownscaledEdx, uint8_t, 0, "", 0, "If != 0, downscale dEdx processing (if enabled) to x %") AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 430cad041ebe5..f47c6923a6be7 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -269,6 +269,10 @@ bool GPUChainTracking::ValidateSettings() GPUError("noGPUMemoryRegistration only possible with gather mode 3 (set to %d / %d)", mRec->GetProcessingSettings().tpcCompressionGatherMode, gatherMode); return false; } + if (mRec->IsGPU() && (GetProcessingSettings().clusterizerZSSanityCheck || GetProcessingSettings().mergerSanityCheck)) { + GPUError("Clusterizer and merger Sanity checks only supported when not running on GPU"); + return false; + } if (GetProcessingSettings().doublePipeline) { if (!GetRecoStepsOutputs().isOnlySet(GPUDataTypes::InOutType::TPCMergedTracks, GPUDataTypes::InOutType::TPCCompressedClusters, GPUDataTypes::InOutType::TPCClusters)) { GPUError("Invalid outputs for double pipeline mode 0x%x", (uint32_t)GetRecoStepsOutputs()); @@ -791,7 +795,7 @@ int32_t GPUChainTracking::RunChainFinalize() } if (GetProcessingSettings().outputSanityCheck) { - SanityCheck(); + OutputSanityCheck(); } const bool needQA = GPUQA::QAAvailable() && (GetProcessingSettings().runQA || (GetProcessingSettings().eventDisplay && mIOPtrs.nMCInfosTPC)); diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index d7e821187e1fe..5c85147494711 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -291,7 +291,7 @@ class GPUChainTracking : public GPUChain private: int32_t RunChainFinalize(); - void SanityCheck(); + void OutputSanityCheck(); int32_t RunTPCTrackingSectors_internal(); int32_t RunTPCClusterizer_prepare(bool restorePointers); #ifdef GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 1fa2014fe47e7..99f1d93796752 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -107,7 +107,7 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdat if (doGPU) { pages = o - processors()->tpcClusterer[iSector].mPzsOffsets; } - if (!doGPU && GetProcessingSettings().debugLevel >= 4 && mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) { + if (GetProcessingSettings().clusterizerZSSanityCheck && mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) { TPCClusterizerEnsureZSOffsets(iSector, fragment); } return {digits, pages}; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index fbd999f8feb56..15846246bca0a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -251,7 +251,7 @@ void GPUChainTracking::PrintOutputStat() GPUInfo("Output Tracks: %d (%d / %d / %d / %d clusters (fitted / attached / adjacent / total) - %s format)%s", nTracks, nAttachedClustersFitted, nAttachedClusters, nAdjacentClusters, nCls, GetProcessingSettings().createO2Output > 1 ? "O2" : "GPU", trdText); } -void GPUChainTracking::SanityCheck() +void GPUChainTracking::OutputSanityCheck() { size_t nErrors = 0; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 4d9fcd4b1572a..a9d4304d77c83 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -163,6 +163,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); + if (GetProcessingSettings().mergerSanityCheck) { + Merger.CheckMergeGraph(); + } runKernel(GetGridAuto(0, deviceType)); if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{1, -WarpSize(), 0, deviceType}}, 1); @@ -189,6 +192,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) CondWaitEvent(waitForTransfer, &mEvents->single); runKernel(GetGridAuto(0, deviceType)); } + if (GetProcessingSettings().mergerSanityCheck) { + Merger.CheckCollectedTracks(); + } uint32_t maxId = Merger.NMaxClusters(); if (maxId > Merger.NMaxClusters()) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6121ce0aa89bb..9a4b129f751a4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -164,7 +164,98 @@ GPUTPCGMMerger::GPUTPCGMMerger() #if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC) #include "GPUQAHelper.h" -void GPUTPCGMMerger::CheckMergedTracks() +template +inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView* a, const AliHLTTPCClusterMCLabel* b) +{ + return a; +} +template <> +inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView* a, const AliHLTTPCClusterMCLabel* b) +{ + return b; +} + +template +int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const +{ + GPUTPCGMSectorTrack* sectorTrack = nullptr; + int32_t nClusters = 0; + if constexpr (std::is_same_v) { + sectorTrack = &mSectorTrackInfos[trk.TrackID()]; + nClusters = sectorTrack->OrigTrack()->NHits(); + } else { + nClusters = trk.NClusters(); + } + auto acc = GPUTPCTrkLbl(resolveMCLabels(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f); + for (int32_t i = 0; i < nClusters; i++) { + int32_t id; + if constexpr (std::is_same_v) { + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; + const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; + id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; + } else { + id = mClusters[trk.FirstClusterRef() + i].num; + } + acc.addLabel(id); + } + return acc.computeLabel().id; +} + +template +int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const +{ +#ifdef GPUCA_TPC_GEOMETRY_O2 + if (GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) { + return GetTrackLabelA, S>(trk); + } else +#endif + { + return GetTrackLabelA(trk); + } +} + +#endif +// END DEBUG CODE + +void GPUTPCGMMerger::CheckCollectedTracks() +{ + uint32_t nErr = 0; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; + if (trk.OK()) { + if (trk.NClusters() == 0) { + GPUError("FAILURE: Track marked ok but has 0 clusters"); + nErr++; + } + if (!trk.CCE() && !trk.MergedLooper()) { + const GPUTPCGMMergedTrack* updTrk = &trk; + while (updTrk->PrevSegment() >= 0) { + auto next = &mMergedTracks[updTrk->PrevSegment()]; + if (!next->MergedLooper()) { + GPUError("FAILURE: prev segment not marked as merged looper\n"); + nErr++; + } + if (next == &trk) { + GPUError("FAILURE: segment cycle found\n"); + break; + } + updTrk = next; + } + if (updTrk->NClusters() == 0) { + printf("FAILURE: segment leg has 0 clusters"); + } + } + } + } + + if (nErr == 0) { + GPUInfo("Merged Tracks OK"); + } else { + throw std::runtime_error("Error during track merging"); + } +} + +void GPUTPCGMMerger::CheckMergeGraph() { uint32_t nErr = 0; std::vector trkUsed(SectorTrackInfoLocalTotal()); @@ -175,19 +266,19 @@ void GPUTPCGMMerger::CheckMergedTracks() for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) { GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0 && mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour() != itr) { - GPUError("Invalid reciprocal segment link: %d PrevSegmentNeighbour %d NextSegmentNeighbour %d", itr, track.PrevSegmentNeighbour(), mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour()); + GPUError("FAILURE: Invalid reciprocal segment link: %d PrevSegmentNeighbour %d NextSegmentNeighbour %d", itr, track.PrevSegmentNeighbour(), mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour()); nErr++; } if (track.NextSegmentNeighbour() >= 0 && mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour() != itr) { - GPUError("Invalid reciprocal segment link: %d NextSegmentNeighbour %d PrevSegmentNeighbour %d", itr, track.NextSegmentNeighbour(), mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour()); + GPUError("FAILURE: Invalid reciprocal segment link: %d NextSegmentNeighbour %d PrevSegmentNeighbour %d", itr, track.NextSegmentNeighbour(), mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour()); nErr++; } if (track.PrevNeighbour() >= 0 && mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour() != itr) { - GPUError("Invalid reciprocal link: %d PrevNeighbour %d NextNeighbour %d", itr, track.PrevNeighbour(), mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour()); + GPUError("FAILURE: Invalid reciprocal link: %d PrevNeighbour %d NextNeighbour %d", itr, track.PrevNeighbour(), mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour()); nErr++; } if (track.NextNeighbour() >= 0 && mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour() != itr) { - GPUError("Invalid reciprocal link: %d NextNeighbour %d PrevNeighbour %d", itr, track.NextNeighbour(), mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour()); + GPUError("FAILURE: Invalid reciprocal link: %d NextNeighbour %d PrevNeighbour %d", itr, track.NextNeighbour(), mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour()); nErr++; } if (track.PrevSegmentNeighbour() >= 0) { @@ -202,12 +293,17 @@ void GPUTPCGMMerger::CheckMergedTracks() if (trkUsed[iTrk]) { GPUError("FAILURE: double use"); nErr++; + break; } trkUsed[iTrk] = true; int32_t jtr = tr->NextSegmentNeighbour(); if (jtr >= 0) { tr = &(mSectorTrackInfos[jtr]); + if (tr->PrevNeighbour() >= 0) { + GPUError("FAILURE: Non-base segment has previous leg"); + nErr++; + } continue; } jtr = trbase->NextNeighbour(); @@ -215,6 +311,8 @@ void GPUTPCGMMerger::CheckMergedTracks() trbase = &(mSectorTrackInfos[jtr]); tr = trbase; if (tr->PrevSegmentNeighbour() >= 0) { + GPUError("FAILURE: Neibhbour leg has previous segment neightbout"); + nErr++; break; } continue; @@ -230,62 +328,11 @@ void GPUTPCGMMerger::CheckMergedTracks() } if (nErr == 0) { GPUInfo("Merged Track Graph OK"); - } -} - -template -inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView* a, const AliHLTTPCClusterMCLabel* b) -{ - return a; -} -template <> -inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView* a, const AliHLTTPCClusterMCLabel* b) -{ - return b; -} - -template -int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const -{ - GPUTPCGMSectorTrack* sectorTrack = nullptr; - int32_t nClusters = 0; - if constexpr (std::is_same_v) { - sectorTrack = &mSectorTrackInfos[trk.TrackID()]; - nClusters = sectorTrack->OrigTrack()->NHits(); } else { - nClusters = trk.NClusters(); - } - auto acc = GPUTPCTrkLbl(resolveMCLabels(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f); - for (int32_t i = 0; i < nClusters; i++) { - int32_t id; - if constexpr (std::is_same_v) { - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; - const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; - id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; - } else { - id = mClusters[trk.FirstClusterRef() + i].num; - } - acc.addLabel(id); - } - return acc.computeLabel().id; -} - -template -int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const -{ -#ifdef GPUCA_TPC_GEOMETRY_O2 - if (GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) { - return GetTrackLabelA, S>(trk); - } else -#endif - { - return GetTrackLabelA(trk); + throw std::runtime_error("Invalid merge graph"); } } -#endif -// END DEBUG CODE - void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const { const GPUTPCGMSectorTrack* orgTrack = trk; @@ -1441,7 +1488,6 @@ struct GPUTPCGMMerger_CompareClusterIds { GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - // if (iThread == 0 && iBlock == 0) { CheckMergedTracks(); } return; // (if GPUCA_CADEBUG_ENABLED) static constexpr int32_t kMaxParts = 16; static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 0159b795aa963..14974bdec2303 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -218,6 +218,9 @@ class GPUTPCGMMerger : public GPUProcessor GPUdi() int32_t SectorTrackInfoLocalTotal() const { return mSectorTrackInfoIndex[NSECTORS]; } GPUdi() int32_t SectorTrackInfoTotal() const { return mSectorTrackInfoIndex[2 * NSECTORS]; } + void CheckMergeGraph(); + void CheckCollectedTracks(); + private: GPUd() void MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false); template @@ -225,7 +228,6 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, int32_t itr); - void CheckMergedTracks(); #ifndef GPUCA_GPUCODE void PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const; template diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index f2812be8e16a3..f38ea4d320c14 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -149,7 +149,6 @@ class GPUTPCGMTrackParam GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); - // We force to compile these twice, for PropagateLooper and for Fit, for better optimization GPUd() void AttachClustersLooper(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards, GPUTPCGMPropagator& prop); GPUd() void AttachClustersLooperFollow(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards); GPUd() void StoreLoopPropagation(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool outwards, float alpha); diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 2e89a4d72c63e..1fa41d55ebbec 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -843,7 +843,7 @@ int32_t main(int argc, char** argv) break; } if (configStandalone.runs2 > 1) { - printf("RUN2: %d\n", iRunOuter); + printf("\nRUN2: %d\n", iRunOuter); } int64_t nTracksTotal = 0; int64_t nClustersTotal = 0;