From 339b4a0225bf693c8687a9def3653d10a3e8bf3c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 00:18:02 +0200 Subject: [PATCH 1/2] GPU: Add debug dumps for compressed / uncompressed TPC clusters --- GPU/GPUTracking/CMakeLists.txt | 1 + .../DataCompression/GPUTPCCompression.cxx | 56 +++++++++++++ .../DataCompression/GPUTPCCompression.h | 4 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Global/GPUChain.h | 31 +++----- GPU/GPUTracking/Global/GPUChainTracking.h | 1 + .../Global/GPUChainTrackingClusterizer.cxx | 19 ++--- .../Global/GPUChainTrackingCompression.cxx | 3 + .../Global/GPUChainTrackingDebug.h | 79 +++++++++++++++++++ .../GPUChainTrackingDebugAndProfiling.cxx | 15 ++++ .../Global/GPUChainTrackingMerger.cxx | 19 ++--- .../Global/GPUChainTrackingSectorTracker.cxx | 15 ++-- .../TPCClusterFinder/GPUTPCClusterFinder.h | 1 + .../GPUTPCClusterFinderDump.cxx | 2 +- 14 files changed, 201 insertions(+), 47 deletions(-) create mode 100644 GPU/GPUTracking/Global/GPUChainTrackingDebug.h diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index b2852389398d0..2e26622d05291 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -138,6 +138,7 @@ set(HDRS_INSTALL Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h + Global/GPUChainTrackingDebug.h Global/GPUChainTrackingGetters.inc Global/GPUErrorCodes.h Merger/GPUTPCGMBorderTrack.h diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 2a0c5b58d8a83..a107f749ddd77 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -130,3 +130,59 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters += 16 - (mMaxClusters % 16); } } + +void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) +{ + const o2::tpc::CompressedClusters O = *mOutputFlat; + out << "\n\nCompressed Clusters:\n"; + out << O.nTracks << " Tracks\n"; + out << "Slice Row Clusters:\n"; + for (uint32_t i = 0; i < NSECTORS; i++) { + out << "Sector " << i << ": "; + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", "; + } + out << "\n"; + } + out << "\nTrack Clusters:\n"; + for (uint32_t i = 0; i < O.nTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << O.nTrackClusters[i] << ", "; + } + out << "\n\nUnattached Clusters\n"; + uint32_t offset = 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << "Sector " << i << " Row " << j << ": "; + for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l = k + offset; + out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; + } + offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; + out << "\n"; + } + } + out << "\n\nAttached Clusters\n"; + offset = 0; + for (uint32_t i = 0; i < O.nTracks; i++) { + out << "Track " << i << ": {" << (uint32_t)O.qPtA[i] << ", " << (uint32_t)O.rowA[i] << ", " << (uint32_t)O.sliceA[i] << ", " << (uint32_t)O.timeA[i] << ", " << (uint32_t)O.padA[i] << "} - "; + for (uint32_t k = 0; k < O.nTrackClusters[i]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l1 = k + offset, l2 = k + offset - i; + out << "["; + if (k) { + out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", "; + } + out << (uint32_t)O.qTotA[l1] << ", " << (uint32_t)O.qMaxA[l1] << ", " << (uint32_t)O.flagsA[l1] << ", " << (uint32_t)O.sigmaPadA[l1] << ", " << (uint32_t)O.sigmaTimeA[l1] << "] "; + } + offset += O.nTrackClusters[i]; + out << "\n"; + } +} diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index c1d9fe283fbea..52585b4c08b24 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -57,6 +57,10 @@ class GPUTPCCompression : public GPUProcessor GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); } GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); } +#ifndef GPUCA_GPUCODE + void DumpCompressedClusters(std::ostream& out); +#endif + protected: struct memory { uint32_t nStoredTracks = 0; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 4c32c3e46e3a7..638a3ed43d2aa 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking") AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") -AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 5df324fcba648..1f88d3dc3aba1 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -20,6 +20,7 @@ #include "GPUKernelClassesFwd.h" #include +#include namespace o2::gpu { @@ -226,12 +227,19 @@ class GPUChain virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; } template - bool DoDebugAndDump(RecoStep step, int32_t mask, T& processor, S T::*func, Args&&... args) + bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::* func, Args&&... args) { return DoDebugAndDump(step, mask, true, processor, func, args...); } template - bool DoDebugAndDump(RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args); + bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::* func, Args&&... args); + template + bool DoDebugDump(uint32_t mask, std::function func, Args&... args); + template + bool DoDebugDump(uint32_t mask, S* func, Args&&... args) + { + return DoDebugDump(mask, std::function([&func](Args&&... args_tmp) { (*func)(args_tmp...); }), args...); + } template int32_t runRecoStep(RecoStep step, S T::*func, Args... args); @@ -278,24 +286,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... } template -bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args) -{ - if (GetProcessingSettings().keepAllMemory) { - if (transfer) { - TransferMemoryResourcesToHost(step, &processor, -1, true); - } - if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) { - if (func) { - (processor.*func)(args...); - } - return true; - } - } - return false; -} - -template -int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) +inline int32_t GPUChain::runRecoStep(RecoStep step, S T::* func, Args... args) { if (GetRecoSteps().isSet(step)) { auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 8664652b549e3..13773a97d4e3d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -234,6 +234,7 @@ class GPUChainTracking : public GPUChain void PrepareDebugOutput(); void PrintDebugOutput(); void PrintOutputStat(); + static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index f188388e76a02..2cdd1bb76bf00 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -14,6 +14,7 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" @@ -813,7 +814,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (fragment.index == 0) { runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); } - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererZeroedCharges, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); if (doGPU) { if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { @@ -900,7 +901,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (!mIOPtrs.tpcZS) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { clusterer.DumpChargeMap(*mDebugFile, "Charges"); } @@ -919,13 +920,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Peaks"); } RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { uint32_t iSector = iSectorBase + lane; @@ -939,13 +940,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); } RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { uint32_t iSector = iSectorBase + lane; @@ -979,7 +980,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } // float time_clusterizer = 0, time_fill = 0, time_networks = 0; @@ -1092,7 +1093,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) #endif } else { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); } @@ -1111,7 +1112,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); laneHasData[lane] = true; // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off }); mRec->SetNActiveThreadsOuterLoop(1); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 24c74a661f18e..534c02a4c0a84 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUTrackingInputProvider.h" @@ -202,6 +203,7 @@ int32_t GPUChainTracking::RunTPCCompression() ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec); } mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR")); + DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile); return 0; } @@ -425,5 +427,6 @@ int32_t GPUChainTracking::RunTPCDecompression() } mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR")); } + DoDebugDump(GPUChainTrackingDebugFlags::TPCDecompressedClusters, &GPUChainTracking::DumpClusters, *mDebugFile, mIOPtrs.clustersNative); return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h new file mode 100644 index 0000000000000..952a6c088ae8b --- /dev/null +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -0,0 +1,79 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUChainTrackingDEBUG.h +/// \author David Rohr + +#ifndef GPUCHAINTRACKINGDEBUG_H +#define GPUCHAINTRACKINGDEBUG_H + +#include +#include +#include + +namespace o2::gpu +{ +// NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask +enum GPUChainTrackingDebugFlags : uint32_t { + TPCSectorTrackingData = 1, + TPCPreLinks = 2, + TPCLinks = 4, + TPCStartHits = 8, + TPCTracklets = 16, + TPCSectorTracks = 32, + TPCHitWeights = 256, + TPCCompressedClusters = 512, + TPCDecompressedClusters = 1024, + TPCMergingRanges = 2048, + TPCMergingSectorTracks = 4096, + TPCMergingMergedTracks = 8192, + TPCMergingCollectedTracks = 16384, + TPCMergingCE = 32768, + TPCMergingRefit = 65536, + TPCClustererClusters = 131072, + TPCClusterer = 262144, + TPCClustererDigits = 262144 << 1, + TPCClustererPeaks = 262144 << 2, + TPCClustererSuppressedPeaks = 262144 << 3, + TPCClustererChargeMap = 262144 << 4, + TPCClustererZeroedCharges = 262144 << 5 +}; + +template +inline bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, uint32_t mask, bool transfer, T& processor, S T::* func, Args&&... args) +{ + if (GetProcessingSettings().keepAllMemory) { + if (transfer) { + TransferMemoryResourcesToHost(step, &processor, -1, true); + } + std::function lambda = [&processor, &func](Args&... args_tmp) { + if (func) { + (processor.*func)(args_tmp...); + } + }; + return DoDebugDump(mask, lambda, args...); + } + return false; +} + +template +inline bool GPUChain::DoDebugDump(uint32_t mask, std::function func, Args&... args) +{ + if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) { + func(args...); + return true; + } + return false; +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 53bdfbadd4b25..903505068ad2c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -333,3 +333,18 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster } } } + +void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters) +{ + out << "\nTPC Clusters:\n"; + for (uint32_t iSec = 0; iSec < GPUCA_NSECTORS; iSec++) { + out << "TPCClusters - Sector " << iSec << "\n"; + for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { + out << " Row: " << i << ": " << clusters->nClusters[iSec][i] << " clusters:\n"; + for (uint32_t j = 0; j < clusters->nClusters[iSec][i]; j++) { + const auto& cl = clusters->clusters[iSec][i][j]; + out << " " << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n"; + } + } + } +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index a38148ccb375a..6e86be03e7950 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" @@ -72,7 +73,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); mRec->ReturnVolatileDeviceMemory(); } @@ -135,14 +136,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -157,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -167,14 +168,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel({{1, -WarpSize(), 0, deviceType}}, 1); runKernel({{1, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCollectedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); if (param().rec.tpc.mergeCE) { runKernel(GetGridAuto(0, deviceType), true); RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType); RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCE, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); } int32_t waitForTransfer = 0; if (doGPU) { @@ -201,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -227,7 +228,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); @@ -240,7 +241,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); if (doGPU) { RecordMarker(&mEvents->single, 0); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 635641c00ae14..ef38d53173c2b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" @@ -176,7 +177,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 6) { *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; - if (GetProcessingSettings().debugMask & 1) { + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); } @@ -191,13 +192,13 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().keepDisplayMemory) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); - if (GetProcessingSettings().debugMask & 2) { + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCPreLinks) { trk.DumpLinks(*mDebugFile, 0); } } runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCLinks, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) { @@ -206,7 +207,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSector}}); } - DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCStartHits, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { trk.UpdateMaxData(); @@ -215,8 +216,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } runKernel({GetGridAuto(useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCTracklets, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCHitWeights && GetProcessingSettings().deterministicGPUReconstruction < 2) { trk.DumpHitWeights(*mDebugFile); } @@ -230,7 +231,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); } - DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCSectorTracks, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); }); mRec->SetNActiveThreadsOuterLoop(1); if (error) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 37399f5e4863f..35e2a7297338f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -36,6 +36,7 @@ class ConstMCTruthContainerView; namespace tpc { struct ClusterNative; +struct ClusterNativeAccess; class Digit; } // namespace tpc diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index da30375149b7c..d676cf9cd3887 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -157,7 +157,7 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out) { out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { + for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { size_t N = mPclusterInRow[i]; const tpc::ClusterNative* row = &mPclusterByRow[i * mNMaxClusterPerRow]; From 6127d41467765d2c26c40aff71d680fe57483d70 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Thu, 8 May 2025 13:13:24 +0000 Subject: [PATCH 2/2] Please consider the following formatting changes --- GPU/GPUTracking/Global/GPUChain.h | 6 +++--- GPU/GPUTracking/Global/GPUChainTrackingDebug.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 1f88d3dc3aba1..aca1bb2420fb6 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -227,12 +227,12 @@ class GPUChain virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; } template - bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::* func, Args&&... args) + bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::*func, Args&&... args) { return DoDebugAndDump(step, mask, true, processor, func, args...); } template - bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::* func, Args&&... args); + bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args); template bool DoDebugDump(uint32_t mask, std::function func, Args&... args); template @@ -286,7 +286,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... } template -inline int32_t GPUChain::runRecoStep(RecoStep step, S T::* func, Args... args) +inline int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) { if (GetRecoSteps().isSet(step)) { auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h index 952a6c088ae8b..810f40a1d8654 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -48,7 +48,7 @@ enum GPUChainTrackingDebugFlags : uint32_t { }; template -inline bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, uint32_t mask, bool transfer, T& processor, S T::* func, Args&&... args) +inline bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args) { if (GetProcessingSettings().keepAllMemory) { if (transfer) {