diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx index cdd0e4879f949..92adcbd9c14d7 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx @@ -12,14 +12,47 @@ /// \file GPUTPCClusterFilter.cxx /// \author David Rohr +#include "GPUCommonLogger.h" #include "GPUTPCClusterFilter.h" #include "DataFormatsTPC/ClusterNative.h" using namespace o2::gpu; -GPUTPCClusterFilter::GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters) +GPUTPCClusterFilter::GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters, uint8_t filterType) + : mFilterType(filterType) { - // Could initialize private variables based on the clusters here + if (filterType == 1) { + // Custom filter settings go here + + } else if (filterType == 2) { + // PbPb23 filter + mClusterStats = std::make_unique[]>(MaxStacks); + static bool called = false; + if (!called) { + LOGP(info, "GPUTPCClusterFilter called for PbPb 2023 settings"); + called = true; + } + + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { + const uint32_t globalStack = getGlobalStack(iSector, iRow); + mClusterStats[globalStack].resize(MaxTimeBin); + + for (uint32_t k = 0; k < clusters.nClusters[iSector][iRow]; k++) { + const o2::tpc::ClusterNative& cl = clusters.clusters[iSector][iRow][k]; + const int clTime = static_cast(cl.getTime()); + const float clQmax = cl.getQmax(); + + if (clQmax < 12) { + if (clTime >= static_cast(mClusterStats[globalStack].size())) { + mClusterStats[globalStack].resize(mClusterStats[globalStack].size() + 445); + } + ++mClusterStats[globalStack][clTime]; + } + } + } + } + } } bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::ClusterNative& cl) @@ -27,5 +60,14 @@ bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::Cluster // Return true to keep the cluster, false to drop it. // May change cluster properties by modifying the cl reference. // Note that this function might be called multiple times for the same cluster, in which case the final modified cl reference goes into the output clusters. + if (mFilterType == 2) { + const uint32_t globalStack = getGlobalStack(sector, row); + const int clTime = static_cast(cl.getTime()); + const float clQmax = cl.getQmax(); + if ((mClusterStats[globalStack][clTime] > 40 && clQmax < 12) || (mClusterStats[globalStack][clTime] > 200)) { + return false; + } + } + return true; } diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h index 908f78fd23b9a..b39237757de53 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h @@ -15,7 +15,10 @@ #ifndef GPUTPCCLUSTERFILTER_H #define GPUTPCCLUSTERFILTER_H +#include #include +#include +#include "GPUDefConstantsAndSettings.h" namespace o2::tpc { @@ -28,8 +31,29 @@ namespace o2::gpu class GPUTPCClusterFilter { public: - GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters); + GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters, uint8_t filterType); bool filter(uint32_t sector, uint32_t row, o2::tpc::ClusterNative& cl); + + private: + static constexpr uint32_t MaxTimeBin = 14256; + static constexpr uint32_t MaxStacks = GPUCA_NSECTORS * 4; + uint8_t mFilterType = 0; //< 0: off, 1: custom, 2: PbPb23 + + std::unique_ptr[]> mClusterStats; //< Number of clusters per stack and time bin + + uint32_t getGlobalStack(uint32_t sector, uint32_t row) const + { + int stack = 3; + if (row < 63) { + stack = 0; + } else if (row < 97) { + stack = 1; + } else if (row < 127) { + stack = 2; + } + + return sector * 4 + stack; + }; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 25419f3483dd6..3c31a4fbb8409 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -291,7 +291,7 @@ AddOption(tpcDownscaledEdx, uint8_t, 0, "", 0, "If != 0, downscale dEdx processi AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow") AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding") AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters") -AddOption(tpcApplyDebugClusterFilter, bool, false, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class") +AddOption(tpcApplyClusterFilterOnCPU, uint8_t, 0, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class, 0: off, 1: debug, 2: PbPb23") AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 9de8b2174a182..a63886b93ccf9 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -294,16 +294,12 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression) && GetProcessingSettings().tpcApplyCFCutsAtDecoding && !GetProcessingSettings().tpcUseOldCPUDecoding) { - GPUError("tpcApplyCFCutsAtDecoding currently requires tpcUseOldCPUDecoding"); - return false; - } if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } - if (GetProcessingSettings().tpcApplyDebugClusterFilter == 1 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { - GPUError("tpcApplyDebugClusterFilter cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { + GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); return false; } if (GetRecoSteps() & RecoStep::TRDTracking) { @@ -815,7 +811,7 @@ int32_t GPUChainTracking::RunChainFinalize() PrintDebugOutput(); - //PrintMemoryRelations(); + // PrintMemoryRelations(); if (GetProcessingSettings().eventDisplay) { if (!mDisplayRunning) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 121d60873324f..62a4a524d67df 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -635,7 +635,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (mWaitForFinalInputs) { GPUFatal("Cannot use waitForFinalInput callback without delayed output"); } - if (!GetProcessingSettings().tpcApplyDebugClusterFilter) { + if (!GetProcessingSettings().tpcApplyClusterFilterOnCPU) { AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); tmpNativeClusters = mInputsHost->mPclusterNativeOutput; } else { @@ -1021,7 +1021,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) tmpNativeAccess->clustersMCTruth = mcLabelsConstView; tmpNativeAccess->setOffsetPtrs(); mIOPtrs.clustersNative = tmpNativeAccess; - if (GetProcessingSettings().tpcApplyDebugClusterFilter) { + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU) { auto allocator = [this, &tmpNativeClusters](size_t size) { this->mInputsHost->mNClusterNative = size; this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 94d39249d620c..1b08de21abd0f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,6 +201,11 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { + const bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); + if (runFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); + } + if (GetProcessingSettings().tpcUseOldCPUDecoding) { const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; @@ -214,7 +219,6 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding; auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index ec6b48a55d50d..38e1cd0036c16 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -295,7 +295,8 @@ void GPUChainTracking::SanityCheck() void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts) { - GPUTPCClusterFilter clusterFilter(*clusters); + const uint8_t filterType = GetProcessingSettings().tpcApplyClusterFilterOnCPU; + GPUTPCClusterFilter clusterFilter(*clusters, filterType); o2::tpc::ClusterNative* outputBuffer = nullptr; for (int32_t iPhase = 0; iPhase < 2; iPhase++) { uint32_t countTotal = 0; @@ -312,7 +313,7 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster if (param().tpcCutTimeBin > 0) { keep = keep && cl.getTime() < param().tpcCutTimeBin; } - keep = keep && (!GetProcessingSettings().tpcApplyDebugClusterFilter || clusterFilter.filter(iSector, iRow, cl)); + keep = keep && (!filterType || clusterFilter.filter(iSector, iRow, cl)); if (iPhase && keep) { outputBuffer[countTotal] = cl; }