diff --git a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx index 23afc85f94d64..8784f096e3202 100644 --- a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx +++ b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx @@ -1227,7 +1227,7 @@ void SimpleEventDisplayGUI::showClusters(int roc, int row) } if (fillSingleTB && std::abs(cl.getTime() - timeBin) < 2) { const auto ly = gpuGeom.LinearPad2Y(sector, irow, cl.getPad() + 0.5); - mClustersRowPad->SetNextPoint(gpuGeom.Row2X(irow), (sector >= GPUCA_NSLICES / 2) ? -ly : ly); + mClustersRowPad->SetNextPoint(gpuGeom.Row2X(irow), (sector >= GPUCA_NSECTORS / 2) ? -ly : ly); } } // fmt::print("\n"); diff --git a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx index 6c0ea8b265585..bdf9b95e94450 100644 --- a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx +++ b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx @@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE(CATracking_test1) config.configReconstruction.tpc.searchWindowDZDR = 2.5f; //Should always be 2.5 for looper-finding and/or continuous tracking config.configReconstruction.tpc.trackReferenceX = refX; - config.configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, GPUDataTypes::RecoStep::TPCSliceTracking, + config.configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, GPUDataTypes::RecoStep::TPCSectorTracking, GPUDataTypes::RecoStep::TPCMerging, GPUDataTypes::RecoStep::TPCCompression, GPUDataTypes::RecoStep::TPCdEdx); config.configWorkflow.inputs.set(GPUDataTypes::InOutType::TPCClusters); config.configWorkflow.outputs.set(GPUDataTypes::InOutType::TPCMergedTracks); diff --git a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx index 294a93709e863..cc964ade0d87c 100644 --- a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx +++ b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx @@ -159,7 +159,7 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) const auto& tinfo = pc.services().get(); const auto firstIR = o2::InteractionRecord(0, tinfo.firstTForbit); - const float totalT = std::max(mFastTransform->getMaxDriftTime(0), mFastTransform->getMaxDriftTime(GPUCA_NSLICES / 2)); + const float totalT = std::max(mFastTransform->getMaxDriftTime(0), mFastTransform->getMaxDriftTime(GPUCA_NSECTORS / 2)); unsigned int offset = 0, lasti = 0; const unsigned int maxTime = (mParam->continuousMaxTimeBin + 1) * o2::tpc::ClusterNative::scaleTimePacked - 1; @@ -206,8 +206,8 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) } } offset = 0; - unsigned int offsets[GPUCA_NSLICES][GPUCA_ROW_COUNT]; - for (unsigned int i = 0; i < GPUCA_NSLICES; i++) { + unsigned int offsets[GPUCA_NSECTORS][GPUCA_ROW_COUNT]; + for (unsigned int i = 0; i < GPUCA_NSECTORS; i++) { for (unsigned int j = 0; j < GPUCA_ROW_COUNT; j++) { if (i * GPUCA_ROW_COUNT + j >= clusters.nSliceRows) { break; @@ -218,7 +218,7 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) } #ifdef WITH_OPENMP -#pragma omp parallel for num_threads(mNThreads) schedule(static, (GPUCA_NSLICES + mNThreads - 1) / mNThreads) // Static round-robin scheduling with one chunk per thread to ensure correct order of the final vector +#pragma omp parallel for num_threads(mNThreads) schedule(static, (GPUCA_NSECTORS + mNThreads - 1) / mNThreads) // Static round-robin scheduling with one chunk per thread to ensure correct order of the final vector #endif for (unsigned int ii = 0; ii < clusters.nSliceRows; ii++) { unsigned int i = ii / GPUCA_ROW_COUNT; diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 3396219634587..4f83fa48a64e0 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -34,21 +34,19 @@ #include "GPUKernelDebugOutput.h" #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUConstantMem { GPUParam param; GPUTPCTracker - tpcTrackers[GPUCA_NSLICES]; + tpcTrackers[GPUCA_NSECTORS]; GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; GPUTPCGMMerger tpcMerger; GPUTRDTrackerGPU trdTrackerGPU; GPUTRDTracker trdTrackerO2; - GPUTPCClusterFinder tpcClusterer[GPUCA_NSLICES]; + GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]; GPUITSFitter itsFitter; GPUTrackingRefitProcessor trackingRefit; GPUTrackingInOutPointers ioPtrs; @@ -90,14 +88,11 @@ union GPUConstantMemCopyable { #if defined(GPUCA_GPUCODE) static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1); #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM -namespace o2 -{ -namespace gpu +namespace o2::gpu { // Must be placed here, to avoid circular header dependency @@ -120,7 +115,6 @@ GPUdi() void GPUProcessor::raiseError(uint32_t code, uint32_t param1, uint32_t p GetConstantMem()->errorCodes.raiseError(code, param1, param2, param3); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index e11f818297770..71980d38fdc9e 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -32,9 +32,7 @@ #define GPUCA_CUB cub #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUConstantMem; @@ -110,8 +108,7 @@ class GPUitoa : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #undef GPUCA_CUB diff --git a/GPU/GPUTracking/Base/GPUKernelDebugOutput.h b/GPU/GPUTracking/Base/GPUKernelDebugOutput.h index dc60014718677..9e58ae332dcd4 100644 --- a/GPU/GPUTracking/Base/GPUKernelDebugOutput.h +++ b/GPU/GPUTracking/Base/GPUKernelDebugOutput.h @@ -19,9 +19,7 @@ #include "GPUProcessor.h" #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUKernelDebugOutput : public GPUProcessor @@ -75,8 +73,7 @@ class GPUKernelDebugOutput : public GPUProcessor mutable int32_t* mDebugOutMemory; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif #endif diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 5037e7800d4d9..3bb2c363db2a9 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -18,9 +18,7 @@ #include "GPUCommonDef.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUMemoryReuse { @@ -103,7 +101,6 @@ class GPUMemoryResource int32_t mReuse; MemoryType mType; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index d5c1149b0ab29..192e46c36dc68 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -91,22 +91,22 @@ void GPUParam::SetDefaults(float solenoidBz) constexpr float plusZmax = 249.778; constexpr float minusZmin = -249.645; constexpr float minusZmax = -0.0799937; - for (int32_t i = 0; i < GPUCA_NSLICES; i++) { - const bool zPlus = (i < GPUCA_NSLICES / 2); - SliceParam[i].ZMin = zPlus ? plusZmin : minusZmin; - SliceParam[i].ZMax = zPlus ? plusZmax : minusZmax; + for (int32_t i = 0; i < GPUCA_NSECTORS; i++) { + const bool zPlus = (i < GPUCA_NSECTORS / 2); + SectorParam[i].ZMin = zPlus ? plusZmin : minusZmin; + SectorParam[i].ZMax = zPlus ? plusZmax : minusZmax; int32_t tmp = i; - if (tmp >= GPUCA_NSLICES / 2) { - tmp -= GPUCA_NSLICES / 2; + if (tmp >= GPUCA_NSECTORS / 2) { + tmp -= GPUCA_NSECTORS / 2; } - if (tmp >= GPUCA_NSLICES / 4) { - tmp -= GPUCA_NSLICES / 2; + if (tmp >= GPUCA_NSECTORS / 4) { + tmp -= GPUCA_NSECTORS / 2; } - SliceParam[i].Alpha = 0.174533f + par.dAlpha * tmp; - SliceParam[i].CosAlpha = CAMath::Cos(SliceParam[i].Alpha); - SliceParam[i].SinAlpha = CAMath::Sin(SliceParam[i].Alpha); - SliceParam[i].AngleMin = SliceParam[i].Alpha - par.dAlpha / 2.f; - SliceParam[i].AngleMax = SliceParam[i].Alpha + par.dAlpha / 2.f; + SectorParam[i].Alpha = 0.174533f + par.dAlpha * tmp; + SectorParam[i].CosAlpha = CAMath::Cos(SectorParam[i].Alpha); + SectorParam[i].SinAlpha = CAMath::Sin(SectorParam[i].Alpha); + SectorParam[i].AngleMin = SectorParam[i].Alpha - par.dAlpha / 2.f; + SectorParam[i].AngleMax = SectorParam[i].Alpha + par.dAlpha / 2.f; } par.assumeConstantBz = false; diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 78c13d19be81d..9bdf705dfeb59 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -31,19 +31,17 @@ using Propagator = PropagatorImpl; } // namespace o2::base #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUSettingsRec; struct GPUSettingsGTP; struct GPURecoStepConfiguration; -struct GPUParamSlice { - float Alpha; // slice angle - float CosAlpha, SinAlpha; // sign and cosine of the slice angle +struct GPUParamSector { + float Alpha; // sector angle + float CosAlpha, SinAlpha; // sign and cosine of the sector angle float AngleMin, AngleMax; // minimal and maximal angle - float ZMin, ZMax; // slice Z range + float ZMin, ZMax; // sector Z range }; namespace internal @@ -66,7 +64,7 @@ struct GPUParam_t { const uint32_t* occupancyMap; // Ptr to TPC occupancy map uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) - GPUParamSlice SliceParam[GPUCA_NSLICES]; + GPUParamSector SectorParam[GPUCA_NSECTORS]; protected: #ifdef GPUCA_TPC_GEOMETRY_O2 @@ -88,15 +86,15 @@ struct GPUParam : public internal::GPUParam_t void UpdateRun3ClusterErrors(const float* yErrorParam, const float* zErrorParam); #endif - GPUd() float Alpha(int32_t iSlice) const + GPUd() float Alpha(int32_t iSector) const { - if (iSlice >= GPUCA_NSLICES / 2) { - iSlice -= GPUCA_NSLICES / 2; + if (iSector >= GPUCA_NSECTORS / 2) { + iSector -= GPUCA_NSECTORS / 2; } - if (iSlice >= GPUCA_NSLICES / 4) { - iSlice -= GPUCA_NSLICES / 2; + if (iSector >= GPUCA_NSECTORS / 4) { + iSector -= GPUCA_NSECTORS / 2; } - return 0.174533f + par.dAlpha * iSlice; + return 0.174533f + par.dAlpha * iSector; } GPUd() float GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const; GPUd() void GetClusterErrorsSeeding2(uint8_t sector, int32_t row, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const; @@ -108,13 +106,12 @@ struct GPUParam : public internal::GPUParam_t GPUd() void UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const; GPUd() float GetUnscaledMult(float time) const; - GPUd() void Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const; - GPUd() void Global2Slice(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const; + GPUd() void Sector2Global(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const; + GPUd() void Global2Sector(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const; GPUd() bool rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 87d7b2dc4c7cd..19dc1fc4a3578 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -19,24 +19,22 @@ #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCClusterOccupancyMap.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { -GPUdi() void GPUParam::Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const +GPUdi() void GPUParam::Sector2Global(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const { // conversion of coordinates sector->global - *X = x * SliceParam[iSlice].CosAlpha - y * SliceParam[iSlice].SinAlpha; - *Y = y * SliceParam[iSlice].CosAlpha + x * SliceParam[iSlice].SinAlpha; + *X = x * SectorParam[iSector].CosAlpha - y * SectorParam[iSector].SinAlpha; + *Y = y * SectorParam[iSector].CosAlpha + x * SectorParam[iSector].SinAlpha; *Z = z; } -GPUdi() void GPUParam::Global2Slice(int32_t iSlice, float X, float Y, float Z, float* x, float* y, float* z) const +GPUdi() void GPUParam::Global2Sector(int32_t iSector, float X, float Y, float Z, float* x, float* y, float* z) const { // conversion of coordinates global->sector - *x = X * SliceParam[iSlice].CosAlpha + Y * SliceParam[iSlice].SinAlpha; - *y = Y * SliceParam[iSlice].CosAlpha - X * SliceParam[iSlice].SinAlpha; + *x = X * SectorParam[iSector].CosAlpha + Y * SectorParam[iSector].SinAlpha; + *y = Y * SectorParam[iSector].CosAlpha - X * SectorParam[iSector].SinAlpha; *z = Z; } @@ -117,7 +115,7 @@ GPUdi() float GPUParam::GetSystematicClusterErrorC122(float x, float y, uint8_t return 0.f; } constexpr float dEdgeInv = 18.f / CAMath::Pi(); - const float dy = (sector == (GPUCA_NSLICES / 2 + 1) ? 0.5f : -0.5f) * (y / x) * dEdgeInv + 0.5f; + const float dy = (sector == (GPUCA_NSECTORS / 2 + 1) ? 0.5f : -0.5f) * (y / x) * dEdgeInv + 0.5f; const float errC12 = rec.tpc.sysClusErrorC12Norm * occupancyTotal * dy; return errC12 * errC12; } @@ -223,7 +221,6 @@ GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, fl return CAMath::Abs(uncorrectedY) > (tpcGeometry.NPads(iRow) - 1) * 0.5f * tpcGeometry.PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUParamRTC.h b/GPU/GPUTracking/Base/GPUParamRTC.h index dd9cf73e38a1e..2377666ee7b07 100644 --- a/GPU/GPUTracking/Base/GPUParamRTC.h +++ b/GPU/GPUTracking/Base/GPUParamRTC.h @@ -18,9 +18,7 @@ #include "GPUParam.h" #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { namespace gpu_rtc { @@ -38,7 +36,6 @@ struct GPUParamRTC : public internal::GPUParam_t #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTrackingInOutPointers; class GPUReconstruction; @@ -157,7 +155,6 @@ class GPUProcessor friend class GPUTPCNeighboursFinder; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 481494f268494..6ce07f67c3645 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -26,6 +26,7 @@ #include "GPUReconstruction.h" #include "GPUReconstructionIncludes.h" #include "GPUReconstructionThreading.h" +#include "GPUReconstructionIO.h" #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" @@ -43,9 +44,9 @@ #include "GPUReconstructionIncludesITS.h" -namespace o2 +namespace o2::gpu { -namespace gpu +namespace // anonymous { struct GPUReconstructionPipelineQueue { uint32_t op = 0; // For now, 0 = process, 1 = terminate @@ -55,6 +56,7 @@ struct GPUReconstructionPipelineQueue { bool done = false; int32_t retVal = 0; }; +} // namespace struct GPUReconstructionPipelineContext { std::queue queue; @@ -62,8 +64,7 @@ struct GPUReconstructionPipelineContext { std::condition_variable cond; bool terminate = false; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu using namespace o2::gpu; @@ -89,9 +90,9 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos new (&mGRPSettings) GPUSettingsGRP; param().SetDefaults(&mGRPSettings); mMemoryScalers.reset(new GPUMemorySizeScalers); - for (uint32_t i = 0; i < NSLICES; i++) { - processors()->tpcTrackers[i].SetSlice(i); // TODO: Move to a better place - processors()->tpcClusterer[i].mISlice = i; + for (uint32_t i = 0; i < NSECTORS; i++) { + processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place + processors()->tpcClusterer[i].mISector = i; } #ifndef GPUCA_NO_ROOT mROOTDump = GPUROOTDumpCore::getAndCreate(); @@ -118,15 +119,6 @@ void GPUReconstruction::GetITSTraits(std::unique_ptr* tr } } -void GPUReconstruction::SetNActiveThreads(int32_t n) -{ - mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); - mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); - } -} - int32_t GPUReconstruction::getHostThreadIndex() { return std::max(0, tbb::this_task_arena::current_thread_index()); @@ -257,6 +249,9 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.debugLevel < 1) { mProcessingSettings.deviceTimers = false; } + if (mProcessingSettings.debugLevel > 0) { + mProcessingSettings.recoTaskTiming = true; + } if (mProcessingSettings.deterministicGPUReconstruction == -1) { mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6; } @@ -281,8 +276,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.trackletSelectorInPipeline < 0) { mProcessingSettings.trackletSelectorInPipeline = 1; } - if (mProcessingSettings.trackletSelectorSlices < 0) { - mProcessingSettings.trackletSelectorSlices = 1; + if (mProcessingSettings.trackletSelectorSectors < 0) { + mProcessingSettings.trackletSelectorSectors = 1; } } if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) { @@ -327,12 +322,12 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } else { mProcessingSettings.autoAdjustHostThreads = false; } - mMaxHostThreads = mActiveHostKernelThreads = mProcessingSettings.nHostThreads; + mMaxHostThreads = mProcessingSettings.nHostThreads; if (mMaster == nullptr) { mThreading = std::make_shared(); mThreading->control = std::make_unique(tbb::global_control::max_allowed_parallelism, mMaxHostThreads); mThreading->allThreads = std::make_unique(mMaxHostThreads); - mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); + mThreading->activeThreads = std::make_unique(mMaxHostThreads); } else { mThreading = mMaster->mThreading; } @@ -342,14 +337,14 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.nTPCClustererLanes == -1) { - mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSLICES, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSLICES : 4) : 1) : mMaxHostThreads)); + mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSECTORS, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads)); } if (mProcessingSettings.overrideClusterizerFragmentLen == -1) { mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; } - if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSLICES) { + if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSECTORS) { GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes); - mProcessingSettings.nTPCClustererLanes = GPUCA_NSLICES; + mProcessingSettings.nTPCClustererLanes = GPUCA_NSECTORS; } if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { @@ -962,8 +957,12 @@ int32_t GPUReconstruction::unregisterMemoryForGPU(const void* ptr) return 1; } +namespace o2::gpu::internal +{ +namespace // anonymous +{ template -static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* err = "Invalid step num") +constexpr static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* err = "Invalid step num") { static_assert(sizeof(step) == sizeof(uint32_t), "Invalid step enum size"); int32_t retVal = 8 * sizeof(uint32_t) - 1 - CAMath::Clz((uint32_t)step); @@ -975,9 +974,11 @@ static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* } return retVal; } +} // anonymous namespace +} // namespace o2::gpu::internal -int32_t GPUReconstruction::getRecoStepNum(RecoStep step, bool validCheck) { return getStepNum(step, validCheck, GPUDataTypes::N_RECO_STEPS, "Invalid Reco Step"); } -int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) { return getStepNum(step, validCheck, GPUDataTypes::N_GENERAL_STEPS, "Invalid General Step"); } +int32_t GPUReconstruction::getRecoStepNum(RecoStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_RECO_STEPS, "Invalid Reco Step"); } +int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_GENERAL_STEPS, "Invalid General Step"); } void GPUReconstruction::RunPipelineWorker() { @@ -1181,8 +1182,3 @@ void GPUReconstruction::SetInputControl(void* ptr, size_t size) { mInputControl.set(ptr, size); } - -GPUReconstruction::GPUThreadContext::GPUThreadContext() = default; -GPUReconstruction::GPUThreadContext::~GPUThreadContext() = default; - -std::unique_ptr GPUReconstruction::GetThreadContext() { return std::unique_ptr(new GPUThreadContext); } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 1fdfabb11211a..529cce2bd087f 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -31,22 +31,17 @@ #include "GPUOutputControl.h" #include "GPUMemoryResource.h" #include "GPUConstantMem.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPULogging.h" -namespace o2 -{ -namespace its +namespace o2::its { class TrackerTraits; class VertexerTraits; class TimeFrame; -} // namespace its -} // namespace o2 +} // namespace o2::its -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChain; struct GPUMemorySizeScalers; @@ -57,12 +52,11 @@ class GPUROOTDumpCore; namespace gpu_reconstruction_kernels { struct deviceEvent; +class threadContext; } class GPUReconstruction { - friend class GPUChain; - protected: class LibraryLoader; // These must be the first members to ensure correct destructor order! std::shared_ptr mMyLib = nullptr; @@ -77,7 +71,7 @@ class GPUReconstruction GPUReconstruction& operator=(const GPUReconstruction&) = delete; // General definitions - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; using GeometryType = GPUDataTypes::GeometryType; using DeviceType = GPUDataTypes::DeviceType; @@ -95,8 +89,8 @@ class GPUReconstruction static DeviceType GetDeviceType(const char* type); enum InOutPointerType : uint32_t { CLUSTER_DATA = 0, - SLICE_OUT_TRACK = 1, - SLICE_OUT_CLUSTER = 2, + SECTOR_OUT_TRACK = 1, + SECTOR_OUT_CLUSTER = 2, MC_LABEL_TPC = 3, MC_INFO_TPC = 4, MERGED_TRACK = 5, @@ -114,10 +108,10 @@ class GPUReconstruction TRD_SPACEPOINT = 17, TRD_TRIGGERRECORDS = 18, TF_SETTINGS = 19 }; - static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Slice Tracks", "TPC Slice Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets", + static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Sector Tracks", "TPC Sector Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets", "TPC Raw Clusters", "TPC Native Clusters", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit", "TPC ZS Page", "TPC Native Clusters MC Labels", "TPC Digit MC Labeels", "TRD Spacepoints", "TRD Triggerrecords", "TF Settings"}; - static uint32_t getNIOTypeMultiplicity(InOutPointerType type) { return (type == CLUSTER_DATA || type == SLICE_OUT_TRACK || type == SLICE_OUT_CLUSTER || type == RAW_CLUSTERS || type == TPC_DIGIT || type == TPC_DIGIT_MC) ? NSLICES : 1; } + static uint32_t getNIOTypeMultiplicity(InOutPointerType type) { return (type == CLUSTER_DATA || type == SECTOR_OUT_TRACK || type == SECTOR_OUT_CLUSTER || type == RAW_CLUSTERS || type == TPC_DIGIT || type == TPC_DIGIT_MC) ? NSECTORS : 1; } // Functionality to create an instance of GPUReconstruction for the desired device static GPUReconstruction* CreateInstance(const GPUSettingsDeviceBackend& cfg); @@ -207,8 +201,6 @@ class GPUReconstruction void SetOutputControl(void* ptr, size_t size); void SetInputControl(void* ptr, size_t size); GPUOutputControl& OutputControl() { return mOutputControl; } - int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; } - void SetNActiveThreads(int32_t n); int32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } @@ -235,8 +227,10 @@ class GPUReconstruction double GetStatKernelTime() { return mStatKernelTime; } double GetStatWallTime() { return mStatWallTime; } + // Threading std::shared_ptr mThreading; static int32_t getHostThreadIndex(); + int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; } protected: void AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool); @@ -258,13 +252,7 @@ class GPUReconstruction virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; // Management for GPU thread contexts - class GPUThreadContext - { - public: - GPUThreadContext(); - virtual ~GPUThreadContext(); - }; - virtual std::unique_ptr GetThreadContext(); + virtual std::unique_ptr GetThreadContext() = 0; // Private helpers for library loading static std::shared_ptr* GetLibraryInstance(DeviceType type, bool verbose); @@ -344,15 +332,14 @@ class GPUReconstruction uint32_t mNEventsProcessed = 0; double mStatKernelTime = 0.; double mStatWallTime = 0.; + double mStatCPUTime = 0.; std::shared_ptr mROOTDump; std::vector>* mOutputErrorCodes = nullptr; - int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU - int32_t mThreadId = -1; // Thread ID that is valid for the local CUDA context - int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events - int32_t mNStreams = 1; // Number of parallel GPU streams - int32_t mMaxHostThreads = 0; // Maximum number of OMP threads - int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels + int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU + int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events + int32_t mNStreams = 1; // Number of parallel GPU streams + int32_t mMaxHostThreads = 0; // Maximum number of OMP threads // Management for GPUProcessors struct ProcessorData { @@ -492,164 +479,6 @@ inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate) } } -template -inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type) -{ - int32_t count = getNIOTypeMultiplicity(type); - uint32_t numTotal = 0; - for (int32_t i = 0; i < count; i++) { - numTotal += num[i]; - } - if (numTotal == 0) { - return 0; - } - fwrite(&type, sizeof(type), 1, fp); - for (int32_t i = 0; i < count; i++) { - fwrite(&num[i], sizeof(num[i]), 1, fp); - if (num[i]) { - fwrite(entries[i], sizeof(*entries[i]), num[i], fp); - } - } - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Dumped %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); - } - return numTotal; -} - -template -inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr* mem, InOutPointerType type, T** nonConstPtrs) -{ - if (feof(fp)) { - return 0; - } - InOutPointerType inType; - size_t r, pos = ftell(fp); - r = fread(&inType, sizeof(inType), 1, fp); - if (r != 1 || inType != type) { - fseek(fp, pos, SEEK_SET); - return 0; - } - - int32_t count = getNIOTypeMultiplicity(type); - size_t numTotal = 0; - for (int32_t i = 0; i < count; i++) { - r = fread(&num[i], sizeof(num[i]), 1, fp); - T* m = AllocateIOMemoryHelper(num[i], entries[i], mem[i]); - if (nonConstPtrs) { - nonConstPtrs[i] = m; - } - if (num[i]) { - r = fread(m, sizeof(*entries[i]), num[i], fp); - } - numTotal += num[i]; - } - (void)r; - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); - } - return numTotal; -} - -template -inline void GPUReconstruction::DumpFlatObjectToFile(const T* obj, const char* file) -{ - FILE* fp = fopen(file, "w+b"); - if (fp == nullptr) { - return; - } - size_t size[2] = {sizeof(*obj), obj->getFlatBufferSize()}; - fwrite(size, sizeof(size[0]), 2, fp); - fwrite(obj, 1, size[0], fp); - fwrite(obj->getFlatBufferPtr(), 1, size[1], fp); - fclose(fp); -} - -template -inline std::unique_ptr GPUReconstruction::ReadFlatObjectFromFile(const char* file) -{ - FILE* fp = fopen(file, "rb"); - if (fp == nullptr) { - return nullptr; - } - size_t size[2] = {0}, r; - r = fread(size, sizeof(size[0]), 2, fp); - if (r == 0 || size[0] != sizeof(T)) { - fclose(fp); - GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size[0], (int64_t)sizeof(T)); - throw std::runtime_error("invalid size"); - } - std::unique_ptr retVal(new T); - retVal->destroy(); - char* buf = new char[size[1]]; // Not deleted as ownership is transferred to FlatObject - r = fread((void*)retVal.get(), 1, size[0], fp); - r = fread(buf, 1, size[1], fp); - fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld bytes from %s", (int64_t)r, file); - } - retVal->clearInternalBufferPtr(); - retVal->setActualBufferAddress(buf); - retVal->adoptInternalBuffer(buf); - return retVal; -} - -template -inline void GPUReconstruction::DumpStructToFile(const T* obj, const char* file) -{ - FILE* fp = fopen(file, "w+b"); - if (fp == nullptr) { - return; - } - size_t size = sizeof(*obj); - fwrite(&size, sizeof(size), 1, fp); - fwrite(obj, 1, size, fp); - fclose(fp); -} - -template -inline std::unique_ptr GPUReconstruction::ReadStructFromFile(const char* file) -{ - FILE* fp = fopen(file, "rb"); - if (fp == nullptr) { - return nullptr; - } - size_t size, r; - r = fread(&size, sizeof(size), 1, fp); - if (r == 0 || size != sizeof(T)) { - fclose(fp); - GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size, (int64_t)sizeof(T)); - throw std::runtime_error("invalid size"); - } - std::unique_ptr newObj(new T); - r = fread(newObj.get(), 1, size, fp); - fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld bytes from %s", (int64_t)r, file); - } - return newObj; -} - -template -inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) -{ - FILE* fp = fopen(file, "rb"); - if (fp == nullptr) { - return 1; - } - size_t size, r; - r = fread(&size, sizeof(size), 1, fp); - if (r == 0) { - fclose(fp); - return 1; - } - r = fread(obj, 1, size, fp); - fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld bytes from %s", (int64_t)r, file); - } - return 0; -} -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index b5f9d591fd9a6..d95a57c8f2063 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -18,8 +18,8 @@ #include "GPUChain.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutput.h" +#include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTRDTrackletWord.h" @@ -33,6 +33,7 @@ #include "GPUConstantMem.h" #include "GPUMemorySizeScalers.h" #include +#include #define GPUCA_LOGGING_PRINTF #include "GPULogging.h" @@ -54,23 +55,6 @@ GPUReconstructionCPU::~GPUReconstructionCPU() Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor } -int32_t GPUReconstructionCPUBackend::getNKernelHostThreads(bool splitCores) -{ - int32_t nThreads = 0; - if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { - if (splitCores) { - nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; - nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; - } else { - nThreads = mMaxHostThreads; - } - nThreads = std::max(1, nThreads); - } else { - nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; - } - return nThreads; -} - template inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { @@ -89,12 +73,14 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS if (mProcessingSettings.debugLevel >= 5) { printf("Running %d Threads\n", nThreads); } - mThreading->activeThreads->execute([&] { - tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { - typename T::GPUSharedMemory smem; - for (uint32_t iB = r.begin(); iB < r.end(); iB++) { - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); - } + tbb::this_task_arena::isolate([&] { + mThreading->activeThreads->execute([&] { + tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { + typename T::GPUSharedMemory smem; + for (uint32_t iB = r.begin(); iB < r.end(); iB++) { + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); + } + }); }); }); } else { @@ -198,6 +184,8 @@ int32_t GPUReconstructionCPU::GetThread() int32_t GPUReconstructionCPU::InitDevice() { + mActiveHostKernelThreads = mMaxHostThreads; + mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { if (mDeviceMemorySize > mHostMemorySize) { @@ -211,7 +199,6 @@ int32_t GPUReconstructionCPU::InitDevice() if (mProcessingSettings.inKernelParallel) { mBlockCount = mMaxHostThreads; } - mThreadId = GetThread(); mProcShadow.mProcessorsProc = processors(); return 0; } @@ -234,19 +221,14 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; - timerTotal.Start(); + mTimerTotal.Start(); + const std::clock_t cpuTimerStart = std::clock(); if (mProcessingSettings.doublePipeline) { int32_t retVal = EnqueuePipeline(); if (retVal) { return retVal; } } else { - if (mThreadId != GetThread()) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Thread changed, migrating context, Previous Thread: %d, New Thread: %d", mThreadId, GetThread()); - } - mThreadId = GetThread(); - } if (mSlaves.size() || mMaster) { WriteConstantParams(); // Reinitialize // TODO: Get this in sync with GPUChainTracking::DoQueuedUpdates, and consider the doublePipeline } @@ -257,17 +239,18 @@ int32_t GPUReconstructionCPU::RunChains() } } } - timerTotal.Stop(); + mTimerTotal.Stop(); + mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; - mStatWallTime = (timerTotal.GetElapsedTime() * 1000000. / mStatNEvents); + mStatWallTime = (mTimerTotal.GetElapsedTime() * 1000000. / mStatNEvents); std::string nEventReport; if (GetProcessingSettings().debugLevel >= 0 && mStatNEvents > 1) { nEventReport += " (avergage of " + std::to_string(mStatNEvents) + " runs)"; } - if (GetProcessingSettings().debugLevel >= 1) { - double kernelTotal = 0; - std::vector kernelStepTimes(GPUDataTypes::N_RECO_STEPS); + double kernelTotal = 0; + std::vector kernelStepTimes(GPUDataTypes::N_RECO_STEPS, 0.); + if (GetProcessingSettings().debugLevel >= 1) { for (uint32_t i = 0; i < mTimers.size(); i++) { double time = 0; if (mTimers[i] == nullptr) { @@ -297,9 +280,12 @@ int32_t GPUReconstructionCPU::RunChains() mTimers[i]->memSize = 0; } } + } + if (GetProcessingSettings().recoTaskTiming) { for (int32_t i = 0; i < GPUDataTypes::N_RECO_STEPS; i++) { if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) { - printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us)\n", "Tasks", GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents); + printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks", + GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime()); } if (mTimersRecoSteps[i].bytesToGPU) { printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents, @@ -314,6 +300,7 @@ int32_t GPUReconstructionCPU::RunChains() mTimersRecoSteps[i].timerToGPU.Reset(); mTimersRecoSteps[i].timerToHost.Reset(); mTimersRecoSteps[i].timerTotal.Reset(); + mTimersRecoSteps[i].timerCPU = 0; mTimersRecoSteps[i].countToGPU = 0; mTimersRecoSteps[i].countToHost = 0; } @@ -323,15 +310,18 @@ int32_t GPUReconstructionCPU::RunChains() printf("Execution Time: General Step : %50s Time: %'10.0f us\n", GPUDataTypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents); } } - mStatKernelTime = kernelTotal * 1000000 / mStatNEvents; - printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str()); - printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Wall", mStatWallTime, nEventReport.c_str()); + if (GetProcessingSettings().debugLevel >= 1) { + mStatKernelTime = kernelTotal * 1000000 / mStatNEvents; + printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str()); + } + printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str()); } else if (GetProcessingSettings().debugLevel >= 0) { - GPUInfo("Total Wall Time: %lu us%s", (uint64_t)mStatWallTime, nEventReport.c_str()); + GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str()); } if (mProcessingSettings.resetTimers) { mStatNEvents = 0; - timerTotal.Reset(); + mStatCPUTime = 0; + mTimerTotal.Reset(); } return 0; @@ -346,60 +336,6 @@ void GPUReconstructionCPU::ResetDeviceProcessorTypes() } } -static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation - -GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) -{ - while (timerFlag.test_and_set()) { - } - if (mTimers.size() <= id) { - mTimers.resize(id + 1); - } - if (mTimers[id] == nullptr) { - if (J >= 0) { - name += std::to_string(J); - } - mTimers[id].reset(new timerMeta{std::unique_ptr{new HighResTimer[num]}, name, num, type, 1u, step, (size_t)0}); - } else { - mTimers[id]->count++; - } - timerMeta* retVal = mTimers[id].get(); - timerFlag.clear(); - return retVal; -} - -GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::getTimerById(uint32_t id, bool increment) -{ - timerMeta* retVal = nullptr; - while (timerFlag.test_and_set()) { - } - if (mTimers.size() > id && mTimers[id]) { - retVal = mTimers[id].get(); - retVal->count += increment; - } - timerFlag.clear(); - return retVal; -} - -uint32_t GPUReconstructionCPU::getNextTimerId() -{ - static std::atomic id{0}; - return id.fetch_add(1); -} - -uint32_t GPUReconstructionCPU::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) -{ - if (condition && mProcessingSettings.inKernelParallel != 1) { - mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; - } else { - mNActiveThreadsOuterLoop = 1; - } - if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); - } - return mNActiveThreadsOuterLoop; -} - void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream) { param().occupancyMap = mapHost; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index f82f481df6a63..b6225999c68a0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -15,36 +15,31 @@ #ifndef GPURECONSTRUCTIONICPU_H #define GPURECONSTRUCTIONICPU_H -#include "GPUReconstruction.h" +#include "GPUReconstructionProcessing.h" #include "GPUConstantMem.h" #include -#include "utils/timer.h" #include #include "GPUGeneralKernels.h" #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionKernels.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { -class GPUReconstructionCPUBackend : public GPUReconstruction +class GPUReconstructionCPUBackend : public GPUReconstructionProcessing { public: ~GPUReconstructionCPUBackend() override = default; protected: - GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} + GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing(cfg) {} template int32_t runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); template int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); - uint32_t mNActiveThreadsOuterLoop = 1; - int32_t getNKernelHostThreads(bool splitCores); }; class GPUReconstructionCPU : public GPUReconstructionKernels @@ -65,23 +60,12 @@ class GPUReconstructionCPU : public GPUReconstructionKernels()); } - template - constexpr static const char* GetKernelName(); - virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } void ResetDeviceProcessorTypes(); - template - void AddGPUEvents(T*& events); int32_t RunChains() override; - HighResTimer& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)].timerTotal; } - HighResTimer& getGeneralStepTimer(GeneralStep step) { return mTimersGeneralSteps[getGeneralStepNum(step)]; } - - void SetNActiveThreadsOuterLoop(uint32_t f) { mNActiveThreadsOuterLoop = f; } - uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max); - void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream = -1); protected: @@ -142,43 +126,8 @@ class GPUReconstructionCPU : public GPUReconstructionKernels timer; - std::string name; - int32_t num; // How many parallel instances to sum up (CPU threads / GPU streams) - int32_t type; // 0 = kernel, 1 = CPU step, 2 = DMA transfer - uint32_t count; // How often was the timer queried - RecoStep step; // Which RecoStep is this - size_t memSize; // Memory size for memory bandwidth computation - }; - - struct RecoStepTimerMeta { - HighResTimer timerToGPU; - HighResTimer timerToHost; - HighResTimer timerTotal; - size_t bytesToGPU = 0; - size_t bytesToHost = 0; - uint32_t countToGPU = 0; - uint32_t countToHost = 0; - }; - - HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]; - - std::vector> mTimers; - RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]; - HighResTimer timerTotal; - template - HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true); - template - HighResTimer& getTimer(const char* name, int32_t num = -1); - - std::vector> mEvents; - private: size_t TransferMemoryResourcesHelper(GPUProcessor* proc, int32_t stream, bool all, bool toGPU); - uint32_t getNextTimerId(); - timerMeta* getTimerById(uint32_t id, bool increment = true); - timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step); }; template @@ -246,58 +195,6 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args return retVal; } -#define GPUCA_KRNL(x_class, ...) \ - template <> \ - constexpr const char* GPUReconstructionCPU::GetKernelName() \ - { \ - return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - -template -inline void GPUReconstructionCPU::AddGPUEvents(T*& events) -{ - mEvents.emplace_back(std::vector(sizeof(T) / sizeof(deviceEvent))); - events = (T*)mEvents.back().data(); -} - -template -HighResTimer& GPUReconstructionCPU::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment) -{ - static int32_t id = getNextTimerId(); - timerMeta* timer = getTimerById(id, increment); - if (timer == nullptr) { - timer = insertTimer(id, GetKernelName(), -1, NSLICES, 0, step); - } - if (addMemorySize) { - timer->memSize += addMemorySize; - } - if (num < 0 || num >= timer->num) { - throw std::runtime_error("Invalid timer requested"); - } - return timer->timer[num]; -} - -template -HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num) -{ - static int32_t id = getNextTimerId(); - timerMeta* timer = getTimerById(id); - if (timer == nullptr) { - int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); - timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); - } - if (num == -1) { - num = getHostThreadIndex(); - } - if (num < 0 || num >= timer->num) { - throw std::runtime_error("Invalid timer requested"); - } - return timer->timer[num]; -} - -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index 629d23075d9bc..8f5cab6807050 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -49,16 +49,16 @@ using namespace std::string_literals; void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr* clusters, uint32_t* nClusters, const TPCFastTransform* transform, int32_t continuousMaxTimeBin) { - memset(nClusters, 0, NSLICES * sizeof(nClusters[0])); + memset(nClusters, 0, NSECTORS * sizeof(nClusters[0])); uint32_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { - uint32_t nClSlice = 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + uint32_t nClSector = 0; for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - nClSlice += native->nClusters[i][j]; + nClSector += native->nClusters[i][j]; } - nClusters[i] = nClSlice; - clusters[i].reset(new GPUTPCClusterData[nClSlice]); - nClSlice = 0; + nClusters[i] = nClSector; + clusters[i].reset(new GPUTPCClusterData[nClSector]); + nClSector = 0; for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < native->nClusters[i][j]; k++) { const auto& clin = native->clusters[i][j][k]; @@ -68,7 +68,7 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative } else { transform->TransformInTimeFrame(i, j, clin.getPad(), clin.getTime(), x, y, z, continuousMaxTimeBin); } - auto& clout = clusters[i].get()[nClSlice]; + auto& clout = clusters[i].get()[nClSector]; clout.x = x; clout.y = y; clout.z = z; @@ -76,7 +76,7 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative clout.amp = clin.qTot; clout.flags = clin.getFlags(); clout.id = offset + k; - nClSlice++; + nClSector++; } native->clusterOffset[i][j] = offset; offset += native->nClusters[i][j]; @@ -87,7 +87,7 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters) { memset((void*)&native, 0, sizeof(native)); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < nRawClusters[i]; j++) { native.nClusters[i][rawClusters[i][j].GetPadRow()]++; } @@ -96,7 +96,7 @@ void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAcce nativeBuffer.reset(new ClusterNative[native.nClustersTotal]); native.clustersLinear = nativeBuffer.get(); native.setOffsetPtrs(); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { native.nClusters[i][j] = 0; } @@ -117,7 +117,7 @@ void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAcce int32_t GPUReconstructionConvert::GetMaxTimeBin(const ClusterNativeAccess& native) { float retVal = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < native.nClusters[i][j]; k++) { if (native.clusters[i][j][k].getTime() > retVal) { @@ -132,7 +132,7 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const ClusterNativeAccess& nativ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutDigits& digits) { float retVal = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t k = 0; k < digits.nTPCDigits[i]; k++) { if (digits.tpcDigits[i][k].getTimeStamp() > retVal) { retVal = digits.tpcDigits[i][k].getTimeStamp(); @@ -145,12 +145,12 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutDigits& di int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspages) { float retVal = 0; - for (uint32_t i = 0; i < NSLICES; i++) { - int32_t firstHBF = zspages.slice[i].count[0] ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)zspages.slice[i].zsPtr[0][0]) : 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + int32_t firstHBF = zspages.sector[i].count[0] ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)zspages.sector[i].zsPtr[0][0]) : 0; for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < zspages.slice[i].count[j]; k++) { - const char* page = (const char*)zspages.slice[i].zsPtr[j][k]; - for (uint32_t l = 0; l < zspages.slice[i].nZSPtr[j][k]; l++) { + for (uint32_t k = 0; k < zspages.sector[i].count[j]; k++) { + const char* page = (const char*)zspages.sector[i].zsPtr[j][k]; + for (uint32_t l = 0; l < zspages.sector[i].nZSPtr[j][k]; l++) { o2::header::RAWDataHeader* rdh = (o2::header::RAWDataHeader*)(page + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); TPCZSHDR* hdr = (TPCZSHDR*)(page + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); int32_t nTimeBinSpan = hdr->nTimeBinSpan; @@ -174,6 +174,8 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspage // ------------------------------------------------- TPC ZS ------------------------------------------------- #ifdef GPUCA_TPC_GEOMETRY_O2 +namespace o2::gpu +{ namespace // anonymous { @@ -1306,19 +1308,9 @@ size_t zsEncoderRun::compare(std::vector* buffer, std::vector void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr* outBuffer, uint32_t* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int32_t version, bool verify, float threshold, bool padding, std::function&)> digitsFilter) { @@ -1328,8 +1320,15 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr buffer[NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSLICES), o2::gpu::internal::tmpReductionResult(), [&](const auto range, auto red) { + std::vector buffer[NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + struct tmpReductionResult { + uint32_t totalPages = 0; + size_t totalSize = 0; + size_t nErrors = 0; + size_t digitsInput = 0; + size_t digitsEncoded = 0; + }; + auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS), tmpReductionResult(), [&](const auto range, auto red) { for (uint32_t i = range.begin(); i < range.end(); i++) { std::vector tmpBuffer; red.digitsInput += ZSEncoderGetNDigits(in, i); @@ -1392,7 +1391,7 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptrreset(new uint64_t[reduced.totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(uint64_t)]); uint64_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { memcpy((char*)outBuffer->get() + offset, buffer[i][j].data(), buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE); offset += buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE; @@ -1417,20 +1416,20 @@ template void GPUReconstructionConvert::RunZSEncoder(const DigitArra void GPUReconstructionConvert::RunZSEncoderCreateMeta(const uint64_t* buffer, const uint32_t* sizes, void** ptrs, GPUTrackingInOutZS* out) { uint64_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j] = (char*)buffer + offset; offset += sizes[i * GPUTrackingInOutZS::NENDPOINTS + j] * TPCZSHDR::TPC_ZS_PAGE_SIZE; - out->slice[i].zsPtr[j] = &ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j]; - out->slice[i].nZSPtr[j] = &sizes[i * GPUTrackingInOutZS::NENDPOINTS + j]; - out->slice[i].count[j] = 1; + out->sector[i].zsPtr[j] = &ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j]; + out->sector[i].nZSPtr[j] = &sizes[i * GPUTrackingInOutZS::NENDPOINTS + j]; + out->sector[i].count[j] = 1; } } } void GPUReconstructionConvert::RunZSFilter(std::unique_ptr* buffers, const o2::tpc::Digit* const* ptrs, size_t* nsb, const size_t* ns, const GPUParam& param, bool zs12bit, float threshold) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { if (buffers[i].get() != ptrs[i] || nsb != ns) { throw std::runtime_error("Not owning digits"); } @@ -1455,6 +1454,8 @@ void GPUReconstructionConvert::RunZSFilter(std::unique_ptr* bu } #ifdef GPUCA_O2_LIB +namespace o2::gpu::internal +{ template static inline auto GetDecoder_internal(const GPUParam* param, int32_t version) { @@ -1480,15 +1481,16 @@ static inline auto GetDecoder_internal(const GPUParam* param, int32_t version) enc->decodePage(outBuffer, (const zsPage*)page, endpoint, firstTfOrbit, triggerBC); }; } +} // namespace o2::gpu::internal std::function&, const void*, uint32_t, uint32_t)> GPUReconstructionConvert::GetDecoder(int32_t version, const GPUParam* param) { if (version >= o2::tpc::ZSVersion::ZSVersionRowBased10BitADC && version <= o2::tpc::ZSVersion::ZSVersionRowBased12BitADC) { - return GetDecoder_internal(param, version); + return o2::gpu::internal::GetDecoder_internal(param, version); } else if (version == o2::tpc::ZSVersion::ZSVersionLinkBasedWithMeta) { - return GetDecoder_internal(param, version); + return o2::gpu::internal::GetDecoder_internal(param, version); } else if (version >= o2::tpc::ZSVersion::ZSVersionDenseLinkBased && version <= o2::tpc::ZSVersion::ZSVersionDenseLinkBasedV2) { - return GetDecoder_internal(param, version); + return o2::gpu::internal::GetDecoder_internal(param, version); } else { throw std::runtime_error("Invalid ZS version "s + std::to_string(version) + ", cannot create decoder"s); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.h b/GPU/GPUTracking/Base/GPUReconstructionConvert.h index b8aedbcde582b..a24eb52a3a47c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.h +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.h @@ -37,9 +37,7 @@ class RawFileWriter; struct AliHLTTPCRawCluster; -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUParam; struct GPUTPCClusterData; @@ -50,7 +48,7 @@ struct GPUTrackingInOutZS; class GPUReconstructionConvert { public: - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr* clusters, uint32_t* nClusters, const TPCFastTransform* transform, int32_t continuousMaxTimeBin = 0); static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters); template @@ -63,7 +61,6 @@ class GPUReconstructionConvert static std::function&, const void*, uint32_t, uint32_t)> GetDecoder(int32_t version, const GPUParam* param); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 91715fab4f668..64d9351b447e2 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -16,7 +16,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUTPCTracker.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" using namespace o2::gpu; @@ -102,7 +102,6 @@ int32_t GPUReconstructionDeviceBase::InitDevice() GPUError("Too many straems requested %d > %d\n", mProcessingSettings.nStreams, GPUCA_MAX_STREAMS); return (1); } - mThreadId = GetThread(); void* semLock = nullptr; if (mProcessingSettings.globalInitMutex && GetGlobalLock(semLock)) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 1381fd0f76981..6cd3813ff1431 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -20,9 +20,7 @@ #include "GPUChain.h" #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if !(defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) extern template class GPUReconstructionKernels; @@ -52,7 +50,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); - virtual const GPUTPCTracker* CPUTracker(int32_t iSlice) { return &processors()->tpcTrackers[iSlice]; } + virtual const GPUTPCTracker* CPUTracker(int32_t iSector) { return &processors()->tpcTrackers[iSector]; } int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override = 0; size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override; @@ -87,7 +85,6 @@ inline size_t GPUReconstructionDeviceBase::GPUMemCpyAlways(bool onGpu, void* dst return GPUReconstructionCPU::GPUMemCpyAlways(false, dst, src, size, stream, toGPU, ev, evList, nEvents); } } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionIO.h b/GPU/GPUTracking/Base/GPUReconstructionIO.h new file mode 100644 index 0000000000000..2208c15846e09 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionIO.h @@ -0,0 +1,183 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionIO.h +/// \author David Rohr + +#if !defined(GPURECONSTRUCTIONIO_H) +#define GPURECONSTRUCTIONIO_H + +#include "GPUReconstruction.h" + +namespace o2::gpu +{ + +template +inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type) +{ + int32_t count = getNIOTypeMultiplicity(type); + uint32_t numTotal = 0; + for (int32_t i = 0; i < count; i++) { + numTotal += num[i]; + } + if (numTotal == 0) { + return 0; + } + fwrite(&type, sizeof(type), 1, fp); + for (int32_t i = 0; i < count; i++) { + fwrite(&num[i], sizeof(num[i]), 1, fp); + if (num[i]) { + fwrite(entries[i], sizeof(*entries[i]), num[i], fp); + } + } + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Dumped %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); + } + return numTotal; +} + +template +inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr* mem, InOutPointerType type, T** nonConstPtrs) +{ + if (feof(fp)) { + return 0; + } + InOutPointerType inType; + size_t r, pos = ftell(fp); + r = fread(&inType, sizeof(inType), 1, fp); + if (r != 1 || inType != type) { + fseek(fp, pos, SEEK_SET); + return 0; + } + + int32_t count = getNIOTypeMultiplicity(type); + size_t numTotal = 0; + for (int32_t i = 0; i < count; i++) { + r = fread(&num[i], sizeof(num[i]), 1, fp); + T* m = AllocateIOMemoryHelper(num[i], entries[i], mem[i]); + if (nonConstPtrs) { + nonConstPtrs[i] = m; + } + if (num[i]) { + r = fread(m, sizeof(*entries[i]), num[i], fp); + } + numTotal += num[i]; + } + (void)r; + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); + } + return numTotal; +} + +template +inline void GPUReconstruction::DumpFlatObjectToFile(const T* obj, const char* file) +{ + FILE* fp = fopen(file, "w+b"); + if (fp == nullptr) { + return; + } + size_t size[2] = {sizeof(*obj), obj->getFlatBufferSize()}; + fwrite(size, sizeof(size[0]), 2, fp); + fwrite(obj, 1, size[0], fp); + fwrite(obj->getFlatBufferPtr(), 1, size[1], fp); + fclose(fp); +} + +template +inline std::unique_ptr GPUReconstruction::ReadFlatObjectFromFile(const char* file) +{ + FILE* fp = fopen(file, "rb"); + if (fp == nullptr) { + return nullptr; + } + size_t size[2] = {0}, r; + r = fread(size, sizeof(size[0]), 2, fp); + if (r == 0 || size[0] != sizeof(T)) { + fclose(fp); + GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size[0], (int64_t)sizeof(T)); + throw std::runtime_error("invalid size"); + } + std::unique_ptr retVal(new T); + retVal->destroy(); + char* buf = new char[size[1]]; // Not deleted as ownership is transferred to FlatObject + r = fread((void*)retVal.get(), 1, size[0], fp); + r = fread(buf, 1, size[1], fp); + fclose(fp); + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld bytes from %s", (int64_t)r, file); + } + retVal->clearInternalBufferPtr(); + retVal->setActualBufferAddress(buf); + retVal->adoptInternalBuffer(buf); + return retVal; +} + +template +inline void GPUReconstruction::DumpStructToFile(const T* obj, const char* file) +{ + FILE* fp = fopen(file, "w+b"); + if (fp == nullptr) { + return; + } + size_t size = sizeof(*obj); + fwrite(&size, sizeof(size), 1, fp); + fwrite(obj, 1, size, fp); + fclose(fp); +} + +template +inline std::unique_ptr GPUReconstruction::ReadStructFromFile(const char* file) +{ + FILE* fp = fopen(file, "rb"); + if (fp == nullptr) { + return nullptr; + } + size_t size, r; + r = fread(&size, sizeof(size), 1, fp); + if (r == 0 || size != sizeof(T)) { + fclose(fp); + GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size, (int64_t)sizeof(T)); + throw std::runtime_error("invalid size"); + } + std::unique_ptr newObj(new T); + r = fread(newObj.get(), 1, size, fp); + fclose(fp); + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld bytes from %s", (int64_t)r, file); + } + return newObj; +} + +template +inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) +{ + FILE* fp = fopen(file, "rb"); + if (fp == nullptr) { + return 1; + } + size_t size, r; + r = fread(&size, sizeof(size), 1, fp); + if (r == 0) { + fclose(fp); + return 1; + } + r = fread(obj, 1, size, fp); + fclose(fp); + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld bytes from %s", (int64_t)r, file); + } + return 0; +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index d4502b978ef5b..b35613f3bec59 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -36,8 +36,8 @@ if (mProcessingSettings.trackletSelectorInPipeline < 0) { \ mProcessingSettings.trackletSelectorInPipeline = GPUCA_SELECTOR_IN_PIPELINE; \ } \ - if (mProcessingSettings.trackletSelectorSlices < 0) { \ - mProcessingSettings.trackletSelectorSlices = GPUCA_TRACKLET_SELECTOR_SLICE_COUNT; \ + if (mProcessingSettings.trackletSelectorSectors < 0) { \ + mProcessingSettings.trackletSelectorSectors = GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT; \ } \ if (mProcessingSettings.alternateBorderSort < 0) { \ mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h index 02008ed0ff78f..38a9780376d16 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h @@ -17,12 +17,9 @@ #include "GPUDef.h" -namespace o2 +namespace o2::gpu { -namespace gpu -{ -} -} // namespace o2 +} // namespace o2::gpu using namespace o2::gpu; // clang-format off diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 295e6e1a5d9b7..41abc8725c07b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -53,31 +53,31 @@ #define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) // GPU Kernel entry point for single sector #define GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSlice_internal GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSector_internal GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU_SINGLE(...) GPUCA_KRNLGPU_SINGLE_DEF(__VA_ARGS__); #else #define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSlice_internal] GPUCA_M_STRIP(x_forward)); \ + GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSector_internal] GPUCA_M_STRIP(x_forward)); \ } #endif // GPU Kernel entry point for multiple sector #define GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSlice, int32_t nSliceCount GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSector, int32_t nSectorCount GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU_MULTI(...) GPUCA_KRNLGPU_MULTI_DEF(__VA_ARGS__); #else #define GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ - const int32_t iSlice_internal = nSliceCount * (get_group_id(0) + (get_num_groups(0) % nSliceCount != 0 && nSliceCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ - const int32_t nSliceBlockOffset = get_num_groups(0) * iSlice_internal / nSliceCount; \ - const int32_t sliceBlockId = get_group_id(0) - nSliceBlockOffset; \ - const int32_t sliceGridDim = get_num_groups(0) * (iSlice_internal + 1) / nSliceCount - get_num_groups(0) * (iSlice_internal) / nSliceCount; \ + const int32_t iSector_internal = nSectorCount * (get_group_id(0) + (get_num_groups(0) % nSectorCount != 0 && nSectorCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ + const int32_t nSectorBlockOffset = get_num_groups(0) * iSector_internal / nSectorCount; \ + const int32_t sectorBlockId = get_group_id(0) - nSectorBlockOffset; \ + const int32_t sectorGridDim = get_num_groups(0) * (iSector_internal + 1) / nSectorCount - get_num_groups(0) * (iSector_internal) / nSectorCount; \ GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(sliceGridDim, get_local_size(0), sliceBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSlice + iSlice_internal] GPUCA_M_STRIP(x_forward)); \ + GPUCA_M_STRIP_FIRST(x_class)::template Thread(sectorGridDim, get_local_size(0), sectorBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSector + iSector_internal] GPUCA_M_STRIP(x_forward)); \ } #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index d5d329d55ad4a..e95a59df6cfd5 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -17,27 +17,11 @@ #include "GPUReconstruction.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { namespace gpu_reconstruction_kernels { -struct deviceEvent { - constexpr deviceEvent() = default; - constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; - template - void set(T val) { v = reinterpret_cast(val); } - template - T& get() { return reinterpret_cast(v); } - template - T* getEventList() { return reinterpret_cast(this); } - bool isSet() const { return v; } - - private: - void* v = nullptr; // We use only pointers anyway, and since cl_event and cudaEvent_t and hipEvent_t are actually pointers, we can cast them to deviceEvent (void*) this way. -}; template struct classArgument { @@ -95,6 +79,7 @@ struct krnlSetupArgs : public gpu_reconstruction_kernels::classArgument { const krnlSetupTime s; std::tuple sizeof(void*)), const Args&, const Args>::type...> v; }; + } // namespace gpu_reconstruction_kernels template @@ -126,7 +111,6 @@ class GPUReconstructionKernels : public T #undef GPUCA_KRNL }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx new file mode 100644 index 0000000000000..18662870ed45e --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -0,0 +1,119 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionProcessing.cxx +/// \author David Rohr + +#include "GPUReconstructionProcessing.h" +#include "GPUReconstructionThreading.h" + +using namespace o2::gpu; + +int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) +{ + int32_t nThreads = 0; + if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { + if (splitCores) { + nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; + nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; + } else { + nThreads = mMaxHostThreads; + } + nThreads = std::max(1, nThreads); + } else { + nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; + } + return nThreads; +} + +void GPUReconstructionProcessing::SetNActiveThreads(int32_t n) +{ + mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); + mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); + if (mProcessingSettings.debugLevel >= 3) { + GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); + } +} + +void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function lambda) +{ + tbb::task_arena(SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads)).execute([&] { + tbb::parallel_for(0, nThreads, lambda, tbb::simple_partitioner()); + }); +} + +namespace o2::gpu +{ +namespace // anonymous +{ +static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation +} // anonymous namespace +} // namespace o2::gpu + +GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) +{ + while (timerFlag.test_and_set()) { + } + if (mTimers.size() <= id) { + mTimers.resize(id + 1); + } + if (mTimers[id] == nullptr) { + if (J >= 0) { + name += std::to_string(J); + } + mTimers[id].reset(new timerMeta{std::unique_ptr{new HighResTimer[num]}, name, num, type, 1u, step, (size_t)0}); + } else { + mTimers[id]->count++; + } + timerMeta* retVal = mTimers[id].get(); + timerFlag.clear(); + return retVal; +} + +GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment) +{ + timerMeta* retVal = nullptr; + while (timerFlag.test_and_set()) { + } + if (mTimers.size() > id && mTimers[id]) { + retVal = mTimers[id].get(); + retVal->count += increment; + } + timerFlag.clear(); + return retVal; +} + +uint32_t GPUReconstructionProcessing::getNextTimerId() +{ + static std::atomic id{0}; + return id.fetch_add(1); +} + +uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) +{ + if (condition && mProcessingSettings.inKernelParallel != 1) { + mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; + } else { + mNActiveThreadsOuterLoop = 1; + } + if (mProcessingSettings.debugLevel >= 5) { + printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); + } + return mNActiveThreadsOuterLoop; +} + +std::unique_ptr GPUReconstructionProcessing::GetThreadContext() +{ + return std::make_unique(); +} + +gpu_reconstruction_kernels::threadContext::threadContext() = default; +gpu_reconstruction_kernels::threadContext::~threadContext() = default; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h new file mode 100644 index 0000000000000..4ccfb9ff10311 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -0,0 +1,188 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionProcessing.h +/// \author David Rohr + +#if !defined(GPURECONSTRUCTIONPROCESSING_H) +#define GPURECONSTRUCTIONPROCESSING_H + +#include "GPUReconstruction.h" +#include "GPUReconstructionKernelIncludes.h" + +#include "utils/timer.h" +#include + +namespace o2::gpu +{ + +namespace gpu_reconstruction_kernels +{ +struct deviceEvent { + constexpr deviceEvent() = default; + constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; + template + void set(T val) + { + v = reinterpret_cast(val); + } + template + T& get() + { + return reinterpret_cast(v); + } + template + T* getEventList() + { + return reinterpret_cast(this); + } + bool isSet() const { return v; } + + private: + void* v = nullptr; // We use only pointers anyway, and since cl_event and cudaEvent_t and hipEvent_t are actually pointers, we can cast them to deviceEvent (void*) this way. +}; + +class threadContext +{ + public: + threadContext(); + virtual ~threadContext(); +}; + +} // namespace gpu_reconstruction_kernels + +class GPUReconstructionProcessing : public GPUReconstruction +{ + public: + ~GPUReconstructionProcessing() override = default; + + // Threading + int32_t getNKernelHostThreads(bool splitCores); + uint32_t getNActiveThreadsOuterLoop() const { return mNActiveThreadsOuterLoop; } + void SetNActiveThreadsOuterLoop(uint32_t f) { mNActiveThreadsOuterLoop = f; } + uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max); + void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function lambda); + void SetNActiveThreads(int32_t n); + + // Interface to query name of a kernel + template + constexpr static const char* GetKernelName(); + + // Public queries for timers + auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; } + HighResTimer& getGeneralStepTimer(GeneralStep step) { return mTimersGeneralSteps[getGeneralStepNum(step)]; } + + template + void AddGPUEvents(T*& events); + + virtual std::unique_ptr GetThreadContext() override; + + struct RecoStepTimerMeta { + HighResTimer timerToGPU; + HighResTimer timerToHost; + HighResTimer timerTotal; + double timerCPU = 0.; + size_t bytesToGPU = 0; + size_t bytesToHost = 0; + uint32_t countToGPU = 0; + uint32_t countToHost = 0; + }; + + protected: + GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} + using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + + int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels + uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop + + std::vector> mEvents; + + // Timer related stuff + struct timerMeta { + std::unique_ptr timer; + std::string name; + int32_t num; // How many parallel instances to sum up (CPU threads / GPU streams) + int32_t type; // 0 = kernel, 1 = CPU step, 2 = DMA transfer + uint32_t count; // How often was the timer queried + RecoStep step; // Which RecoStep is this + size_t memSize; // Memory size for memory bandwidth computation + }; + + HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]; + + std::vector> mTimers; + RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]; + HighResTimer mTimerTotal; + template + HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true); + template + HighResTimer& getTimer(const char* name, int32_t num = -1); + + private: + uint32_t getNextTimerId(); + timerMeta* getTimerById(uint32_t id, bool increment = true); + timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step); +}; + +template +inline void GPUReconstructionProcessing::AddGPUEvents(T*& events) +{ + mEvents.emplace_back(std::vector(sizeof(T) / sizeof(deviceEvent))); + events = (T*)mEvents.back().data(); +} + +template +HighResTimer& GPUReconstructionProcessing::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment) +{ + static int32_t id = getNextTimerId(); + timerMeta* timer = getTimerById(id, increment); + if (timer == nullptr) { + timer = insertTimer(id, GetKernelName(), -1, NSECTORS, 0, step); + } + if (addMemorySize) { + timer->memSize += addMemorySize; + } + if (num < 0 || num >= timer->num) { + throw std::runtime_error("Invalid timer requested"); + } + return timer->timer[num]; +} + +template +HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t num) +{ + static int32_t id = getNextTimerId(); + timerMeta* timer = getTimerById(id); + if (timer == nullptr) { + int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); + timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); + } + if (num == -1) { + num = getHostThreadIndex(); + } + if (num < 0 || num >= timer->num) { + throw std::runtime_error("Invalid timer requested"); + } + return timer->timer[num]; +} + +#define GPUCA_KRNL(x_class, ...) \ + template <> \ + constexpr const char* GPUReconstructionProcessing::GetKernelName() \ + { \ + return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index fbca43e03781a..4693a1eff24f2 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -72,23 +72,23 @@ int32_t GPUReconstructionTimeframe::ReadEventShifted(int32_t iEvent, float shift mReadEvent(iEvent); if (config.overlayRaw) { float shiftTTotal = (((double)config.timeFrameLen - DRIFT_TIME) * ((double)TPCZ / (double)DRIFT_TIME) - shiftZ) / mChain->GetTPCTransformHelper()->getCorrMap()->getVDrift(); - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (uint32_t j = 0; j < mChain->mIOPtrs.nRawClusters[iSlice]; j++) { - auto& tmp = mChain->mIOMem.rawClusters[iSlice][j]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (uint32_t j = 0; j < mChain->mIOPtrs.nRawClusters[iSector]; j++) { + auto& tmp = mChain->mIOMem.rawClusters[iSector][j]; tmp.fTime += shiftTTotal; } } } if (shiftZ != 0.f) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (uint32_t j = 0; j < mChain->mIOPtrs.nClusterData[iSlice]; j++) { - auto& tmp = mChain->mIOMem.clusterData[iSlice][j]; - tmp.z += iSlice < NSLICES / 2 ? shiftZ : -shiftZ; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (uint32_t j = 0; j < mChain->mIOPtrs.nClusterData[iSector]; j++) { + auto& tmp = mChain->mIOMem.clusterData[iSector][j]; + tmp.z += iSector < NSECTORS / 2 ? shiftZ : -shiftZ; } } for (uint32_t i = 0; i < mChain->mIOPtrs.nMCInfosTPC; i++) { auto& tmp = mChain->mIOMem.mcInfosTPC[i]; - tmp.z += i < NSLICES / 2 ? shiftZ : -shiftZ; + tmp.z += i < NSECTORS / 2 ? shiftZ : -shiftZ; } } @@ -97,40 +97,40 @@ int32_t GPUReconstructionTimeframe::ReadEventShifted(int32_t iEvent, float shift uint32_t removed = 0; if (minZ > -1e6 || maxZ > -1e6) { uint32_t currentClusterTotal = 0; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t currentClusterSlice = 0; - bool doRaw = config.overlayRaw && mChain->mIOPtrs.nClusterData[iSlice] == mChain->mIOPtrs.nRawClusters[iSlice]; - for (uint32_t i = 0; i < mChain->mIOPtrs.nClusterData[iSlice]; i++) { - float sign = iSlice < NSLICES / 2 ? 1 : -1; - if (sign * mChain->mIOMem.clusterData[iSlice][i].z >= minZ && sign * mChain->mIOMem.clusterData[iSlice][i].z <= maxZ) { - if (currentClusterSlice != i) { - mChain->mIOMem.clusterData[iSlice][currentClusterSlice] = mChain->mIOMem.clusterData[iSlice][i]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t currentClusterSector = 0; + bool doRaw = config.overlayRaw && mChain->mIOPtrs.nClusterData[iSector] == mChain->mIOPtrs.nRawClusters[iSector]; + for (uint32_t i = 0; i < mChain->mIOPtrs.nClusterData[iSector]; i++) { + float sign = iSector < NSECTORS / 2 ? 1 : -1; + if (sign * mChain->mIOMem.clusterData[iSector][i].z >= minZ && sign * mChain->mIOMem.clusterData[iSector][i].z <= maxZ) { + if (currentClusterSector != i) { + mChain->mIOMem.clusterData[iSector][currentClusterSector] = mChain->mIOMem.clusterData[iSector][i]; if (doRaw) { - mChain->mIOMem.rawClusters[iSlice][currentClusterSlice] = mChain->mIOMem.rawClusters[iSlice][i]; + mChain->mIOMem.rawClusters[iSector][currentClusterSector] = mChain->mIOMem.rawClusters[iSector][i]; } } if (mChain->mIOPtrs.nMCLabelsTPC > currentClusterTotal && nClusters != currentClusterTotal) { mChain->mIOMem.mcLabelsTPC[nClusters] = mChain->mIOMem.mcLabelsTPC[currentClusterTotal]; } - // GPUInfo("Keeping Cluster ID %d (ID in slice %d) Z=%f (sector %d) --> %d (slice %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSlice][i].fZ, iSlice, nClusters, currentClusterSlice); - currentClusterSlice++; + // GPUInfo("Keeping Cluster ID %d (ID in sector %d) Z=%f (sector %d) --> %d (sector %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSector][i].fZ, iSector, nClusters, currentClusterSector); + currentClusterSector++; nClusters++; } else { - // GPUInfo("Removing Cluster ID %d (ID in slice %d) Z=%f (sector %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSlice][i].fZ, iSlice); + // GPUInfo("Removing Cluster ID %d (ID in sector %d) Z=%f (sector %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSector][i].fZ, iSector); removed++; } currentClusterTotal++; } - mChain->mIOPtrs.nClusterData[iSlice] = currentClusterSlice; + mChain->mIOPtrs.nClusterData[iSector] = currentClusterSector; if (doRaw) { - mChain->mIOPtrs.nRawClusters[iSlice] = currentClusterSlice; + mChain->mIOPtrs.nRawClusters[iSector] = currentClusterSector; } } if (mChain->mIOPtrs.nMCLabelsTPC) { mChain->mIOPtrs.nMCLabelsTPC = nClusters; } } else { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { nClusters += mChain->mIOPtrs.nClusterData[i]; } } @@ -151,7 +151,7 @@ void GPUReconstructionTimeframe::MergeShiftedEvents() mChain->ClearIOPointers(); for (uint32_t i = 0; i < mShiftedEvents.size(); i++) { auto& ptr = std::get<0>(mShiftedEvents[i]); - for (uint32_t j = 0; j < NSLICES; j++) { + for (uint32_t j = 0; j < NSECTORS; j++) { mChain->mIOPtrs.nClusterData[j] += ptr.nClusterData[j]; if (config.overlayRaw) { mChain->mIOPtrs.nRawClusters[j] += ptr.nRawClusters[j]; @@ -164,9 +164,9 @@ void GPUReconstructionTimeframe::MergeShiftedEvents() } uint32_t nClustersTotal = 0; uint32_t nClustersTotalRaw = 0; - uint32_t nClustersSliceOffset[NSLICES] = {0}; - for (uint32_t i = 0; i < NSLICES; i++) { - nClustersSliceOffset[i] = nClustersTotal; + uint32_t nClustersSectorOffset[NSECTORS] = {0}; + for (uint32_t i = 0; i < NSECTORS; i++) { + nClustersSectorOffset[i] = nClustersTotal; nClustersTotal += mChain->mIOPtrs.nClusterData[i]; nClustersTotalRaw += mChain->mIOPtrs.nRawClusters[i]; } @@ -183,23 +183,23 @@ void GPUReconstructionTimeframe::MergeShiftedEvents() uint32_t nTrackOffset = 0; uint32_t nColOffset = 0; - uint32_t nClustersEventOffset[NSLICES] = {0}; + uint32_t nClustersEventOffset[NSECTORS] = {0}; for (uint32_t i = 0; i < mShiftedEvents.size(); i++) { auto& ptr = std::get<0>(mShiftedEvents[i]); uint32_t inEventOffset = 0; - for (uint32_t j = 0; j < NSLICES; j++) { + for (uint32_t j = 0; j < NSECTORS; j++) { memcpy((void*)&mChain->mIOMem.clusterData[j][nClustersEventOffset[j]], (void*)ptr.clusterData[j], ptr.nClusterData[j] * sizeof(ptr.clusterData[j][0])); if (nClustersTotalRaw) { memcpy((void*)&mChain->mIOMem.rawClusters[j][nClustersEventOffset[j]], (void*)ptr.rawClusters[j], ptr.nRawClusters[j] * sizeof(ptr.rawClusters[j][0])); } if (mChain->mIOPtrs.nMCLabelsTPC) { - memcpy((void*)&mChain->mIOMem.mcLabelsTPC[nClustersSliceOffset[j] + nClustersEventOffset[j]], (void*)&ptr.mcLabelsTPC[inEventOffset], ptr.nClusterData[j] * sizeof(ptr.mcLabelsTPC[0])); + memcpy((void*)&mChain->mIOMem.mcLabelsTPC[nClustersSectorOffset[j] + nClustersEventOffset[j]], (void*)&ptr.mcLabelsTPC[inEventOffset], ptr.nClusterData[j] * sizeof(ptr.mcLabelsTPC[0])); } for (uint32_t k = 0; k < ptr.nClusterData[j]; k++) { - mChain->mIOMem.clusterData[j][nClustersEventOffset[j] + k].id = nClustersSliceOffset[j] + nClustersEventOffset[j] + k; + mChain->mIOMem.clusterData[j][nClustersEventOffset[j] + k].id = nClustersSectorOffset[j] + nClustersEventOffset[j] + k; if (mChain->mIOPtrs.nMCLabelsTPC) { for (int32_t l = 0; l < 3; l++) { - auto& label = mChain->mIOMem.mcLabelsTPC[nClustersSliceOffset[j] + nClustersEventOffset[j] + k].fClusterID[l]; + auto& label = mChain->mIOMem.mcLabelsTPC[nClustersSectorOffset[j] + nClustersEventOffset[j] + k].fClusterID[l]; if (label.fMCID >= 0) { label.fMCID += nTrackOffset; } @@ -364,9 +364,9 @@ int32_t GPUReconstructionTimeframe::LoadMergedEvents(int32_t iEvent) void GPUReconstructionTimeframe::SetDisplayInformation(int32_t iCol) { if (mChain->GetEventDisplay()) { - for (uint32_t sl = 0; sl < NSLICES; sl++) { + for (uint32_t sl = 0; sl < NSECTORS; sl++) { mChain->GetEventDisplay()->SetCollisionFirstCluster(iCol, sl, mChain->mIOPtrs.nClusterData[sl]); } - mChain->GetEventDisplay()->SetCollisionFirstCluster(iCol, NSLICES, mChain->mIOPtrs.nMCInfosTPC); + mChain->GetEventDisplay()->SetCollisionFirstCluster(iCol, NSECTORS, mChain->mIOPtrs.nMCInfosTPC); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h index a4f2e055da2c3..47cbfa0a1a5b6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h @@ -27,9 +27,7 @@ namespace o2::tpc struct ClusterNative; } // namespace o2::tpc -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct ClusterNativeAccess; @@ -48,7 +46,7 @@ class GPUReconstructionTimeframe static constexpr int32_t TIME_ORBIT = 1000000000 / ORBIT_RATE; private: - constexpr static uint32_t NSLICES = GPUReconstruction::NSLICES; + constexpr static uint32_t NSECTORS = GPUReconstruction::NSECTORS; void SetDisplayInformation(int32_t iCol); @@ -73,7 +71,6 @@ class GPUReconstructionTimeframe std::vector mEventUsed; std::vector> mShiftedEvents; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h index 99399f505d552..fdc5c16d91f35 100644 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h @@ -19,9 +19,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { class ThrustVolatileAsyncAllocator @@ -38,14 +36,11 @@ class ThrustVolatileAsyncAllocator GPUReconstruction* mRec; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #ifndef __HIPCC__ // Override synchronize call at end of thrust algorithm running on stream, just don't run cudaStreamSynchronize -namespace thrust -{ -namespace cuda_cub +namespace thrust::cuda_cub { typedef thrust::cuda_cub::execution_policy thrustStreamPolicy; @@ -60,8 +55,7 @@ __host__ __device__ inline cudaError_t synchronize(thrustStr #endif } -} // namespace cuda_cub -} // namespace thrust +} // namespace thrust::cuda_cub #endif // __HIPCC__ #endif // GPU_CUDATHRUSTHELPERS_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 20ce23b578d84..b195b375b4503 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -280,8 +280,8 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #ifdef GPUCA_USE_TEXTURES - if (GPUCA_SLICE_DATA_MEMORY * NSLICES > (size_t)deviceProp.maxTexture1DLinear) { - GPUError("Invalid maximum texture size of device: %ld < %ld\n", (int64_t)deviceProp.maxTexture1DLinear, (int64_t)(GPUCA_SLICE_DATA_MEMORY * NSLICES)); + if (GPUCA_SECTOR_DATA_MEMORY * NSECTORS > (size_t)deviceProp.maxTexture1DLinear) { + GPUError("Invalid maximum texture size of device: %ld < %ld\n", (int64_t)deviceProp.maxTexture1DLinear, (int64_t)(GPUCA_SECTOR_DATA_MEMORY * NSECTORS)); return (1); } #endif @@ -548,10 +548,10 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } -std::unique_ptr GPUReconstructionCUDA::GetThreadContext() +std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { GPUFailedMsg(cudaSetDevice(mDeviceId)); - return std::unique_ptr(new GPUThreadContext); + return GPUReconstructionProcessing::GetThreadContext(); } void GPUReconstructionCUDA::SynchronizeGPU() { GPUFailedMsg(cudaDeviceSynchronize()); } @@ -671,9 +671,9 @@ int32_t GPUReconstructionCUDA::PrepareTextures() #ifdef GPUCA_USE_TEXTURES cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); size_t offset; - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSLICES * GPUCA_SLICE_DATA_MEMORY)); + GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSLICES * GPUCA_SLICE_DATA_MEMORY)); + GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); #endif return (0); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 49142d409c5ae..f14696a92a5b0 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -25,9 +25,7 @@ extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_C extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg); #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUReconstructionCUDAInternals; @@ -71,7 +69,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels GetThreadContext() override; + std::unique_ptr GetThreadContext() override; void SynchronizeGPU() override; int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override; void SynchronizeStream(int32_t stream) override; @@ -104,7 +102,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { + #define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) #define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) @@ -85,7 +84,6 @@ class GPUDebugTiming static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 3e738fb6df5cb..b65674a68e6aa 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -30,25 +30,24 @@ endif() # General sources set(SRCS - SliceTracker/GPUTPCTrack.cxx - SliceTracker/GPUTPCTrackParam.cxx - SliceTracker/GPUTPCStartHitsFinder.cxx - SliceTracker/GPUTPCStartHitsSorter.cxx - SliceTracker/GPUTPCNeighboursCleaner.cxx - SliceTracker/GPUTPCTracker.cxx - SliceTracker/GPUTPCSliceData.cxx - SliceTracker/GPUTPCSliceOutput.cxx - SliceTracker/GPUTPCTrackletConstructor.cxx - SliceTracker/GPUTPCSectorDebugSortKernels.cxx - SliceTracker/GPUTPCCreateOccupancyMap.cxx - SliceTracker/GPUTPCNeighboursFinder.cxx - SliceTracker/GPUTPCGrid.cxx - SliceTracker/GPUTPCTrackletSelector.cxx - SliceTracker/GPUTPCRow.cxx - SliceTracker/GPUTPCExtrapolationTracking.cxx - SliceTracker/GPUTPCCreateSliceData.cxx + SectorTracker/GPUTPCTrack.cxx + SectorTracker/GPUTPCTrackParam.cxx + SectorTracker/GPUTPCStartHitsFinder.cxx + SectorTracker/GPUTPCStartHitsSorter.cxx + SectorTracker/GPUTPCNeighboursCleaner.cxx + SectorTracker/GPUTPCTracker.cxx + SectorTracker/GPUTPCTrackingData.cxx + SectorTracker/GPUTPCSectorOutput.cxx + SectorTracker/GPUTPCTrackletConstructor.cxx + SectorTracker/GPUTPCSectorDebugSortKernels.cxx + SectorTracker/GPUTPCCreateOccupancyMap.cxx + SectorTracker/GPUTPCNeighboursFinder.cxx + SectorTracker/GPUTPCGrid.cxx + SectorTracker/GPUTPCTrackletSelector.cxx + SectorTracker/GPUTPCExtrapolationTracking.cxx + SectorTracker/GPUTPCCreateTrackingData.cxx Merger/GPUTPCGMMerger.cxx - Merger/GPUTPCGMSliceTrack.cxx + Merger/GPUTPCGMSectorTrack.cxx Merger/GPUTPCGMTrackParam.cxx Merger/GPUTPCGMPropagator.cxx Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -62,7 +61,7 @@ set(SRCS set(SRCS_DATATYPES DataTypes/GPUDataTypes.cxx DataTypes/GPUConfigDump.cxx DataTypes/GPUTPCGMPolynomialField.cxx) -set(HDRS_CINT_O2 Merger/GPUTPCGMTrackParam.h Merger/GPUTPCGMMergedTrack.h Merger/GPUTPCGMSliceTrack.h Merger/GPUTPCGMBorderTrack.h TRDTracking/GPUTRDInterfaces.h) +set(HDRS_CINT_O2 Merger/GPUTPCGMTrackParam.h Merger/GPUTPCGMMergedTrack.h Merger/GPUTPCGMSectorTrack.h Merger/GPUTPCGMBorderTrack.h TRDTracking/GPUTRDInterfaces.h) set(HDRS_CINT_DATATYPES DataTypes/GPUTPCGMMergedTrackHit.h) set(HDRS_CINT_O2_ADDITIONAL DataTypes/GPUSettings.h Definitions/GPUSettingsList.h DataTypes/GPUDataTypes.h DataTypes/GPUTRDTrack.h DataTypes/CalibdEdxTrackTopologyPol.h DataTypes/CalibdEdxTrackTopologySpline.h) # Manual dependencies for ROOT dictionary generation @@ -71,6 +70,7 @@ set(SRCS_NO_CINT DataTypes/GPUNewCalibValues.cxx DataTypes/GPUTPCClusterOccupancyMap.cxx Base/GPUReconstruction.cxx + Base/GPUReconstructionProcessing.cxx Base/GPUReconstructionCPU.cxx Base/GPUProcessor.cxx Base/GPUMemoryResource.cxx @@ -87,14 +87,14 @@ set(SRCS_NO_CINT Debug/GPUTPCClusterFilter.cxx utils/timer.cxx) -set(SRCS_NO_H SliceTracker/GPUTPCTrackerDump.cxx +set(SRCS_NO_H SectorTracker/GPUTPCTrackerDump.cxx Merger/GPUTPCGMMergerDump.cxx Base/GPUReconstructionLibrary.cxx Global/GPUChainTrackingClusterizer.cxx Global/GPUChainTrackingTransformation.cxx Global/GPUChainTrackingTRD.cxx Global/GPUChainTrackingRefit.cxx - Global/GPUChainTrackingSliceTracker.cxx + Global/GPUChainTrackingSectorTracker.cxx Global/GPUChainTrackingMerger.cxx Global/GPUChainTrackingCompression.cxx Global/GPUChainTrackingDebugAndProfiling.cxx @@ -106,6 +106,7 @@ set(HDRS_INSTALL Base/GPUParamRTC.h Base/GPUReconstructionIncludes.h Base/GPUReconstructionThreading.h + Base/GPUReconstructionIO.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h Base/GPUReconstructionKernels.h @@ -137,15 +138,15 @@ set(HDRS_INSTALL qa/GPUQAHelper.h qconfigoptions.h Refit/GPUTrackParamConvert.h - SliceTracker/GPUTPCBaseTrackParam.h - SliceTracker/GPUTPCClusterData.h - SliceTracker/GPUTPCDef.h - SliceTracker/GPUTPCHit.h - SliceTracker/GPUTPCHitId.h - SliceTracker/GPUTPCMCInfo.h - SliceTracker/GPUTPCSliceOutCluster.h - SliceTracker/GPUTPCTracklet.h - SliceTracker/GPUTPCTrackLinearisation.h + SectorTracker/GPUTPCBaseTrackParam.h + SectorTracker/GPUTPCClusterData.h + SectorTracker/GPUTPCDef.h + SectorTracker/GPUTPCHit.h + SectorTracker/GPUTPCHitId.h + SectorTracker/GPUTPCMCInfo.h + SectorTracker/GPUTPCSectorOutCluster.h + SectorTracker/GPUTPCTracklet.h + SectorTracker/GPUTPCTrackLinearisation.h TPCConvert/GPUTPCConvertImpl.h TRDTracking/GPUTRDGeometry.h TRDTracking/GPUTRDInterfaces.h @@ -296,7 +297,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") Definitions DataTypes Base - SliceTracker + SectorTracker TPCConvert dEdx ITS diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h index fcdfcfc9cc49a..5c25813e75d29 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h @@ -17,9 +17,7 @@ #include "GPUTPCGMMergerTypes.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterRejection { template @@ -67,7 +65,6 @@ struct GPUTPCClusterRejection { return GetProtectionStatus(attach, physics, protect); } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index 794f4cb485f14..b11a3b13d7132 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -23,7 +23,9 @@ using namespace o2::gpu; // Small helper to compute Huffman probabilities -namespace +namespace o2::gpu +{ +namespace // anonymous { typedef std::vector HuffCode; typedef std::map HuffCodeMap; @@ -101,7 +103,8 @@ void GenerateCodes(const INode* node, const HuffCode& prefix, HuffCodeMap& outCo GenerateCodes(in->right, rightPrefix, outCodes); } } -} // namespace +} // anonymous namespace +} // namespace o2::gpu void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param) { @@ -113,10 +116,10 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* mDecoder.decompress(clustersCompressed, clustersNativeDecoded, allocator, param, true); std::vector tmpClusters; if (param.rec.tpc.rejectionStrategy == GPUSettings::RejectionNone) { // verification does not make sense if we reject clusters during compression - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { if (clustersNative->nClusters[i][j] != clustersNativeDecoded.nClusters[i][j]) { - GPUError("Number of clusters mismatch slice %u row %u: expected %d v.s. decoded %d", i, j, clustersNative->nClusters[i][j], clustersNativeDecoded.nClusters[i][j]); + GPUError("Number of clusters mismatch sector %u row %u: expected %d v.s. decoded %d", i, j, clustersNative->nClusters[i][j], clustersNativeDecoded.nClusters[i][j]); decodingErrors++; continue; } @@ -136,7 +139,7 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* const o2::tpc::ClusterNative& c2 = clustersNativeDecoded.clusters[i][j][k]; if (c1.timeFlagsPacked != c2.timeFlagsPacked || c1.padPacked != c2.padPacked || c1.sigmaTimePacked != c2.sigmaTimePacked || c1.sigmaPadPacked != c2.sigmaPadPacked || c1.qMax != c2.qMax || c1.qTot != c2.qTot) { if (decodingErrors++ < 100) { - GPUWarning("Cluster mismatch: slice %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int32_t)c1.getTimePacked(), (int32_t)c1.getFlags(), (int32_t)c1.padPacked, (int32_t)c1.sigmaTimePacked, (int32_t)c1.sigmaPadPacked, (int32_t)c1.qMax, (int32_t)c1.qTot); + GPUWarning("Cluster mismatch: sector %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int32_t)c1.getTimePacked(), (int32_t)c1.getFlags(), (int32_t)c1.padPacked, (int32_t)c1.sigmaTimePacked, (int32_t)c1.sigmaPadPacked, (int32_t)c1.qMax, (int32_t)c1.qTot); GPUWarning("%45s %6d %3d %4d %3d %3d %4d %4d", "", (int32_t)c2.getTimePacked(), (int32_t)c2.getFlags(), (int32_t)c2.padPacked, (int32_t)c2.sigmaTimePacked, (int32_t)c2.sigmaPadPacked, (int32_t)c2.qMax, (int32_t)c2.qTot); } } @@ -155,14 +158,14 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* FillStatistic(mPqMaxA, clustersCompressed->qMaxA, clustersCompressed->nAttachedClusters); FillStatistic(mPflagsA, clustersCompressed->flagsA, clustersCompressed->nAttachedClusters); FillStatistic(mProwDiffA, clustersCompressed->rowDiffA, clustersCompressed->nAttachedClustersReduced); - FillStatistic(mPsliceLegDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced); + FillStatistic(mPsectorLegDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced); FillStatistic(mPpadResA, clustersCompressed->padResA, clustersCompressed->nAttachedClustersReduced); FillStatistic(mPtimeResA, clustersCompressed->timeResA, clustersCompressed->nAttachedClustersReduced); FillStatistic(mPsigmaPadA, clustersCompressed->sigmaPadA, clustersCompressed->nAttachedClusters); FillStatistic(mPsigmaTimeA, clustersCompressed->sigmaTimeA, clustersCompressed->nAttachedClusters); FillStatistic(mPqPtA, clustersCompressed->qPtA, clustersCompressed->nTracks); FillStatistic(mProwA, clustersCompressed->rowA, clustersCompressed->nTracks); - FillStatistic(mPsliceA, clustersCompressed->sliceA, clustersCompressed->nTracks); + FillStatistic(mPsectorA, clustersCompressed->sliceA, clustersCompressed->nTracks); FillStatistic(mPtimeA, clustersCompressed->timeA, clustersCompressed->nTracks); FillStatistic(mPpadA, clustersCompressed->padA, clustersCompressed->nTracks); FillStatistic(mPqTotU, clustersCompressed->qTotU, clustersCompressed->nUnattachedClusters); @@ -173,12 +176,12 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* FillStatistic(mPsigmaPadU, clustersCompressed->sigmaPadU, clustersCompressed->nUnattachedClusters); FillStatistic(mPsigmaTimeU, clustersCompressed->sigmaTimeU, clustersCompressed->nUnattachedClusters); FillStatistic(mPnTrackClusters, clustersCompressed->nTrackClusters, clustersCompressed->nTracks); - FillStatistic(mPnSliceRowClusters, clustersCompressed->nSliceRowClusters, clustersCompressed->nSliceRows); + FillStatistic(mPnSectorRowClusters, clustersCompressed->nSliceRowClusters, clustersCompressed->nSliceRows); FillStatisticCombined(mPsigmaA, clustersCompressed->sigmaPadA, clustersCompressed->sigmaTimeA, clustersCompressed->nAttachedClusters, P_MAX_SIGMA); FillStatisticCombined(mPsigmaU, clustersCompressed->sigmaPadU, clustersCompressed->sigmaTimeU, clustersCompressed->nUnattachedClusters, P_MAX_SIGMA); FillStatisticCombined(mPQA, clustersCompressed->qMaxA, clustersCompressed->qTotA, clustersCompressed->nAttachedClusters, P_MAX_QMAX); FillStatisticCombined(mPQU, clustersCompressed->qMaxU, clustersCompressed->qTotU, clustersCompressed->nUnattachedClusters, P_MAX_QMAX); - FillStatisticCombined(mProwSliceA, clustersCompressed->rowDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced, GPUCA_ROW_COUNT); + FillStatisticCombined(mProwSectorA, clustersCompressed->rowDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced, GPUCA_ROW_COUNT); mNTotalClusters += clustersCompressed->nAttachedClusters + clustersCompressed->nUnattachedClusters; } @@ -195,15 +198,15 @@ void GPUTPCClusterStatistics::Finish() double eQ = Analyze(mPqTotA, "qTot Attached", false); eQ += Analyze(mPqMaxA, "qMax Attached", false); Analyze(mPflagsA, "flags Attached"); - double eRowSlice = Analyze(mProwDiffA, "rowDiff Attached", false); - eRowSlice += Analyze(mPsliceLegDiffA, "sliceDiff Attached", false); + double eRowSector = Analyze(mProwDiffA, "rowDiff Attached", false); + eRowSector += Analyze(mPsectorLegDiffA, "sectorDiff Attached", false); Analyze(mPpadResA, "padRes Attached"); Analyze(mPtimeResA, "timeRes Attached"); double eSigma = Analyze(mPsigmaPadA, "sigmaPad Attached", false); eSigma += Analyze(mPsigmaTimeA, "sigmaTime Attached", false); Analyze(mPqPtA, "qPt Attached"); Analyze(mProwA, "row Attached"); - Analyze(mPsliceA, "slice Attached"); + Analyze(mPsectorA, "sector Attached"); Analyze(mPtimeA, "time Attached"); Analyze(mPpadA, "pad Attached"); eQ += Analyze(mPqTotU, "qTot Unattached", false); @@ -214,14 +217,14 @@ void GPUTPCClusterStatistics::Finish() eSigma += Analyze(mPsigmaPadU, "sigmaPad Unattached", false); eSigma += Analyze(mPsigmaTimeU, "sigmaTime Unattached", false); Analyze(mPnTrackClusters, "nClusters in Track"); - Analyze(mPnSliceRowClusters, "nClusters in Row"); + Analyze(mPnSectorRowClusters, "nClusters in Row"); double eSigmaCombined = Analyze(mPsigmaA, "combined sigma Attached"); eSigmaCombined += Analyze(mPsigmaU, "combined sigma Unattached"); double eQCombined = Analyze(mPQA, "combined Q Attached"); eQCombined += Analyze(mPQU, "combined Q Unattached"); - double eRowSliceCombined = Analyze(mProwSliceA, "combined row/slice Attached"); + double eRowSectorCombined = Analyze(mProwSectorA, "combined row/sector Attached"); - GPUInfo("Combined Row/Slice: %6.4f --> %6.4f (%6.4f%%)", eRowSlice, eRowSliceCombined, eRowSlice > 1e-1 ? (100. * (eRowSlice - eRowSliceCombined) / eRowSlice) : 0.f); + GPUInfo("Combined Row/Sector: %6.4f --> %6.4f (%6.4f%%)", eRowSector, eRowSectorCombined, eRowSector > 1e-1 ? (100. * (eRowSector - eRowSectorCombined) / eRowSector) : 0.f); GPUInfo("Combined Sigma: %6.4f --> %6.4f (%6.4f%%)", eSigma, eSigmaCombined, eSigma > 1e-3 ? (100. * (eSigma - eSigmaCombined) / eSigma) : 0.f); GPUInfo("Combined Q: %6.4f --> %6.4f (%6.4f%%)", eQ, eQCombined, eQ > 1e-3 ? (100. * (eQ - eQCombined) / eQ) : 0.f); diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h index 7c873fa67f522..1dfb958750bef 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h @@ -29,7 +29,7 @@ namespace o2::gpu class GPUTPCClusterStatistics { public: - static constexpr uint32_t NSLICES = GPUCA_NSLICES; + static constexpr uint32_t NSECTORS = GPUCA_NSECTORS; void RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param); void Finish(); @@ -55,14 +55,14 @@ class GPUTPCClusterStatistics std::vector mPqMaxA = std::vector(P_MAX_QMAX, 0); std::vector mPflagsA = std::vector(P_MAX_FLAGS, 0); std::vector mProwDiffA = std::vector(GPUCA_ROW_COUNT, 0); - std::vector mPsliceLegDiffA = std::vector(GPUCA_NSLICES * 2, 0); + std::vector mPsectorLegDiffA = std::vector(GPUCA_NSECTORS * 2, 0); std::vector mPpadResA = std::vector(P_MAX_PAD, 0); std::vector mPtimeResA = std::vector(P_MAX_TIME, 0); std::vector mPsigmaPadA = std::vector(P_MAX_SIGMA, 0); std::vector mPsigmaTimeA = std::vector(P_MAX_SIGMA, 0); std::vector mPqPtA = std::vector(P_MAX_QPT, 0); std::vector mProwA = std::vector(GPUCA_ROW_COUNT, 0); - std::vector mPsliceA = std::vector(GPUCA_NSLICES, 0); + std::vector mPsectorA = std::vector(GPUCA_NSECTORS, 0); std::vector mPtimeA = std::vector(P_MAX_TIME, 0); std::vector mPpadA = std::vector(P_MAX_PAD, 0); std::vector mPqTotU = std::vector(P_MAX_QTOT, 0); @@ -73,12 +73,12 @@ class GPUTPCClusterStatistics std::vector mPsigmaPadU = std::vector(P_MAX_SIGMA, 0); std::vector mPsigmaTimeU = std::vector(P_MAX_SIGMA, 0); std::vector mPnTrackClusters; - std::vector mPnSliceRowClusters; + std::vector mPnSectorRowClusters; std::vector mPsigmaU = std::vector(P_MAX_SIGMA * P_MAX_SIGMA, 0); std::vector mPsigmaA = std::vector(P_MAX_SIGMA * P_MAX_SIGMA, 0); std::vector mPQU = std::vector(P_MAX_QMAX * P_MAX_QTOT, 0); std::vector mPQA = std::vector(P_MAX_QMAX * P_MAX_QTOT, 0); - std::vector mProwSliceA = std::vector(GPUCA_ROW_COUNT * GPUCA_NSLICES * 2, 0); + std::vector mProwSectorA = std::vector(GPUCA_ROW_COUNT * GPUCA_NSECTORS * 2, 0); double mEntropy = 0; double mHuffman = 0; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 8f8137a6307b0..335b201d11d07 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -65,7 +65,7 @@ void GPUTPCCompression::SetPointersCompressedClusters(void*& mem, T& c, uint32_t computePointerWithAlignment(mem, c.timeDiffU, nClU); computePointerWithAlignment(mem, c.sigmaPadU, nClU); computePointerWithAlignment(mem, c.sigmaTimeU, nClU); - computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSLICES); + computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSECTORS); uint32_t nClAreduced = reducedClA ? nClA - nTr : nClA; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index 9a5d6436f06af..c1d9fe283fbea 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -64,7 +64,7 @@ class GPUTPCCompression : public GPUProcessor uint32_t nStoredUnattachedClusters = 0; }; - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; o2::tpc::CompressedClustersPtrs mPtrs; o2::tpc::CompressedClusters* mOutput = nullptr; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 0f5936095fdc3..966bffa963c7e 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -44,7 +44,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread processors.param.rec.tpc.rejectQPtB5 || trk.MergedLooper(); uint32_t nClustersStored = 0; CompressedClustersPtrs& GPUrestrict() c = compressor.mPtrs; - uint8_t lastRow = 0, lastSlice = 0; + uint8_t lastRow = 0, lastSector = 0; GPUTPCCompressionTrackModel track; float zOffset = 0; for (int32_t k = trk.NClusters() - 1; k >= 0; k--) { @@ -67,18 +67,18 @@ GPUdii() void GPUTPCCompressionKernels::Threadclusters[hit.slice][hit.row][hit.num - clusters->clusterOffset[hit.slice][hit.row]]; + const ClusterNative& GPUrestrict() orgCl = clusters -> clusters[hit.sector][hit.row][hit.num - clusters->clusterOffset[hit.sector][hit.row]]; float x = param.tpcGeometry.Row2X(hit.row); - float y = track.LinearPad2Y(hit.slice, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); - float z = param.tpcGeometry.LinearTime2Z(hit.slice, orgCl.getTime()); + float y = track.LinearPad2Y(hit.sector, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); + float z = param.tpcGeometry.LinearTime2Z(hit.sector, orgCl.getTime()); if (nClustersStored) { - if ((hit.slice < GPUCA_NSLICES) ^ (lastSlice < GPUCA_NSLICES)) { + if ((hit.sector < GPUCA_NSECTORS) ^ (lastSector < GPUCA_NSECTORS)) { break; } if (lastLeg != hit.leg && track.Mirror()) { break; } - if (track.Propagate(param.tpcGeometry.Row2X(hit.row), param.SliceParam[hit.slice].Alpha)) { + if (track.Propagate(param.tpcGeometry.Row2X(hit.row), param.SectorParam[hit.sector].Alpha)) { break; } } @@ -89,35 +89,35 @@ GPUdii() void GPUTPCCompressionKernels::Thread 0 ? 254 : 0); zOffset = z; - track.Init(x, y, z - zOffset, param.SliceParam[hit.slice].Alpha, qpt, param); + track.Init(x, y, z - zOffset, param.SectorParam[hit.sector].Alpha, qpt, param); myTrack = CAMath::AtomicAdd(&compressor.mMemory->nStoredTracks, 1u); compressor.mAttachedClusterFirstIndex[myTrack] = trk.FirstClusterRef(); lastLeg = hit.leg; c.qPtA[myTrack] = qpt; c.rowA[myTrack] = hit.row; - c.sliceA[myTrack] = hit.slice; + c.sliceA[myTrack] = hit.sector; c.timeA[myTrack] = orgCl.getTimePacked(); c.padA[myTrack] = orgCl.padPacked; } else { uint32_t row = hit.row; - uint32_t slice = hit.slice; + uint32_t sector = hit.sector; if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionDifferences) { if (lastRow > row) { row += GPUCA_ROW_COUNT; } row -= lastRow; - if (lastSlice > slice) { - slice += compressor.NSLICES; + if (lastSector > sector) { + sector += compressor.NSECTORS; } - slice -= lastSlice; + sector -= lastSector; } c.rowDiffA[cidx] = row; - c.sliceLegDiffA[cidx] = (hit.leg == lastLeg ? 0 : compressor.NSLICES) + slice; - float pad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(hit.slice, track.Y(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)))); + c.sliceLegDiffA[cidx] = (hit.leg == lastLeg ? 0 : compressor.NSECTORS) + sector; + float pad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(hit.sector, track.Y(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)))); c.padResA[cidx] = orgCl.padPacked - orgCl.packPad(pad); - float time = CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(hit.slice, track.Z() + zOffset)); + float time = CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(hit.sector, track.Z() + zOffset)); c.timeResA[cidx] = (orgCl.getTimePacked() - orgCl.packTime(time)) & 0xFFFFFF; lastLeg = hit.leg; } @@ -138,7 +138,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadnStoredAttachedClusters, nClustersStored); @@ -185,12 +185,12 @@ GPUdii() void GPUTPCCompressionKernels::ThreadclusterOffset[iSlice][iRow]; - const uint32_t idOffsetOut = clusters->clusterOffset[iSlice][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; - const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSlice])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal + for (int32_t iSectorRow = iBlock; iSectorRow < GPUCA_NSECTORS * GPUCA_ROW_COUNT; iSectorRow += nBlocks) { + const uint32_t iSector = iSectorRow / GPUCA_ROW_COUNT; + const uint32_t iRow = iSectorRow % GPUCA_ROW_COUNT; + const uint32_t idOffset = clusters->clusterOffset[iSector][iRow]; + const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; + const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal if (iThread == nThreads - 1) { smem.nCount = 0; } @@ -199,12 +199,12 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSlice][iRow]); + const uint32_t nn = GPUCommonMath::nextMultipleOf(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; int32_t cidx = 0; do { - if (i >= clusters->nClusters[iSlice][iRow]) { + if (i >= clusters->nClusters[iSector][iRow]) { break; } if (compressor.mClusterStatus[idx]) { @@ -253,29 +253,29 @@ GPUdii() void GPUTPCCompressionKernels::Thread idOffsetOutMax) { if (iThread == nThreads - 1) { - compressor.raiseError(GPUErrors::ERROR_COMPRESSION_ROW_HIT_OVERFLOW, iSlice * 1000 + iRow, idOffsetOut + totalCount + count, idOffsetOutMax); + compressor.raiseError(GPUErrors::ERROR_COMPRESSION_ROW_HIT_OVERFLOW, iSector * 1000 + iRow, idOffsetOut + totalCount + count, idOffsetOutMax); } break; } if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionDifferences) { if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } GPUbarrier(); } for (uint32_t j = get_local_id(0); j < count; j += get_local_size(0)) { int32_t outidx = idOffsetOut + totalCount + j; - const ClusterNative& GPUrestrict() orgCl = clusters->clusters[iSlice][iRow][sortBuffer[j]]; + const ClusterNative& GPUrestrict() orgCl = clusters -> clusters[iSector][iRow][sortBuffer[j]]; int32_t preId = j != 0 ? (int32_t)sortBuffer[j - 1] : (totalCount != 0 ? (int32_t)smem.lastIndex : -1); - GPUTPCCompression_EncodeUnattached(param.rec.tpc.compressionTypeMask, orgCl, c.timeDiffU[outidx], c.padDiffU[outidx], preId == -1 ? nullptr : &clusters->clusters[iSlice][iRow][preId]); + GPUTPCCompression_EncodeUnattached(param.rec.tpc.compressionTypeMask, orgCl, c.timeDiffU[outidx], c.padDiffU[outidx], preId == -1 ? nullptr : &clusters->clusters[iSector][iRow][preId]); uint16_t qtot = orgCl.qTot, qmax = orgCl.qMax; uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked; @@ -304,7 +304,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadnStoredUnattachedClusters, totalCount); } GPUbarrier(); @@ -530,7 +530,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::ThreadnSliceRowClusters, compressor.mPtrs.nSliceRowClusters, compressor.NSLICES * GPUCA_ROW_COUNT, nThreads, iThread); + compressorMemcpy(compressor.mOutput->nSliceRowClusters, compressor.mPtrs.nSliceRowClusters, compressor.NSECTORS * GPUCA_ROW_COUNT, nThreads, iThread); compressorMemcpy(compressor.mOutput->nTrackClusters, compressor.mPtrs.nTrackClusters, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpy(compressor.mOutput->qPtA, compressor.mPtrs.qPtA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpy(compressor.mOutput->rowA, compressor.mPtrs.rowA, compressor.mMemory->nStoredTracks, nThreads, iThread); @@ -549,14 +549,14 @@ GPUdii() void GPUTPCCompressionGatherKernels::ThreadtimeA, compressor.mPtrs.timeA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpy(compressor.mOutput->padA, compressor.mPtrs.padA, compressor.mMemory->nStoredTracks, nThreads, iThread); - uint32_t sliceStart = rowStart / GPUCA_ROW_COUNT; - uint32_t sliceEnd = rowEnd / GPUCA_ROW_COUNT; + uint32_t sectorStart = rowStart / GPUCA_ROW_COUNT; + uint32_t sectorEnd = rowEnd / GPUCA_ROW_COUNT; - uint32_t sliceRowStart = rowStart % GPUCA_ROW_COUNT; - uint32_t sliceRowEnd = rowEnd % GPUCA_ROW_COUNT; + uint32_t sectorRowStart = rowStart % GPUCA_ROW_COUNT; + uint32_t sectorRowEnd = rowEnd % GPUCA_ROW_COUNT; - for (uint32_t i = sliceStart; i <= sliceEnd && i < compressor.NSLICES; i++) { - for (uint32_t j = ((i == sliceStart) ? sliceRowStart : 0); j < ((i == sliceEnd) ? sliceRowEnd : GPUCA_ROW_COUNT); j++) { + for (uint32_t i = sectorStart; i <= sectorEnd && i < compressor.NSECTORS; i++) { + for (uint32_t j = ((i == sectorStart) ? sectorRowStart : 0); j < ((i == sectorEnd) ? sectorRowEnd : GPUCA_ROW_COUNT); j++) { uint32_t nClusters = compressor.mPtrs.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; uint32_t clusterOffsetInCache = clusters->clusterOffset[i][j] * compressor.mMaxClusterFactorBase1024 / 1024; compressorMemcpy(compressor.mOutput->qTotU + rowsOffset, compressor.mPtrs.qTotU + clusterOffsetInCache, nClusters, nLanes, iLane); @@ -636,7 +636,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherBuffered(int32_t nBlocks, in auto& input = compressor.mPtrs; auto* output = compressor.mOutput; - uint32_t nRows = compressor.NSLICES * GPUCA_ROW_COUNT; + uint32_t nRows = compressor.NSECTORS * GPUCA_ROW_COUNT; uint32_t rowsPerWarp = (nRows + nGlobalWarps - 1) / nGlobalWarps; uint32_t rowStart = rowsPerWarp * iGlobalWarp; uint32_t rowEnd = CAMath::Min(nRows, rowStart + rowsPerWarp); @@ -661,7 +661,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherBuffered(int32_t nBlocks, in uint32_t tracksOffset = calculateWarpOffsets(smem, input.nTrackClusters, trackStart, trackEnd, nWarps, iWarp, nLanes, iLane); if (iBlock == 0) { - compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSLICES * GPUCA_ROW_COUNT, nThreads, iThread); + compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSECTORS * GPUCA_ROW_COUNT, nThreads, iThread); compressorMemcpyBasic(output->nTrackClusters, input.nTrackClusters, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->qPtA, input.qPtA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->rowA, input.rowA, compressor.mMemory->nStoredTracks, nThreads, iThread); @@ -671,17 +671,17 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherBuffered(int32_t nBlocks, in } const uint32_t* clusterOffsets = &clusters->clusterOffset[0][0] + rowStart; - const uint32_t* nSliceRowClusters = input.nSliceRowClusters + rowStart; + const uint32_t* nSectorRowClusters = input.nSliceRowClusters + rowStart; auto* buf = smem.getBuffer(iWarp); - compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); const uint16_t* nTrackClustersPtr = input.nTrackClusters + trackStart; const uint32_t* aClsFstIdx = compressor.mAttachedClusterFirstIndex + trackStart; @@ -714,7 +714,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherMulti(int32_t nBlocks, int32 auto* buf = smem.getBuffer(iWarp); if (iBlock == 0) { - compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSLICES * GPUCA_ROW_COUNT, nThreads, iThread); + compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSECTORS * GPUCA_ROW_COUNT, nThreads, iThread); compressorMemcpyBasic(output->nTrackClusters, input.nTrackClusters, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->qPtA, input.qPtA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->rowA, input.rowA, compressor.mMemory->nStoredTracks, nThreads, iThread); @@ -725,7 +725,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherMulti(int32_t nBlocks, int32 const uint32_t nGlobalWarps = nWarps * (nBlocks - 1) / 2; const uint32_t iGlobalWarp = nWarps * (iBlock - 1) / 2 + iWarp; - const uint32_t nRows = compressor.NSLICES * GPUCA_ROW_COUNT; + const uint32_t nRows = compressor.NSECTORS * GPUCA_ROW_COUNT; uint32_t rowsPerWarp = (nRows + nGlobalWarps - 1) / nGlobalWarps; uint32_t rowStart = rowsPerWarp * iGlobalWarp; uint32_t rowEnd = CAMath::Min(nRows, rowStart + rowsPerWarp); @@ -737,15 +737,15 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherMulti(int32_t nBlocks, int32 const uint32_t rowsOffset = calculateWarpOffsets(smem, input.nSliceRowClusters, rowStart, rowEnd, nWarps, iWarp, nLanes, iLane); const uint32_t* clusterOffsets = &clusters->clusterOffset[0][0] + rowStart; - const uint32_t* nSliceRowClusters = input.nSliceRowClusters + rowStart; - - compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + const uint32_t* nSectorRowClusters = input.nSliceRowClusters + rowStart; + + compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); } else { const uint32_t nGlobalWarps = nWarps * (nBlocks - 1) / 2; const uint32_t iGlobalWarp = nWarps * (iBlock / 2 - 1) + iWarp; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx index 2d8b69a4be516..1f84aa4599a27 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx @@ -66,7 +66,7 @@ GPUd() int32_t GPUTPCCompressionTrackModel::Mirror() return 0; } -#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) #include "GPUTPCTrackLinearisation.h" #include "GPUTPCTracker.h" diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h index b67f544f513bf..b3b4da27e625b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h @@ -17,7 +17,7 @@ // For debugging purposes, we provide means to use other track models // #define GPUCA_COMPRESSION_TRACK_MODEL_MERGER -// #define GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER +// #define GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER #include "GPUDef.h" @@ -25,7 +25,7 @@ #include "GPUTPCGMPropagator.h" #include "GPUTPCGMTrackParam.h" -#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) #include "GPUTPCTrackParam.h" #else // Default internal track model for compression @@ -49,7 +49,7 @@ class GPUTPCCompressionTrackModel GPUd() int32_t Filter(float y, float z, int32_t iRow); GPUd() int32_t Mirror(); -#if defined(GPUCA_COMPRESSION_TRACK_MODEL_MERGER) || defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#if defined(GPUCA_COMPRESSION_TRACK_MODEL_MERGER) || defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) GPUd() float X() const { return mTrk.GetX(); @@ -100,15 +100,15 @@ class GPUTPCCompressionTrackModel GPUd() void getClusterErrors2(int32_t iRow, float z, float sinPhi, float DzDs, float& ErrY2, float& ErrZ2) const; GPUd() void resetCovariance(); - GPUd() float LinearPad2Y(int32_t slice, float pad, float padWidth, uint8_t npads) const + GPUd() float LinearPad2Y(int32_t sector, float pad, float padWidth, uint8_t npads) const { const float u = (pad - 0.5f * npads) * padWidth; - return (slice >= GPUCA_NSLICES / 2) ? -u : u; + return (sector >= GPUCA_NSECTORS / 2) ? -u : u; } - GPUd() float LinearY2Pad(int32_t slice, float y, float padWidth, uint8_t npads) const + GPUd() float LinearY2Pad(int32_t sector, float y, float padWidth, uint8_t npads) const { - const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; + const float u = (sector >= GPUCA_NSECTORS / 2) ? -y : y; return u / padWidth + 0.5f * npads; } @@ -120,7 +120,7 @@ class GPUTPCCompressionTrackModel GPUTPCGMTrackParam mTrk; const GPUParam* mParam; -#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) GPUTPCTrackParam mTrk; float mAlpha; const GPUParam* mParam; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx index 16c6cb4b8d61c..fd0c929dd2ba7 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx @@ -39,7 +39,7 @@ void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, uint32 computePointerWithAlignment(mem, c.timeDiffU, nClU); computePointerWithAlignment(mem, c.sigmaPadU, nClU); computePointerWithAlignment(mem, c.sigmaTimeU, nClU); - computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSLICES); + computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSECTORS); uint32_t nClAreduced = reducedClA ? nClA - nTr : nClA; @@ -67,19 +67,19 @@ void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, uint32 void* GPUTPCDecompression::SetPointersTmpNativeBuffersGPU(void* mem) { - computePointerWithAlignment(mem, mTmpNativeClusters, NSLICES * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer); + computePointerWithAlignment(mem, mTmpNativeClusters, NSECTORS * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer); return mem; } void* GPUTPCDecompression::SetPointersTmpNativeBuffersOutput(void* mem) { - computePointerWithAlignment(mem, mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT); + computePointerWithAlignment(mem, mNativeClustersIndex, NSECTORS * GPUCA_ROW_COUNT); return mem; } void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem) { - computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSLICES * GPUCA_ROW_COUNT); + computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSECTORS * GPUCA_ROW_COUNT); computePointerWithAlignment(mem, mAttachedClustersOffsets, mInputGPU.nTracks); return mem; } @@ -98,7 +98,7 @@ void* GPUTPCDecompression::SetPointersInputClusterNativeAccess(void* mem) void* GPUTPCDecompression::SetPointersNClusterPerSectorRow(void* mem) { - computePointerWithAlignment(mem, mNClusterPerSectorRow, NSLICES * GPUCA_ROW_COUNT); + computePointerWithAlignment(mem, mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT); return mem; } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h index d39eba6a08e2d..e6f8377a246e2 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h @@ -49,7 +49,7 @@ class GPUTPCDecompression : public GPUProcessor #endif protected: - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; o2::tpc::CompressedClusters mInputGPU; uint32_t mMaxNativeClustersPerBuffer; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx index 2ba80bf4d3b21..ee1a9c97cc30b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx @@ -38,19 +38,19 @@ GPUdii() void GPUTPCDecompressionKernels::Thread -GPUdii() void GPUTPCDecompressionKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sliceStart, int32_t nSlices) +GPUdii() void GPUTPCDecompressionKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sectorStart, int32_t nSectors) { GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor; CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU; ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer; const ClusterNativeAccess* outputAccess = decompressor.mClusterNativeAccess; uint32_t* offsets = decompressor.mUnattachedClustersOffsets; - for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSlices; i += get_global_size(0)) { + for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSectors; i += get_global_size(0)) { uint32_t iRow = i % GPUCA_ROW_COUNT; - uint32_t iSlice = sliceStart + (i / GPUCA_ROW_COUNT); - const uint32_t linearIndex = iSlice * GPUCA_ROW_COUNT + iRow; - uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(iSlice, iRow, decompressor.mMaxNativeClustersPerBuffer); - ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[iSlice][iRow]; + uint32_t iSector = sectorStart + (i / GPUCA_ROW_COUNT); + const uint32_t linearIndex = iSector * GPUCA_ROW_COUNT + iRow; + uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(iSector, iRow, decompressor.mMaxNativeClustersPerBuffer); + ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[iSector][iRow]; if (decompressor.mNativeClustersIndex[linearIndex] != 0) { decompressorMemcpyBasic(buffer, decompressor.mTmpNativeClusters + tmpBufferIndex, decompressor.mNativeClustersIndex[linearIndex]); } @@ -58,7 +58,7 @@ GPUdii() void GPUTPCDecompressionKernels::Thread= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]); TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout); if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) { - for (uint32_t k = 0; k < outputAccess->nClusters[iSlice][iRow]; k++) { + for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) { auto& cl = buffer[k]; float t = cl.getTime() + processors.param.rec.tpc.clustersShiftTimebins; if (t < 0) { @@ -92,11 +92,11 @@ GPUdii() void GPUTPCDecompressionUtilKernels::ThreadnClusters[slice][row]; k++) { - ClusterNative cl = clusterAccess->clusters[slice][row][k]; + for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) { + ClusterNative cl = clusterAccess->clusters[sector][row][k]; if (isClusterKept(cl, param)) { decompressor.mNClusterPerSectorRow[i]++; } @@ -112,14 +112,14 @@ GPUdii() void GPUTPCDecompressionUtilKernels::ThreadnClusters[slice][row]; k++) { - const ClusterNative cl = clusterAccess->clusters[slice][row][k]; + for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) { + const ClusterNative cl = clusterAccess->clusters[sector][row][k]; if (isClusterKept(cl, param)) { - clusterBuffer[outputAccess->clusterOffset[slice][row] + count] = cl; + clusterBuffer[outputAccess->clusterOffset[sector][row] + count] = cl; count++; } } @@ -131,10 +131,10 @@ GPUdii() void GPUTPCDecompressionUtilKernels::ThreadclusterOffset[slice][row]; - GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[slice][row]); + ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[sector][row]; + GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[sector][row]); } } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h index 0bd69653fdbd4..1ea93e4acb9d0 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h @@ -37,9 +37,9 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, Args... args); - GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t slice, uint32_t row, uint32_t maxClustersPerBuffer) + GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t sector, uint32_t row, uint32_t maxClustersPerBuffer) { - return slice * (GPUCA_ROW_COUNT * maxClustersPerBuffer) + row * maxClustersPerBuffer; + return sector * (GPUCA_ROW_COUNT * maxClustersPerBuffer) + row * maxClustersPerBuffer; } template diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc index 43ed260f461a4..6c4f70d7c6884 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc @@ -47,7 +47,7 @@ class TPCClusterDecompressionCore return clusterVector.back(); } - GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT]) + GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSECTORS][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSECTORS][GPUCA_ROW_COUNT]) { std::vector& clusterVector = clusters[slice][row]; auto& lock = locks[slice][row]; @@ -85,14 +85,14 @@ class TPCClusterDecompressionCore uint32_t pad = 0, time = 0; if (clusterIndex != 0) { uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1]; - bool changeLeg = (tmpSlice >= GPUCA_NSLICES); + bool changeLeg = (tmpSlice >= GPUCA_NSECTORS); if (changeLeg) { - tmpSlice -= GPUCA_NSLICES; + tmpSlice -= GPUCA_NSECTORS; } if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) { slice += tmpSlice; - if (slice >= GPUCA_NSLICES) { - slice -= GPUCA_NSLICES; + if (slice >= GPUCA_NSECTORS) { + slice -= GPUCA_NSECTORS; } row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1]; if (row >= GPUCA_ROW_COUNT) { @@ -105,7 +105,7 @@ class TPCClusterDecompressionCore if (changeLeg && track.Mirror()) { break; } - if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) { + if (track.Propagate(param.tpcGeometry.Row2X(row), param.SectorParam[slice].Alpha)) { break; } uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1]; @@ -140,7 +140,7 @@ class TPCClusterDecompressionCore float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); if (clusterIndex == 0) { zOffset = z; - track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); + track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SectorParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); } if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) { break; diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index e3b8965c3e27b..296a203cf070b 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -48,9 +48,9 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom if (clustersCompressed->nTracks && clustersCompressed->maxTimeBin != -1e6 && clustersCompressed->maxTimeBin != param.continuousMaxTimeBin) { throw std::runtime_error("Configured max time bin does not match value used for track model encoding"); } - std::vector clusters[NSLICES][GPUCA_ROW_COUNT]; - std::atomic_flag locks[NSLICES][GPUCA_ROW_COUNT]; - for (uint32_t i = 0; i < NSLICES * GPUCA_ROW_COUNT; i++) { + std::vector clusters[NSECTORS][GPUCA_ROW_COUNT]; + std::atomic_flag locks[NSECTORS][GPUCA_ROW_COUNT]; + for (uint32_t i = 0; i < NSECTORS * GPUCA_ROW_COUNT; i++) { (&locks[0][0])[i].clear(); } const uint32_t maxTime = param.continuousMaxTimeBin > 0 ? ((param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1) : TPC_MAX_TIME_BIN_TRIGGERED; @@ -69,10 +69,10 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom }); size_t nTotalClusters = clustersCompressed->nAttachedClusters + clustersCompressed->nUnattachedClusters; ClusterNative* clusterBuffer = allocator(nTotalClusters); - uint32_t offsets[NSLICES][GPUCA_ROW_COUNT]; + uint32_t offsets[NSECTORS][GPUCA_ROW_COUNT]; uint32_t offset = 0; uint32_t decodedAttachedClusters = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { clustersNative.nClusters[i][j] = clusters[i][j].size() + ((i * GPUCA_ROW_COUNT + j >= clustersCompressed->nSliceRows) ? 0 : clustersCompressed->nSliceRowClusters[i * GPUCA_ROW_COUNT + j]); offsets[i][j] = offset; @@ -85,7 +85,7 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom } clustersNative.clustersLinear = clusterBuffer; clustersNative.setOffsetPtrs(); - tbb::parallel_for(0, NSLICES, [&](auto i) { + tbb::parallel_for(0, NSECTORS, [&](auto i) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { ClusterNative* buffer = &clusterBuffer[clustersNative.clusterOffset[i][j]]; if (clusters[i][j].size()) { diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h index 4a40b20e8d4f5..0c54f34c0237a 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h @@ -32,7 +32,7 @@ struct GPUParam; class TPCClusterDecompressor { public: - static constexpr uint32_t NSLICES = GPUCA_NSLICES; + static constexpr uint32_t NSECTORS = GPUCA_NSECTORS; static int32_t decompress(const o2::tpc::CompressedClustersFlat* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); static int32_t decompress(const o2::tpc::CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); }; diff --git a/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx b/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx index 9cb49bf4c7ef5..b23d19c3c9cd4 100644 --- a/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx +++ b/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx @@ -23,7 +23,7 @@ #include const int32_t sort_method = 1; // 0 No sorting, 1 sort after pad, 2 sort after time, 3/4 mixed methods favoring pad / time -const int32_t slice_diff = 1; +const int32_t sector_diff = 1; const int32_t row_diff = 1; const int32_t pad_diff = 1; const int32_t time_diff = 1; @@ -40,7 +40,7 @@ const int32_t track_separate_q = track_based && 1; const int32_t track_diffsigma = track_based && 0; const int32_t track_separate_sigma = track_based && 1; const int32_t truncate_bits = 1; -const int32_t separate_slices = 0; +const int32_t separate_sectors = 0; const int32_t separate_patches = 0; const int32_t separate_sides = 0; const int32_t full_row_numbers = 1; @@ -59,7 +59,7 @@ const int32_t sort_pad_mixed_bins = 100; const int32_t sort_time_mixed_bins = 400; #define EVENT 0 -#define SLICE 1 +#define SECTOR 1 #define PATCH 2 #define ROW 3 #define PAD 4 @@ -95,14 +95,14 @@ const int32_t rr = optimized_negative_values && 0 ? 13 : 14; // We can make them const uint32_t field_bits[] = {0, 6, 0, 8, 14, 15, 8, 8, 10, 16, 2, 0, 14, 15, 16, 10, 26, 16, 8, 8, 16, 26, 8, 8, rr, rr, rr, rr, rr, 14}; const uint32_t significant_bits[] = {0, 6, 0, 8, 14, 15, truncate_sigma, truncate_sigma, truncate_charge, truncate_charge, 2, 0, 14, 15, truncate_charge, truncate_charge, 26, 16, truncate_sigma, truncate_sigma, 16, 26, 8, 8, rr, rr, rr, rr, rr, 14}; const int32_t nFields = sizeof(field_bits) / sizeof(field_bits[0]); -const char* field_names[] = {"event", "slice", "patch", "row", "pad", "time", "sigmaPad", "sigmaTime", "qmax", "qtot", "flagPadTime", "trackID", "resTrackPad", +const char* field_names[] = {"event", "sector", "patch", "row", "pad", "time", "sigmaPad", "sigmaTime", "qmax", "qtot", "flagPadTime", "trackID", "resTrackPad", "resTrackTime", "trackQTot", "trackQMax", "qmaxtot", "sigmapadtime", "diffsigmapad", "diffsigmatime", "diffsigmapadtime", "tracktotmax", "trackfirstrow", "trackrow", "pad_80", "pad_92", "pad_104", "pad_116", "pad_128", "pad_140"}; union cluster_struct { struct { - uint32_t event, slice, patch, row, pad, time, sigmaPad, sigmaTime, qmax, qtot, splitPadTime; + uint32_t event, sector, patch, row, pad, time, sigmaPad, sigmaTime, qmax, qtot, splitPadTime; int32_t trackID; uint32_t resPad, resTime, avgtot, avgmax; }; @@ -209,7 +209,7 @@ bool clustercompare_padtime_mixed(cluster_struct a, cluster_struct b) { return ( bool clustercompare_timepad_mixed(cluster_struct a, cluster_struct b) { return (a.time / sort_time_mixed_bins < b.time / sort_time_mixed_bins || (a.time / sort_time_mixed_bins == b.time / sort_time_mixed_bins && a.pad < b.pad)); } -bool clustercompare_inevent(cluster_struct a, cluster_struct b) { return (a.slice < b.slice || (a.slice == b.slice && a.patch < b.patch) || (a.slice == b.slice && a.patch == b.patch && a.row < b.row)); } +bool clustercompare_inevent(cluster_struct a, cluster_struct b) { return (a.sector < b.sector || (a.sector == b.sector && a.patch < b.patch) || (a.sector == b.sector && a.patch == b.patch && a.row < b.row)); } void do_diff(uint32_t& val, int32_t& last, uint32_t bits, uint32_t maxval = 0) { @@ -327,7 +327,7 @@ int32_t main(int argc, char** argv) double* probabilities[nFields]; int64_t counts[nFields]; int32_t used[nFields]; - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID) { continue; } @@ -337,18 +337,18 @@ int32_t main(int argc, char** argv) double rawtotalbytes = 0; double entrototalbytes = 0; - for (int32_t islice = 0; islice < 36; islice++) { + for (int32_t isector = 0; isector < 36; isector++) { for (int32_t ipatch = 0; ipatch < 6; ipatch++) { - if (separate_slices) { - printf("SLICE %d ", islice); + if (separate_sectors) { + printf("SECTOR %d ", isector); } if (separate_patches) { printf("PATCH %d", ipatch); } - if (separate_slices || separate_patches) { + if (separate_sectors || separate_patches) { printf("\n"); } - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } @@ -359,7 +359,7 @@ int32_t main(int argc, char** argv) size_t nClustersUsed = 0; - int32_t lastRow = 0, lastPad = 0, lastTime = 0, lastSlice = 0, lastResPad = 0, lastResTime = 0, lastQTot = 0, lastQMax = 0, lastSigmaPad = 0, lastSigmaTime = 0, lastTrack = -1, lastEvent = 0; + int32_t lastRow = 0, lastPad = 0, lastTime = 0, lastSector = 0, lastResPad = 0, lastResTime = 0, lastQTot = 0, lastQMax = 0, lastSigmaPad = 0, lastSigmaTime = 0, lastTrack = -1, lastEvent = 0; for (size_t i = 0; i < nClusters; i++) { const cluster_struct& cluster_org = clusters[i]; @@ -368,10 +368,10 @@ int32_t main(int argc, char** argv) printf("%d\n", cluster.pad); } - if ((separate_slices && cluster.slice != islice) || (separate_patches && cluster.patch != ipatch)) { + if ((separate_sectors && cluster.sector != isector) || (separate_patches && cluster.patch != ipatch)) { continue; } - if (separate_sides && !(cluster.slice < 18 ^ islice < 18)) { + if (separate_sides && !(cluster.sector < 18 ^ isector < 18)) { continue; } @@ -379,7 +379,7 @@ int32_t main(int argc, char** argv) uint32_t dSigmaPad, dSigmaTime; if (cluster.event != lastEvent) { - lastRow = lastPad = lastTime = lastSlice = 0; + lastRow = lastPad = lastTime = lastSector = 0; lastTrack = -1; } @@ -387,13 +387,13 @@ int32_t main(int argc, char** argv) cluster.row += fgRows[cluster.patch][0]; } - if ((slice_diff || res_diff || track_diffqtot || track_diffqmax) && cluster.trackID != -1 && track_based) { + if ((sector_diff || res_diff || track_diffqtot || track_diffqmax) && cluster.trackID != -1 && track_based) { if (lastTrack != cluster.trackID) { - lastSlice = lastResPad = lastResTime = lastQTot = lastQMax = lastSigmaPad = lastSigmaTime = 0; + lastSector = lastResPad = lastResTime = lastQTot = lastQMax = lastSigmaPad = lastSigmaTime = 0; } - if (slice_diff) { - do_diff(cluster.slice, lastSlice, field_bits[SLICE]); + if (sector_diff) { + do_diff(cluster.sector, lastSector, field_bits[SECTOR]); } if (res_diff) { @@ -483,17 +483,17 @@ int32_t main(int argc, char** argv) lastTrack = cluster.trackID; if (print_clusters > 0 || (print_clusters < 0 && i < -print_clusters)) { - printf("Event %u Track %d Slice %u Patch %u Row %u Pad %u Time %u sigmaPad %u sigmaTime %u qTot %u qMax %u Flag %u resPad %u resTime %u avgTot %u avgMax %u\n", cluster.event, cluster.trackID, cluster.slice, cluster.patch, cluster.row, cluster.pad, cluster.time, cluster.sigmaPad, + printf("Event %u Track %d Sector %u Patch %u Row %u Pad %u Time %u sigmaPad %u sigmaTime %u qTot %u qMax %u Flag %u resPad %u resTime %u avgTot %u avgMax %u\n", cluster.event, cluster.trackID, cluster.sector, cluster.patch, cluster.row, cluster.pad, cluster.time, cluster.sigmaPad, cluster.sigmaTime, cluster.qtot, cluster.qmax, cluster.splitPadTime, cluster.resPad, cluster.resTime, cluster.avgtot, cluster.avgmax); } - for (int32_t j = SLICE; j < nFields; j++) { + for (int32_t j = SECTOR; j < nFields; j++) { bool forceStore = false; if (j == CLUSTER_ID || j == PATCH) { continue; } - if (j == SLICE && (track_based == 0 || cluster.trackID == -1)) { + if (j == SECTOR && (track_based == 0 || cluster.trackID == -1)) { continue; } @@ -594,7 +594,7 @@ int32_t main(int argc, char** argv) double log2 = log(2.); double entropies[nFields]; double huffmanSizes[nFields]; - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } @@ -631,7 +631,7 @@ int32_t main(int argc, char** argv) int32_t rawBits = 0; double entroTotal = 0., huffmanTotal = 0.; - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } @@ -663,7 +663,7 @@ int32_t main(int argc, char** argv) used[i] = 1; } } - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (field_bits[i] == 0) { continue; } @@ -672,7 +672,7 @@ int32_t main(int argc, char** argv) } printf("Field %2d/%16s (count %10ld / used %1d) rawBits %2d huffman %9.6f entropy %9.6f\n", i, field_names[i], counts[i], used[i], field_bits[i], huffmanSizes[i], entropies[i]); } - rawBits = 79; // Override incorrect calculation: Row is only 6 bit in raw format, and slice is not needed! + rawBits = 79; // Override incorrect calculation: Row is only 6 bit in raw format, and sector is not needed! printf("Raw Bits: %d - Total Size %f MB Clusters %d\n", rawBits, (double)rawBits * (double)nClustersUsed / 8. / 1.e6, nClustersUsed); printf("Huffman Bits: %f - Total Size %f MB\n", huffmanTotal / (double)nClustersUsed, huffmanTotal / 8. / 1.e6); printf("Entropy Bits: %f - Total Size %f MB\n", entroTotal / (double)nClustersUsed, entroTotal / 8. / 1.e6); @@ -680,10 +680,10 @@ int32_t main(int argc, char** argv) entrototalbytes += entroTotal; rawtotalbytes += (double)rawBits * (double)nClustersUsed; - if (separate_sides && !separate_slices && islice == 0) { - islice = 17; - } else if (!separate_slices) { - islice = 9999999; + if (separate_sides && !separate_sectors && isector == 0) { + isector = 17; + } else if (!separate_sectors) { + isector = 9999999; } if (!separate_patches) { @@ -692,12 +692,12 @@ int32_t main(int argc, char** argv) } } - if (separate_slices || separate_patches || separate_sides) { + if (separate_sectors || separate_patches || separate_sides) { printf("Total Compression: %f\n", rawtotalbytes / entrototalbytes); } printf("Exiting\n"); - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 4c275d6de1bf1..51b5c0b101537 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -27,9 +27,7 @@ struct AliHLTTPCClusterMCLabel; struct AliHLTTPCRawCluster; -namespace o2 -{ -namespace tpc +namespace o2::tpc { struct ClusterNativeAccess; struct CompressedClustersFlat; @@ -38,8 +36,7 @@ class TrackTPC; namespace constants { } // namespace constants -} // namespace tpc -} // namespace o2 +} // namespace o2::tpc namespace o2 { @@ -91,21 +88,13 @@ class CalibdEdxContainer; } // namespace tpc } // namespace o2 -namespace o2 -{ -namespace gpu +namespace o2::gpu { class CorrectionMapsHelper; class TPCFastTransform; struct TPCPadGainCalib; struct TPCZSLinkMapping; -} // namespace gpu -} // namespace o2 -namespace o2 -{ -namespace gpu -{ #include "utils/bitfield.h" #define ENUM_CLASS class #define ENUM_UINT : uint32_t @@ -138,7 +127,7 @@ class GPUDataTypes QA = 2 }; enum ENUM_CLASS RecoStep { TPCConversion = 1, - TPCSliceTracking = 2, + TPCSectorTracking = 2, TPCMerging = 4, TPCCompression = 8, TRDTracking = 16, @@ -167,7 +156,7 @@ class GPUDataTypes #endif typedef bitfield RecoStepField; typedef bitfield InOutTypeField; - static constexpr uint32_t NSLICES = 36; + static constexpr uint32_t NSECTORS = 36; static DeviceType GetDeviceType(const char* type); }; @@ -205,27 +194,27 @@ typedef GPUCalibObjectsTemplate GPUCalibObjects; // NOTE: These 2 mu typedef GPUCalibObjectsTemplate GPUCalibObjectsConst; struct GPUTrackingInOutZS { - static constexpr uint32_t NSLICES = GPUDataTypes::NSLICES; + static constexpr uint32_t NSECTORS = GPUDataTypes::NSECTORS; static constexpr uint32_t NENDPOINTS = 20; - struct GPUTrackingInOutZSSlice { + struct GPUTrackingInOutZSSector { const void* const* zsPtr[NENDPOINTS]; const uint32_t* nZSPtr[NENDPOINTS]; uint32_t count[NENDPOINTS]; }; struct GPUTrackingInOutZSCounts { - uint32_t count[NSLICES][NENDPOINTS] = {}; + uint32_t count[NSECTORS][NENDPOINTS] = {}; }; struct GPUTrackingInOutZSMeta { - void* ptr[NSLICES][NENDPOINTS]; - uint32_t n[NSLICES][NENDPOINTS]; + void* ptr[NSECTORS][NENDPOINTS]; + uint32_t n[NSECTORS][NENDPOINTS]; }; - GPUTrackingInOutZSSlice slice[NSLICES]; + GPUTrackingInOutZSSector sector[NSECTORS]; }; struct GPUTrackingInOutDigits { - static constexpr uint32_t NSLICES = GPUDataTypes::NSLICES; - const o2::tpc::Digit* tpcDigits[NSLICES] = {nullptr}; - size_t nTPCDigits[NSLICES] = {0}; + static constexpr uint32_t NSECTORS = GPUDataTypes::NSECTORS; + const o2::tpc::Digit* tpcDigits[NSECTORS] = {nullptr}; + size_t nTPCDigits[NSECTORS] = {0}; const GPUTPCDigitsMCInput* tpcDigitsMC = nullptr; }; @@ -233,18 +222,18 @@ struct GPUTrackingInOutPointers { GPUTrackingInOutPointers() = default; // TPC - static constexpr uint32_t NSLICES = GPUDataTypes::NSLICES; + static constexpr uint32_t NSECTORS = GPUDataTypes::NSECTORS; const GPUTrackingInOutZS* tpcZS = nullptr; const GPUTrackingInOutDigits* tpcPackedDigits = nullptr; - const GPUTPCClusterData* clusterData[NSLICES] = {nullptr}; - uint32_t nClusterData[NSLICES] = {0}; - const AliHLTTPCRawCluster* rawClusters[NSLICES] = {nullptr}; - uint32_t nRawClusters[NSLICES] = {0}; + const GPUTPCClusterData* clusterData[NSECTORS] = {nullptr}; + uint32_t nClusterData[NSECTORS] = {0}; + const AliHLTTPCRawCluster* rawClusters[NSECTORS] = {nullptr}; + uint32_t nRawClusters[NSECTORS] = {0}; const o2::tpc::ClusterNativeAccess* clustersNative = nullptr; - const GPUTPCTrack* sliceTracks[NSLICES] = {nullptr}; - uint32_t nSliceTracks[NSLICES] = {0}; - const GPUTPCHitId* sliceClusters[NSLICES] = {nullptr}; - uint32_t nSliceClusters[NSLICES] = {0}; + const GPUTPCTrack* sectorTracks[NSECTORS] = {nullptr}; + uint32_t nSectorTracks[NSECTORS] = {0}; + const GPUTPCHitId* sectorClusters[NSECTORS] = {nullptr}; + uint32_t nSectorClusters[NSECTORS] = {0}; const AliHLTTPCClusterMCLabel* mcLabelsTPC = nullptr; uint32_t nMCLabelsTPC = 0; const GPUTPCMCInfo* mcInfosTPC = nullptr; @@ -325,7 +314,6 @@ struct GPUTrackingInOutPointers { #undef ENUM_CLASS #undef ENUM_UINT -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h b/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h index 0788b445416b4..fe6d05cef202b 100644 --- a/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h @@ -32,9 +32,7 @@ #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "SimulationDataFormat/MCCompLabel.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCDigitsMCInput { @@ -55,7 +53,6 @@ struct GPUTPCLinearLabels { std::vector data; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index 2cec1775dd239..e5012d86742f8 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -45,8 +45,8 @@ struct GPUMemorySizeScalers { double tpcSectorTracksPerHit = 0.02; double tpcSectorTrackHitsPerHit = 0.8; double tpcSectorTrackHitsPerHitWithRejection = 1.0; - double tpcMergedTrackPerSliceTrack = 0.9; - double tpcMergedTrackHitPerSliceHit = 1.1; + double tpcMergedTrackPerSectorTrack = 0.9; + double tpcMergedTrackHitPerSectorHit = 1.1; size_t tpcCompressedUnattachedHitsBase1024[3] = {900, 900, 500}; // No ratio, but integer fraction of 1024 for exact computation // Upper limits @@ -71,7 +71,7 @@ struct GPUMemorySizeScalers { return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * factor * temporaryFactor); } - inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSLICES * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } + inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } inline size_t NTPCClusters(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxSectorClusters : tpcMaxClusters, (conservative ? 1.0 : tpcClustersPerPeak) * NTPCPeaks(tpcDigits, perSector)); } inline size_t NTPCStartHits(size_t tpcHits) { return getValue(tpcMaxStartHits, tpcHits * tpcStartHitsPerHit); } inline size_t NTPCRowStartHits(size_t tpcHits) { return getValue(tpcMaxRowStartHits, std::max(NTPCStartHits(tpcHits) * (tpcHits < 30000000 ? 20 : 12) / GPUCA_ROW_COUNT, tpcMinRowStartHits)); } @@ -79,8 +79,8 @@ struct GPUMemorySizeScalers { inline size_t NTPCTrackletHits(size_t tpcHits) { return getValue(tpcMaxTrackletHits, hitOffset + tpcHits * tpcTrackletHitsPerHit); } inline size_t NTPCSectorTracks(size_t tpcHits) { return getValue(tpcMaxSectorTracks, tpcHits * tpcSectorTracksPerHit); } inline size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); } - inline size_t NTPCMergedTracks(size_t tpcSliceTracks) { return getValue(tpcMaxMergedTracks, tpcSliceTracks * (conservative ? 1.0 : tpcMergedTrackPerSliceTrack)); } - inline size_t NTPCMergedTrackHits(size_t tpcSliceTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSliceTrackHitss * tpcMergedTrackHitPerSliceHit); } + inline size_t NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservative ? 1.0 : tpcMergedTrackPerSectorTrack)); } + inline size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); } inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * factor * temporaryFactor); } }; diff --git a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h index e16fde9614911..b6e176f468995 100644 --- a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h +++ b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUNewCalibValues { @@ -33,7 +31,6 @@ struct GPUNewCalibValues { void updateFrom(const GPUNewCalibValues* from); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index cad554f355c8e..799fd25330ab4 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -20,9 +20,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { // This defines an output region. ptrBase points to a memory buffer, which should have a proper alignment. @@ -83,7 +81,6 @@ struct GPUTrackingOutputs { static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index afde8d4128bab..05888770ef9e5 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -23,9 +23,7 @@ #include #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUDisplayFrontendInterface; class GPUReconstruction; @@ -80,8 +78,7 @@ struct GPUSettingsDeviceBackend { GPUReconstruction* master = nullptr; // GPUReconstruction master object }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #ifdef GPUCA_GPUCODE_DEVICE #define QCONFIG_GPU diff --git a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h index 746fb1cf7d19f..a1dd54bbba02b 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h @@ -22,7 +22,7 @@ namespace o2::gpu { struct GPUParam; struct GPUTPCClusterOccupancyMapBin { - uint16_t bin[GPUCA_NSLICES][GPUCA_ROW_COUNT]; + uint16_t bin[GPUCA_NSECTORS][GPUCA_ROW_COUNT]; GPUd() static uint32_t getNBins(const GPUParam& param); GPUd() static uint32_t getTotalSize(const GPUParam& param); diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index fb1a12da994da..3c86dbfcd8d18 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -17,13 +17,11 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCGMMergedTrackHit { uint32_t num; - uint8_t slice, row, leg, state; + uint8_t sector, row, leg, state; // NOTE: the lower states must match those from ClusterNative! // TODO: take them directly from clusterNative header. @@ -49,7 +47,6 @@ struct GPUTPCGMMergedTrackHitXYZ { #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h index 13940cecc86de..6417e47352339 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMPolynomialField @@ -289,7 +287,6 @@ GPUdi() float GPUTPCGMPolynomialField::GetFieldItsBz(float x, float y, float z) return bz; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index da9a66fa57301..461ac9366ca23 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -17,18 +17,16 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_NSLICES) && !defined(GPUCA_ROW_COUNT) +#if !defined(GPUCA_NSECTORS) && !defined(GPUCA_ROW_COUNT) #include "DataFormatsTPC/Constants.h" -#define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR +#define GPUCA_NSECTORS o2::tpc::constants::MAXSECTOR #define GPUCA_ROW_COUNT o2::tpc::constants::MAXGLOBALPADROW #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { // Copy of TPC constants from AliRoot:TPCGeometry / O2:TPC/Base/Mapper // Should be unified, but cannot take the contants from the official headers for now, since we want it to be constexpr @@ -112,25 +110,25 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() float PadWidth(int32_t row) const { return (mPadWidth[GetRegion(row)]); } GPUd() uint8_t NPads(int32_t row) const { return mNPads[row]; } - GPUd() float LinearPad2Y(int32_t slice, int32_t row, float pad) const + GPUd() float LinearPad2Y(int32_t sector, int32_t row, float pad) const { #ifdef GPUCA_TPC_GEOMETRY_O2 const float u = (pad - 0.5f * (mNPads[row] - 1)) * PadWidth(row); #else const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); #endif - return (slice >= GPUCA_NSLICES / 2) ? -u : u; + return (sector >= GPUCA_NSECTORS / 2) ? -u : u; } - GPUd() static float LinearTime2Z(int32_t slice, float time) + GPUd() static float LinearTime2Z(int32_t sector, float time) { const float v = 250.f - time * FACTOR_T2Z; // Used in compression, must remain constant at 250cm! - return (slice >= GPUCA_NSLICES / 2) ? -v : v; + return (sector >= GPUCA_NSECTORS / 2) ? -v : v; } - GPUd() float LinearY2Pad(int32_t slice, int32_t row, float y) const + GPUd() float LinearY2Pad(int32_t sector, int32_t row, float y) const { - const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; + const float u = (sector >= GPUCA_NSECTORS / 2) ? -y : y; #ifdef GPUCA_TPC_GEOMETRY_O2 return u / PadWidth(row) + 0.5f * (mNPads[row] - 1); #else @@ -138,12 +136,11 @@ class GPUTPCGeometry // TODO: Make values constexpr #endif } - GPUd() static float LinearZ2Time(int32_t slice, float z) + GPUd() static float LinearZ2Time(int32_t sector, float z) { - const float v = (slice >= GPUCA_NSLICES / 2) ? -z : z; + const float v = (sector >= GPUCA_NSECTORS / 2) ? -z : z; return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDDef.h b/GPU/GPUTracking/DataTypes/GPUTRDDef.h index 4340e854cd67d..bb0cf3652a7e7 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDDef.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDDef.h @@ -17,23 +17,19 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace track +namespace o2::track { template class TrackParametrizationWithError; -} // namespace track -namespace base +} // namespace o2::track + +namespace o2::base { template class PropagatorImpl; -} // namespace base -} // namespace o2 +} // namespace o2::base -namespace o2 -{ -namespace gpu +namespace o2::gpu { typedef o2::track::TrackParametrizationWithError TRDBaseTrack; @@ -62,7 +58,6 @@ class GPUTRDTracker_t; typedef GPUTRDTracker_t GPUTRDTracker; typedef GPUTRDTracker_t GPUTRDTrackerGPU; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDDEF_H diff --git a/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h b/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h index 6b37afbde12d7..0a269c05869e4 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h @@ -17,9 +17,7 @@ // This is the interface for the GPUTRDTrack based on the O2 track type #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template class trackInterface; @@ -28,8 +26,7 @@ namespace gputpcgmmergertypes { struct GPUTPCOuterParam; } // namespace gputpcgmmergertypes -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #include "ReconstructionDataFormats/Track.h" #include "ReconstructionDataFormats/TrackTPCITS.h" @@ -38,9 +35,7 @@ struct GPUTPCOuterParam; #include "ReconstructionDataFormats/TrackLTIntegral.h" #include "CommonConstants/LHCConstants.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template <> @@ -94,7 +89,6 @@ class trackInterface : public o2::track::TrackParCov ClassDefNV(trackInterface, 1); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx index 54f28ec21d30e..369d3838b646f 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx @@ -19,14 +19,11 @@ using namespace o2::gpu; #include "GPUTRDTrack.inc" #if !defined(GPUCA_GPUCODE) -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if !defined(GPUCA_O2_LIB) // Instantiate O2 track version, for O2 this happens in GPUTRDTrackO2.cxx template class GPUTRDTrack_t>; #endif template class GPUTRDTrack_t>; // Always instatiate GM track version -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 66cf31fc8e8d0..18f7c61e01fc3 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -22,25 +22,20 @@ struct GPUTRDTrackDataRecord; class AliHLTExternalTrackParam; -namespace o2 -{ -namespace tpc +namespace o2::tpc { class TrackTPC; -} // namespace tpc -namespace dataformats +} // namespace o2::tpc +namespace o2::dataformats { class TrackTPCITS; class GlobalTrackID; -} // namespace dataformats -} // namespace o2 +} // namespace o2::dataformats //_____________________________________________________________________________ #include "GPUTRDInterfaceO2Track.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -127,7 +122,6 @@ class GPUTRDTrack_t : public T #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACK_H diff --git a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h index 01b61928be922..41ed5d0859efe 100644 --- a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h +++ b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h @@ -21,9 +21,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTriggerOutputs { @@ -52,7 +50,6 @@ struct GPUTriggerOutputs { static_assert(sizeof(o2::tpc::TriggerInfoDLBZS) % sizeof(uint32_t) == 0); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h index f3a7f4b2a0724..cbb8e2f161fa7 100644 --- a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h +++ b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h @@ -17,12 +17,9 @@ #include "DataFormatsTPC/dEdxInfo.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { using GPUdEdxInfo = o2::tpc::dEdxInfo; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/TPCPadBitMap.h b/GPU/GPUTracking/DataTypes/TPCPadBitMap.h index 5a4beda82889e..6ddfac8c268ee 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadBitMap.h +++ b/GPU/GPUTracking/DataTypes/TPCPadBitMap.h @@ -102,7 +102,7 @@ struct TPCPadBitMap { }; uint16_t mPadOffsetPerRow[GPUCA_ROW_COUNT]; - SectorBitMap mBitMap[GPUCA_NSLICES]; + SectorBitMap mBitMap[GPUCA_NSECTORS]; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h index 35aa353a384e6..263956c8b5602 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h +++ b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h @@ -81,14 +81,14 @@ struct TPCPadGainCalib { GPUdi() void setMinCorrectionFactor(const float minCorrectionFactor) { - for (int32_t sector = 0; sector < GPUCA_NSLICES; sector++) { + for (int32_t sector = 0; sector < GPUCA_NSECTORS; sector++) { mGainCorrection[sector].mMinCorrectionFactor = minCorrectionFactor; } } GPUdi() void setMaxCorrectionFactor(const float maxCorrectionFactor) { - for (int32_t sector = 0; sector < GPUCA_NSLICES; sector++) { + for (int32_t sector = 0; sector < GPUCA_NSECTORS; sector++) { mGainCorrection[sector].mMaxCorrectionFactor = maxCorrectionFactor; } } @@ -154,7 +154,7 @@ struct TPCPadGainCalib { }; uint16_t mPadOffsetPerRow[GPUCA_ROW_COUNT]; - SectorPadGainCorrection mGainCorrection[GPUCA_NSLICES]; + SectorPadGainCorrection mGainCorrection[GPUCA_NSECTORS]; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Debug/GPUROOTDump.h b/GPU/GPUTracking/Debug/GPUROOTDump.h index f8f8950a06ae0..d4f034fd7c60f 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDump.h +++ b/GPU/GPUTracking/Debug/GPUROOTDump.h @@ -28,9 +28,7 @@ class TNtuple; #include #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if !defined(GPUCA_NO_ROOT) && !defined(GPUCA_GPUCODE) namespace @@ -172,7 +170,6 @@ class GPUROOTDump } }; #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Debug/GPUROOTDumpCore.h b/GPU/GPUTracking/Debug/GPUROOTDumpCore.h index f3f7890ebfa5b..08e88eddb377e 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDumpCore.h +++ b/GPU/GPUTracking/Debug/GPUROOTDumpCore.h @@ -21,9 +21,7 @@ class TFile; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUROOTDumpCore; @@ -61,7 +59,6 @@ class GPUROOTDumpCore std::vector mBranches; #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 75b0169a660a5..f18390629f2bc 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -48,17 +48,17 @@ #define TPC_MAX_TIME_BIN_TRIGGERED 600 -#if defined(GPUCA_NSLICES) || defined(GPUCA_ROW_COUNT) - #error GPUCA_NSLICES or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! +#if defined(GPUCA_NSECTORS) || defined(GPUCA_ROW_COUNT) + #error GPUCA_NSECTORS or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! #endif #if defined(GPUCA_TPC_GEOMETRY_O2) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) //Use definitions from the O2 headers if available for nicer code and type safety #include "DataFormatsTPC/Constants.h" - #define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR + #define GPUCA_NSECTORS o2::tpc::constants::MAXSECTOR #define GPUCA_ROW_COUNT o2::tpc::constants::MAXGLOBALPADROW #else //Define it manually, if O2 headers not available, ROOT5, and OpenCL 1.2, which do not know C++11. - #define GPUCA_NSLICES 36 + #define GPUCA_NSECTORS 36 #ifdef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_ROW_COUNT 152 #else diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index cebc74fcc4a5b..c40659ecd2632 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -32,7 +32,7 @@ #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT 256 - #define GPUCA_LB_GPUTPCCreateSliceData 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 @@ -46,7 +46,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 256 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 @@ -56,7 +56,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 @@ -89,7 +89,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -98,7 +98,7 @@ #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT 256 - #define GPUCA_LB_GPUTPCCreateSliceData 128 + #define GPUCA_LB_GPUTPCCreateTrackingData 128 #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 @@ -112,7 +112,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 256 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -122,7 +122,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 @@ -155,7 +155,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -164,7 +164,7 @@ #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT 512 - #define GPUCA_LB_GPUTPCCreateSliceData 384 + #define GPUCA_LB_GPUTPCCreateTrackingData 384 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 @@ -178,7 +178,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 32, 6 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -188,7 +188,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 @@ -221,7 +221,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -230,7 +230,7 @@ #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT 512 - #define GPUCA_LB_GPUTPCCreateSliceData 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 @@ -244,7 +244,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 64, 5 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -254,7 +254,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 @@ -279,7 +279,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_COMP_GATHER_KERNEL 4 #define GPUCA_COMP_GATHER_MODE 3 @@ -297,8 +297,8 @@ #ifndef GPUCA_THREAD_COUNT #define GPUCA_THREAD_COUNT 256 #endif - #ifndef GPUCA_LB_GPUTPCCreateSliceData - #define GPUCA_LB_GPUTPCCreateSliceData 256 + #ifndef GPUCA_LB_GPUTPCCreateTrackingData + #define GPUCA_LB_GPUTPCCreateTrackingData 256 #endif #ifndef GPUCA_LB_GPUTPCTrackletConstructor #define GPUCA_LB_GPUTPCTrackletConstructor 256 @@ -372,8 +372,8 @@ #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSliceRefit - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 256 + #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 #endif #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 @@ -402,8 +402,8 @@ #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256 + #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 #endif #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 @@ -512,8 +512,8 @@ #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCTrackletConstructor_singleSlice GPUCA_LB_GPUTPCTrackletConstructor -#define GPUCA_LB_GPUTPCTrackletConstructor_allSlices GPUCA_LB_GPUTPCTrackletConstructor +#define GPUCA_LB_GPUTPCTrackletConstructor_singleSector GPUCA_LB_GPUTPCTrackletConstructor +#define GPUCA_LB_GPUTPCTrackletConstructor_allSectors GPUCA_LB_GPUTPCTrackletConstructor #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER @@ -549,8 +549,8 @@ #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 #endif - #ifndef GPUCA_TRACKLET_SELECTOR_SLICE_COUNT - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results + #ifndef GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results #endif #ifndef GPUCA_COMP_GATHER_KERNEL #define GPUCA_COMP_GATHER_KERNEL 0 @@ -566,7 +566,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 0 #define GPUCA_SORT_BEFORE_FIT 0 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_THREAD_COUNT_FINDER 1 #define GPUCA_COMP_GATHER_KERNEL 0 #define GPUCA_COMP_GATHER_MODE 0 @@ -612,7 +612,7 @@ #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#define GPUCA_MAX_SLICE_NTRACK (2 << 24) // Maximum number of tracks per slice (limited by track id format) +#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format) // #define GPUCA_KERNEL_DEBUGGER_OUTPUT diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 905622de26ba9..98e0be1bdb1e5 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -90,8 +90,8 @@ AddOptionRTC(extraClusterErrorFactorSplitPadSharedSingleY2, float, 3.0f, "", 0, AddOptionRTC(extraClusterErrorSplitTimeSharedSingleZ2, float, 0.03f, "", 0, "Additive extra cluster error for Z2 if splittime, shared, or single set") AddOptionRTC(extraClusterErrorFactorSplitTimeSharedSingleZ2, float, 3.0f, "", 0, "Multiplicative extra cluster error for Z2 if splittime, shared, or single set") AddOptionArray(errorsCECrossing, float, 5, (0.f, 0.f, 0.f, 0.f, 0.f), "", 0, "Extra errors to add to track when crossing CE, depending on addErrorsCECrossing") // BUG: CUDA cannot yet handle AddOptionArrayRTC -AddOptionRTC(extrapolationTrackingYRangeUpper, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for extrapolated track candidates") -AddOptionRTC(extrapolationTrackingYRangeLower, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for extrapolated track candidates") +AddOptionRTC(extrapolationTrackingYRangeUpper, float, 0.85f, "", 0, "Inner portion of y-range in sector that is not used in searching for extrapolated track candidates") +AddOptionRTC(extrapolationTrackingYRangeLower, float, 0.85f, "", 0, "Inner portion of y-range in sector that is not used in searching for extrapolated track candidates") AddOptionRTC(trackFollowingYFactor, float, 4.f, "", 0, "Weight of y residual vs z residual in tracklet constructor") AddOptionRTC(trackMergerFactor2YS, float, 1.5f * 1.5f, "", 0, "factor2YS for track merging") AddOptionRTC(trackMergerFactor2ZT, float, 1.5f * 1.5f, "", 0, "factor2ZT for track merging") @@ -112,7 +112,7 @@ AddOptionRTC(trackMergerMinPartHits, uint8_t, 10, "", 0, "Minimum hits of track AddOptionRTC(trackMergerMinTotalHits, uint8_t, 20, "", 0, "Minimum total of track part during track merging") AddOptionRTC(mergerCERowLimit, uint8_t, 5, "", 0, "Distance from first / last row in order to attempt merging accross CE") AddOptionRTC(mergerLooperQPtB5Limit, uint8_t, 4, "", 0, "Min Q/Pt (@B=0.5T) to run special looper merging procedure") -AddOptionRTC(mergerLooperSecondHorizontalQPtB5Limit, uint8_t, 2, "", 0, "Min Q/Pt (@B=0.5T) to attempt second horizontal merge between slices after a vertical merge was found") +AddOptionRTC(mergerLooperSecondHorizontalQPtB5Limit, uint8_t, 2, "", 0, "Min Q/Pt (@B=0.5T) to attempt second horizontal merge between sectors after a vertical merge was found") AddOptionRTC(trackFollowingMaxRowGap, uint8_t, 4, "", 0, "Maximum number of consecutive rows without hit in track following") AddOptionRTC(trackFollowingMaxRowGapSeed, uint8_t, 2, "", 0, "Maximum number of consecutive rows without hit in track following during fit of seed") AddOptionRTC(trackFitMaxRowMissedHard, uint8_t, 10, "", 0, "Hard limit for number of missed rows in fit / propagation") @@ -234,6 +234,7 @@ AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") +AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6") AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics") AddOption(runCompressionStatistics, bool, false, "compressionStat", 0, "Run statistics and verification for cluster compression") @@ -257,7 +258,7 @@ AddOption(autoAdjustHostThreads, bool, true, "", 0, "Auto-adjust number of OMP t AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") -AddOption(trackletSelectorSlices, int8_t, -1, "", 0, "Number of slices to processes in parallel at max") +AddOption(trackletSelectorSectors, int8_t, -1, "", 0, "Number of sectors to processes in parallel at max") AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") @@ -320,8 +321,8 @@ AddOption(drawTracks, bool, false, "", 0, "Highlight sector tracks") AddOption(drawExtrapolatedTracks, bool, false, "", 0, "Highlight global sector tracks prolonged into adjacent sector") AddOption(drawFinal, bool, false, "", 0, "Highlight final tracks") AddOption(excludeClusters, int32_t, 0, "", 0, "Exclude clusters from selected draw objects from display, (2 = exclude clusters but still show tracks)") -AddOption(drawSlice, int32_t, -1, "", 0, "Show individual slice") -AddOption(drawRelatedSlices, int32_t, 0, "", 0, "Show related slices (if drawSlice != -1)") +AddOption(drawSector, int32_t, -1, "", 0, "Show individual sector") +AddOption(drawRelatedSectors, int32_t, 0, "", 0, "Show related sectors (if drawSector != -1)") AddOption(drawGrid, int32_t, 0, "", 0, "Highlight grid") AddOption(propagateTracks, int32_t, 0, "", 0, "Propagate final tracks further (inward / outward / show MC tracks)") AddOption(showCollision, int32_t, -1, "", 0, "Show only individual collision") diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_O2.h b/GPU/GPUTracking/GPUTrackingLinkDef_O2.h index d5a81797bf7fc..8e99514a817c5 100644 --- a/GPU/GPUTracking/GPUTrackingLinkDef_O2.h +++ b/GPU/GPUTracking/GPUTrackingLinkDef_O2.h @@ -19,12 +19,12 @@ #pragma link off all functions; #pragma link C++ class o2::gpu::GPUTPCGMMergedTrack + ; -#pragma link C++ class o2::gpu::GPUTPCGMSliceTrack + ; +#pragma link C++ class o2::gpu::GPUTPCGMSectorTrack + ; #pragma link C++ class o2::gpu::GPUTPCGMBorderTrack + ; #pragma link C++ class o2::gpu::GPUTPCGMTrackParam + ; #pragma link C++ class o2::gpu::GPUTPCTrack + ; #pragma link C++ struct o2::gpu::GPUTPCBaseTrackParam + ; -#pragma link C++ struct o2::gpu::GPUTPCGMSliceTrack::sliceTrackParam + ; +#pragma link C++ struct o2::gpu::GPUTPCGMSectorTrack::sectorTrackParam + ; #pragma link C++ class o2::gpu::trackInterface < o2::gpu::GPUTPCGMTrackParam> + ; #pragma link C++ class o2::gpu::GPUTRDTrack_t < o2::gpu::trackInterface < o2::gpu::GPUTPCGMTrackParam>> + ; #pragma link C++ class o2::gpu::gputpcgmmergertypes::GPUTPCOuterParam + ; diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 0981fea43810a..a7c582b79d964 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -17,9 +17,9 @@ #include "GPUReconstructionCPU.h" -namespace o2 -{ -namespace gpu +#include + +namespace o2::gpu { class GPUChain { @@ -51,7 +51,7 @@ class GPUChain virtual bool SupportsDoublePipeline() { return false; } virtual int32_t FinalizePipelinedProcessing() { return 0; } - constexpr static int32_t NSLICES = GPUReconstruction::NSLICES; + constexpr static int32_t NSECTORS = GPUReconstruction::NSECTORS; virtual void DumpSettings(const char* dir = "") {} virtual void ReadSettings(const char* dir = "") {} @@ -101,7 +101,7 @@ class GPUChain } inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); } inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); } - virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } + virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } inline void SynchronizeGPU() { mRec->SynchronizeGPU(); } inline void ReleaseEvent(deviceEvent ev, bool doGPU = true) { @@ -282,19 +282,22 @@ template int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) { if (GetRecoSteps().isSet(step)) { - if (GetProcessingSettings().debugLevel >= 1) { - mRec->getRecoStepTimer(step).Start(); + auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr; + std::clock_t c; + if (timer) { + timer->timerTotal.Start(); + c = std::clock(); } int32_t retVal = (reinterpret_cast(this)->*func)(args...); - if (GetProcessingSettings().debugLevel >= 1) { - mRec->getRecoStepTimer(step).Stop(); + if (timer) { + timer->timerTotal.Stop(); + timer->timerCPU += (double)(std::clock() - c) / CLOCKS_PER_SEC; } return retVal; } - return false; + return 0; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 66f37e1122832..9de8b2174a182 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -20,8 +20,8 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutput.h" +#include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCTrack.h" @@ -78,9 +78,9 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() } mRec->RegisterGPUProcessor(mInputsHost.get(), mRec->IsGPU()); - if (GetRecoSteps() & RecoStep::TPCSliceTracking) { - for (uint32_t i = 0; i < NSLICES; i++) { - mRec->RegisterGPUProcessor(&processors()->tpcTrackers[i], GetRecoStepsGPU() & RecoStep::TPCSliceTracking); + if (GetRecoSteps() & RecoStep::TPCSectorTracking) { + for (uint32_t i = 0; i < NSECTORS; i++) { + mRec->RegisterGPUProcessor(&processors()->tpcTrackers[i], GetRecoStepsGPU() & RecoStep::TPCSectorTracking); } } if (GetRecoSteps() & RecoStep::TPCMerging) { @@ -102,7 +102,7 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() mRec->RegisterGPUProcessor(&processors()->tpcDecompressor, GetRecoStepsGPU() & RecoStep::TPCDecompression); } if (GetRecoSteps() & RecoStep::TPCClusterFinding) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUProcessor(&processors()->tpcClusterer[i], GetRecoStepsGPU() & RecoStep::TPCClusterFinding); } } @@ -121,8 +121,8 @@ void GPUChainTracking::RegisterGPUProcessors() mRec->RegisterGPUDeviceProcessor(mInputsShadow.get(), mInputsHost.get()); } memcpy((void*)&processorsShadow()->trdTrackerGPU, (const void*)&processors()->trdTrackerGPU, sizeof(processors()->trdTrackerGPU)); - if (GetRecoStepsGPU() & RecoStep::TPCSliceTracking) { - for (uint32_t i = 0; i < NSLICES; i++) { + if (GetRecoStepsGPU() & RecoStep::TPCSectorTracking) { + for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcTrackers[i], &processors()->tpcTrackers[i]); } } @@ -147,7 +147,7 @@ void GPUChainTracking::RegisterGPUProcessors() mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcDecompressor, &processors()->tpcDecompressor); } if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcClusterer[i], &processors()->tpcClusterer[i]); } } @@ -176,7 +176,7 @@ bool GPUChainTracking::ValidateSteps() return false; } if (!param().par.earlyTpcTransform) { - if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { + if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { GPUError("Invalid Reconstruction Step Setting: Tracking without early transform requires TPC Conversion to be active"); return false; } @@ -200,11 +200,11 @@ bool GPUChainTracking::ValidateSteps() return false; } #endif - if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCCompression) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCdEdx)) && !tpcClustersAvail) { + if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCCompression) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCdEdx)) && !tpcClustersAvail) { GPUError("Missing input for TPC Cluster conversion / sector tracking / compression / dEdx: TPC Clusters required"); return false; } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking))) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking))) { GPUError("Input for TPC merger missing"); return false; } @@ -220,8 +220,8 @@ bool GPUChainTracking::ValidateSteps() GPUError("TPC Raw / TPC Clusters / TRD Tracklets cannot be output"); return false; } - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - GPUError("No TPC Slice Tracker Output available"); + if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { + GPUError("No TPC Sector Tracker Output available"); return false; } if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCMergedTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) { @@ -550,11 +550,11 @@ void GPUChainTracking::ClearIOPointers() void GPUChainTracking::AllocateIOMemory() { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { AllocateIOMemoryHelper(mIOPtrs.nClusterData[i], mIOPtrs.clusterData[i], mIOMem.clusterData[i]); AllocateIOMemoryHelper(mIOPtrs.nRawClusters[i], mIOPtrs.rawClusters[i], mIOMem.rawClusters[i]); - AllocateIOMemoryHelper(mIOPtrs.nSliceTracks[i], mIOPtrs.sliceTracks[i], mIOMem.sliceTracks[i]); - AllocateIOMemoryHelper(mIOPtrs.nSliceClusters[i], mIOPtrs.sliceClusters[i], mIOMem.sliceClusters[i]); + AllocateIOMemoryHelper(mIOPtrs.nSectorTracks[i], mIOPtrs.sectorTracks[i], mIOMem.sectorTracks[i]); + AllocateIOMemoryHelper(mIOPtrs.nSectorClusters[i], mIOPtrs.sectorClusters[i], mIOMem.sectorClusters[i]); } mIOMem.clusterNativeAccess.reset(new ClusterNativeAccess); std::memset(mIOMem.clusterNativeAccess.get(), 0, sizeof(ClusterNativeAccess)); // ClusterNativeAccess has no its own constructor @@ -718,25 +718,25 @@ int32_t GPUChainTracking::RunChain() } if (GetProcessingSettings().autoAdjustHostThreads && !mRec->IsGPU() && mIOPtrs.clustersNative) { - mRec->SetNActiveThreads(mIOPtrs.clustersNative->nClustersTotal / 5000); + mRec->SetNActiveThreads(mIOPtrs.clustersNative->nClustersTotal / 1500); } if (mIOPtrs.clustersNative && runRecoStep(RecoStep::TPCConversion, &GPUChainTracking::ConvertNativeToClusterData)) { return 1; } - mRec->PushNonPersistentMemory(qStr2Tag("TPCSLCD1")); // 1st stack level for TPC tracking slice data - mTPCSliceScratchOnStack = true; - if (runRecoStep(RecoStep::TPCSliceTracking, &GPUChainTracking::RunTPCTrackingSlices)) { + mRec->PushNonPersistentMemory(qStr2Tag("TPCSLCD1")); // 1st stack level for TPC tracking sector data + mTPCSectorScratchOnStack = true; + if (runRecoStep(RecoStep::TPCSectorTracking, &GPUChainTracking::RunTPCTrackingSectors)) { return 1; } if (runRecoStep(RecoStep::TPCMerging, &GPUChainTracking::RunTPCTrackingMerger, false)) { return 1; } - if (mTPCSliceScratchOnStack) { - mRec->PopNonPersistentMemory(RecoStep::TPCSliceTracking, qStr2Tag("TPCSLCD1")); // Release 1st stack level, TPC slice data not needed after merger - mTPCSliceScratchOnStack = false; + if (mTPCSectorScratchOnStack) { + mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Release 1st stack level, TPC sector data not needed after merger + mTPCSectorScratchOnStack = false; } if (mIOPtrs.clustersNative) { diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 6e5e0b3048140..4a2778851e517 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -24,35 +24,24 @@ #include #include -namespace o2 -{ -namespace trd +namespace o2::trd { class GeometryFlat; -} -} // namespace o2 +} // namespace o2::trd -namespace o2 -{ -namespace tpc +namespace o2::tpc { struct ClusterNativeAccess; struct ClusterNative; class CalibdEdxContainer; -} // namespace tpc -} // namespace o2 +} // namespace o2::tpc -namespace o2 -{ -namespace base +namespace o2::base { class MatLayerCylSet; -} -} // namespace o2 +} // namespace o2::base -namespace o2 -{ -namespace gpu +namespace o2::gpu { //class GPUTRDTrackerGPU; class GPUTPCGPUTracker; @@ -101,14 +90,14 @@ class GPUChainTracking : public GPUChain std::unique_ptr tpcCompressedClusters; // TODO: Fix alignment std::unique_ptr tpcZSmeta; std::unique_ptr tpcZSmeta2; - std::unique_ptr tpcDigits[NSLICES]; + std::unique_ptr tpcDigits[NSECTORS]; std::unique_ptr digitMap; - std::unique_ptr clusterData[NSLICES]; - std::unique_ptr rawClusters[NSLICES]; + std::unique_ptr clusterData[NSECTORS]; + std::unique_ptr rawClusters[NSECTORS]; std::unique_ptr clustersNative; std::unique_ptr clusterNativeAccess; - std::unique_ptr sliceTracks[NSLICES]; - std::unique_ptr sliceClusters[NSLICES]; + std::unique_ptr sectorTracks[NSECTORS]; + std::unique_ptr sectorClusters[NSECTORS]; std::unique_ptr mcLabelsTPC; std::unique_ptr mcInfosTPC; std::unique_ptr mcInfosTPCCol; @@ -123,7 +112,7 @@ class GPUChainTracking : public GPUChain std::unique_ptr trdTracks; std::unique_ptr clusterNativeMC; std::unique_ptr> clusterNativeMCView; - std::unique_ptr tpcDigitsMC[NSLICES]; + std::unique_ptr tpcDigitsMC[NSECTORS]; std::unique_ptr[]> tpcDigitMCView; std::unique_ptr tpcDigitMCMap; std::unique_ptr> clusterNativeMCBuffer; @@ -149,8 +138,8 @@ class GPUChainTracking : public GPUChain // Getters for external usage of tracker classes GPUTRDTrackerGPU* GetTRDTrackerGPU() { return &processors()->trdTrackerGPU; } - GPUTPCTracker* GetTPCSliceTrackers() { return processors()->tpcTrackers; } - const GPUTPCTracker* GetTPCSliceTrackers() const { return processors()->tpcTrackers; } + GPUTPCTracker* GetTPCSectorTrackers() { return processors()->tpcTrackers; } + const GPUTPCTracker* GetTPCSectorTrackers() const { return processors()->tpcTrackers; } const GPUTPCGMMerger& GetTPCMerger() const { return processors()->tpcMerger; } GPUTPCGMMerger& GetTPCMerger() { return processors()->tpcMerger; } GPUDisplayInterface* GetEventDisplay() { return mEventDisplay.get(); } @@ -164,7 +153,7 @@ class GPUChainTracking : public GPUChain // Processing functions int32_t RunTPCClusterizer(bool synchronizeOutput = true); int32_t ForwardTPCDigits(); - int32_t RunTPCTrackingSlices(); + int32_t RunTPCTrackingSectors(); int32_t RunTPCTrackingMerger(bool synchronizeOutput = true); template int32_t RunTRDTracking(); @@ -216,7 +205,7 @@ class GPUChainTracking : public GPUChain struct eventStruct // Must consist only of void* ptr that will hold the GPU event ptrs! { - deviceEvent slice[NSLICES]; + deviceEvent sector[NSECTORS]; deviceEvent stream[GPUCA_MAX_STREAMS]; deviceEvent init; deviceEvent single; @@ -231,9 +220,9 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - int32_t ReadEvent(uint32_t iSlice, int32_t threadId); - void WriteOutput(int32_t iSlice, int32_t threadId); - int32_t ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput = true); + int32_t ReadEvent(uint32_t iSector, int32_t threadId); + void WriteOutput(int32_t iSector, int32_t threadId); + int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); int32_t PrepareProfile(); int32_t DoProfile(); @@ -277,7 +266,7 @@ class GPUChainTracking : public GPUChain // (Ptrs to) configuration objects std::unique_ptr mCFContext; - bool mTPCSliceScratchOnStack = false; + bool mTPCSectorScratchOnStack = false; std::unique_ptr mNewCalibObjects; bool mUpdateNewCalibObjects = false; std::unique_ptr mNewCalibValues; @@ -291,24 +280,24 @@ class GPUChainTracking : public GPUChain // Synchronization and Locks eventStruct* mEvents = nullptr; - volatile int32_t mSliceSelectorReady = 0; - std::array mWriteOutputDone; + volatile int32_t mSectorSelectorReady = 0; + std::array mWriteOutputDone; std::vector mOutputQueue; private: int32_t RunChainFinalize(); void SanityCheck(); - int32_t RunTPCTrackingSlices_internal(); + int32_t RunTPCTrackingSectors_internal(); int32_t RunTPCClusterizer_prepare(bool restorePointers); #ifdef GPUCA_TPC_GEOMETRY_O2 - std::pair RunTPCClusterizer_transferZS(int32_t iSlice, const CfFragment& fragment, int32_t lane); + std::pair RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane); void RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int32_t stage, bool doGPU, int32_t lane); - std::pair TPCClusterizerDecodeZSCount(uint32_t iSlice, const CfFragment& fragment); - std::pair TPCClusterizerDecodeZSCountUpdate(uint32_t iSlice, const CfFragment& fragment); - void TPCClusterizerEnsureZSOffsets(uint32_t iSlice, const CfFragment& fragment); + std::pair TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment); + std::pair TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment); + void TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment); #endif - void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType); + void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType); void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); @@ -320,7 +309,6 @@ class GPUChainTracking : public GPUChain int32_t OutputStream() const { return mRec->NStreams() - 2; } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 6ca645808c5bd..5cb9a7e46ac91 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -35,8 +35,6 @@ #include "utils/strtag.h" -#include - #ifndef GPUCA_NO_VC #include #endif @@ -47,23 +45,23 @@ using namespace o2::tpc::constants; using namespace o2::dataformats; #ifdef GPUCA_TPC_GEOMETRY_O2 -std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdate(uint32_t iSlice, const CfFragment& fragment) +std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment) { bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder::ZSOffset* o = processors()->tpcClusterer[iSlice].mPzsOffsets; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder::ZSOffset* o = processors()->tpcClusterer[iSector].mPzsOffsets; uint32_t digits = 0; uint32_t pages = 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - clusterer.mMinMaxCN[j] = mCFContext->fragmentData[fragment.index].minMaxCN[iSlice][j]; + clusterer.mMinMaxCN[j] = mCFContext->fragmentData[fragment.index].minMaxCN[iSector][j]; if (doGPU) { uint16_t posInEndpoint = 0; uint16_t pagesEndpoint = 0; for (uint32_t k = clusterer.mMinMaxCN[j].zsPtrFirst; k < clusterer.mMinMaxCN[j].zsPtrLast; k++) { const uint32_t pageFirst = (k == clusterer.mMinMaxCN[j].zsPtrFirst) ? clusterer.mMinMaxCN[j].zsPageFirst : 0; - const uint32_t pageLast = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; + const uint32_t pageLast = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; for (uint32_t l = pageFirst; l < pageLast; l++) { - uint16_t pageDigits = mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j][posInEndpoint++]; + uint16_t pageDigits = mCFContext->fragmentData[fragment.index].pageDigits[iSector][j][posInEndpoint++]; if (pageDigits) { *(o++) = GPUTPCClusterFinder::ZSOffset{digits, j, pagesEndpoint}; digits += pageDigits; @@ -71,44 +69,44 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdat pagesEndpoint++; } } - if (pagesEndpoint != mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j].size()) { + if (pagesEndpoint != mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()) { if (GetProcessingSettings().ignoreNonFatalGPUErrors) { - GPUError("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j].size()); + GPUError("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()); return {0, 0}; } else { - GPUFatal("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j].size()); + GPUFatal("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()); } } } else { clusterer.mPzsOffsets[j] = GPUTPCClusterFinder::ZSOffset{digits, j, 0}; - digits += mCFContext->fragmentData[fragment.index].nDigits[iSlice][j]; - pages += mCFContext->fragmentData[fragment.index].nPages[iSlice][j]; + digits += mCFContext->fragmentData[fragment.index].nDigits[iSector][j]; + pages += mCFContext->fragmentData[fragment.index].nPages[iSector][j]; } } if (doGPU) { - pages = o - processors()->tpcClusterer[iSlice].mPzsOffsets; + pages = o - processors()->tpcClusterer[iSector].mPzsOffsets; } if (!doGPU && GetProcessingSettings().debugLevel >= 4 && mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) { - TPCClusterizerEnsureZSOffsets(iSlice, fragment); + TPCClusterizerEnsureZSOffsets(iSector, fragment); } return {digits, pages}; } -void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSlice, const CfFragment& fragment) +void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment) { - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; uint32_t nAdcs = 0; for (uint16_t endpoint = 0; endpoint < GPUTrackingInOutZS::NENDPOINTS; endpoint++) { const auto& data = mCFContext->fragmentData[fragment.index]; uint32_t pagesEndpoint = 0; - const uint32_t nAdcsExpected = data.nDigits[iSlice][endpoint]; - const uint32_t nPagesExpected = data.nPages[iSlice][endpoint]; + const uint32_t nAdcsExpected = data.nDigits[iSector][endpoint]; + const uint32_t nPagesExpected = data.nPages[iSector][endpoint]; uint32_t nAdcDecoded = 0; - const auto& zs = mIOPtrs.tpcZS->slice[iSlice]; - for (uint32_t i = data.minMaxCN[iSlice][endpoint].zsPtrFirst; i < data.minMaxCN[iSlice][endpoint].zsPtrLast; i++) { - const uint32_t pageFirst = (i == data.minMaxCN[iSlice][endpoint].zsPtrFirst) ? data.minMaxCN[iSlice][endpoint].zsPageFirst : 0; - const uint32_t pageLast = (i + 1 == data.minMaxCN[iSlice][endpoint].zsPtrLast) ? data.minMaxCN[iSlice][endpoint].zsPageLast : zs.nZSPtr[endpoint][i]; + const auto& zs = mIOPtrs.tpcZS->sector[iSector]; + for (uint32_t i = data.minMaxCN[iSector][endpoint].zsPtrFirst; i < data.minMaxCN[iSector][endpoint].zsPtrLast; i++) { + const uint32_t pageFirst = (i == data.minMaxCN[iSector][endpoint].zsPtrFirst) ? data.minMaxCN[iSector][endpoint].zsPageFirst : 0; + const uint32_t pageLast = (i + 1 == data.minMaxCN[iSector][endpoint].zsPtrLast) ? data.minMaxCN[iSector][endpoint].zsPageLast : zs.nZSPtr[endpoint][i]; for (uint32_t j = pageFirst; j < pageLast; j++) { const uint8_t* page = static_cast(zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE; const header::RAWDataHeader* rawDataHeader = reinterpret_cast(page); @@ -121,15 +119,15 @@ void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSlice, const CfFr } if (pagesEndpoint != nPagesExpected) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSlice, endpoint, fragment.index, pagesEndpoint, nPagesExpected); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected); } if (nAdcDecoded != nAdcsExpected) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC count mismatch: expected %u, buffered %u", iSlice, endpoint, fragment.index, nAdcsExpected, nAdcDecoded); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC count mismatch: expected %u, buffered %u", iSector, endpoint, fragment.index, nAdcsExpected, nAdcDecoded); } if (nAdcs != clusterer.mPzsOffsets[endpoint].offset) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC offset mismatch: expected %u, buffered %u", iSlice, endpoint, fragment.index, nAdcs, clusterer.mPzsOffsets[endpoint].offset); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC offset mismatch: expected %u, buffered %u", iSector, endpoint, fragment.index, nAdcs, clusterer.mPzsOffsets[endpoint].offset); } nAdcs += nAdcsExpected; @@ -143,7 +141,7 @@ struct TPCCFDecodeScanTmp { }; } // namespace -std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSlice, const CfFragment& fragment) +std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment) { mRec->getGeneralStepTimer(GeneralStep::Prepare).Start(); uint32_t nDigits = 0; @@ -151,15 +149,16 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) + : 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC if (GetProcessingSettings().prefetchTPCpageScan >= 3 && j < GPUTrackingInOutZS::NENDPOINTS - 1) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[iSlice].count[j + 1]; k++) { - for (uint32_t l = 0; l < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j + 1][k]; l++) { - Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); - Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j + 1]; k++) { + for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j + 1][k]; l++) { + Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); + Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); } } } @@ -176,19 +175,19 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t firstPossibleFragment = 0; uint32_t pageCounter = 0; uint32_t emptyPages = 0; - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[iSlice].count[j]; k++) { - if (GetProcessingSettings().tpcSingleSector != -1 && GetProcessingSettings().tpcSingleSector != (int32_t)iSlice) { + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j]; k++) { + if (GetProcessingSettings().tpcSingleSector != -1 && GetProcessingSettings().tpcSingleSector != (int32_t)iSector) { break; } - nPages += mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; - for (uint32_t l = 0; l < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; l++) { + nPages += mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; + for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; l++) { #ifndef GPUCA_NO_VC - if (GetProcessingSettings().prefetchTPCpageScan >= 2 && l + 1 < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]) { - Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE); - Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); + if (GetProcessingSettings().prefetchTPCpageScan >= 2 && l + 1 < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]) { + Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE); + Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); } #endif - const uint8_t* const page = ((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE; + const uint8_t* const page = ((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE; const o2::header::RAWDataHeader* rdh = (const o2::header::RAWDataHeader*)page; if (o2::raw::RDHUtils::getMemorySize(*rdh) == sizeof(o2::header::RAWDataHeader)) { emptyPages++; @@ -253,7 +252,7 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint } bool extendsInNextPage = false; if (mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) { - if (l + 1 < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k] && o2::raw::RDHUtils::getMemorySize(*rdh) == TPCZSHDR::TPC_ZS_PAGE_SIZE) { + if (l + 1 < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k] && o2::raw::RDHUtils::getMemorySize(*rdh) == TPCZSHDR::TPC_ZS_PAGE_SIZE) { const o2::header::RAWDataHeader* nextrdh = (const o2::header::RAWDataHeader*)(page + TPCZSHDR::TPC_ZS_PAGE_SIZE); extendsInNextPage = o2::raw::RDHUtils::getHeartBeatOrbit(*nextrdh) == o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) && o2::raw::RDHUtils::getMemorySize(*nextrdh) > sizeof(o2::header::RAWDataHeader); } @@ -267,8 +266,8 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint // Only add extended page on GPU. On CPU the pages are in consecutive memory anyway. // Not adding the page prevents an issue where a page is decoded twice on CPU, when only the extend should be decoded. fragments[ff].second.zsPageLast++; - mCFContext->fragmentData[ff].nPages[iSlice][j]++; - mCFContext->fragmentData[ff].pageDigits[iSlice][j].emplace_back(0); + mCFContext->fragmentData[ff].nPages[iSector][j]++; + mCFContext->fragmentData[ff].pageDigits[iSector][j].emplace_back(0); } fragmentExtends[ff] = false; } @@ -286,27 +285,27 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint fragments[f].second.zsPageFirst = l; } else { if (pageCounter > (uint32_t)fragments[f].second.pageCounter + 1) { - mCFContext->fragmentData[f].nPages[iSlice][j] += emptyPages + pageCounter - fragments[f].second.pageCounter - 1; + mCFContext->fragmentData[f].nPages[iSector][j] += emptyPages + pageCounter - fragments[f].second.pageCounter - 1; for (uint32_t k2 = fragments[f].second.zsPtrLast - 1; k2 <= k; k2++) { - for (uint32_t l2 = ((int32_t)k2 == fragments[f].second.zsPtrLast - 1) ? fragments[f].second.zsPageLast : 0; l2 < (k2 < k ? mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k2] : l); l2++) { + for (uint32_t l2 = ((int32_t)k2 == fragments[f].second.zsPtrLast - 1) ? fragments[f].second.zsPageLast : 0; l2 < (k2 < k ? mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k2] : l); l2++) { if (doGPU) { - mCFContext->fragmentData[f].pageDigits[iSlice][j].emplace_back(0); + mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(0); } else { // CPU cannot skip unneeded pages, so we must keep space to store the invalid dummy clusters - const uint8_t* const pageTmp = ((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k2]) + l2 * TPCZSHDR::TPC_ZS_PAGE_SIZE; + const uint8_t* const pageTmp = ((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k2]) + l2 * TPCZSHDR::TPC_ZS_PAGE_SIZE; const o2::header::RAWDataHeader* rdhTmp = (const o2::header::RAWDataHeader*)pageTmp; if (o2::raw::RDHUtils::getMemorySize(*rdhTmp) != sizeof(o2::header::RAWDataHeader)) { const TPCZSHDR* const hdrTmp = (const TPCZSHDR*)(rdh_utils::getLink(o2::raw::RDHUtils::getFEEID(*rdhTmp)) == rdh_utils::DLBZSLinkID ? (pageTmp + o2::raw::RDHUtils::getMemorySize(*rdhTmp) - sizeof(TPCZSHDRV2)) : (pageTmp + sizeof(o2::header::RAWDataHeader))); - mCFContext->fragmentData[f].nDigits[iSlice][j] += hdrTmp->nADCsamples; + mCFContext->fragmentData[f].nDigits[iSector][j] += hdrTmp->nADCsamples; } } } } } else if (emptyPages) { - mCFContext->fragmentData[f].nPages[iSlice][j] += emptyPages; + mCFContext->fragmentData[f].nPages[iSector][j] += emptyPages; if (doGPU) { for (uint32_t m = 0; m < emptyPages; m++) { - mCFContext->fragmentData[f].pageDigits[iSlice][j].emplace_back(0); + mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(0); } } } @@ -314,10 +313,10 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint fragments[f].second.zsPtrLast = k + 1; fragments[f].second.zsPageLast = l + 1; fragments[f].second.pageCounter = pageCounter; - mCFContext->fragmentData[f].nPages[iSlice][j]++; - mCFContext->fragmentData[f].nDigits[iSlice][j] += hdr->nADCsamples; + mCFContext->fragmentData[f].nPages[iSector][j]++; + mCFContext->fragmentData[f].nDigits[iSector][j] += hdr->nADCsamples; if (doGPU) { - mCFContext->fragmentData[f].pageDigits[iSlice][j].emplace_back(hdr->nADCsamples); + mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(hdr->nADCsamples); } fragmentExtends[f] = extendsInNextPage; } else { @@ -338,19 +337,19 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint } } for (uint32_t f = 0; f < mCFContext->nFragments; f++) { - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPtrLast = fragments[f].second.zsPtrLast; - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPtrFirst = fragments[f].second.zsPtrFirst; - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPageLast = fragments[f].second.zsPageLast; - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPageFirst = fragments[f].second.zsPageFirst; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPtrLast = fragments[f].second.zsPtrLast; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPtrFirst = fragments[f].second.zsPtrFirst; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPageLast = fragments[f].second.zsPageLast; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPageFirst = fragments[f].second.zsPageFirst; } } mCFContext->nPagesTotal += nPages; - mCFContext->nPagesSector[iSlice] = nPages; + mCFContext->nPagesSector[iSector] = nPages; - mCFContext->nDigitsEndpointMax[iSlice] = 0; + mCFContext->nDigitsEndpointMax[iSector] = 0; for (uint32_t i = 0; i < GPUTrackingInOutZS::NENDPOINTS; i++) { - if (endpointAdcSamples[i] > mCFContext->nDigitsEndpointMax[iSlice]) { - mCFContext->nDigitsEndpointMax[iSlice] = endpointAdcSamples[i]; + if (endpointAdcSamples[i] > mCFContext->nDigitsEndpointMax[iSector]) { + mCFContext->nDigitsEndpointMax[iSector] = endpointAdcSamples[i]; } } uint32_t nDigitsFragmentMax = 0; @@ -358,8 +357,8 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t pagesInFragment = 0; uint32_t digitsInFragment = 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - pagesInFragment += mCFContext->fragmentData[i].nPages[iSlice][j]; - digitsInFragment += mCFContext->fragmentData[i].nDigits[iSlice][j]; + pagesInFragment += mCFContext->fragmentData[i].nPages[iSector][j]; + digitsInFragment += mCFContext->fragmentData[i].nDigits[iSector][j]; } mCFContext->nPagesFragmentMax = std::max(mCFContext->nPagesFragmentMax, pagesInFragment); nDigitsFragmentMax = std::max(nDigitsFragmentMax, digitsInFragment); @@ -373,7 +372,7 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust auto& in = stage ? clustererShadow.mPpeakPositions : clustererShadow.mPpositions; auto& out = stage ? clustererShadow.mPfilteredPeakPositions : clustererShadow.mPpeakPositions; if (doGPU) { - const uint32_t iSlice = clusterer.mISlice; + const uint32_t iSector = clusterer.mISector; auto& count = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions; std::vector counts; @@ -389,22 +388,22 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust for (uint32_t i = 1; i < nSteps; i++) { counts.push_back(tmpCount); if (i == 1) { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSlice}}, i, stage); + runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, stage); } else { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSlice}}, i, tmpCount); + runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, tmpCount); } tmpCount = (tmpCount + clusterer.mScanWorkGroupSize - 1) / clusterer.mScanWorkGroupSize; } - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSlice}}, nSteps, tmpCount); + runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, nSteps, tmpCount); for (uint32_t i = nSteps - 1; i > 1; i--) { tmpCount = counts[i - 1]; - runKernel({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSlice}}, i, clusterer.mScanWorkGroupSize, tmpCount); + runKernel({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, clusterer.mScanWorkGroupSize, tmpCount); } } - runKernel({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSlice}}, 1, stage, in, out); + runKernel({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSector}}, 1, stage, in, out); } else { auto& nOut = stage ? clusterer.mPmemory->counters.nClusters : clusterer.mPmemory->counters.nPeaks; auto& nIn = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions; @@ -418,35 +417,35 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust } } -std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSlice, const CfFragment& fragment, int32_t lane) +std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane) { bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding; if (mCFContext->abandonTimeframe) { return {0, 0}; } - const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSlice, fragment); + const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment); if (doGPU) { - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; uint32_t nPagesSector = 0; for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { uint32_t nPages = 0; - mInputsHost->mPzsMeta->slice[iSlice].zsPtr[j] = &mInputsShadow->mPzsPtrs[iSlice * GPUTrackingInOutZS::NENDPOINTS + j]; - mInputsHost->mPzsPtrs[iSlice * GPUTrackingInOutZS::NENDPOINTS + j] = clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE; + mInputsHost->mPzsMeta->sector[iSector].zsPtr[j] = &mInputsShadow->mPzsPtrs[iSector * GPUTrackingInOutZS::NENDPOINTS + j]; + mInputsHost->mPzsPtrs[iSector * GPUTrackingInOutZS::NENDPOINTS + j] = clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE; for (uint32_t k = clusterer.mMinMaxCN[j].zsPtrFirst; k < clusterer.mMinMaxCN[j].zsPtrLast; k++) { const uint32_t min = (k == clusterer.mMinMaxCN[j].zsPtrFirst) ? clusterer.mMinMaxCN[j].zsPageFirst : 0; - const uint32_t max = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; + const uint32_t max = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; if (max > min) { - char* src = (char*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k] + min * TPCZSHDR::TPC_ZS_PAGE_SIZE; - char* ptrLast = (char*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k] + (max - 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE; + char* src = (char*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k] + min * TPCZSHDR::TPC_ZS_PAGE_SIZE; + char* ptrLast = (char*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k] + (max - 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE; size_t size = (ptrLast - src) + o2::raw::RDHUtils::getMemorySize(*(const o2::header::RAWDataHeader*)ptrLast); GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE, src, size, lane, true); } nPages += max - min; } - mInputsHost->mPzsMeta->slice[iSlice].nZSPtr[j] = &mInputsShadow->mPzsSizes[iSlice * GPUTrackingInOutZS::NENDPOINTS + j]; - mInputsHost->mPzsSizes[iSlice * GPUTrackingInOutZS::NENDPOINTS + j] = nPages; - mInputsHost->mPzsMeta->slice[iSlice].count[j] = 1; + mInputsHost->mPzsMeta->sector[iSector].nZSPtr[j] = &mInputsShadow->mPzsSizes[iSector * GPUTrackingInOutZS::NENDPOINTS + j]; + mInputsHost->mPzsSizes[iSector * GPUTrackingInOutZS::NENDPOINTS + j] = nPages; + mInputsHost->mPzsMeta->sector[iSector].count[j] = 1; nPagesSector += nPages; } GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPzsOffsets, clusterer.mPzsOffsets, clusterer.mNMaxPages * sizeof(*clusterer.mPzsOffsets), lane, true); @@ -458,10 +457,10 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) { bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; if (restorePointers) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processors()->tpcClusterer[iSlice].mPzsOffsets = mCFContext->ptrSave[iSlice].zsOffsetHost; - processorsShadow()->tpcClusterer[iSlice].mPzsOffsets = mCFContext->ptrSave[iSlice].zsOffsetDevice; - processorsShadow()->tpcClusterer[iSlice].mPzs = mCFContext->ptrSave[iSlice].zsDevice; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processors()->tpcClusterer[iSector].mPzsOffsets = mCFContext->ptrSave[iSector].zsOffsetHost; + processorsShadow()->tpcClusterer[iSector].mPzsOffsets = mCFContext->ptrSave[iSector].zsOffsetDevice; + processorsShadow()->tpcClusterer[iSector].mPzs = mCFContext->ptrSave[iSector].zsDevice; } processorsShadow()->ioPtrs.clustersNative = mCFContext->ptrClusterNativeSave; return 0; @@ -480,54 +479,54 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) mTriggerBuffer->triggers.clear(); } if (mIOPtrs.tpcZS) { - uint32_t nDigitsFragmentMax[NSLICES]; + uint32_t nDigitsFragmentMax[NSECTORS]; mCFContext->zsVersion = -1; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (mIOPtrs.tpcZS->slice[iSlice].count[0]) { - const void* rdh = mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + if (mIOPtrs.tpcZS->sector[iSector].count[0]) { + const void* rdh = mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]; if (rdh && o2::raw::RDHUtils::getVersion() > o2::raw::RDHUtils::getVersion(rdh)) { GPUError("Data has invalid RDH version %d, %d required\n", o2::raw::RDHUtils::getVersion(rdh), o2::raw::RDHUtils::getVersion()); return 1; } } #ifndef GPUCA_NO_VC - if (GetProcessingSettings().prefetchTPCpageScan >= 1 && iSlice < NSLICES - 1) { + if (GetProcessingSettings().prefetchTPCpageScan >= 1 && iSector < NSECTORS - 1) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[iSlice].count[j]; k++) { - for (uint32_t l = 0; l < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; l++) { - Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); - Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j]; k++) { + for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; l++) { + Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); + Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); } } } } #endif - const auto& x = TPCClusterizerDecodeZSCount(iSlice, fragmentMax); - nDigitsFragmentMax[iSlice] = x.first; - processors()->tpcClusterer[iSlice].mPmemory->counters.nDigits = x.first; + const auto& x = TPCClusterizerDecodeZSCount(iSector, fragmentMax); + nDigitsFragmentMax[iSector] = x.first; + processors()->tpcClusterer[iSector].mPmemory->counters.nDigits = x.first; mRec->MemoryScalers()->nTPCdigits += x.first; } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t nDigitsBase = nDigitsFragmentMax[iSlice]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t nDigitsBase = nDigitsFragmentMax[iSector]; uint32_t threshold = 40000000; uint32_t nDigitsScaled = nDigitsBase > threshold ? nDigitsBase : std::min((threshold + nDigitsBase) / 2, 2 * nDigitsBase); - processors()->tpcClusterer[iSlice].SetNMaxDigits(processors()->tpcClusterer[iSlice].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSlice]); + processors()->tpcClusterer[iSector].SetNMaxDigits(processors()->tpcClusterer[iSector].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSector]); if (doGPU) { - processorsShadow()->tpcClusterer[iSlice].SetNMaxDigits(processors()->tpcClusterer[iSlice].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSlice]); + processorsShadow()->tpcClusterer[iSector].SetNMaxDigits(processors()->tpcClusterer[iSector].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSector]); } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { - mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSlice].mZSOffsetId, mRec); - mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSlice].mZSId, mRec); + mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSector].mZSOffsetId, mRec); + mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSector].mZSId, mRec); } else { - AllocateRegisteredMemory(processors()->tpcClusterer[iSlice].mZSOffsetId); - AllocateRegisteredMemory(processors()->tpcClusterer[iSlice].mZSId); + AllocateRegisteredMemory(processors()->tpcClusterer[iSector].mZSOffsetId); + AllocateRegisteredMemory(processors()->tpcClusterer[iSector].mZSId); } } } else { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSector]; mRec->MemoryScalers()->nTPCdigits += nDigits; - processors()->tpcClusterer[iSlice].SetNMaxDigits(nDigits, mCFContext->nPagesFragmentMax, nDigits, 0); + processors()->tpcClusterer[iSector].SetNMaxDigits(nDigits, mCFContext->nPagesFragmentMax, nDigits, 0); } } @@ -548,17 +547,17 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) } mCFContext->fragmentFirst = CfFragment{std::max(mCFContext->tpcMaxTimeBin + 1, maxFragmentLen), maxFragmentLen}; - for (int32_t iSlice = 0; iSlice < GetProcessingSettings().nTPCClustererLanes && iSlice < NSLICES; iSlice++) { - if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - mCFContext->nextPos[iSlice] = RunTPCClusterizer_transferZS(iSlice, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSlice); + for (int32_t iSector = 0; iSector < GetProcessingSettings().nTPCClustererLanes && iSector < NSECTORS; iSector++) { + if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { + mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector); } } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - mCFContext->ptrSave[iSlice].zsOffsetHost = processors()->tpcClusterer[iSlice].mPzsOffsets; - mCFContext->ptrSave[iSlice].zsOffsetDevice = processorsShadow()->tpcClusterer[iSlice].mPzsOffsets; - mCFContext->ptrSave[iSlice].zsDevice = processorsShadow()->tpcClusterer[iSlice].mPzs; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + mCFContext->ptrSave[iSector].zsOffsetHost = processors()->tpcClusterer[iSector].mPzsOffsets; + mCFContext->ptrSave[iSector].zsOffsetDevice = processorsShadow()->tpcClusterer[iSector].mPzsOffsets; + mCFContext->ptrSave[iSector].zsDevice = processorsShadow()->tpcClusterer[iSector].mPzs; } } return 0; @@ -579,7 +578,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) return 1; } if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) { - mRec->SetNActiveThreads(mRec->MemoryScalers()->nTPCdigits / 20000); + mRec->SetNActiveThreads(mRec->MemoryScalers()->nTPCdigits / 6000); } mRec->MemoryScalers()->nTPCHits = mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits); @@ -597,12 +596,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) tpcHitLowOccupancyScalingFactor = std::min(3.5f, (float)threshold / nHitsBase); } } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processors()->tpcClusterer[iSlice].SetMaxData(mIOPtrs); // First iteration to set data sizes + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processors()->tpcClusterer[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes } mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - SetupGPUProcessor(&processors()->tpcClusterer[iSlice], true); // Now we allocate + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + SetupGPUProcessor(&processors()->tpcClusterer[iSector], true); // Now we allocate } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) @@ -613,7 +612,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); } if (doGPU) { - WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSLICES, mRec->NStreams() - 1, &mEvents->init); + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } size_t nClsTotal = 0; @@ -653,7 +652,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) mcLinearLabels.data.reserve(mRec->MemoryScalers()->nTPCHits); } - int8_t transferRunning[NSLICES] = {0}; + int8_t transferRunning[NSECTORS] = {0}; uint32_t outputQueueStart = mOutputQueue.size(); auto notifyForeignChainFinished = [this]() { @@ -668,238 +667,230 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) }; bool synchronizeCalibUpdate = false; - for (uint32_t iSliceBase = 0; iSliceBase < NSLICES; iSliceBase += GetProcessingSettings().nTPCClustererLanes) { + for (uint32_t iSectorBase = 0; iSectorBase < NSECTORS; iSectorBase += GetProcessingSettings().nTPCClustererLanes) { std::vector laneHasData(GetProcessingSettings().nTPCClustererLanes, false); - static_assert(NSLICES <= GPUCA_MAX_STREAMS, "Stream events must be able to hold all slices"); - const int32_t maxLane = std::min(GetProcessingSettings().nTPCClustererLanes, NSLICES - iSliceBase); + static_assert(NSECTORS <= GPUCA_MAX_STREAMS, "Stream events must be able to hold all sectors"); + const int32_t maxLane = std::min(GetProcessingSettings().nTPCClustererLanes, NSECTORS - iSectorBase); for (CfFragment fragment = mCFContext->fragmentFirst; !fragment.isEnd(); fragment = fragment.next()) { if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSliceBase, iSliceBase + GetProcessingSettings().nTPCClustererLanes - 1); + GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSectorBase, iSectorBase + GetProcessingSettings().nTPCClustererLanes - 1); } - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - if (doGPU && fragment.index != 0) { - SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished - } - - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; - clusterer.mPmemory->fragment = fragment; - - if (mIOPtrs.tpcPackedDigits) { - bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; - bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; - auto* inDigits = mIOPtrs.tpcPackedDigits; - size_t numDigits = inDigits->nTPCDigits[iSlice]; - if (setDigitsOnGPU) { - GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); - } - if (setDigitsOnHost) { - clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSlice]); // TODO: Needs fixing (invalid const cast) - } - clusterer.mPmemory->counters.nDigits = numDigits; - } + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + if (doGPU && fragment.index != 0) { + SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished + } - if (mIOPtrs.tpcZS) { - if (mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSlice].first; - clusterer.mPmemory->counters.nPagesSubslice = mCFContext->nextPos[iSlice].second; - } else { - clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubslice = 0; - } + uint32_t iSector = iSectorBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; + clusterer.mPmemory->fragment = fragment; + + if (mIOPtrs.tpcPackedDigits) { + bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; + bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; + auto* inDigits = mIOPtrs.tpcPackedDigits; + size_t numDigits = inDigits->nTPCDigits[iSector]; + if (setDigitsOnGPU) { + GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSector], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); } - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - - using ChargeMapType = decltype(*clustererShadow.mPchargeMap); - using PeakMapType = decltype(*clustererShadow.mPpeakMap); - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); - if (fragment.index == 0) { - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); + if (setDigitsOnHost) { + clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSector]); // TODO: Needs fixing (invalid const cast) } - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); + clusterer.mPmemory->counters.nDigits = numDigits; + } - if (doGPU) { - if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); - SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); - } - SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory + if (mIOPtrs.tpcZS) { + if (mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { + clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSector].first; + clusterer.mPmemory->counters.nPagesSubsector = mCFContext->nextPos[iSector].second; + } else { + clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubsector = 0; } + } + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + + using ChargeMapType = decltype(*clustererShadow.mPchargeMap); + using PeakMapType = decltype(*clustererShadow.mPpeakMap); + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); + if (fragment.index == 0) { + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); + } + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); - if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) { - clusterer.mPmemory->counters.nPositions = 0; - return; - } - if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] == 0) { - clusterer.mPmemory->counters.nPositions = 0; - return; + if (doGPU) { + if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); + SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); } + SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory + } - if (propagateMCLabels && fragment.index == 0) { - clusterer.PrepareMC(); - clusterer.mPinputLabels = digitsMC->v[iSlice]; - if (clusterer.mPinputLabels == nullptr) { - GPUFatal("MC label container missing, sector %d", iSlice); - } - if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]) { - GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice], (int32_t)clusterer.mPinputLabels->getIndexedSize()); - } - } + if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSector] || mCFContext->zsVersion == -1)) { + clusterer.mPmemory->counters.nPositions = 0; + return; + } + if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSector] == 0) { + clusterer.mPmemory->counters.nPositions = 0; + return; + } - if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSlice) { - if (not mIOPtrs.tpcZS) { - runKernel({GetGrid(1, lane), {iSlice}}, mIOPtrs.tpcZS == nullptr); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } else if (propagateMCLabels) { - runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, mIOPtrs.tpcZS == nullptr); - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } + if (propagateMCLabels && fragment.index == 0) { + clusterer.PrepareMC(); + clusterer.mPinputLabels = digitsMC->v[iSector]; + if (clusterer.mPinputLabels == nullptr) { + GPUFatal("MC label container missing, sector %d", iSector); } + if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSector]) { + GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSector], (int32_t)clusterer.mPinputLabels->getIndexedSize()); + } + } - if (mIOPtrs.tpcZS) { - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0); - uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubslice : GPUTrackingInOutZS::NENDPOINTS; - - (void)tpcTimeBinCut; // TODO: To be used in decoding kernels - switch (mCFContext->zsVersion) { - default: - GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); - break; - case ZSVersionRowBased10BitADC: - case ZSVersionRowBased12BitADC: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - case ZSVersionLinkBasedWithMeta: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - case ZSVersionDenseLinkBased: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - } + if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSector) { + if (not mIOPtrs.tpcZS) { + runKernel({GetGrid(1, lane), {iSector}}, mIOPtrs.tpcZS == nullptr); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } // clang-format off - }, tbb::simple_partitioner()); // clang-format on - }); - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - uint32_t iSlice = iSliceBase + lane; - if (doGPU) { - SynchronizeStream(lane); - } - if (mIOPtrs.tpcZS) { - CfFragment f = fragment.next(); - int32_t nextSlice = iSlice; - if (f.isEnd()) { - nextSlice += GetProcessingSettings().nTPCClustererLanes; - f = mCFContext->fragmentFirst; - } - if (nextSlice < NSLICES && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSlice] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { - mCFContext->nextPos[nextSlice] = RunTPCClusterizer_transferZS(nextSlice, f, GetProcessingSettings().nTPCClustererLanes + lane); - } + } else if (propagateMCLabels) { + runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, mIOPtrs.tpcZS == nullptr); + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); } - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (clusterer.mPmemory->counters.nPositions == 0) { - return; + } + + if (mIOPtrs.tpcZS) { + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0); + uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubsector : GPUTrackingInOutZS::NENDPOINTS; + + (void)tpcTimeBinCut; // TODO: To be used in decoding kernels + switch (mCFContext->zsVersion) { + default: + GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); + break; + case ZSVersionRowBased10BitADC: + case ZSVersionRowBased12BitADC: + runKernel({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF); + break; + case ZSVersionLinkBasedWithMeta: + runKernel({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF); + break; + case ZSVersionDenseLinkBased: + runKernel({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF); + break; } - if (!mIOPtrs.tpcZS) { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + } // clang-format off + }); + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + uint32_t iSector = iSectorBase + lane; + if (doGPU) { + SynchronizeStream(lane); + } + if (mIOPtrs.tpcZS) { + CfFragment f = fragment.next(); + int32_t nextSector = iSector; + if (f.isEnd()) { + nextSector += GetProcessingSettings().nTPCClustererLanes; + f = mCFContext->fragmentFirst; } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { - clusterer.DumpChargeMap(*mDebugFile, "Charges"); + if (nextSector < NSECTORS && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSector] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { + mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane); } + } + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + if (clusterer.mPmemory->counters.nPositions == 0) { + return; + } + if (!mIOPtrs.tpcZS) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + } + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { + clusterer.DumpChargeMap(*mDebugFile, "Charges"); + } - if (propagateMCLabels) { - runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); - } + if (propagateMCLabels) { + runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}); + } - bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); - checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); - checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; + bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); + checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); + checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; - if (checkForNoisyPads) { - int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; + if (checkForNoisyPads) { + int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}); - } + runKernel({GetGridBlk(nBlocks, lane), {iSector}}); + } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { - clusterer.DumpPeakMap(*mDebugFile, "Peaks"); - } + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { + clusterer.DumpPeakMap(*mDebugFile, "Peaks"); + } - RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off - }, tbb::simple_partitioner()); // clang-format on + RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off }); - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (doGPU) { - SynchronizeStream(lane); - } - if (clusterer.mPmemory->counters.nPeaks == 0) { - return; - } - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { - clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); - } + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + uint32_t iSector = iSectorBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + if (doGPU) { + SynchronizeStream(lane); + } + if (clusterer.mPmemory->counters.nPeaks == 0) { + return; + } + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { + clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); + } - RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off - }, tbb::simple_partitioner()); // clang-format on + RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off }); - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (doGPU) { - SynchronizeStream(lane); - } + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + uint32_t iSector = iSectorBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + if (doGPU) { + SynchronizeStream(lane); + } - if (fragment.index == 0) { - deviceEvent* waitEvent = nullptr; - if (transferRunning[lane] == 1) { - waitEvent = &mEvents->stream[lane]; - transferRunning[lane] = 2; - } - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow)); + if (fragment.index == 0) { + deviceEvent* waitEvent = nullptr; + if (transferRunning[lane] == 1) { + waitEvent = &mEvents->stream[lane]; + transferRunning[lane] = 2; } + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow)); + } - if (clusterer.mPmemory->counters.nClusters == 0) { - return; - } + if (clusterer.mPmemory->counters.nClusters == 0) { + return; + } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}}, 0); - if (doGPU && propagateMCLabels) { - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); - if (doGPU) { - SynchronizeStream(lane); - } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 1); - } - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSlice, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); + if (doGPU && propagateMCLabels) { + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); + if (doGPU) { + SynchronizeStream(lane); } + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); + } + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSector, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); + } - TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); - laneHasData[lane] = true; - // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off - }, tbb::simple_partitioner()); // clang-format on + TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); + laneHasData[lane] = true; + // Include clusters in default debug mask, exclude other debug output by default + DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off }); mRec->SetNActiveThreadsOuterLoop(1); } @@ -907,22 +898,22 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) size_t nClsFirst = nClsTotal; bool anyLaneHasData = false; for (int32_t lane = 0; lane < maxLane; lane++) { - uint32_t iSlice = iSliceBase + lane; - std::fill(&tmpNativeAccess->nClusters[iSlice][0], &tmpNativeAccess->nClusters[iSlice][0] + MAXGLOBALPADROW, 0); + uint32_t iSector = iSectorBase + lane; + std::fill(&tmpNativeAccess->nClusters[iSector][0], &tmpNativeAccess->nClusters[iSector][0] + MAXGLOBALPADROW, 0); if (doGPU) { SynchronizeStream(lane); } - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; if (laneHasData[lane]) { anyLaneHasData = true; if (buildNativeGPU && GetProcessingSettings().tpccfGatherKernel) { - runKernel({GetGridBlk(GPUCA_ROW_COUNT, mRec->NStreams() - 1), {iSlice}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, mRec->NStreams() - 1), {iSector}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]); } for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { if (nClsTotal + clusterer.mPclusterInRow[j] > mInputsHost->mNClusterNative) { - clusterer.raiseError(GPUErrors::ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, iSlice * 1000 + j, nClsTotal + clusterer.mPclusterInRow[j], mInputsHost->mNClusterNative); + clusterer.raiseError(GPUErrors::ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, iSector * 1000 + j, nClsTotal + clusterer.mPclusterInRow[j], mInputsHost->mNClusterNative); continue; } if (buildNativeGPU) { @@ -932,7 +923,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } else if (buildNativeHost) { GPUMemCpyAlways(RecoStep::TPCClusterFinding, (void*)&tmpNativeClusters[nClsTotal], (const void*)&clustererShadow.mPclusterByRow[j * clusterer.mNMaxClusterPerRow], sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * clusterer.mPclusterInRow[j], mRec->NStreams() - 1, false); } - tmpNativeAccess->nClusters[iSlice][j] += clusterer.mPclusterInRow[j]; + tmpNativeAccess->nClusters[iSector][j] += clusterer.mPclusterInRow[j]; nClsTotal += clusterer.mPclusterInRow[j]; } if (transferRunning[lane]) { @@ -947,15 +938,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) continue; } - runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); + runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}); GPUTPCCFMCLabelFlattener::setGlobalOffsetsAndAllocate(clusterer, mcLinearLabels); - runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, &mcLinearLabels); + runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, &mcLinearLabels); clusterer.clearMCMemory(); assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true); } if (propagateMCLabels) { for (int32_t lane = 0; lane < maxLane; lane++) { - processors()->tpcClusterer[iSliceBase + lane].clearMCMemory(); + processors()->tpcClusterer[iSectorBase + lane].clearMCMemory(); } } if (buildNativeHost && buildNativeGPU && anyLaneHasData) { @@ -966,10 +957,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } - if (mWaitForFinalInputs && iSliceBase >= 21 && (int32_t)iSliceBase < 21 + GetProcessingSettings().nTPCClustererLanes) { + if (mWaitForFinalInputs && iSectorBase >= 21 && (int32_t)iSectorBase < 21 + GetProcessingSettings().nTPCClustererLanes) { notifyForeignChainFinished(); } - if (mWaitForFinalInputs && iSliceBase >= 30 && (int32_t)iSliceBase < 30 + GetProcessingSettings().nTPCClustererLanes) { + if (mWaitForFinalInputs && iSectorBase >= 30 && (int32_t)iSectorBase < 30 + GetProcessingSettings().nTPCClustererLanes) { mWaitForFinalInputs(); synchronizeCalibUpdate = DoQueuedUpdates(0, false); } @@ -1061,7 +1052,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) SynchronizeStream(0); } if (buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4)) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { std::sort(&tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j] + tmpNativeAccess->nClusters[i][j]]); } @@ -1077,6 +1068,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) mPipelineNotifyCtx = nullptr; } + if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) { + mRec->SetNActiveThreads(-1); + } + #endif return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 4ea7094416d5e..94d39249d620c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -62,7 +62,7 @@ int32_t GPUChainTracking::RunTPCCompression() O->nAttachedClusters = Compressor.mMemory->nStoredAttachedClusters; O->nUnattachedClusters = Compressor.mMemory->nStoredUnattachedClusters; O->nAttachedClustersReduced = O->nAttachedClusters - O->nTracks; - O->nSliceRows = NSLICES * GPUCA_ROW_COUNT; + O->nSliceRows = NSECTORS * GPUCA_ROW_COUNT; O->nComppressionModes = param().rec.tpc.compressionTypeMask; O->solenoidBz = param().bzkG; O->maxTimeBin = param().continuousMaxTimeBin; @@ -143,11 +143,11 @@ int32_t GPUChainTracking::RunTPCCompression() gatherTimer = &getTimer("GPUTPCCompression_GatherOnCPU", 0); gatherTimer->Start(); } - GPUMemCpyAlways(myStep, O->nSliceRowClusters, P->nSliceRowClusters, NSLICES * GPUCA_ROW_COUNT * sizeof(O->nSliceRowClusters[0]), outputStream, direction); + GPUMemCpyAlways(myStep, O->nSliceRowClusters, P->nSliceRowClusters, NSECTORS * GPUCA_ROW_COUNT * sizeof(O->nSliceRowClusters[0]), outputStream, direction); GPUMemCpyAlways(myStep, O->nTrackClusters, P->nTrackClusters, O->nTracks * sizeof(O->nTrackClusters[0]), outputStream, direction); SynchronizeStream(outputStream); uint32_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { uint32_t srcOffset = mIOPtrs.clustersNative->clusterOffset[i][j] * Compressor.mMaxClusterFactorBase1024 / 1024; GPUMemCpyAlways(myStep, O->qTotU + offset, P->qTotU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->qTotU[0]), outputStream, direction); @@ -264,7 +264,7 @@ int32_t GPUChainTracking::RunTPCDecompression() inputGPU = cmprClsHost; bool toGPU = true; - runKernel({GetGridAutoStep(inputStream, RecoStep::TPCDecompression), krnlRunRangeNone, &mEvents->init}, DecompressorShadow.mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNativeClustersIndex[0])); + runKernel({GetGridAutoStep(inputStream, RecoStep::TPCDecompression), krnlRunRangeNone, &mEvents->init}, DecompressorShadow.mNativeClustersIndex, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNativeClustersIndex[0])); int32_t nStreams = doGPU ? mRec->NStreams() - 1 : 1; if (cmprClsHost.nAttachedClusters != 0) { std::exclusive_scan(cmprClsHost.nTrackClusters, cmprClsHost.nTrackClusters + cmprClsHost.nTracks, Decompressor.mAttachedClustersOffsets, 0u); // computing clusters offsets for first kernel @@ -294,7 +294,7 @@ int32_t GPUChainTracking::RunTPCDecompression() runKernel({GetGridAuto(iStream), krnlRunRangeNone, {&mEvents->stream[iStream], &mEvents->init}}, startTrack, endTrack); } } - GPUMemCpy(myStep, inputGPUShadow.nSliceRowClusters, cmprClsHost.nSliceRowClusters, NSLICES * GPUCA_ROW_COUNT * sizeof(cmprClsHost.nSliceRowClusters[0]), unattachedStream, toGPU); + GPUMemCpy(myStep, inputGPUShadow.nSliceRowClusters, cmprClsHost.nSliceRowClusters, NSECTORS * GPUCA_ROW_COUNT * sizeof(cmprClsHost.nSliceRowClusters[0]), unattachedStream, toGPU); GPUMemCpy(myStep, inputGPUShadow.qTotU, cmprClsHost.qTotU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.qTotU[0]), unattachedStream, toGPU); GPUMemCpy(myStep, inputGPUShadow.qMaxU, cmprClsHost.qMaxU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.qMaxU[0]), unattachedStream, toGPU); GPUMemCpy(myStep, inputGPUShadow.flagsU, cmprClsHost.flagsU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.flagsU[0]), unattachedStream, toGPU); @@ -307,7 +307,7 @@ int32_t GPUChainTracking::RunTPCDecompression() SynchronizeStream(inputStream); uint32_t offset = 0; uint32_t decodedAttachedClusters = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { uint32_t linearIndex = i * GPUCA_ROW_COUNT + j; uint32_t unattachedOffset = (linearIndex >= cmprClsHost.nSliceRows) ? 0 : cmprClsHost.nSliceRowClusters[linearIndex]; @@ -353,13 +353,13 @@ int32_t GPUChainTracking::RunTPCDecompression() *mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess; } - uint32_t batchSize = doGPU ? 6 : NSLICES; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice = iSlice + batchSize) { - int32_t iStream = (iSlice / batchSize) % mRec->NStreams(); - runKernel({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSlice, batchSize); - uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSlice, mClusterNativeAccess->nClustersSector + iSlice + batchSize, 0u); + uint32_t batchSize = doGPU ? 6 : NSECTORS; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector = iSector + batchSize) { + int32_t iStream = (iSector / batchSize) % mRec->NStreams(); + runKernel({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSector, batchSize); + uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSector, mClusterNativeAccess->nClustersSector + iSector + batchSize, 0u); if (!runFiltering) { - GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSlice][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSlice][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false); + GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSector][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSector][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false); } } SynchronizeGPU(); @@ -367,7 +367,7 @@ int32_t GPUChainTracking::RunTPCDecompression() if (runFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly AllocateRegisteredMemory(Decompressor.mResourceNClusterPerSectorRow); WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream); - runKernel({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSLICES * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0])); + runKernel({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0])); runKernel(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression)); TransferMemoryResourceLinkToHost(RecoStep::TPCDecompression, Decompressor.mResourceNClusterPerSectorRow, unattachedStream); SynchronizeStream(unattachedStream); @@ -378,7 +378,7 @@ int32_t GPUChainTracking::RunTPCDecompression() DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer; Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput; WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { mClusterNativeAccess->nClusters[i][j] = Decompressor.mNClusterPerSectorRow[i * GPUCA_ROW_COUNT + j]; } @@ -402,7 +402,7 @@ int32_t GPUChainTracking::RunTPCDecompression() runKernel(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression)); const ClusterNativeAccess* decoded = mIOPtrs.clustersNative; if (doGPU) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { ClusterNative* begin = mInputsHost->mPclusterNativeOutput + decoded->clusterOffset[i][j]; ClusterNative* end = begin + decoded->nClusters[i][j]; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 96bc8a3083067..ec6b48a55d50d 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -140,7 +140,7 @@ void addToMap(std::string name, std::map& void GPUChainTracking::PrintMemoryStatistics() { std::map usageMap; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { #ifdef GPUCA_TPC_GEOMETRY_O2 addToMap("TPC Clusterer Sector Peaks", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nPeaks, processors()->tpcClusterer[i].mNMaxPeaks); addToMap("TPC Clusterer Sector Clusters", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nClusters, processors()->tpcClusterer[i].mNMaxClusters); @@ -173,7 +173,7 @@ void GPUChainTracking::PrintMemoryStatistics() void GPUChainTracking::PrintMemoryRelations() { - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { GPUInfo("MEMREL StartHits NCl %d NTrkl %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NStartHits()); GPUInfo("MEMREL Tracklets NCl %d NTrkl %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracklets()); GPUInfo("MEMREL Tracklets NCl %d NTrkl %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NRowHits()); @@ -193,7 +193,7 @@ void GPUChainTracking::PrepareDebugOutput() WriteToConstantMemory(RecoStep::NoRecoStep, (char*)&processors()->debugOutput - (char*)processors(), &processorsShadow()->debugOutput, sizeof(processors()->debugOutput), -1); memset(processors()->debugOutput.memory(), 0, processors()->debugOutput.memorySize() * sizeof(processors()->debugOutput.memory()[0])); } - runKernel({{BlockCount(), ThreadCount(), 0, RecoStep::TPCSliceTracking}}, (mRec->IsGPU() ? processorsShadow() : processors())->debugOutput.memory(), processorsShadow()->debugOutput.memorySize() * sizeof(processors()->debugOutput.memory()[0])); + runKernel({{BlockCount(), ThreadCount(), 0, RecoStep::TPCSectorTracking}}, (mRec->IsGPU() ? processorsShadow() : processors())->debugOutput.memory(), processorsShadow()->debugOutput.memorySize() * sizeof(processors()->debugOutput.memory()[0])); #endif } @@ -272,7 +272,7 @@ void GPUChainTracking::SanityCheck() uint8_t sector, row; uint32_t cl; trk.getClusterReference(mIOPtrs.outputClusRefsTPCO2, j, sector, row, cl); - if (sector >= GPUCA_NSLICES || row >= GPUCA_ROW_COUNT) { + if (sector >= GPUCA_NSECTORS || row >= GPUCA_ROW_COUNT) { if (nErrors++ < 1000) { GPUError("Invalid sector / row %d / %d", (int32_t)sector, (int32_t)row); continue; @@ -299,7 +299,7 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster o2::tpc::ClusterNative* outputBuffer = nullptr; for (int32_t iPhase = 0; iPhase < 2; iPhase++) { uint32_t countTotal = 0; - for (uint32_t iSector = 0; iSector < GPUCA_NSLICES; iSector++) { + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { uint32_t count = 0; for (uint32_t k = 0; k < clusters->nClusters[iSector][iRow]; k++) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h index 31ef86bcd6f70..dc1a665e6052c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h @@ -18,9 +18,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUChainTrackingFinalContext { GPUReconstruction* rec = nullptr; @@ -28,7 +26,6 @@ struct GPUChainTrackingFinalContext { std::condition_variable cond; bool ready = false; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 229469af801f6..c4dddd4b8b88f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -13,9 +13,10 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUReconstructionIO.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutput.h" +#include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCTrack.h" @@ -76,7 +77,7 @@ void GPUChainTracking::DumpData(const char* filename) DumpData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, InOutPointerType::RAW_CLUSTERS); if (mIOPtrs.clustersNative) { if (DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { - fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp); + fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); if (mIOPtrs.clustersNative->clustersMCTruth) { const auto& buffer = mIOPtrs.clustersNative->clustersMCTruth->getBuffer(); std::pair tmp = {buffer.data(), buffer.size()}; @@ -86,9 +87,9 @@ void GPUChainTracking::DumpData(const char* filename) } if (mIOPtrs.tpcPackedDigits) { if (DumpData(fp, mIOPtrs.tpcPackedDigits->tpcDigits, mIOPtrs.tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && mIOPtrs.tpcPackedDigits->tpcDigitsMC) { - const char* ptrs[NSLICES]; - size_t sizes[NSLICES]; - for (uint32_t i = 0; i < NSLICES; i++) { + const char* ptrs[NSECTORS]; + size_t sizes[NSECTORS]; + for (uint32_t i = 0; i < NSECTORS; i++) { if (mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]) { const auto& buffer = mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); ptrs[i] = buffer.data(); @@ -103,10 +104,10 @@ void GPUChainTracking::DumpData(const char* filename) } if (mIOPtrs.tpcZS) { size_t total = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[i].count[j]; k++) { - total += mIOPtrs.tpcZS->slice[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { + total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -114,12 +115,12 @@ void GPUChainTracking::DumpData(const char* filename) char* ptr = pages[0].data(); GPUTrackingInOutZS::GPUTrackingInOutZSCounts counts; total = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[i].count[j]; k++) { - memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->slice[i].zsPtr[j][k], mIOPtrs.tpcZS->slice[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); - counts.count[i][j] += mIOPtrs.tpcZS->slice[i].nZSPtr[j][k]; - total += mIOPtrs.tpcZS->slice[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { + memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); + counts.count[i][j] += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -140,8 +141,8 @@ void GPUChainTracking::DumpData(const char* filename) uint32_t n = 1; DumpData(fp, &mIOPtrs.settingsTF, &n, InOutPointerType::TF_SETTINGS); } - DumpData(fp, mIOPtrs.sliceTracks, mIOPtrs.nSliceTracks, InOutPointerType::SLICE_OUT_TRACK); - DumpData(fp, mIOPtrs.sliceClusters, mIOPtrs.nSliceClusters, InOutPointerType::SLICE_OUT_CLUSTER); + DumpData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + DumpData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); DumpData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, InOutPointerType::MC_INFO_TPC); DumpData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); @@ -180,14 +181,14 @@ int32_t GPUChainTracking::ReadData(const char* filename) fclose(fp); return 1; } - GPUTPCClusterData* ptrClusterData[NSLICES]; + GPUTPCClusterData* ptrClusterData[NSECTORS]; ReadData(fp, mIOPtrs.clusterData, mIOPtrs.nClusterData, mIOMem.clusterData, InOutPointerType::CLUSTER_DATA, ptrClusterData); - AliHLTTPCRawCluster* ptrRawClusters[NSLICES]; + AliHLTTPCRawCluster* ptrRawClusters[NSECTORS]; ReadData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, mIOMem.rawClusters, InOutPointerType::RAW_CLUSTERS, ptrRawClusters); int32_t nClustersTotal = 0; mIOMem.clusterNativeAccess.reset(new ClusterNativeAccess); if (ReadData(fp, &mIOMem.clusterNativeAccess->clustersLinear, &mIOMem.clusterNativeAccess->nClustersTotal, &mIOMem.clustersNative, InOutPointerType::CLUSTERS_NATIVE)) { - r = fread(&mIOMem.clusterNativeAccess->nClusters[0][0], sizeof(mIOMem.clusterNativeAccess->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp); + r = fread(&mIOMem.clusterNativeAccess->nClusters[0][0], sizeof(mIOMem.clusterNativeAccess->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); mIOMem.clusterNativeAccess->setOffsetPtrs(); mIOPtrs.clustersNative = mIOMem.clusterNativeAccess.get(); std::pair tmp = {nullptr, 0}; @@ -199,12 +200,12 @@ int32_t GPUChainTracking::ReadData(const char* filename) mIOMem.digitMap.reset(new GPUTrackingInOutDigits); if (ReadData(fp, mIOMem.digitMap->tpcDigits, mIOMem.digitMap->nTPCDigits, mIOMem.tpcDigits, InOutPointerType::TPC_DIGIT)) { mIOPtrs.tpcPackedDigits = mIOMem.digitMap.get(); - const char* ptrs[NSLICES]; - size_t sizes[NSLICES]; + const char* ptrs[NSECTORS]; + size_t sizes[NSECTORS]; if (ReadData(fp, ptrs, sizes, mIOMem.tpcDigitsMC, InOutPointerType::TPC_DIGIT_MC)) { mIOMem.tpcDigitMCMap = std::make_unique(); - mIOMem.tpcDigitMCView.reset(new ConstMCLabelContainerView[NSLICES]); - for (uint32_t i = 0; i < NSLICES; i++) { + mIOMem.tpcDigitMCView.reset(new ConstMCLabelContainerView[NSECTORS]); + for (uint32_t i = 0; i < NSECTORS; i++) { if (sizes[i]) { mIOMem.tpcDigitMCView.get()[i] = gsl::span(ptrs[i], ptrs[i] + sizes[i]); mIOMem.tpcDigitMCMap->v[i] = mIOMem.tpcDigitMCView.get() + i; @@ -224,13 +225,13 @@ int32_t GPUChainTracking::ReadData(const char* filename) mIOMem.tpcZSmeta.reset(new GPUTrackingInOutZS); mIOMem.tpcZSmeta2.reset(new GPUTrackingInOutZS::GPUTrackingInOutZSMeta); total = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { mIOMem.tpcZSmeta2->ptr[i][j] = &ptrZSPages[total * TPCZSHDR::TPC_ZS_PAGE_SIZE]; - mIOMem.tpcZSmeta->slice[i].zsPtr[j] = &mIOMem.tpcZSmeta2->ptr[i][j]; + mIOMem.tpcZSmeta->sector[i].zsPtr[j] = &mIOMem.tpcZSmeta2->ptr[i][j]; mIOMem.tpcZSmeta2->n[i][j] = counts.count[i][j]; - mIOMem.tpcZSmeta->slice[i].nZSPtr[j] = &mIOMem.tpcZSmeta2->n[i][j]; - mIOMem.tpcZSmeta->slice[i].count[j] = 1; + mIOMem.tpcZSmeta->sector[i].nZSPtr[j] = &mIOMem.tpcZSmeta2->n[i][j]; + mIOMem.tpcZSmeta->sector[i].count[j] = 1; total += counts.count[i][j]; } } @@ -241,8 +242,8 @@ int32_t GPUChainTracking::ReadData(const char* filename) } uint32_t n; ReadData(fp, &mIOPtrs.settingsTF, &n, &mIOMem.settingsTF, InOutPointerType::TF_SETTINGS); - ReadData(fp, mIOPtrs.sliceTracks, mIOPtrs.nSliceTracks, mIOMem.sliceTracks, InOutPointerType::SLICE_OUT_TRACK); - ReadData(fp, mIOPtrs.sliceClusters, mIOPtrs.nSliceClusters, mIOMem.sliceClusters, InOutPointerType::SLICE_OUT_CLUSTER); + ReadData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, mIOMem.sectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + ReadData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, mIOMem.sectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); ReadData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, &mIOMem.mcLabelsTPC, InOutPointerType::MC_LABEL_TPC); ReadData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, &mIOMem.mcInfosTPC, InOutPointerType::MC_INFO_TPC); ReadData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, &mIOMem.mcInfosTPCCol, InOutPointerType::MC_INFO_TPC); @@ -265,7 +266,7 @@ int32_t GPUChainTracking::ReadData(const char* filename) return 1; } (void)r; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < mIOPtrs.nClusterData[i]; j++) { ptrClusterData[i][j].id = nClustersTotal++; if ((uint32_t)ptrClusterData[i][j].amp >= 25 * 1024) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index f28b99c0d8dd0..6c79d87e50465 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -21,22 +21,22 @@ using namespace o2::gpu; -void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType) +void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType) { GPUTPCGMMerger& Merger = processors()->tpcMerger; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging; GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger; if (GetProcessingSettings().deterministicGPUReconstruction) { - uint32_t nBorderTracks = withinSlice == 1 ? NSLICES : (2 * NSLICES); + uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS); runKernel({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0); } - uint32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES; + uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); RecordMarker(&mEvents->single, 0); for (uint32_t i = 0; i < n; i++) { int32_t stream = i % mRec->NStreams(); - runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSlice, mergeMode); + runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode); } ReleaseEvent(mEvents->single); SynchronizeEventAndRelease(mEvents->init); @@ -44,10 +44,10 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice int32_t stream = i % mRec->NStreams(); int32_t n1, n2; GPUTPCGMBorderTrack *b1, *b2; - int32_t jSlice; - Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, i, withinSlice, mergeMode); + int32_t jSector; + Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode); gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i); - gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSlice) + *processors()->tpcTrackers[jSlice].NTracks(); + gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks(); runKernel({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0); runKernel({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1); deviceEvent* e = nullptr; @@ -55,24 +55,24 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice if (i == n - 1) { // Synchronize all execution on stream 0 with the last kernel ne = std::min(n, mRec->NStreams()); for (int32_t j = 1; j < ne; j++) { - RecordMarker(&mEvents->slice[j], j); + RecordMarker(&mEvents->sector[j], j); } - e = &mEvents->slice[1]; + e = &mEvents->sector[1]; ne--; stream = 0; } - runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSlice, mergeMode); + runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSector, mergeMode); } } else { for (uint32_t i = 0; i < n; i++) { - runKernel(GetGridAuto(0, deviceType), i, withinSlice, mergeMode); + runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } - runKernel({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSlice, mergeMode); + runKernel({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode); for (uint32_t i = 0; i < n; i++) { - runKernel(GetGridAuto(0, deviceType), i, withinSlice, mergeMode); + runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSlice, mergeMode); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); mRec->ReturnVolatileDeviceMemory(); } @@ -100,12 +100,12 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } const auto& threadContext = GetThreadContext(); - SynchronizeGPU(); // Need to know the full number of slice tracks + SynchronizeGPU(); // Need to know the full number of sector tracks SetupGPUProcessor(&Merger, true); AllocateRegisteredMemory(Merger.MemoryResOutput(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracks)]); AllocateRegisteredMemory(Merger.MemoryResOutputState(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::sharedClusterMap)]); - if (Merger.CheckSlices()) { + if (Merger.CheckSectors()) { return 1; } @@ -118,48 +118,48 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel(GetGridAuto(0, deviceType), 1); } - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { runKernel({{1, -WarpSize(), 0, deviceType}}, i); runKernel(GetGridAuto(0, deviceType), i); - runKernel(GetGridAuto(0, deviceType), i); + runKernel(GetGridAuto(0, deviceType), i); } if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({{1, -WarpSize(), 0, deviceType}}, NSLICES); - runKernel({{GPUCA_NSLICES, -WarpSize(), 0, deviceType}}, 0); + runKernel({{1, -WarpSize(), 0, deviceType}}, NSECTORS); + runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 0); } - for (uint32_t i = 0; i < NSLICES; i++) { - runKernel({{1, -WarpSize(), 0, deviceType}}, NSLICES + i); + for (uint32_t i = 0; i < NSECTORS; i++) { + runKernel({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i); runKernel(GetGridAuto(0, deviceType), i); } - runKernel({{1, -WarpSize(), 0, deviceType}}, 2 * NSLICES); + runKernel({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS); if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({{GPUCA_NSLICES, -WarpSize(), 0, deviceType}}, 1); + runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSliceTracks, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSLICES * sizeof(*MergerShadowAll.TmpCounter())); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSlices, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); + runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0); RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); + runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0); RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); + runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSlices, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); @@ -281,9 +281,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) #ifdef GPUCA_TPC_GEOMETRY_O2 if (GetProcessingSettings().createO2Output) { - if (mTPCSliceScratchOnStack) { - mRec->PopNonPersistentMemory(RecoStep::TPCSliceTracking, qStr2Tag("TPCSLCD1")); // Return the slice data memory early - mTPCSliceScratchOnStack = false; + if (mTPCSectorScratchOnStack) { + mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early + mTPCSectorScratchOnStack = false; } mRec->PushNonPersistentMemory(qStr2Tag("TPCMERG2")); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx new file mode 100644 index 0000000000000..df7c513fc1120 --- /dev/null +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -0,0 +1,486 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUChainTrackingSectorTracker.cxx +/// \author David Rohr + +#include "GPUChainTracking.h" +#include "GPULogging.h" +#include "GPUO2DataTypes.h" +#include "GPUMemorySizeScalers.h" +#include "GPUTPCClusterData.h" +#include "GPUTrackingInputProvider.h" +#include "GPUTPCClusterOccupancyMap.h" +#include "utils/strtag.h" +#include + +using namespace o2::gpu; + +int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput) +{ + runKernel({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams()); + if (synchronizeOutput) { + SynchronizeStream(iSector % mRec->NStreams()); + } + return (0); +} + +int32_t GPUChainTracking::RunTPCTrackingSectors() +{ + if (mRec->GPUStuck()) { + GPUWarning("This GPU is stuck, processing of tracking for this event is skipped!"); + return (1); + } + + const auto& threadContext = GetThreadContext(); + + int32_t retVal = RunTPCTrackingSectors_internal(); + if (retVal) { + SynchronizeGPU(); + } + return (retVal != 0); +} + +int32_t GPUChainTracking::RunTPCTrackingSectors_internal() +{ + if (GetProcessingSettings().debugLevel >= 2) { + GPUInfo("Running TPC Sector Tracker"); + } + bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSectorTracking; + if (!param().par.earlyTpcTransform) { + for (uint32_t i = 0; i < NSECTORS; i++) { + processors()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); + if (doGPU) { + processorsShadow()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor + } + } + mRec->MemoryScalers()->nTPCHits = mIOPtrs.clustersNative->nClustersTotal; + } else { + int32_t offset = 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset); + if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) { + processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset()); + } + offset += mIOPtrs.nClusterData[i]; + } + mRec->MemoryScalers()->nTPCHits = offset; + } + GPUInfo("Event has %u TPC Clusters, %d TRD Tracklets", (uint32_t)mRec->MemoryScalers()->nTPCHits, mIOPtrs.nTRDTracklets); + + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processors()->tpcTrackers[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes + } + mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + SetupGPUProcessor(&processors()->tpcTrackers[iSector], false); // Prepare custom allocation for 1st stack level + mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSector].MemoryResSectorScratch()); + } + mRec->PushNonPersistentMemory(qStr2Tag("TPCSLTRK")); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + SetupGPUProcessor(&processors()->tpcTrackers[iSector], true); // Now we allocate + mRec->ResetRegisteredMemoryPointers(&processors()->tpcTrackers[iSector]); // TODO: The above call breaks the GPU ptrs to already allocated memory. This fixes them. Should actually be cleaned up at the source. + processors()->tpcTrackers[iSector].SetupCommonMemory(); + } + + bool streamInit[GPUCA_MAX_STREAMS] = {false}; + if (doGPU) { + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); + // Initialize Startup Constants + processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; + processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase()); + } + + if (PrepareTextures()) { + return (2); + } + + // Copy Tracker Object to GPU Memory + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Copying Tracker objects to GPU"); + } + if (PrepareProfile()) { + return 2; + } + + WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); + + for (int32_t i = 0; i < mRec->NStreams() - 1; i++) { + streamInit[i] = false; + } + streamInit[mRec->NStreams() - 1] = true; + } + if (GPUDebug("Initialization (1)", 0)) { + return (2); + } + + int32_t streamOccMap = mRec->NStreams() - 1; + if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { + AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]); + } + if (param().rec.tpc.occupancyMapTimeBins) { + if (doGPU) { + ReleaseEvent(mEvents->init); + } + uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap; + auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU); + runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); + runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); + runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2); + mRec->ReturnVolatileMemory(); + mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage; + if (doGPU) { + GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); + } else { + TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); + } + } + if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { + uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; + occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap); + } + + int32_t streamMap[NSECTORS]; + + bool error = false; + mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { + GPUTPCTracker& trk = processors()->tpcTrackers[iSector]; + GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk; + int32_t useStream = (iSector % mRec->NStreams()); + + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Creating Sector Data (Sector %d)", iSector); + } + if (doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + streamInit[useStream] = true; + } else { + if (ReadEvent(iSector, 0)) { + GPUError("Error reading event"); + error = 1; + return; + } + } + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}}); + } + if (!doGPU && trk.CheckEmptySector() && GetProcessingSettings().debugLevel == 0) { + return; + } + + if (GetProcessingSettings().debugLevel >= 6) { + *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + if (GetProcessingSettings().debugMask & 1) { + if (doGPU) { + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); + } + trk.DumpTrackingData(*mDebugFile); + } + } + + // Initialize temporary memory where needed + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Copying Sector Data to GPU and initializing temporary memory"); + } + runKernel(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); + + if (!doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory + } + if (GPUDebug("Initialization (3)", useStream)) { + throw std::runtime_error("memcpy failure"); + } + + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + streamInit[useStream] = true; + + if (GetProcessingSettings().keepDisplayMemory) { + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); + memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); + if (GetProcessingSettings().debugMask & 2) { + trk.DumpLinks(*mDebugFile, 0); + } + } + + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}}); + DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); +#ifdef GPUCA_SORT_STARTHITS_GPU + if (doGPU) { + runKernel({GetGridAuto(useStream), {iSector}}); + } +#endif + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSector}}); + } + DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); + + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + trk.UpdateMaxData(); + AllocateRegisteredMemory(trk.MemoryResTracklets()); + AllocateRegisteredMemory(trk.MemoryResOutput()); + } + + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { + runKernel({GetGridAuto(useStream), {iSector}}); + DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + trk.DumpHitWeights(*mDebugFile); + } + } + + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { + runKernel({GetGridAuto(useStream), {iSector}}); + runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSector}}); + } + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]); + streamMap[iSector] = useStream; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); + } + DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + } + }); + mRec->SetNActiveThreadsOuterLoop(1); + if (error) { + return (3); + } + + if (doGPU || GetProcessingSettings().debugLevel >= 1) { + if (doGPU) { + ReleaseEvent(mEvents->init); + } + + if (!GetProcessingSettings().trackletSelectorInPipeline) { + if (GetProcessingSettings().trackletConstructorInPipeline) { + SynchronizeGPU(); + } else { + for (int32_t i = 0; i < mRec->NStreams(); i++) { + RecordMarker(&mEvents->stream[i], i); + } + runKernel({GetGridAuto(0), krnlRunRangeNone, {&mEvents->single, mEvents->stream, mRec->NStreams()}}); + for (int32_t i = 0; i < mRec->NStreams(); i++) { + ReleaseEvent(mEvents->stream[i]); + } + SynchronizeEventAndRelease(mEvents->single); + } + + if (GetProcessingSettings().debugLevel >= 4) { + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + DoDebugAndDump(RecoStep::TPCSectorTracking, 128, processors()->tpcTrackers[iSector], &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + } + } + + int32_t runSectors = 0; + int32_t useStream = 0; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector += runSectors) { + if (runSectors < GetProcessingSettings().trackletSelectorSectors) { + runSectors++; + } + runSectors = CAMath::Min(runSectors, NSECTORS - iSector); + if (getKernelProperties().minBlocks * BlockCount() < (uint32_t)runSectors) { + runSectors = getKernelProperties().minBlocks * BlockCount(); + } + + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Running TPC Tracklet selector (Stream %d, Sector %d to %d)", useStream, iSector, iSector + runSectors); + } + runKernel({GetGridAuto(useStream), {iSector, runSectors}}); + runKernel({{1, -ThreadCount(), useStream}, {iSector}}, runSectors); + for (uint32_t k = iSector; k < iSector + runSectors; k++) { + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {k}}); + } + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->sector[k]); + streamMap[k] = useStream; + } + useStream++; + if (useStream >= mRec->NStreams()) { + useStream = 0; + } + } + } + + mSectorSelectorReady = 0; + + std::array transferRunning; + transferRunning.fill(true); + if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { + if (param().rec.tpc.extrapolationTracking) { + mWriteOutputDone.fill(0); + } + + uint32_t tmpSector = 0; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Transfering Tracks from GPU to Host"); + } + + if (tmpSector == iSector) { + SynchronizeEvents(&mEvents->sector[iSector]); + } + while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) { + ReleaseEvent(mEvents->sector[tmpSector]); + if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) { + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]); + } else { + transferRunning[tmpSector] = false; + } + tmpSector++; + } + + if (GetProcessingSettings().keepAllMemory) { + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); + if (!GetProcessingSettings().trackletConstructorInPipeline) { + if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + processors()->tpcTrackers[iSector].DumpHitWeights(*mDebugFile); + } + } + if (!GetProcessingSettings().trackletSelectorInPipeline) { + if (GetProcessingSettings().debugMask & 512) { + processors()->tpcTrackers[iSector].DumpTrackHits(*mDebugFile); + } + } + } + + if (transferRunning[iSector]) { + SynchronizeEvents(&mEvents->sector[iSector]); + } + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits()); + } + + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Data ready for sector %d", iSector); + } + mSectorSelectorReady = iSector; + + if (param().rec.tpc.extrapolationTracking) { + for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) { + uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a); + uint32_t sectorLeft, sectorRight; + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); + + if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mWriteOutputDone[tmpSector2] == 0) { + ExtrapolationTracking(tmpSector2, 0); + WriteOutput(tmpSector2, 0); + mWriteOutputDone[tmpSector2] = 1; + } + } + } else { + WriteOutput(iSector, 0); + } + } + } + if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) { + std::vector blocking(NSECTORS * mRec->NStreams()); + for (int32_t i = 0; i < NSECTORS; i++) { + for (int32_t j = 0; j < mRec->NStreams(); j++) { + blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j; + } + } + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); + if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) { + uint32_t sectorLeft, sectorRight; + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); + if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { + StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]); + blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true; + } + if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) { + StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]); + blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true; + } + } + ExtrapolationTracking(tmpSector, 0, false); + } + } + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + if (doGPU && transferRunning[iSector]) { + ReleaseEvent(mEvents->sector[iSector]); + } + } + } else { + mSectorSelectorReady = NSECTORS; + mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { + if (param().rec.tpc.extrapolationTracking) { + ExtrapolationTracking(iSector, 0); + } + if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { + WriteOutput(iSector, 0); + } + }); + mRec->SetNActiveThreadsOuterLoop(1); + } + + if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) { + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + GPUInfo("Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector, + processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSector].CommonMemory()->nTracks, processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits); + } + } + + if (GetProcessingSettings().debugMask & 1024 && !GetProcessingSettings().deterministicGPUReconstruction) { + for (uint32_t i = 0; i < NSECTORS; i++) { + processors()->tpcTrackers[i].DumpOutput(*mDebugFile); + } + } + + if (DoProfile()) { + return (1); + } + for (uint32_t i = 0; i < NSECTORS; i++) { + mIOPtrs.nSectorTracks[i] = *processors()->tpcTrackers[i].NTracks(); + mIOPtrs.sectorTracks[i] = processors()->tpcTrackers[i].Tracks(); + mIOPtrs.nSectorClusters[i] = *processors()->tpcTrackers[i].NTrackHits(); + mIOPtrs.sectorClusters[i] = processors()->tpcTrackers[i].TrackHits(); + if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) { + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[i], -1, true); + } + } + if (GetProcessingSettings().debugLevel >= 2) { + GPUInfo("TPC Sector Tracker finished"); + } + mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK")); + return 0; +} + +int32_t GPUChainTracking::ReadEvent(uint32_t iSector, int32_t threadId) +{ + if (GetProcessingSettings().debugLevel >= 5) { + GPUInfo("Running ReadEvent for sector %d on thread %d\n", iSector, threadId); + } + runKernel({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSector}}); + if (GetProcessingSettings().debugLevel >= 5) { + GPUInfo("Finished ReadEvent for sector %d on thread %d\n", iSector, threadId); + } + return (0); +} + +void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId) +{ + if (GetProcessingSettings().debugLevel >= 5) { + GPUInfo("Running WriteOutput for sector %d on thread %d\n", iSector, threadId); + } + processors()->tpcTrackers[iSector].WriteOutputPrepare(); + processors()->tpcTrackers[iSector].WriteOutput(); + if (GetProcessingSettings().debugLevel >= 5) { + GPUInfo("Finished WriteOutput for sector %d on thread %d\n", iSector, threadId); + } +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx deleted file mode 100644 index cab025b03e8b6..0000000000000 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ /dev/null @@ -1,490 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUChainTrackingSliceTracker.cxx -/// \author David Rohr - -#include "GPUChainTracking.h" -#include "GPULogging.h" -#include "GPUO2DataTypes.h" -#include "GPUMemorySizeScalers.h" -#include "GPUTPCClusterData.h" -#include "GPUTrackingInputProvider.h" -#include "GPUTPCClusterOccupancyMap.h" -#include "utils/strtag.h" -#include - -#include - -using namespace o2::gpu; - -int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) -{ - runKernel({GetGridBlk(256, iSlice % mRec->NStreams()), {iSlice}}); - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams()); - if (synchronizeOutput) { - SynchronizeStream(iSlice % mRec->NStreams()); - } - return (0); -} - -int32_t GPUChainTracking::RunTPCTrackingSlices() -{ - if (mRec->GPUStuck()) { - GPUWarning("This GPU is stuck, processing of tracking for this event is skipped!"); - return (1); - } - - const auto& threadContext = GetThreadContext(); - - int32_t retVal = RunTPCTrackingSlices_internal(); - if (retVal) { - SynchronizeGPU(); - } - return (retVal != 0); -} - -int32_t GPUChainTracking::RunTPCTrackingSlices_internal() -{ - if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("Running TPC Slice Tracker"); - } - bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSliceTracking; - if (!param().par.earlyTpcTransform) { - for (uint32_t i = 0; i < NSLICES; i++) { - processors()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); - if (doGPU) { - processorsShadow()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor - } - } - mRec->MemoryScalers()->nTPCHits = mIOPtrs.clustersNative->nClustersTotal; - } else { - int32_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { - processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset); - if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) { - processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset()); - } - offset += mIOPtrs.nClusterData[i]; - } - mRec->MemoryScalers()->nTPCHits = offset; - } - GPUInfo("Event has %u TPC Clusters, %d TRD Tracklets", (uint32_t)mRec->MemoryScalers()->nTPCHits, mIOPtrs.nTRDTracklets); - - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processors()->tpcTrackers[iSlice].SetMaxData(mIOPtrs); // First iteration to set data sizes - } - mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - SetupGPUProcessor(&processors()->tpcTrackers[iSlice], false); // Prepare custom allocation for 1st stack level - mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSlice].MemoryResSliceScratch()); - } - mRec->PushNonPersistentMemory(qStr2Tag("TPCSLTRK")); - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - SetupGPUProcessor(&processors()->tpcTrackers[iSlice], true); // Now we allocate - mRec->ResetRegisteredMemoryPointers(&processors()->tpcTrackers[iSlice]); // TODO: The above call breaks the GPU ptrs to already allocated memory. This fixes them. Should actually be cleaned up at the source. - processors()->tpcTrackers[iSlice].SetupCommonMemory(); - } - - bool streamInit[GPUCA_MAX_STREAMS] = {false}; - if (doGPU) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processorsShadow()->tpcTrackers[iSlice].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); - // Initialize Startup Constants - processors()->tpcTrackers[iSlice].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSLICES - 1 - iSlice) / NSLICES) * getKernelProperties().nThreads; - processorsShadow()->tpcTrackers[iSlice].SetGPUTextureBase(mRec->DeviceMemoryBase()); - } - - if (PrepareTextures()) { - return (2); - } - - // Copy Tracker Object to GPU Memory - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Tracker objects to GPU"); - } - if (PrepareProfile()) { - return 2; - } - - WriteToConstantMemory(RecoStep::TPCSliceTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSLICES, mRec->NStreams() - 1, &mEvents->init); - - for (int32_t i = 0; i < mRec->NStreams() - 1; i++) { - streamInit[i] = false; - } - streamInit[mRec->NStreams() - 1] = true; - } - if (GPUDebug("Initialization (1)", 0)) { - return (2); - } - - int32_t streamOccMap = mRec->NStreams() - 1; - if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { - AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]); - } - if (param().rec.tpc.occupancyMapTimeBins) { - if (doGPU) { - ReleaseEvent(mEvents->init); - } - uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap; - auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU); - runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSliceTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); - runKernel(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); - runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2); - mRec->ReturnVolatileMemory(); - mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage; - if (doGPU) { - GPUMemCpy(RecoStep::TPCSliceTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); - } else { - TransferMemoryResourceLinkToGPU(RecoStep::TPCSliceTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); - } - } - if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { - uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; - occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap); - } - - int32_t streamMap[NSLICES]; - - bool error = false; - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, NSLICES)).execute([&] { - tbb::parallel_for(0, NSLICES, [&](auto iSlice) { - GPUTPCTracker& trk = processors()->tpcTrackers[iSlice]; - GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSlice] : trk; - int32_t useStream = (iSlice % mRec->NStreams()); - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Creating Slice Data (Slice %d)", iSlice); - } - if (doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; - } else { - if (ReadEvent(iSlice, 0)) { - GPUError("Error reading event"); - error = 1; - return; - } - } - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); - } - if (!doGPU && trk.CheckEmptySlice() && GetProcessingSettings().debugLevel == 0) { - return; - } - - if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; - if (GetProcessingSettings().debugMask & 1) { - if (doGPU) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); - } - trk.DumpSliceData(*mDebugFile); - } - } - - // Initialize temporary memory where needed - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); - } - runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - - if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory - } - if (GPUDebug("Initialization (3)", useStream)) { - throw std::runtime_error("memcpy failure"); - } - - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; - - if (GetProcessingSettings().keepDisplayMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); - memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); - if (GetProcessingSettings().debugMask & 2) { - trk.DumpLinks(*mDebugFile, 0); - } - } - - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); - - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSlice}}); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (doGPU) { - runKernel({GetGridAuto(useStream), {iSlice}}); - } -#endif - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); - } - DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); - - if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - trk.UpdateMaxData(); - AllocateRegisteredMemory(trk.MemoryResTracklets()); - AllocateRegisteredMemory(trk.MemoryResOutput()); - } - - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - trk.DumpHitWeights(*mDebugFile); - } - } - - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, trk.MemoryResCommon(), useStream, &mEvents->slice[iSlice]); - streamMap[iSlice] = useStream; - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Slice %u, Number of tracks: %d", iSlice, *trk.NTracks()); - } - DoDebugAndDump(RecoStep::TPCSliceTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); - } }, tbb::simple_partitioner()); - }); - mRec->SetNActiveThreadsOuterLoop(1); - if (error) { - return (3); - } - - if (doGPU || GetProcessingSettings().debugLevel >= 1) { - if (doGPU) { - ReleaseEvent(mEvents->init); - } - - if (!GetProcessingSettings().trackletSelectorInPipeline) { - if (GetProcessingSettings().trackletConstructorInPipeline) { - SynchronizeGPU(); - } else { - for (int32_t i = 0; i < mRec->NStreams(); i++) { - RecordMarker(&mEvents->stream[i], i); - } - runKernel({GetGridAuto(0), krnlRunRangeNone, {&mEvents->single, mEvents->stream, mRec->NStreams()}}); - for (int32_t i = 0; i < mRec->NStreams(); i++) { - ReleaseEvent(mEvents->stream[i]); - } - SynchronizeEventAndRelease(mEvents->single); - } - - if (GetProcessingSettings().debugLevel >= 4) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - DoDebugAndDump(RecoStep::TPCSliceTracking, 128, processors()->tpcTrackers[iSlice], &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - } - } - - int32_t runSlices = 0; - int32_t useStream = 0; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice += runSlices) { - if (runSlices < GetProcessingSettings().trackletSelectorSlices) { - runSlices++; - } - runSlices = CAMath::Min(runSlices, NSLICES - iSlice); - if (getKernelProperties().minBlocks * BlockCount() < (uint32_t)runSlices) { - runSlices = getKernelProperties().minBlocks * BlockCount(); - } - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Running TPC Tracklet selector (Stream %d, Slice %d to %d)", useStream, iSlice, iSlice + runSlices); - } - runKernel({GetGridAuto(useStream), {iSlice, runSlices}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, runSlices); - for (uint32_t k = iSlice; k < iSlice + runSlices; k++) { - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {k}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->slice[k]); - streamMap[k] = useStream; - } - useStream++; - if (useStream >= mRec->NStreams()) { - useStream = 0; - } - } - } - - mSliceSelectorReady = 0; - - std::array transferRunning; - transferRunning.fill(true); - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { - if (param().rec.tpc.extrapolationTracking) { - mWriteOutputDone.fill(0); - } - - uint32_t tmpSlice = 0; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Transfering Tracks from GPU to Host"); - } - - if (tmpSlice == iSlice) { - SynchronizeEvents(&mEvents->slice[iSlice]); - } - while (tmpSlice < NSLICES && (tmpSlice == iSlice || IsEventDone(&mEvents->slice[tmpSlice]))) { - ReleaseEvent(mEvents->slice[tmpSlice]); - if (*processors()->tpcTrackers[tmpSlice].NTracks() > 0) { - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[tmpSlice].MemoryResOutput(), streamMap[tmpSlice], &mEvents->slice[tmpSlice]); - } else { - transferRunning[tmpSlice] = false; - } - tmpSlice++; - } - - if (GetProcessingSettings().keepAllMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &processors()->tpcTrackers[iSlice], -1, true); - if (!GetProcessingSettings().trackletConstructorInPipeline) { - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - processors()->tpcTrackers[iSlice].DumpHitWeights(*mDebugFile); - } - } - if (!GetProcessingSettings().trackletSelectorInPipeline) { - if (GetProcessingSettings().debugMask & 512) { - processors()->tpcTrackers[iSlice].DumpTrackHits(*mDebugFile); - } - } - } - - if (transferRunning[iSlice]) { - SynchronizeEvents(&mEvents->slice[iSlice]); - } - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSlice].NTracks(), *processors()->tpcTrackers[iSlice].NTrackHits()); - } - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Data ready for slice %d", iSlice); - } - mSliceSelectorReady = iSlice; - - if (param().rec.tpc.extrapolationTracking) { - for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a++) { - uint32_t tmpSlice2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(tmpSlice2a); - uint32_t sliceLeft, sliceRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(tmpSlice2, sliceLeft, sliceRight); - - if (tmpSlice2 <= iSlice && sliceLeft <= iSlice && sliceRight <= iSlice && mWriteOutputDone[tmpSlice2] == 0) { - ExtrapolationTracking(tmpSlice2, 0); - WriteOutput(tmpSlice2, 0); - mWriteOutputDone[tmpSlice2] = 1; - } - } - } else { - WriteOutput(iSlice, 0); - } - } - } - if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) { - std::vector blocking(NSLICES * mRec->NStreams()); - for (int32_t i = 0; i < NSLICES; i++) { - for (int32_t j = 0; j < mRec->NStreams(); j++) { - blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j; - } - } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t tmpSlice = GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(iSlice); - if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) { - uint32_t sliceLeft, sliceRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(tmpSlice, sliceLeft, sliceRight); - if (doGPU && !blocking[tmpSlice * mRec->NStreams() + sliceLeft % mRec->NStreams()]) { - StreamWaitForEvents(tmpSlice % mRec->NStreams(), &mEvents->slice[sliceLeft]); - blocking[tmpSlice * mRec->NStreams() + sliceLeft % mRec->NStreams()] = true; - } - if (doGPU && !blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()]) { - StreamWaitForEvents(tmpSlice % mRec->NStreams(), &mEvents->slice[sliceRight]); - blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()] = true; - } - } - ExtrapolationTracking(tmpSlice, 0, false); - } - } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (doGPU && transferRunning[iSlice]) { - ReleaseEvent(mEvents->slice[iSlice]); - } - } - } else { - mSliceSelectorReady = NSLICES; - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, NSLICES)).execute([&] { - tbb::parallel_for(0, NSLICES, [&](auto iSlice) { - if (param().rec.tpc.extrapolationTracking) { - ExtrapolationTracking(iSlice, 0); - } - if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { - WriteOutput(iSlice, 0); - } }, tbb::simple_partitioner()); - }); - mRec->SetNActiveThreadsOuterLoop(1); - } - - if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - GPUInfo("Slice %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSlice, - processors()->tpcTrackers[iSlice].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSlice].CommonMemory()->nTracks, processors()->tpcTrackers[iSlice].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSlice].CommonMemory()->nTrackHits); - } - } - - if (GetProcessingSettings().debugMask & 1024 && !GetProcessingSettings().deterministicGPUReconstruction) { - for (uint32_t i = 0; i < NSLICES; i++) { - processors()->tpcTrackers[i].DumpOutput(*mDebugFile); - } - } - - if (DoProfile()) { - return (1); - } - for (uint32_t i = 0; i < NSLICES; i++) { - mIOPtrs.nSliceTracks[i] = *processors()->tpcTrackers[i].NTracks(); - mIOPtrs.sliceTracks[i] = processors()->tpcTrackers[i].Tracks(); - mIOPtrs.nSliceClusters[i] = *processors()->tpcTrackers[i].NTrackHits(); - mIOPtrs.sliceClusters[i] = processors()->tpcTrackers[i].TrackHits(); - if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &processors()->tpcTrackers[i], -1, true); - } - } - if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("TPC Slice Tracker finished"); - } - mRec->PopNonPersistentMemory(RecoStep::TPCSliceTracking, qStr2Tag("TPCSLTRK")); - return 0; -} - -int32_t GPUChainTracking::ReadEvent(uint32_t iSlice, int32_t threadId) -{ - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running ReadEvent for slice %d on thread %d\n", iSlice, threadId); - } - runKernel({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSlice}}); - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished ReadEvent for slice %d on thread %d\n", iSlice, threadId); - } - return (0); -} - -void GPUChainTracking::WriteOutput(int32_t iSlice, int32_t threadId) -{ - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running WriteOutput for slice %d on thread %d\n", iSlice, threadId); - } - processors()->tpcTrackers[iSlice].WriteOutputPrepare(); - processors()->tpcTrackers[iSlice].WriteOutput(); - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished WriteOutput for slice %d on thread %d\n", iSlice, threadId); - } -} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index d91fed4046de0..db5e5ae3aeb75 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -30,7 +30,7 @@ using namespace o2::tpc; bool GPUChainTracking::NeedTPCClustersOnGPU() { - return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression); + return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression); } int32_t GPUChainTracking::ConvertNativeToClusterData() @@ -56,7 +56,7 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() } if (!param().par.earlyTpcTransform) { if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during slice data creation / refit"); + GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during sector data creation / refit"); } if (transferClusters) { SynchronizeStream(0); // TODO: Synchronize implicitly with next step @@ -64,18 +64,18 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() return 0; } SetupGPUProcessor(&convert, true); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { convert.mMemory->clusters[i] = convertShadow.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; } WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->tpcConverter - (char*)processors(), &convertShadow, sizeof(convertShadow), 0); TransferMemoryResourcesToGPU(RecoStep::TPCConversion, &convert, 0); - runKernel(GetGridBlk(NSLICES * GPUCA_ROW_COUNT, 0)); + runKernel(GetGridBlk(NSECTORS * GPUCA_ROW_COUNT, 0)); TransferMemoryResourcesToHost(RecoStep::TPCConversion, &convert, 0); SynchronizeStream(0); - for (uint32_t i = 0; i < NSLICES; i++) { - mIOPtrs.nClusterData[i] = (i == NSLICES - 1 ? mIOPtrs.clustersNative->nClustersTotal : mIOPtrs.clustersNative->clusterOffset[i + 1][0]) - mIOPtrs.clustersNative->clusterOffset[i][0]; + for (uint32_t i = 0; i < NSECTORS; i++) { + mIOPtrs.nClusterData[i] = (i == NSECTORS - 1 ? mIOPtrs.clustersNative->nClustersTotal : mIOPtrs.clustersNative->clusterOffset[i + 1][0]) - mIOPtrs.clustersNative->clusterOffset[i][0]; mIOPtrs.clusterData[i] = convert.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; } mRec->PopNonPersistentMemory(RecoStep::TPCConversion, qStr2Tag("TPCTRANS")); @@ -89,7 +89,7 @@ void GPUChainTracking::ConvertNativeToClusterDataLegacy() *tmp = *mIOPtrs.clustersNative; } GPUReconstructionConvert::ConvertNativeToClusterData(mIOMem.clusterNativeAccess.get(), mIOMem.clusterData, mIOPtrs.nClusterData, processors()->calibObjects.fastTransform, param().continuousMaxTimeBin); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mIOPtrs.clusterData[i] = mIOMem.clusterData[i].get(); if (GetProcessingSettings().registerStandaloneInputMemory) { if (mRec->registerMemoryForGPU(mIOMem.clusterData[i].get(), mIOPtrs.nClusterData[i] * sizeof(*mIOPtrs.clusterData[i]))) { @@ -104,7 +104,7 @@ void GPUChainTracking::ConvertNativeToClusterDataLegacy() void GPUChainTracking::ConvertRun2RawToNative() { GPUReconstructionConvert::ConvertRun2RawToNative(*mIOMem.clusterNativeAccess, mIOMem.clustersNative, mIOPtrs.rawClusters, mIOPtrs.nRawClusters); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mIOPtrs.rawClusters[i] = nullptr; mIOPtrs.nRawClusters[i] = 0; mIOMem.rawClusters[i].reset(nullptr); @@ -129,10 +129,10 @@ void GPUChainTracking::ConvertZSEncoder(int32_t version) GPUReconstructionConvert::RunZSEncoderCreateMeta(mIOMem.tpcZSpages.get(), &mIOMem.tpcZSmeta2->n[0][0], &mIOMem.tpcZSmeta2->ptr[0][0], mIOMem.tpcZSmeta.get()); mIOPtrs.tpcZS = mIOMem.tpcZSmeta.get(); if (GetProcessingSettings().registerStandaloneInputMemory) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[i].count[j]; k++) { - if (mRec->registerMemoryForGPU(mIOPtrs.tpcZS->slice[i].zsPtr[j][k], mIOPtrs.tpcZS->slice[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE)) { + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { + if (mRec->registerMemoryForGPU(mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE)) { throw std::runtime_error("Error registering memory for GPU"); } } @@ -151,10 +151,10 @@ int32_t GPUChainTracking::ForwardTPCDigits() if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { throw std::runtime_error("Cannot forward TPC digits with Clusterizer on GPU"); } - std::vector tmp[NSLICES][GPUCA_ROW_COUNT]; + std::vector tmp[NSECTORS][GPUCA_ROW_COUNT]; uint32_t nTotal = 0; const float zsThreshold = param().rec.tpc.zsThreshold; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < mIOPtrs.tpcPackedDigits->nTPCDigits[i]; j++) { const auto& d = mIOPtrs.tpcPackedDigits->tpcDigits[i][j]; if (d.getChargeFloat() >= zsThreshold) { @@ -172,7 +172,7 @@ int32_t GPUChainTracking::ForwardTPCDigits() mIOMem.clustersNative.reset(new ClusterNative[nTotal]); nTotal = 0; mClusterNativeAccess->clustersLinear = mIOMem.clustersNative.get(); - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { mClusterNativeAccess->nClusters[i][j] = tmp[i][j].size(); memcpy(&mIOMem.clustersNative[nTotal], tmp[i][j].data(), tmp[i][j].size() * sizeof(*mClusterNativeAccess->clustersLinear)); diff --git a/GPU/GPUTracking/Global/GPUErrorCodes.h b/GPU/GPUTracking/Global/GPUErrorCodes.h index 6f3ba4d2b47bf..f35f5fc81a382 100644 --- a/GPU/GPUTracking/Global/GPUErrorCodes.h +++ b/GPU/GPUTracking/Global/GPUErrorCodes.h @@ -28,10 +28,10 @@ GPUCA_ERROR_CODE(8, ERROR_GLOBAL_TRACKING_TRACK_HIT_OVERFLOW, Sector, Value, Max GPUCA_ERROR_CODE(9, ERROR_LOOPER_OVERFLOW) GPUCA_ERROR_CODE(10, ERROR_MERGER_CE_HIT_OVERFLOW, Value, Max) GPUCA_ERROR_CODE(11, ERROR_MERGER_LOOPER_OVERFLOW, Value, Max) -GPUCA_ERROR_CODE(12, ERROR_SLICEDATA_FIRSTHITINBIN_OVERFLOW, Sector, Value, Max) -GPUCA_ERROR_CODE(13, ERROR_SLICEDATA_HITINROW_OVERFLOW, SectorRow, Value, Max) -GPUCA_ERROR_CODE(14, ERROR_SLICEDATA_BIN_OVERFLOW, SectorRow, Value, Max) -GPUCA_ERROR_CODE(15, ERROR_SLICEDATA_Z_OVERFLOW, Sector, Value) +GPUCA_ERROR_CODE(12, ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, Sector, Value, Max) +GPUCA_ERROR_CODE(13, ERROR_SECTORDATA_HITINROW_OVERFLOW, SectorRow, Value, Max) +GPUCA_ERROR_CODE(14, ERROR_SECTORDATA_BIN_OVERFLOW, SectorRow, Value, Max) +GPUCA_ERROR_CODE(15, ERROR_SECTORDATA_Z_OVERFLOW, Sector, Value) GPUCA_ERROR_CODE(16, ERROR_MERGER_HIT_OVERFLOW, Value, Max) GPUCA_ERROR_CODE(17, ERROR_MERGER_TRACK_OVERFLOW, Value, Max) GPUCA_ERROR_CODE(18, ERROR_COMPRESSION_ROW_HIT_OVERFLOW, SectorRow, Value, Max) diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index 377736a5dacb9..cd86390bc1b01 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUErrors @@ -44,7 +42,6 @@ class GPUErrors GPUglobalref() uint32_t* mErrors; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx index 445bb1a9c56fd..a5457bf3f2f23 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx @@ -27,8 +27,8 @@ void* GPUTrackingInputProvider::SetPointersInputZS(void* mem) { if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) { computePointerWithAlignment(mem, mPzsMeta); - computePointerWithAlignment(mem, mPzsSizes, GPUTrackingInOutZS::NSLICES * GPUTrackingInOutZS::NENDPOINTS); - computePointerWithAlignment(mem, mPzsPtrs, GPUTrackingInOutZS::NSLICES * GPUTrackingInOutZS::NENDPOINTS); + computePointerWithAlignment(mem, mPzsSizes, GPUTrackingInOutZS::NSECTORS * GPUTrackingInOutZS::NENDPOINTS); + computePointerWithAlignment(mem, mPzsPtrs, GPUTrackingInOutZS::NSECTORS * GPUTrackingInOutZS::NENDPOINTS); } return mem; } diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.h b/GPU/GPUTracking/Global/GPUTrackingInputProvider.h index 751c9f0229f3d..910e87fd02126 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.h +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.h @@ -18,18 +18,13 @@ #include "GPUDef.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace tpc +namespace o2::tpc { struct ClusterNative; struct ClusterNativeAccess; -} // namespace tpc -} // namespace o2 +} // namespace o2::tpc -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTrackingInOutZS; @@ -90,7 +85,6 @@ class GPUTrackingInputProvider : public GPUProcessor uint32_t* mErrorCodes = nullptr; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h index 70d9676c4fe26..d59b8fea28f08 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h @@ -18,14 +18,12 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMBorderTrack * - * The class describes TPC slice tracks at sector borders. + * The class describes TPC sector tracks at sector borders. * Used in GPUTPCGMMerger * */ @@ -127,7 +125,6 @@ class GPUTPCGMBorderTrack ClassDefNV(GPUTPCGMBorderTrack, 1); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 00d4b1822bdc1..2d73279cf1fe7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -18,9 +18,7 @@ #include "GPUTPCGMTrackParam.h" #include "GPUTPCGMMergedTrackHit.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMMergedTrack @@ -125,7 +123,6 @@ class GPUTPCGMMergedTrack ClassDefNV(GPUTPCGMMergedTrack, 0); #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index a0b2c7b12246a..f6a50565bac52 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -42,13 +42,13 @@ #include "GPUCommonConstants.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUTPCTrackLinearisation.h" #include "GPUTPCGMTrackParam.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUTPCGMBorderTrack.h" #include "DataFormatsTPC/ClusterNative.h" @@ -58,14 +58,18 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif +namespace o2::gpu::internal +{ +} using namespace o2::gpu; +using namespace o2::gpu::internal; using namespace o2::tpc; using namespace gputpcgmmergertypes; static constexpr int32_t kMaxParts = 400; static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; -namespace o2::gpu +namespace o2::gpu::internal { struct MergeLooperParam { float refz; @@ -73,7 +77,7 @@ struct MergeLooperParam { float y; uint32_t id; }; -} // namespace o2::gpu +} // namespace o2::gpu::internal #ifndef GPUCA_GPUCODE @@ -81,20 +85,17 @@ struct MergeLooperParam { #include "GPUMemorySizeScalers.h" GPUTPCGMMerger::GPUTPCGMMerger() - : mTrackLinks(nullptr), mNTotalSliceTracks(0), mNMaxTracks(0), mNMaxSingleSliceTracks(0), mNMaxOutputTrackClusters(0), mNMaxClusters(0), mMemoryResMemory(-1), mNClusters(0), mOutputTracks(nullptr), mSliceTrackInfos(nullptr), mSliceTrackInfoIndex(nullptr), mClusters(nullptr), mClustersXYZ(nullptr), mClusterAttachment(nullptr), mOutputTracksTPCO2(nullptr), mOutputClusRefsTPCO2(nullptr), mOutputTracksTPCO2MC(nullptr), mTrackOrderAttach(nullptr), mTrackOrderProcess(nullptr), mBorderMemory(nullptr), mBorderRangeMemory(nullptr), mMemory(nullptr), mRetryRefitIds(nullptr), mLoopData(nullptr) { - //* constructor - - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - mNextSliceInd[iSlice] = iSlice + 1; - mPrevSliceInd[iSlice] = iSlice - 1; - } - int32_t mid = NSLICES / 2 - 1; - int32_t last = NSLICES - 1; - mNextSliceInd[mid] = 0; - mPrevSliceInd[0] = mid; - mNextSliceInd[last] = NSLICES / 2; - mPrevSliceInd[NSLICES / 2] = last; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + mNextSectorInd[iSector] = iSector + 1; + mPrevSectorInd[iSector] = iSector - 1; + } + int32_t mid = NSECTORS / 2 - 1; + int32_t last = NSECTORS - 1; + mNextSectorInd[mid] = 0; + mPrevSectorInd[0] = mid; + mNextSectorInd[last] = NSECTORS / 2; + mPrevSectorInd[NSECTORS / 2] = last; } // DEBUG CODE @@ -103,13 +104,13 @@ GPUTPCGMMerger::GPUTPCGMMerger() void GPUTPCGMMerger::CheckMergedTracks() { - std::vector trkUsed(SliceTrackInfoLocalTotal()); - for (int32_t i = 0; i < SliceTrackInfoLocalTotal(); i++) { + std::vector trkUsed(SectorTrackInfoLocalTotal()); + for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) { trkUsed[i] = false; } - for (int32_t itr = 0; itr < SliceTrackInfoLocalTotal(); itr++) { - GPUTPCGMSliceTrack& track = mSliceTrackInfos[itr]; + for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) { + GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { continue; } @@ -117,9 +118,9 @@ void GPUTPCGMMerger::CheckMergedTracks() continue; } int32_t leg = 0; - GPUTPCGMSliceTrack *trbase = &track, *tr = &track; + GPUTPCGMSectorTrack *trbase = &track, *tr = &track; while (true) { - int32_t iTrk = tr - mSliceTrackInfos; + int32_t iTrk = tr - mSectorTrackInfos; if (trkUsed[iTrk]) { GPUError("FAILURE: double use"); } @@ -127,12 +128,12 @@ void GPUTPCGMMerger::CheckMergedTracks() int32_t jtr = tr->NextSegmentNeighbour(); if (jtr >= 0) { - tr = &(mSliceTrackInfos[jtr]); + tr = &(mSectorTrackInfos[jtr]); continue; } jtr = trbase->NextNeighbour(); if (jtr >= 0) { - trbase = &(mSliceTrackInfos[jtr]); + trbase = &(mSectorTrackInfos[jtr]); tr = trbase; if (tr->PrevSegmentNeighbour() >= 0) { break; @@ -143,7 +144,7 @@ void GPUTPCGMMerger::CheckMergedTracks() break; } } - for (int32_t i = 0; i < SliceTrackInfoLocalTotal(); i++) { + for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) { if (trkUsed[i] == false) { GPUError("FAILURE: trk missed"); } @@ -164,11 +165,11 @@ inline const auto* resolveMCLabels(const o2::dataformat template int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const { - GPUTPCGMSliceTrack* sliceTrack = nullptr; + GPUTPCGMSectorTrack* sectorTrack = nullptr; int32_t nClusters = 0; if constexpr (std::is_same::value) { - sliceTrack = &mSliceTrackInfos[trk.TrackID()]; - nClusters = sliceTrack->OrigTrack()->NHits(); + sectorTrack = &mSectorTrackInfos[trk.TrackID()]; + nClusters = sectorTrack->OrigTrack()->NHits(); } else { nClusters = trk.NClusters(); } @@ -176,9 +177,9 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const for (int32_t i = 0; i < nClusters; i++) { int32_t id; if constexpr (std::is_same::value) { - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sliceTrack->Slice()]; - const GPUTPCHitId& ic = tracker.TrackHits()[sliceTrack->OrigTrack()->FirstHitID() + i]; - id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sliceTrack->Slice()][0]; + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; + const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; + id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; } else { id = mClusters[trk.FirstClusterRef() + i].num; } @@ -203,27 +204,27 @@ int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const #endif // END DEBUG CODE -void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSliceTrack* trk, std::ostream& out) const +void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const { - const GPUTPCGMSliceTrack* orgTrack = trk; + const GPUTPCGMSectorTrack* orgTrack = trk; while (trk->PrevSegmentNeighbour() >= 0) { - trk = &mSliceTrackInfos[trk->PrevSegmentNeighbour()]; + trk = &mSectorTrackInfos[trk->PrevSegmentNeighbour()]; } - const GPUTPCGMSliceTrack* orgTower = trk; + const GPUTPCGMSectorTrack* orgTower = trk; while (trk->PrevNeighbour() >= 0) { - trk = &mSliceTrackInfos[trk->PrevNeighbour()]; + trk = &mSectorTrackInfos[trk->PrevNeighbour()]; } - int32_t nextId = trk - mSliceTrackInfos; - out << "Graph of track " << (orgTrack - mSliceTrackInfos) << "\n"; + int32_t nextId = trk - mSectorTrackInfos; + out << "Graph of track " << (orgTrack - mSectorTrackInfos) << "\n"; while (nextId >= 0) { - trk = &mSliceTrackInfos[nextId]; + trk = &mSectorTrackInfos[nextId]; if (trk->PrevSegmentNeighbour() >= 0) { out << "TRACK TREE INVALID!!! " << trk->PrevSegmentNeighbour() << " --> " << nextId << "\n"; } out << (trk == orgTower ? "--" : " "); while (nextId >= 0) { - GPUTPCGMSliceTrack* trk2 = &mSliceTrackInfos[nextId]; + GPUTPCGMSectorTrack* trk2 = &mSectorTrackInfos[nextId]; if (trk != trk2 && (trk2->PrevNeighbour() >= 0 || trk2->NextNeighbour() >= 0)) { out << " (TRACK TREE INVALID!!! " << trk2->PrevNeighbour() << " <-- " << nextId << " --> " << trk2->NextNeighbour() << ") "; } @@ -241,28 +242,28 @@ void GPUTPCGMMerger::InitializeProcessor() {} void* GPUTPCGMMerger::SetPointersMerger(void* mem) { - computePointerWithAlignment(mem, mSliceTrackInfos, mNTotalSliceTracks); - computePointerWithAlignment(mem, mSliceTrackInfoIndex, NSLICES * 2 + 1); + computePointerWithAlignment(mem, mSectorTrackInfos, mNTotalSectorTracks); + computePointerWithAlignment(mem, mSectorTrackInfoIndex, NSECTORS * 2 + 1); if (mRec->GetProcessingSettings().deterministicGPUReconstruction) { - computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSliceTracks, mNMaxTracks * 2)); + computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSectorTracks, mNMaxTracks * 2)); } void* memBase = mem; - computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSliceTracks); // MergeBorders & Resolve - computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSliceTracks); + computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSectorTracks); // MergeBorders & Resolve + computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSectorTracks); int32_t nTracks = 0; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks(); - mBorder[iSlice] = mBorderMemory + 2 * nTracks; - mBorder[NSLICES + iSlice] = mBorderMemory + 2 * nTracks + n; - mBorderRange[iSlice] = mBorderRangeMemory + 2 * nTracks; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks(); + mBorder[iSector] = mBorderMemory + 2 * nTracks; + mBorder[NSECTORS + iSector] = mBorderMemory + 2 * nTracks + n; + mBorderRange[iSector] = mBorderRangeMemory + 2 * nTracks; nTracks += n; } - computePointerWithAlignment(mem, mTrackLinks, mNTotalSliceTracks); - computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSliceTracks); + computePointerWithAlignment(mem, mTrackLinks, mNTotalSectorTracks); + computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSectorTracks); void* memMax = mem; mem = memBase; - computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSLICES * mNMaxSingleSliceTracks); // UnpackResetIds - RefitSliceTracks - UnpackSliceGlobal + computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSECTORS * mNMaxSingleSectorTracks); // UnpackResetIds - RefitSectorTracks - UnpackSectorGlobal memMax = (void*)std::max((size_t)mem, (size_t)memMax); mem = memBase; computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareClustersForFit0 - SortTracksQPt - PrepareClustersForFit1 - PrepareClustersForFit1 / Finalize0 - Finalize2 @@ -362,28 +363,28 @@ void GPUTPCGMMerger::RegisterMemoryAllocation() void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) { - mNTotalSliceTracks = 0; + mNTotalSectorTracks = 0; mNClusters = 0; - mNMaxSingleSliceTracks = 0; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks(); - mNTotalSliceTracks += ntrk; - mNClusters += *mRec->GetConstantMem().tpcTrackers[iSlice].NTrackHits(); - if (mNMaxSingleSliceTracks < ntrk) { - mNMaxSingleSliceTracks = ntrk; + mNMaxSingleSectorTracks = 0; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks(); + mNTotalSectorTracks += ntrk; + mNClusters += *mRec->GetConstantMem().tpcTrackers[iSector].NTrackHits(); + if (mNMaxSingleSectorTracks < ntrk) { + mNMaxSingleSectorTracks = ntrk; } } mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) { - mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSliceTracks, mNTotalSliceTracks); + mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); } else { - mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSliceTracks); + mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks); } if (io.clustersNative) { mNMaxClusters = io.clustersNative->nClustersTotal; - } else if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) { + } else if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) { mNMaxClusters = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { mNMaxClusters += mRec->GetConstantMem().tpcTrackers[i].NHitsTotal(); } } else { @@ -392,15 +393,15 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxLooperMatches = mNMaxClusters / 4; // We have that much scratch memory anyway } -int32_t GPUTPCGMMerger::CheckSlices() +int32_t GPUTPCGMMerger::CheckSectors() { - for (int32_t i = 0; i < NSLICES; i++) { - if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSliceTracks) { - throw std::runtime_error("mNMaxSingleSliceTracks too small"); + for (int32_t i = 0; i < NSECTORS; i++) { + if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSectorTracks) { + throw std::runtime_error("mNMaxSingleSectorTracks too small"); } } - if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - throw std::runtime_error("Must run also slice tracking"); + if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { + throw std::runtime_error("Must run also sector tracking"); } return 0; } @@ -409,13 +410,13 @@ int32_t GPUTPCGMMerger::CheckSlices() GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output) { - const int32_t n = output ? mMemory->nOutputTracks : SliceTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) { mTrackLinks[i] = -1; } } -GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, const GPUTPCTrack* inTrack, float alpha, int32_t slice) +GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector) { GPUTPCGMPropagator prop; prop.SetMaterialTPC(); @@ -431,9 +432,9 @@ GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, c trk.SinPhi() = inTrack->Param().GetSinPhi(); trk.DzDs() = inTrack->Param().GetDzDs(); trk.QPt() = inTrack->Param().GetQPt(); - trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(slice, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); - trk.ShiftZ(this, slice, sliceTrack.ClusterZT0(), sliceTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead - sliceTrack.SetX2(0.f); + trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); + trk.ShiftZ(this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead + sectorTrack.SetX2(0.f); for (int32_t way = 0; way < 2; way++) { if (way) { prop.SetFitInProjections(true); @@ -447,74 +448,74 @@ GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, c for (int32_t i = start; i != end; i += incr) { float x, y, z; int32_t row, flags; - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[slice]; + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sector]; const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i]; int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()); row = ic.RowIndex(); - const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[slice][0] + clusterIndex]; + const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex]; flags = cl.getFlags(); if (Param().par.earlyTpcTransform) { x = tracker.Data().ClusterData()[clusterIndex].x; y = tracker.Data().ClusterData()[clusterIndex].y; z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset(); } else { - GetConstantMem()->calibObjects.fastTransformHelper->Transform(slice, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); + GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); } if (prop.PropagateToXAlpha(x, alpha, true)) { return way == 0; } trk.ConstrainSinPhi(); - if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, slice, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge + if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge return way == 0; } trk.ConstrainSinPhi(); } if (way) { - sliceTrack.SetParam2(trk); + sectorTrack.SetParam2(trk); } else { - sliceTrack.Set(trk, inTrack, alpha, slice); + sectorTrack.Set(trk, inTrack, alpha, sector); } } return 0; } -GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSliceTrack& track, int32_t iSlice, const GPUTPCTrack* sliceTr) +GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - const GPUTPCHitId& ic1 = trk.TrackHits()[sliceTr->FirstHitID()]; - const GPUTPCHitId& ic2 = trk.TrackHits()[sliceTr->FirstHitID() + sliceTr->NHits() - 1]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; + const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()]; + const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1]; int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex()); int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex()); if (Param().par.earlyTpcTransform) { track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z); } else { - const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; + const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); } } GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id) { - mSliceTrackInfoIndex[id] = mMemory->nUnpackedTracks; + mSectorTrackInfoIndex[id] = mMemory->nUnpackedTracks; } -GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) +GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - float alpha = Param().Alpha(iSlice); - const GPUTPCTrack* sliceTr = mMemory->firstExtrapolatedTracks[iSlice]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; + float alpha = Param().Alpha(iSector); + const GPUTPCTrack* sectorTr = mMemory->firstExtrapolatedTracks[iSector]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; uint32_t nTracks = *trk.NTracks(); for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) { - sliceTr = &trk.Tracks()[itr]; - int32_t localId = mTrackIDs[(sliceTr->LocalTrackId() >> 24) * mNMaxSingleSliceTracks + (sliceTr->LocalTrackId() & 0xFFFFFF)]; + sectorTr = &trk.Tracks()[itr]; + int32_t localId = mTrackIDs[(sectorTr->LocalTrackId() >> 24) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)]; if (localId == -1) { continue; } uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u); - GPUTPCGMSliceTrack& track = mSliceTrackInfos[myTrack]; - SetTrackClusterZT(track, iSlice, sliceTr); - track.Set(this, sliceTr, alpha, iSlice); + GPUTPCGMSectorTrack& track = mSectorTrackInfos[myTrack]; + SetTrackClusterZT(track, iSector, sectorTr); + track.Set(this, sectorTr, alpha, iSector); track.SetGlobalSectorTrackCov(); track.SetPrevNeighbour(-1); track.SetNextNeighbour(-1); @@ -524,45 +525,45 @@ GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, } } -GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) +GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; for (uint32_t i = iBlock * nThreads + iThread; i < nLocalTracks; i += nBlocks * nThreads) { - mTrackIDs[iSlice * mNMaxSingleSliceTracks + i] = -1; + mTrackIDs[iSector * mNMaxSingleSectorTracks + i] = -1; } } -GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) +GPUd() void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; - float alpha = Param().Alpha(iSlice); - const GPUTPCTrack* sliceTr = nullptr; + float alpha = Param().Alpha(iSector); + const GPUTPCTrack* sectorTr = nullptr; for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) { - sliceTr = &trk.Tracks()[itr]; - GPUTPCGMSliceTrack track; - SetTrackClusterZT(track, iSlice, sliceTr); + sectorTr = &trk.Tracks()[itr]; + GPUTPCGMSectorTrack track; + SetTrackClusterZT(track, iSector, sectorTr); if (Param().rec.tpc.mergerCovSource == 0) { - track.Set(this, sliceTr, alpha, iSlice); - if (!track.FilterErrors(this, iSlice, GPUCA_MAX_SIN_PHI, 0.1f)) { + track.Set(this, sectorTr, alpha, iSector); + if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) { continue; } } else if (Param().rec.tpc.mergerCovSource == 1) { - track.Set(this, sliceTr, alpha, iSlice); + track.Set(this, sectorTr, alpha, iSector); track.CopyBaseTrackCov(); } else if (Param().rec.tpc.mergerCovSource == 2) { - if (RefitSliceTrack(track, sliceTr, alpha, iSlice)) { - track.Set(this, sliceTr, alpha, iSlice); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed - if (!track.FilterErrors(this, iSlice, GPUCA_MAX_SIN_PHI, 0.1f)) { + if (RefitSectorTrack(track, sectorTr, alpha, iSector)) { + track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed + if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) { continue; } } } - CADEBUG(GPUInfo("INPUT Slice %d, Track %u, QPt %f DzDs %f", iSlice, itr, track.QPt(), track.DzDs())); + CADEBUG(GPUInfo("INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs())); track.SetPrevNeighbour(-1); track.SetNextNeighbour(-1); track.SetNextSegmentNeighbour(-1); @@ -570,25 +571,25 @@ GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, track.SetExtrapolatedTrackId(0, -1); track.SetExtrapolatedTrackId(1, -1); uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u); - mTrackIDs[iSlice * mNMaxSingleSliceTracks + sliceTr->LocalTrackId()] = myTrack; - mSliceTrackInfos[myTrack] = track; + mTrackIDs[iSector * mNMaxSingleSectorTracks + sectorTr->LocalTrackId()] = myTrack; + mSectorTrackInfos[myTrack] = track; } } GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (int32_t itr = SliceTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SliceTrackInfoGlobalLast(NSLICES - 1); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& extrapolatedTrack = mSliceTrackInfos[itr]; - GPUTPCGMSliceTrack& localTrack = mSliceTrackInfos[extrapolatedTrack.LocalTrackId()]; + for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) { + GPUTPCGMSectorTrack& extrapolatedTrack = mSectorTrackInfos[itr]; + GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId()]; if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) { localTrack.SetExtrapolatedTrackId(1, itr); } } } -GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam) +GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam) { - //* prepare slice tracks for merging with next/previous/same sector + //* prepare sector tracks for merging with next/previous/same sector //* each track transported to the border line float fieldBz = Param().bzCLight; @@ -614,21 +615,21 @@ GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nTh float cosAlpha = CAMath::Cos(dAlpha); float sinAlpha = CAMath::Sin(dAlpha); - GPUTPCGMSliceTrack trackTmp; - for (int32_t itr = iBlock * nThreads + iThread; itr < SliceTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - const GPUTPCGMSliceTrack* track = &mSliceTrackInfos[itr]; - int32_t iSlice = track->Slice(); + GPUTPCGMSectorTrack trackTmp; + for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { + const GPUTPCGMSectorTrack* track = &mSectorTrackInfos[itr]; + int32_t iSector = track->Sector(); - if (track->PrevSegmentNeighbour() >= 0 && track->Slice() == mSliceTrackInfos[track->PrevSegmentNeighbour()].Slice()) { + if (track->PrevSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->PrevSegmentNeighbour()].Sector()) { continue; } - if (useOrigTrackParam) { // TODO: Check how far this makes sense with slice track refit + if (useOrigTrackParam) { // TODO: Check how far this makes sense with sector track refit if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperQPtB5Limit) { continue; } - const GPUTPCGMSliceTrack* trackMin = track; - while (track->NextSegmentNeighbour() >= 0 && track->Slice() == mSliceTrackInfos[track->NextSegmentNeighbour()].Slice()) { - track = &mSliceTrackInfos[track->NextSegmentNeighbour()]; + const GPUTPCGMSectorTrack* trackMin = track; + while (track->NextSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->NextSegmentNeighbour()].Sector()) { + track = &mSectorTrackInfos[track->NextSegmentNeighbour()]; if (track->OrigTrack()->Param().X() < trackMin->OrigTrack()->Param().X()) { trackMin = track; } @@ -638,7 +639,7 @@ GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nTh if (Param().rec.tpc.mergerCovSource == 2 && trackTmp.X2() != 0.f) { trackTmp.UseParam2(); } else { - trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Slice()); + trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector()); } } else { if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperSecondHorizontalQPtB5Limit) { @@ -663,19 +664,19 @@ GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nTh if (CAMath::Abs(b.Cov()[4]) >= 0.5f) { b.SetCov(4, 0.5f); } - uint32_t myTrack = CAMath::AtomicAdd(&nB[iSlice], 1u); - B[iSlice][myTrack] = b; + uint32_t myTrack = CAMath::AtomicAdd(&nB[iSector], 1u); + B[iSector][myTrack] = b; } } } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { - CADEBUG(GPUInfo("\nMERGING Slices %d %d NTracks %d %d CROSS %d", iSlice1, iSlice2, N1, N2, mergeMode)); - GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); - bool sameSlice = (iSlice1 == iSlice2); + CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode)); + GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; + GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); + bool sameSector = (iSector1 == iSector2); for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) { GPUTPCGMBorderTrack& b = B1[itr]; float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1])); @@ -684,17 +685,17 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } else if (d > 3) { d = 3; } - CADEBUG(printf(" Input Slice 1 %d Track %d: ", iSlice1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); + CADEBUG(printf(" Input Sector 1 %d Track %d: ", iSector1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); GPUTPCGMBorderRange range; range.fId = itr; range.fMin = b.Par()[1] + b.ZOffsetLinear() - d; range.fMax = b.Par()[1] + b.ZOffsetLinear() + d; range1[itr] = range; - if (sameSlice) { + if (sameSector) { range2[itr] = range; } } - if (!sameSlice) { + if (!sameSector) { for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) { GPUTPCGMBorderTrack& b = B2[itr]; float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1])); @@ -703,7 +704,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } else if (d > 3) { d = 3; } - CADEBUG(printf(" Input Slice 2 %d Track %d: ", iSlice2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); + CADEBUG(printf(" Input Sector 2 %d Track %d: ", iSector2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); GPUTPCGMBorderRange range; range.fId = itr; range.fMin = b.Par()[1] + b.ZOffsetLinear() - d; @@ -714,11 +715,11 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) - GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); + GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; + GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); if (iThread == 0) { if (iBlock == 0) { @@ -741,6 +742,10 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea } #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3> +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct MergeBorderTracks_compMax { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { @@ -761,6 +766,8 @@ struct MergeBorderTracks_compMin { #endif } }; +} // anonymous namespace +} // namespace o2::gpu::internal template <> inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) @@ -790,7 +797,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThrea } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { // int32_t statAll = 0, statMerged = 0; float factor2ys = Param().rec.tpc.trackMergerFactor2YS; @@ -805,10 +812,10 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea int32_t minNPartHits = Param().rec.tpc.trackMergerMinPartHits; int32_t minNTotalHits = Param().rec.tpc.trackMergerMinTotalHits; - bool sameSlice = (iSlice1 == iSlice2); + bool sameSector = (iSector1 == iSector2); - GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); + GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; + GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); int32_t i2 = 0; for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) { @@ -829,7 +836,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea if (r2.fMin > r1.fMax) { break; } - if (sameSlice && (r1.fId >= r2.fId)) { + if (sameSector && (r1.fId >= r2.fId)) { continue; } // do check @@ -857,7 +864,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea } } - GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSlice1, iSlice2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); }); + GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); }); if (!b1.CheckChi2Y(b2, factor2ys)) { CADEBUG2(continue, printf("!Y\n")); @@ -891,7 +898,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea if (iBest2 < 0) { continue; } - GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSlice1, iSlice2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); }); + GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); }); // statMerged++; @@ -906,77 +913,77 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea #endif } } - // GPUInfo("STAT: slices %d, %d: all %d merged %d", iSlice1, iSlice2, statAll, statMerged); + // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged); } -GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSlice, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) const -{ - if (withinSlice == 1) { // Merge tracks within the same slice - jSlice = iSlice; - n1 = n2 = mMemory->tmpCounter[iSlice]; - b1 = b2 = mBorder[iSlice]; - } else if (withinSlice == -1) { // Merge tracks accross the central electrode - jSlice = (iSlice + NSLICES / 2); - const int32_t offset = mergeMode == 2 ? NSLICES : 0; - n1 = mMemory->tmpCounter[iSlice + offset]; - n2 = mMemory->tmpCounter[jSlice + offset]; - b1 = mBorder[iSlice + offset]; - b2 = mBorder[jSlice + offset]; - } else { // Merge tracks of adjacent slices - jSlice = mNextSliceInd[iSlice]; - n1 = mMemory->tmpCounter[iSlice]; - n2 = mMemory->tmpCounter[NSLICES + jSlice]; - b1 = mBorder[iSlice]; - b2 = mBorder[NSLICES + jSlice]; +GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const +{ + if (withinSector == 1) { // Merge tracks within the same sector + jSector = iSector; + n1 = n2 = mMemory->tmpCounter[iSector]; + b1 = b2 = mBorder[iSector]; + } else if (withinSector == -1) { // Merge tracks accross the central electrode + jSector = (iSector + NSECTORS / 2); + const int32_t offset = mergeMode == 2 ? NSECTORS : 0; + n1 = mMemory->tmpCounter[iSector + offset]; + n2 = mMemory->tmpCounter[jSector + offset]; + b1 = mBorder[iSector + offset]; + b2 = mBorder[jSector + offset]; + } else { // Merge tracks of adjacent sectors + jSector = mNextSectorInd[iSector]; + n1 = mMemory->tmpCounter[iSector]; + n2 = mMemory->tmpCounter[NSECTORS + jSector]; + b1 = mBorder[iSector]; + b2 = mBorder[NSECTORS + jSector]; } } template -GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode) { int32_t n1, n2; GPUTPCGMBorderTrack *b1, *b2; - int32_t jSlice; - MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, iSlice, withinSlice, mergeMode); - MergeBorderTracks(nBlocks, nThreads, iBlock, iThread, iSlice, b1, n1, jSlice, b2, n2, mergeMode); + int32_t jSector; + MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, withinSector, mergeMode); + MergeBorderTracks(nBlocks, nThreads, iBlock, iThread, iSector, b1, n1, jSector, b2, n2, mergeMode); } #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. -template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); -template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); -template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); +template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); +template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); +template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); #endif -GPUd() void GPUTPCGMMerger::MergeWithinSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) +GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { float x0 = Param().tpcGeometry.Row2X(63); const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); - for (int32_t itr = iBlock * nThreads + iThread; itr < SliceTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& track = mSliceTrackInfos[itr]; - int32_t iSlice = track.Slice(); + for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { + GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; + int32_t iSector = track.Sector(); GPUTPCGMBorderTrack b; if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) { b.SetTrackID(itr); - CADEBUG(printf("WITHIN SLICE %d Track %d - ", iSlice, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n")); + CADEBUG(printf("WITHIN SECTOR %d Track %d - ", iSector, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n")); b.SetNClusters(track.NClusters()); - uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSlice], 1u); - mBorder[iSlice][myTrack] = b; + uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSector], 1u); + mBorder[iSector][myTrack] = b; } } } -GPUd() void GPUTPCGMMerger::MergeSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam) +GPUd() void GPUTPCGMMerger::MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam) { bool part2 = iBlock & 1; int32_t border = part2 ? border1 : border0; GPUAtomic(uint32_t)* n = mMemory->tmpCounter; GPUTPCGMBorderTrack** b = mBorder; if (part2) { - n += NSLICES; - b += NSLICES; + n += NSECTORS; + b += NSECTORS; } - MergeSlicesPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam); + MergeSectorsPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam); } GPUdi() void GPUTPCGMMerger::setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end) @@ -1013,7 +1020,7 @@ GPUd() void GPUTPCGMMerger::hookEdge(int32_t u, int32_t v) GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsSetup(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); for (int32_t i = start + iThread; i < end; i += nThreads) { mTrackCCRoots[i] = i; } @@ -1024,7 +1031,7 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookLinks(int32_t nBlo // Compute connected components in parallel, step 1. // Source: Adaptive Work-Efficient Connected Components on the GPU, Sutton et al, 2016 (https://arxiv.org/pdf/1612.01178.pdf) int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); for (int32_t itr = start + iThread; itr < end; itr += nThreads) { hookEdge(itr, mTrackLinks[itr]); } @@ -1039,12 +1046,12 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookNeighbors(int32_t } int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end); int32_t myNeighbor = iBlock % 4; for (int32_t itr = start + iThread; itr < end; itr += nThreads) { - int32_t v = mSliceTrackInfos[itr].AnyNeighbour(myNeighbor); + int32_t v = mSectorTrackInfos[itr].AnyNeighbour(myNeighbor); hookEdge(itr, v); } } @@ -1053,7 +1060,7 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlo { // Compute connected components in parallel, step 2. int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); for (int32_t itr = start + iThread; itr < end; itr += nThreads) { int32_t root = itr; int32_t next = mTrackCCRoots[root]; @@ -1068,7 +1075,7 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlo } } -GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll) +GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll) { if (!mergeAll) { /*int32_t neighborType = useOrigTrackParam ? 1 : 0; @@ -1079,7 +1086,7 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int if (neighborType) old1 = newTrack2.PrevNeighbour(1); if ( old1 >= 0 ) { - GPUTPCGMSliceTrack &oldTrack1 = mSliceTrackInfos[old1]; + GPUTPCGMSectorTrack &oldTrack1 = mSectorTrackInfos[old1]; if ( oldTrack1.NClusters() < newTrack1.NClusters() ) { newTrack2.SetPrevNeighbour( -1, neighborType ); oldTrack1.SetNextNeighbour( -1, neighborType ); @@ -1090,7 +1097,7 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int if (neighborType) old2 = newTrack1.NextNeighbour(1); if ( old2 >= 0 ) { - GPUTPCGMSliceTrack &oldTrack2 = mSliceTrackInfos[old2]; + GPUTPCGMSectorTrack &oldTrack2 = mSectorTrackInfos[old2]; if ( oldTrack2.NClusters() < newTrack2.NClusters() ) { oldTrack2.SetPrevNeighbour( -1, neighborType ); @@ -1101,26 +1108,26 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int } int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); - for (int32_t baseIdx = 0; baseIdx < SliceTrackInfoLocalTotal(); baseIdx += nThreads) { + for (int32_t baseIdx = 0; baseIdx < SectorTrackInfoLocalTotal(); baseIdx += nThreads) { int32_t itr = baseIdx + iThread; - bool inRange = itr < SliceTrackInfoLocalTotal(); + bool inRange = itr < SectorTrackInfoLocalTotal(); int32_t itr2 = -1; if (inRange) { itr2 = mTrackLinks[itr]; } - bool resolveSlice = (itr2 > -1); - if (resolveSlice) { + bool resolveSector = (itr2 > -1); + if (resolveSector) { int32_t root = mTrackCCRoots[itr]; - resolveSlice &= (start <= root) && (root < end); + resolveSector &= (start <= root) && (root < end); } - int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSlice)); + int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSector)); - if (resolveSlice) { + if (resolveSector) { smem.iTrack1[smemIdx - 1] = itr; smem.iTrack2[smemIdx - 1] = itr2; } @@ -1130,16 +1137,16 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int continue; } - const int32_t nSlices = smemIdx; + const int32_t nSectors = smemIdx; - for (int32_t i = 0; i < nSlices; i++) { + for (int32_t i = 0; i < nSectors; i++) { itr = smem.iTrack1[i]; itr2 = smem.iTrack2[i]; - GPUTPCGMSliceTrack* track1 = &mSliceTrackInfos[itr]; - GPUTPCGMSliceTrack* track2 = &mSliceTrackInfos[itr2]; - GPUTPCGMSliceTrack* track1Base = track1; - GPUTPCGMSliceTrack* track2Base = track2; + GPUTPCGMSectorTrack* track1 = &mSectorTrackInfos[itr]; + GPUTPCGMSectorTrack* track2 = &mSectorTrackInfos[itr2]; + GPUTPCGMSectorTrack* track1Base = track1; + GPUTPCGMSectorTrack* track2Base = track2; bool sameSegment = CAMath::Abs(track1->NClusters() > track2->NClusters() ? track1->QPt() : track2->QPt()) * Param().qptB5Scaler < 2 || track1->QPt() * track2->QPt() > 0; // GPUInfo("\nMerge %d with %d - same segment %d", itr, itr2, (int32_t) sameSegment); @@ -1147,23 +1154,23 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int // PrintMergeGraph(track2, std::cout); while (track2->PrevSegmentNeighbour() >= 0) { - track2 = &mSliceTrackInfos[track2->PrevSegmentNeighbour()]; + track2 = &mSectorTrackInfos[track2->PrevSegmentNeighbour()]; } if (sameSegment) { if (track1 == track2) { continue; } while (track1->PrevSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->PrevSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()]; if (track1 == track2) { goto NextTrack; } } GPUCommonAlgorithm::swap(track1, track1Base); for (int32_t k = 0; k < 2; k++) { - GPUTPCGMSliceTrack* tmp = track1Base; + GPUTPCGMSectorTrack* tmp = track1Base; while (tmp->Neighbour(k) >= 0) { - tmp = &mSliceTrackInfos[tmp->Neighbour(k)]; + tmp = &mSectorTrackInfos[tmp->Neighbour(k)]; if (tmp == track2) { goto NextTrack; } @@ -1171,23 +1178,23 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int } while (track1->NextSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->NextSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()]; if (track1 == track2) { goto NextTrack; } } } else { while (track1->PrevSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->PrevSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()]; } if (track1 == track2) { continue; } for (int32_t k = 0; k < 2; k++) { - GPUTPCGMSliceTrack* tmp = track1; + GPUTPCGMSectorTrack* tmp = track1; while (tmp->Neighbour(k) >= 0) { - tmp = &mSliceTrackInfos[tmp->Neighbour(k)]; + tmp = &mSectorTrackInfos[tmp->Neighbour(k)]; if (tmp == track2) { goto NextTrack; } @@ -1210,18 +1217,18 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int bool goUp = z2max - z1min > z1max - z2min; if (track1->Neighbour(goUp) < 0 && track2->Neighbour(!goUp) < 0) { - track1->SetNeighbor(track2 - mSliceTrackInfos, goUp); - track2->SetNeighbor(track1 - mSliceTrackInfos, !goUp); + track1->SetNeighbor(track2 - mSectorTrackInfos, goUp); + track2->SetNeighbor(track1 - mSectorTrackInfos, !goUp); // GPUInfo("Result (simple neighbor)"); // PrintMergeGraph(track1, std::cout); continue; } else if (track1->Neighbour(goUp) < 0) { - track2 = &mSliceTrackInfos[track2->Neighbour(!goUp)]; + track2 = &mSectorTrackInfos[track2->Neighbour(!goUp)]; GPUCommonAlgorithm::swap(track1, track2); } else if (track2->Neighbour(!goUp) < 0) { - track1 = &mSliceTrackInfos[track1->Neighbour(goUp)]; + track1 = &mSectorTrackInfos[track1->Neighbour(goUp)]; } else { // Both would work, but we use the simpler one - track1 = &mSliceTrackInfos[track1->Neighbour(goUp)]; + track1 = &mSectorTrackInfos[track1->Neighbour(goUp)]; } track1Base = track1; } @@ -1229,11 +1236,11 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int track2Base = track2; if (!sameSegment) { while (track1->NextSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->NextSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()]; } } - track1->SetNextSegmentNeighbour(track2 - mSliceTrackInfos); - track2->SetPrevSegmentNeighbour(track1 - mSliceTrackInfos); + track1->SetNextSegmentNeighbour(track2 - mSectorTrackInfos); + track2->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos); // k = 0: Merge right side // k = 1: Merge left side for (int32_t k = 0; k < 2; k++) { @@ -1241,23 +1248,23 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int track2 = track2Base; while (track2->Neighbour(k) >= 0) { if (track1->Neighbour(k) >= 0) { - GPUTPCGMSliceTrack* track1new = &mSliceTrackInfos[track1->Neighbour(k)]; - GPUTPCGMSliceTrack* track2new = &mSliceTrackInfos[track2->Neighbour(k)]; + GPUTPCGMSectorTrack* track1new = &mSectorTrackInfos[track1->Neighbour(k)]; + GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)]; track2->SetNeighbor(-1, k); track2new->SetNeighbor(-1, k ^ 1); track1 = track1new; while (track1->NextSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->NextSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()]; } - track1->SetNextSegmentNeighbour(track2new - mSliceTrackInfos); - track2new->SetPrevSegmentNeighbour(track1 - mSliceTrackInfos); + track1->SetNextSegmentNeighbour(track2new - mSectorTrackInfos); + track2new->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos); track1 = track1new; track2 = track2new; } else { - GPUTPCGMSliceTrack* track2new = &mSliceTrackInfos[track2->Neighbour(k)]; + GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)]; track1->SetNeighbor(track2->Neighbour(k), k); track2->SetNeighbor(-1, k); - track2new->SetNeighbor(track1 - mSliceTrackInfos, k ^ 1); + track2new->SetNeighbor(track1 - mSectorTrackInfos, k ^ 1); } } } @@ -1268,7 +1275,7 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int } } -GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr) +GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr) { if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) { return; @@ -1280,13 +1287,13 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const G } else { float x, y; auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num]; - GPUTPCConvertImpl::convert(*mConstantMem, cls.slice, cls.row, cln.getPad(), cln.getTime(), x, y, z); + GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(), x, y, z); } if (!Param().par.continuousTracking && CAMath::Abs(z) > 10) { return; } - int32_t slice = track->Slice(); + int32_t sector = track->Sector(); for (int32_t attempt = 0; attempt < 2; attempt++) { GPUTPCGMBorderTrack b; const float x0 = Param().tpcGeometry.Row2X(attempt == 0 ? 63 : cls.row); @@ -1301,7 +1308,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const G b.SetZOffsetLinear(-b.ZOffsetLinear()); } b.SetRow(cls.row); - uint32_t id = slice + attempt * NSLICES; + uint32_t id = sector + attempt * NSECTORS; uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[id], 1u); mBorder[id][myTrack] = b; break; @@ -1385,8 +1392,8 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(), &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); - const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->slice, clsmax->row, cls[clsmax->num].getPad()), 0.f); - trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSLICES / 2, trk[1]->Param().TZOffset() - offset); + const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); + trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset); trk[1]->Param().TZOffset() = offset; } } @@ -1436,6 +1443,10 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks } +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct GPUTPCGMMerger_CompareClusterIdsLooper { struct clcomparestruct { uint8_t leg; @@ -1489,14 +1500,16 @@ struct GPUTPCGMMerger_CompareClusterIds { #endif } }; +} // anonymous namespace +} // namespace o2::gpu::internal GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - GPUTPCGMSliceTrack* trackParts[kMaxParts]; + GPUTPCGMSectorTrack* trackParts[kMaxParts]; - for (int32_t itr = iBlock * nThreads + iThread; itr < SliceTrackInfoLocalTotal(); itr += nThreads * nBlocks) { + for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& track = mSliceTrackInfos[itr]; + GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { continue; @@ -1507,7 +1520,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread int32_t nParts = 0; int32_t nHits = 0; int32_t leg = 0; - GPUTPCGMSliceTrack *trbase = &track, *tr = &track; + GPUTPCGMSectorTrack *trbase = &track, *tr = &track; tr->SetPrevSegmentNeighbour(1000000000); while (true) { if (nParts >= kMaxParts) { @@ -1525,23 +1538,23 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (nParts >= kMaxParts) { break; } - if (nHits + mSliceTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { + if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { break; } - trackParts[nParts] = &mSliceTrackInfos[tr->ExtrapolatedTrackId(i)]; + trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)]; trackParts[nParts++]->SetLeg(leg); - nHits += mSliceTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); + nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); } } int32_t jtr = tr->NextSegmentNeighbour(); if (jtr >= 0) { - tr = &(mSliceTrackInfos[jtr]); + tr = &(mSectorTrackInfos[jtr]); tr->SetPrevSegmentNeighbour(1000000002); continue; } jtr = trbase->NextNeighbour(); if (jtr >= 0) { - trbase = &(mSliceTrackInfos[jtr]); + trbase = &(mSectorTrackInfos[jtr]); tr = trbase; if (tr->PrevSegmentNeighbour() >= 0) { break; @@ -1555,7 +1568,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread // unpack and sort clusters if (nParts > 1 && leg == 0) { - GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSliceTrack* a, const GPUTPCGMSliceTrack* b) { + GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { #ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith if (a->X() != b->X()) { return (a->X() > b->X()); @@ -1581,15 +1594,15 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread trackCluster trackClusters[kMaxClusters]; nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { - const GPUTPCGMSliceTrack* t = trackParts[ipart]; + const GPUTPCGMSectorTrack* t = trackParts[ipart]; CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs())); int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Slice()]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()]; const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; - uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Slice()][0]; - *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Slice(), t->Leg()}; + uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0]; + *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()}; } nHits += nTrackHits; } @@ -1654,7 +1667,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } GPUTPCGMMerger_CompareClusterIdsLooper::clcomparestruct clusterSort[kMaxClusters]; for (int32_t iPart = 0; iPart < nParts; iPart++) { - const GPUTPCGMSliceTrack* t = trackParts[iPart]; + const GPUTPCGMSectorTrack* t = trackParts[iPart]; int32_t nTrackHits = t->NClusters(); for (int32_t j = 0; j < nTrackHits; j++) { int32_t i = nTmpHits + j; @@ -1706,7 +1719,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { - const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].slice].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].slice].Data().ClusterIdOffset()]; + const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1723,7 +1736,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags cl[i].row = trackClusters[i].row; cl[i].num = trackClusters[i].id; - cl[i].slice = trackClusters[i].slice; + cl[i].sector = trackClusters[i].sector; cl[i].leg = trackClusters[i].leg; } @@ -1743,7 +1756,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetNClusters(nHits); mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); - const GPUTPCGMSliceTrack& p2 = *trackParts[firstTrackIndex]; + const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; @@ -1803,6 +1816,10 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr } #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct GPUTPCGMMergerSortTracks_comp { const GPUTPCGMMergedTrack* const mCmp; GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} @@ -1833,14 +1850,6 @@ struct GPUTPCGMMergerSortTracks_comp { } }; -template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAsyncAllocator alloc(this); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); -} - struct GPUTPCGMMergerSortTracksQPt_comp { const GPUTPCGMMergedTrack* const mCmp; GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} @@ -1861,6 +1870,16 @@ struct GPUTPCGMMergerSortTracksQPt_comp { #endif } }; +} // anonymous namespace +} // namespace o2::gpu::internal + +template <> +inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); + ThrustVolatileAsyncAllocator alloc(this); + thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); +} template <> inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) @@ -2018,8 +2037,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { - const int32_t slice = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].slice; - const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(slice, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100); + const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector; + const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100); float sinA, cosA; CAMath::SinCos(trk.GetAlpha(), sinA, cosA); float gx = cosA * p.GetX() - sinA * p.GetY(); @@ -2043,20 +2062,20 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, for (uint32_t k = 0;k < trk.NClusters();k++) { float xx, yy, zz; if (Param().par.earlyTpcTransform) { - const float zOffset = (mClusters[trk.FirstClusterRef() + k].slice < 18) == (mClusters[trk.FirstClusterRef() + 0].slice < 18) ? p.GetTZOffset() : -p.GetTZOffset(); + const float zOffset = (mClusters[trk.FirstClusterRef() + k].sector < 18) == (mClusters[trk.FirstClusterRef() + 0].sector < 18) ? p.GetTZOffset() : -p.GetTZOffset(); xx = mClustersXYZ[trk.FirstClusterRef() + k].x; yy = mClustersXYZ[trk.FirstClusterRef() + k].y; zz = mClustersXYZ[trk.FirstClusterRef() + k].z - zOffset; } else { const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num]; - GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].slice, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); + GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); } float sa2, ca2; - CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].slice), sa2, ca2); + CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2); float cx = ca2 * xx - sa2 * yy; float cy = ca2 * yy + sa2 * xx; float dist = CAMath::Sqrt((cx - gmx) * (cx - gmx) + (cy - gmy) * (cy - gmy)); - printf("Hit %3d/%3d slice %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].slice, cx, cy, dist); + printf("Hit %3d/%3d sector %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].sector, cx, cy, dist); }*/ } } @@ -2074,12 +2093,18 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, } #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct GPUTPCGMMergerMergeLoopers_comp { GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); } }; +} // anonymous namespace +} // namespace o2::gpu::internal template <> inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 7e309dcb79a9c..ae6a2582d833a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -19,7 +19,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGMBorderTrack.h" #include "GPUTPCGMMergedTrack.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUCommonDef.h" #include "GPUProcessor.h" #include "GPUTPCGMMergerTypes.h" @@ -30,30 +30,28 @@ #include #endif // GPUCA_GPUCODE -namespace o2 -{ -namespace base +namespace o2::base { class MatLayerCylSet; } -namespace tpc +namespace o2::tpc { struct ClusterNative; } -} // namespace o2 -namespace o2 -{ -namespace gpu +namespace o2::gpu { -class GPUTPCSliceTrack; -class GPUTPCSliceOutput; +class GPUTPCSectorTrack; +class GPUTPCSectorOutput; class GPUTPCGMTrackParam; class GPUTPCTracker; class GPUChainTracking; class GPUTPCGMPolynomialField; struct GPUTPCGMLoopData; +namespace internal +{ struct MergeLooperParam; +} // namespace internal /** * @class GPUTPCGMMerger @@ -66,7 +64,7 @@ class GPUTPCGMMerger : public GPUProcessor ~GPUTPCGMMerger() = default; GPUTPCGMMerger(const GPUTPCGMMerger&) = delete; const GPUTPCGMMerger& operator=(const GPUTPCGMMerger&) const = delete; - static constexpr const int32_t NSLICES = GPUCA_NSLICES; //* N slices + static constexpr const int32_t NSECTORS = GPUCA_NSECTORS; //* N sectors struct memory { GPUAtomic(uint32_t) nRetryRefit; @@ -76,15 +74,15 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; - const GPUTPCTrack* firstExtrapolatedTracks[NSLICES]; - GPUAtomic(uint32_t) tmpCounter[2 * NSLICES]; + const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS]; + GPUAtomic(uint32_t) tmpCounter[2 * NSECTORS]; GPUAtomic(uint32_t) nLooperMatchCandidates; }; struct trackCluster { uint32_t id; uint8_t row; - uint8_t slice; + uint8_t sector; uint8_t leg; }; @@ -131,7 +129,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint2* ClusRefTmp() { return mClusRefTmp; } GPUhdi() uint32_t* TrackSort() { return mTrackSort; } GPUhdi() tmpSort* TrackSortO2() { return mTrackSortO2; } - GPUhdi() MergeLooperParam* LooperCandidates() { return mLooperCandidates; } + GPUhdi() internal::MergeLooperParam* LooperCandidates() { return mLooperCandidates; } GPUhdi() GPUAtomic(uint32_t) * SharedCount() { return mSharedCount; } GPUhdi() gputpcgmmergertypes::GPUTPCGMBorderRange* BorderRange(int32_t i) { return mBorderRange[i]; } GPUhdi() const gputpcgmmergertypes::GPUTPCGMBorderRange* BorderRange(int32_t i) const { return mBorderRange[i]; } @@ -141,8 +139,8 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() o2::MCCompLabel* OutputTracksTPCO2MC() { return mOutputTracksTPCO2MC; } GPUhdi() uint32_t NOutputTracksTPCO2() const { return mMemory->nO2Tracks; } GPUhdi() uint32_t NOutputClusRefsTPCO2() const { return mMemory->nO2ClusRefs; } - GPUhdi() GPUTPCGMSliceTrack* SliceTrackInfos() { return mSliceTrackInfos; } - GPUhdi() int32_t NMaxSingleSliceTracks() const { return mNMaxSingleSliceTracks; } + GPUhdi() GPUTPCGMSectorTrack* SectorTrackInfos() { return mSectorTrackInfos; } + GPUhdi() int32_t NMaxSingleSectorTracks() const { return mNMaxSingleSectorTracks; } GPUhdi() int32_t* TrackIDs() { return mTrackIDs; } GPUhdi() int32_t* TmpSortMemory() { return mTmpSortMemory; } @@ -154,21 +152,21 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() uint16_t MemoryResOutputO2MC() const { return mMemoryResOutputO2MC; } GPUd() uint16_t MemoryResOutputO2Scratch() const { return mMemoryResOutputO2Scratch; } - GPUd() int32_t RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, const GPUTPCTrack* inTrack, float alpha, int32_t slice); - GPUd() void SetTrackClusterZT(GPUTPCGMSliceTrack& track, int32_t iSlice, const GPUTPCTrack* sliceTr); + GPUd() int32_t RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector); + GPUd() void SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr); - int32_t CheckSlices(); - GPUd() void RefitSliceTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice); - GPUd() void UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice); + int32_t CheckSectors(); + GPUd() void RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); + GPUd() void UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); GPUd() void UnpackSaveNumber(int32_t id); - GPUd() void UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice); + GPUd() void UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); GPUd() void MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output); - GPUd() void MergeWithinSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void MergeSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam); + GPUd() void MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); + GPUd() void MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam); template - GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); - GPUd() void MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSlice, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) const; + GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); + GPUd() void MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const; template GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, gputpcgmmergertypes::GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax); GPUd() void SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); @@ -186,17 +184,17 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() void ResolveFindConnectedComponentsHookNeighbors(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void ResolveFindConnectedComponentsHookLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void ResolveFindConnectedComponentsMultiJump(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void ResolveMergeSlices(gputpcgmmergertypes::GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll); + GPUd() void ResolveMergeSectors(gputpcgmmergertypes::GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll); GPUd() void MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); #ifndef GPUCA_GPUCODE - void DumpSliceTracks(std::ostream& out) const; - void DumpMergeRanges(std::ostream& out, int32_t withinSlice, int32_t mergeMode) const; + void DumpSectorTracks(std::ostream& out) const; + void DumpMergeRanges(std::ostream& out, int32_t withinSector, int32_t mergeMode) const; void DumpTrackLinks(std::ostream& out, bool output, const char* type) const; - void DumpMergedWithinSlices(std::ostream& out) const; - void DumpMergedBetweenSlices(std::ostream& out) const; + void DumpMergedWithinSectors(std::ostream& out) const; + void DumpMergedBetweenSectors(std::ostream& out) const; void DumpCollected(std::ostream& out) const; void DumpMergeCE(std::ostream& out) const; void DumpFitPrepare(std::ostream& out) const; @@ -204,33 +202,33 @@ class GPUTPCGMMerger : public GPUProcessor void DumpFinal(std::ostream& out) const; template - void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const; - void MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const; + void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; + void MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; const GPUTPCGMBorderTrack& MergedTrackStreamerFindBorderTrack(const GPUTPCGMBorderTrack* tracks, int32_t N, int32_t trackId) const; void DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) const; - std::vector StreamerOccupancyBin(int32_t iSlice, int32_t iRow, float time) const; - std::vector StreamerUncorrectedZY(int32_t iSlice, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const; + std::vector StreamerOccupancyBin(int32_t iSector, int32_t iRow, float time) const; + std::vector StreamerUncorrectedZY(int32_t iSector, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const; void DebugStreamerUpdate(int32_t iTrk, int32_t ihit, float xx, float yy, float zz, const GPUTPCGMMergedTrackHit& cluster, const o2::tpc::ClusterNative& clusterNative, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop, const gputpcgmmergertypes::InterpolationErrorHit& interpolation, int8_t rejectChi2, bool refit, int32_t retVal, float avgInvCharge, float posY, float posZ, int16_t clusterState, int32_t retValReject, float err2Y, float err2Z) const; #endif - GPUdi() int32_t SliceTrackInfoFirst(int32_t iSlice) const { return mSliceTrackInfoIndex[iSlice]; } - GPUdi() int32_t SliceTrackInfoLast(int32_t iSlice) const { return mSliceTrackInfoIndex[iSlice + 1]; } - GPUdi() int32_t SliceTrackInfoGlobalFirst(int32_t iSlice) const { return mSliceTrackInfoIndex[NSLICES + iSlice]; } - GPUdi() int32_t SliceTrackInfoGlobalLast(int32_t iSlice) const { return mSliceTrackInfoIndex[NSLICES + iSlice + 1]; } - GPUdi() int32_t SliceTrackInfoLocalTotal() const { return mSliceTrackInfoIndex[NSLICES]; } - GPUdi() int32_t SliceTrackInfoTotal() const { return mSliceTrackInfoIndex[2 * NSLICES]; } + GPUdi() int32_t SectorTrackInfoFirst(int32_t iSector) const { return mSectorTrackInfoIndex[iSector]; } + GPUdi() int32_t SectorTrackInfoLast(int32_t iSector) const { return mSectorTrackInfoIndex[iSector + 1]; } + GPUdi() int32_t SectorTrackInfoGlobalFirst(int32_t iSector) const { return mSectorTrackInfoIndex[NSECTORS + iSector]; } + GPUdi() int32_t SectorTrackInfoGlobalLast(int32_t iSector) const { return mSectorTrackInfoIndex[NSECTORS + iSector + 1]; } + GPUdi() int32_t SectorTrackInfoLocalTotal() const { return mSectorTrackInfoIndex[NSECTORS]; } + GPUdi() int32_t SectorTrackInfoTotal() const { return mSectorTrackInfoIndex[2 * NSECTORS]; } private: - GPUd() void MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false); + GPUd() void MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false); template - GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); + GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); - GPUd() void MergeCEFill(const GPUTPCGMSliceTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr); + GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr); void CheckMergedTracks(); #ifndef GPUCA_GPUCODE - void PrintMergeGraph(const GPUTPCGMSliceTrack* trk, std::ostream& out) const; + void PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const; template int64_t GetTrackLabelA(const S& trk) const; template @@ -240,58 +238,57 @@ class GPUTPCGMMerger : public GPUProcessor GPUdi() void setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end); GPUdi() void hookEdge(int32_t u, int32_t v); - int32_t mNextSliceInd[NSLICES]; - int32_t mPrevSliceInd[NSLICES]; + int32_t mNextSectorInd[NSECTORS]; + int32_t mPrevSectorInd[NSECTORS]; - int32_t* mTrackLinks; + int32_t* mTrackLinks = nullptr; int32_t* mTrackCCRoots; // root of the connected component of this track - uint32_t mNTotalSliceTracks; // maximum number of incoming slice tracks - uint32_t mNMaxTracks; // maximum number of output tracks - uint32_t mNMaxSingleSliceTracks; // max N tracks in one slice - uint32_t mNMaxOutputTrackClusters; // max number of clusters in output tracks (double-counting shared clusters) - uint32_t mNMaxClusters; // max total unique clusters (in event) - uint32_t mNMaxLooperMatches; // Maximum number of candidate pairs for looper matching + uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks + uint32_t mNMaxTracks = 0; // maximum number of output tracks + uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector + uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) + uint32_t mNMaxClusters = 0; // max total unique clusters (in event) + uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching - uint16_t mMemoryResMemory; - uint16_t mMemoryResOutput; - uint16_t mMemoryResOutputState; - uint16_t mMemoryResOutputO2; - uint16_t mMemoryResOutputO2Clus; - uint16_t mMemoryResOutputO2MC; - uint16_t mMemoryResOutputO2Scratch; + uint16_t mMemoryResMemory = (uint16_t)-1; + uint16_t mMemoryResOutput = (uint16_t)-1; + uint16_t mMemoryResOutputState = (uint16_t)-1; + uint16_t mMemoryResOutputO2 = (uint16_t)-1; + uint16_t mMemoryResOutputO2Clus = (uint16_t)-1; + uint16_t mMemoryResOutputO2MC = (uint16_t)-1; + uint16_t mMemoryResOutputO2Scratch = (uint16_t)-1; - int32_t mNClusters; // Total number of incoming clusters (from slice tracks) - GPUTPCGMMergedTrack* mOutputTracks; //* array of output merged tracks - GPUdEdxInfo* mOutputTracksdEdx; //* dEdx information - GPUTPCGMSliceTrack* mSliceTrackInfos; //* additional information for slice tracks - int32_t* mSliceTrackInfoIndex; - GPUTPCGMMergedTrackHit* mClusters; - GPUTPCGMMergedTrackHitXYZ* mClustersXYZ; - GPUAtomic(uint32_t) * mClusterAttachment; - o2::tpc::TrackTPC* mOutputTracksTPCO2; - uint32_t* mOutputClusRefsTPCO2; - o2::MCCompLabel* mOutputTracksTPCO2MC; - MergeLooperParam* mLooperCandidates; + int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks) + GPUTPCGMMergedTrack* mOutputTracks = nullptr; //* array of output merged tracks + GPUdEdxInfo* mOutputTracksdEdx = nullptr; //* dEdx information + GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks + int32_t* mSectorTrackInfoIndex = nullptr; + GPUTPCGMMergedTrackHit* mClusters = nullptr; + GPUTPCGMMergedTrackHitXYZ* mClustersXYZ = nullptr; + GPUAtomic(uint32_t) * mClusterAttachment = nullptr; + o2::tpc::TrackTPC* mOutputTracksTPCO2 = nullptr; + uint32_t* mOutputClusRefsTPCO2 = nullptr; + o2::MCCompLabel* mOutputTracksTPCO2MC = nullptr; + internal::MergeLooperParam* mLooperCandidates = nullptr; - uint32_t* mTrackOrderAttach; - uint32_t* mTrackOrderProcess; - uint8_t* mClusterStateExt; - uint2* mClusRefTmp; - int32_t* mTrackIDs; - int32_t* mTmpSortMemory; - uint32_t* mTrackSort; - tmpSort* mTrackSortO2; - GPUAtomic(uint32_t) * mSharedCount; // Must be uint32_t unfortunately for atomic support - GPUTPCGMBorderTrack* mBorderMemory; // memory for border tracks - GPUTPCGMBorderTrack* mBorder[2 * NSLICES]; - gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRangeMemory; // memory for border tracks - gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRange[NSLICES]; // memory for border tracks - memory* mMemory; - uint32_t* mRetryRefitIds; - GPUTPCGMLoopData* mLoopData; + uint32_t* mTrackOrderAttach = nullptr; + uint32_t* mTrackOrderProcess = nullptr; + uint8_t* mClusterStateExt = nullptr; + uint2* mClusRefTmp = nullptr; + int32_t* mTrackIDs = nullptr; + int32_t* mTmpSortMemory = nullptr; + uint32_t* mTrackSort = nullptr; + tmpSort* mTrackSortO2 = nullptr; + GPUAtomic(uint32_t) * mSharedCount = nullptr; // Must be uint32_t unfortunately for atomic support + GPUTPCGMBorderTrack* mBorderMemory = nullptr; // memory for border tracks + GPUTPCGMBorderTrack* mBorder[2 * NSECTORS]; + gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRangeMemory = nullptr; // memory for border tracks + gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRange[NSECTORS]; // memory for border tracks + memory* mMemory = nullptr; + uint32_t* mRetryRefitIds = nullptr; + GPUTPCGMLoopData* mLoopData = nullptr; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCGMMERGER_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 0463966c582a5..d6dfcc8424e65 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -25,12 +25,12 @@ #include "GPUO2DataTypes.h" #include "GPUCommonMath.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUParam.inc" #include "GPUTPCGMTrackParam.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUTPCGMBorderTrack.h" #include "GPUReconstruction.h" #include "GPUDebugStreamer.h" @@ -41,17 +41,17 @@ using namespace o2::gpu; using namespace gputpcgmmergertypes; -void GPUTPCGMMerger::DumpSliceTracks(std::ostream& out) const +void GPUTPCGMMerger::DumpSectorTracks(std::ostream& out) const { std::streamsize ss = out.precision(); out << std::setprecision(2); - out << "\nTPC Merger Slice Tracks\n"; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - out << "Slice Track Info Index " << (mSliceTrackInfoIndex[iSlice + 1] - mSliceTrackInfoIndex[iSlice]) << " / " << (mSliceTrackInfoIndex[NSLICES + iSlice + 1] - mSliceTrackInfoIndex[NSLICES + iSlice]) << "\n"; + out << "\nTPC Merger Sector Tracks\n"; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + out << "Sector Track Info Index " << (mSectorTrackInfoIndex[iSector + 1] - mSectorTrackInfoIndex[iSector]) << " / " << (mSectorTrackInfoIndex[NSECTORS + iSector + 1] - mSectorTrackInfoIndex[NSECTORS + iSector]) << "\n"; for (int32_t iGlobal = 0; iGlobal < 2; iGlobal++) { out << " Track type " << iGlobal << "\n"; - for (int32_t j = mSliceTrackInfoIndex[iSlice + NSLICES * iGlobal]; j < mSliceTrackInfoIndex[iSlice + NSLICES * iGlobal + 1]; j++) { - const auto& trk = mSliceTrackInfos[j]; + for (int32_t j = mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal]; j < mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal + 1]; j++) { + const auto& trk = mSectorTrackInfos[j]; out << " Track " << j << ": LocalId " << (iGlobal ? (trk.LocalTrackId() >> 24) : -1) << "/" << (iGlobal ? (trk.LocalTrackId() & 0xFFFFFF) : -1) << " X " << trk.X() << " offsetz " << trk.TZOffset() << " A " << trk.Alpha() << " Y " << trk.Y() << " Z " << trk.Z() << " SinPhi " << trk.SinPhi() << " CosPhi " << trk.CosPhi() << " SecPhi " << trk.SecPhi() << " Tgl " << trk.DzDs() << " QPt " << trk.QPt() << "\n"; } } @@ -59,29 +59,29 @@ void GPUTPCGMMerger::DumpSliceTracks(std::ostream& out) const out << std::setprecision(ss); } -void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSlice, int32_t mergeMode) const +void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSector, int32_t mergeMode) const { - int32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES; + int32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; for (int32_t i = 0; i < n; i++) { int32_t n1, n2; GPUTPCGMBorderTrack *b1, *b2; - int32_t jSlice; - MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, i, withinSlice, mergeMode); - const int32_t nTrk = *mRec->GetConstantMem().tpcTrackers[jSlice].NTracks(); + int32_t jSector; + MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode); + const int32_t nTrk = *mRec->GetConstantMem().tpcTrackers[jSector].NTracks(); const gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = BorderRange(i); - const gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = BorderRange(jSlice) + nTrk; - out << "\nBorder Tracks : i " << i << " withinSlice " << withinSlice << " mergeMode " << mergeMode << "\n"; + const gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = BorderRange(jSector) + nTrk; + out << "\nBorder Tracks : i " << i << " withinSector " << withinSector << " mergeMode " << mergeMode << "\n"; for (int32_t k = 0; k < n1; k++) { out << " " << k << ": t " << b1[k].TrackID() << " ncl " << b1[k].NClusters() << " row " << (mergeMode > 0 ? b1[k].Row() : -1) << " par " << b1[k].Par()[0] << " " << b1[k].Par()[1] << " " << b1[k].Par()[2] << " " << b1[k].Par()[3] << " " << b1[k].Par()[4] << " offset " << b1[k].ZOffsetLinear() << " cov " << b1[k].Cov()[0] << " " << b1[k].Cov()[1] << " " << b1[k].Cov()[2] << " " << b1[k].Cov()[3] << " " << b1[k].Cov()[4] << " covd " << b1[k].CovD()[0] << " " << b1[k].CovD()[1] << "\n"; } - if (i != jSlice) { + if (i != jSector) { for (int32_t k = 0; k < n2; k++) { out << " " << k << ": t " << b2[k].TrackID() << " ncl " << b2[k].NClusters() << " row " << (mergeMode > 0 ? b2[k].Row() : -1) << " par " << b2[k].Par()[0] << " " << b2[k].Par()[1] << " " << b2[k].Par()[2] << " " << b2[k].Par()[3] << " " << b2[k].Par()[4] << " offset " << b2[k].ZOffsetLinear() << " cov " << b2[k].Cov()[0] << " " << b2[k].Cov()[1] << " " << b2[k].Cov()[2] << " " << b2[k].Cov()[3] << " " << b2[k].Cov()[4] << " covd " << b2[k].CovD()[0] << " " << b2[k].CovD()[1] << "\n"; } } - out << "\nBorder Range : i " << i << " withinSlice " << withinSlice << " mergeMode " << mergeMode << "\n"; + out << "\nBorder Range : i " << i << " withinSector " << withinSector << " mergeMode " << mergeMode << "\n"; for (int32_t k = 0; k < n1; k++) { out << " " << k << ": " << range1[k].fId << " " << range1[k].fMin << " " << range1[k].fMax << "\n"; } @@ -94,7 +94,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSlice, int void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* type) const { out << "\nTPC Merger Links " << type << "\n"; - const int32_t n = output ? mMemory->nOutputTracks : SliceTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); for (int32_t i = 0; i < n; i++) { if (mTrackLinks[i] != -1) { out << " " << i << ": " << mTrackLinks[i] << "\n"; @@ -102,13 +102,13 @@ void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* } } -void GPUTPCGMMerger::DumpMergedWithinSlices(std::ostream& out) const +void GPUTPCGMMerger::DumpMergedWithinSectors(std::ostream& out) const { - DumpTrackLinks(out, false, "within Slices"); - out << "\nTPC Merger Merge Within Slices\n"; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (int32_t j = mSliceTrackInfoIndex[iSlice]; j < mSliceTrackInfoIndex[iSlice + 1]; j++) { - const auto& trk = mSliceTrackInfos[j]; + DumpTrackLinks(out, false, "within Sectors"); + out << "\nTPC Merger Merge Within Sectors\n"; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (int32_t j = mSectorTrackInfoIndex[iSector]; j < mSectorTrackInfoIndex[iSector + 1]; j++) { + const auto& trk = mSectorTrackInfos[j]; if (trk.NextSegmentNeighbour() >= 0 || trk.PrevSegmentNeighbour() >= 0) { out << " Track " << j << ": Neighbour " << trk.PrevSegmentNeighbour() << " / " << trk.NextSegmentNeighbour() << "\n"; } @@ -116,13 +116,13 @@ void GPUTPCGMMerger::DumpMergedWithinSlices(std::ostream& out) const } } -void GPUTPCGMMerger::DumpMergedBetweenSlices(std::ostream& out) const +void GPUTPCGMMerger::DumpMergedBetweenSectors(std::ostream& out) const { - DumpTrackLinks(out, false, "between Slices"); - out << "\nTPC Merger Merge Between Slices\n"; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (int32_t j = mSliceTrackInfoIndex[iSlice]; j < mSliceTrackInfoIndex[iSlice + 1]; j++) { - const auto& trk = mSliceTrackInfos[j]; + DumpTrackLinks(out, false, "between Sectors"); + out << "\nTPC Merger Merge Between Sectors\n"; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (int32_t j = mSectorTrackInfoIndex[iSector]; j < mSectorTrackInfoIndex[iSector + 1]; j++) { + const auto& trk = mSectorTrackInfos[j]; if (trk.NextNeighbour() >= 0 || trk.PrevNeighbour() >= 0) { out << " Track " << j << ": Neighbour " << trk.PrevNeighbour() << " / " << trk.NextNeighbour() << "\n"; } @@ -242,24 +242,24 @@ void GPUTPCGMMerger::DumpFinal(std::ostream& out) const } template -inline void GPUTPCGMMerger::MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const +inline void GPUTPCGMMerger::MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const { #ifdef DEBUG_STREAMER std::vector hits1(152), hits2(152); for (int32_t i = 0; i < 152; i++) { hits1[i] = hits2[i] = -1; } - const GPUTPCTracker& tracker1 = GetConstantMem()->tpcTrackers[slice1]; - const GPUTPCGMSliceTrack& sliceTrack1 = mSliceTrackInfos[b1.TrackID()]; - const GPUTPCTrack& inTrack1 = *sliceTrack1.OrigTrack(); + const GPUTPCTracker& tracker1 = GetConstantMem()->tpcTrackers[sector1]; + const GPUTPCGMSectorTrack& sectorTrack1 = mSectorTrackInfos[b1.TrackID()]; + const GPUTPCTrack& inTrack1 = *sectorTrack1.OrigTrack(); for (int32_t i = 0; i < inTrack1.NHits(); i++) { const GPUTPCHitId& ic1 = tracker1.TrackHits()[inTrack1.FirstHitID() + i]; int32_t clusterIndex = tracker1.Data().ClusterDataIndex(tracker1.Data().Row(ic1.RowIndex()), ic1.HitIndex()); hits1[ic1.RowIndex()] = clusterIndex; } - const GPUTPCTracker& tracker2 = GetConstantMem()->tpcTrackers[slice2]; - const GPUTPCGMSliceTrack& sliceTrack2 = mSliceTrackInfos[b2.TrackID()]; - const GPUTPCTrack& inTrack2 = *sliceTrack2.OrigTrack(); + const GPUTPCTracker& tracker2 = GetConstantMem()->tpcTrackers[sector2]; + const GPUTPCGMSectorTrack& sectorTrack2 = mSectorTrackInfos[b2.TrackID()]; + const GPUTPCTrack& inTrack2 = *sectorTrack2.OrigTrack(); for (int32_t i = 0; i < inTrack2.NHits(); i++) { const GPUTPCHitId& ic2 = tracker2.TrackHits()[inTrack2.FirstHitID() + i]; int32_t clusterIndex = tracker2.Data().ClusterDataIndex(tracker2.Data().Row(ic2.RowIndex()), ic2.HitIndex()); @@ -268,17 +268,17 @@ inline void GPUTPCGMMerger::MergedTrackStreamerInternal(const GPUTPCGMBorderTrac std::string debugname = std::string("debug_") + name; std::string treename = std::string("tree_") + name; - o2::utils::DebugStreamer::instance()->getStreamer(debugname.c_str(), "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName(treename.c_str()).data() << "slice1=" << slice1 << "slice2=" << slice2 << "b1=" << b1 << "b2=" << b2 << "clusters1=" << hits1 << "clusters2=" << hits2 << "sliceTrack1=" << sliceTrack1 << "sliceTrack2=" << sliceTrack2 << "mergeMode=" << mergeMode << "weight=" << weight << "fraction=" << frac << "\n"; + o2::utils::DebugStreamer::instance()->getStreamer(debugname.c_str(), "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName(treename.c_str()).data() << "sector1=" << sector1 << "sector2=" << sector2 << "b1=" << b1 << "b2=" << b2 << "clusters1=" << hits1 << "clusters2=" << hits2 << "sectorTrack1=" << sectorTrack1 << "sectorTrack2=" << sectorTrack2 << "mergeMode=" << mergeMode << "weight=" << weight << "fraction=" << frac << "\n"; #endif } -void GPUTPCGMMerger::MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const +void GPUTPCGMMerger::MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const { #ifdef DEBUG_STREAMER if (mergeMode == 0) { - MergedTrackStreamerInternal<0>(b1, b2, name, slice1, slice2, mergeMode, weight, frac); + MergedTrackStreamerInternal<0>(b1, b2, name, sector1, sector2, mergeMode, weight, frac); } else if (mergeMode >= 1 && mergeMode <= 0) { - // MergedTrackStreamerInternal<1>(b1, b2, name, slice1, slice2, mergeMode, weight, frac); Not yet working + // MergedTrackStreamerInternal<1>(b1, b2, name, sector1, sector2, mergeMode, weight, frac); Not yet working } #endif } @@ -323,7 +323,7 @@ void GPUTPCGMMerger::DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) con } } -std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSlice, int32_t iRow, float time) const +std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSector, int32_t iRow, float time) const { static int32_t size = getenv("O2_DEBUG_STREAMER_OCCUPANCY_NBINS") ? atoi(getenv("O2_DEBUG_STREAMER_OCCUPANCY_NBINS")) : Param().rec.tpc.occupancyMapTimeBinsAverage; std::vector retVal(1 + 2 * size); @@ -337,11 +337,11 @@ std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSlice, int32 return retVal; } -std::vector GPUTPCGMMerger::StreamerUncorrectedZY(int32_t iSlice, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const +std::vector GPUTPCGMMerger::StreamerUncorrectedZY(int32_t iSector, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const { std::vector retVal(2); #ifdef DEBUG_STREAMER - GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(iSlice, iRow, track.GetY(), track.GetZ(), retVal[0], retVal[1]); + GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(iSector, iRow, track.GetY(), track.GetZ(), retVal[0], retVal[1]); #endif return retVal; } @@ -350,12 +350,12 @@ void GPUTPCGMMerger::DebugStreamerUpdate(int32_t iTrk, int32_t ihit, float xx, f { #ifdef DEBUG_STREAMER float time = clusterNative.getTime(); - auto occupancyBins = StreamerOccupancyBin(cluster.slice, cluster.row, time); - auto uncorrectedYZ = StreamerUncorrectedZY(cluster.slice, cluster.row, track, prop); + auto occupancyBins = StreamerOccupancyBin(cluster.sector, cluster.row, time); + auto uncorrectedYZ = StreamerUncorrectedZY(cluster.sector, cluster.row, track, prop); float invCharge = 1.f / clusterNative.qMax; int32_t iRow = cluster.row; float unscaledMult = (time >= 0.f ? Param().GetUnscaledMult(time) / Param().tpcGeometry.Row2X(iRow) : 0.f); - const float clAlpha = Param().Alpha(cluster.slice); + const float clAlpha = Param().Alpha(cluster.sector); uint32_t occupancyTotal = Param().occupancyTotal; o2::utils::DebugStreamer::instance()->getStreamer("debug_update_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_update_track").data() << "iTrk=" << iTrk diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 4f654c0fa7beb..d72d59a6250e7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -37,21 +37,21 @@ GPUdii() void GPUTPCGMMergerFollowLoopers::Thread<0>(int32_t nBlocks, int32_t nT } template <> -GPUdii() void GPUTPCGMMergerUnpackResetIds::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice) +GPUdii() void GPUTPCGMMergerUnpackResetIds::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector) { - merger.UnpackResetIds(nBlocks, nThreads, iBlock, iThread, iSlice); + merger.UnpackResetIds(nBlocks, nThreads, iBlock, iThread, iSector); } template <> -GPUdii() void GPUTPCGMMergerSliceRefit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice) +GPUdii() void GPUTPCGMMergerSectorRefit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector) { - merger.RefitSliceTracks(nBlocks, nThreads, iBlock, iThread, iSlice); + merger.RefitSectorTracks(nBlocks, nThreads, iBlock, iThread, iSector); } template <> -GPUdii() void GPUTPCGMMergerUnpackGlobal::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice) +GPUdii() void GPUTPCGMMergerUnpackGlobal::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector) { - merger.UnpackSliceGlobal(nBlocks, nThreads, iBlock, iThread, iSlice); + merger.UnpackSectorGlobal(nBlocks, nThreads, iBlock, iThread, iSector); } template <> @@ -89,7 +89,7 @@ GPUdii() void GPUTPCGMMergerResolve::Thread<3>(int32_t nBlocks, int32_t nThreads template <> GPUdii() void GPUTPCGMMergerResolve::Thread<4>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t useOrigTrackParam, int8_t mergeAll) { - merger.ResolveMergeSlices(smem, nBlocks, nThreads, iBlock, iThread, useOrigTrackParam, mergeAll); + merger.ResolveMergeSectors(smem, nBlocks, nThreads, iBlock, iThread, useOrigTrackParam, mergeAll); } template <> @@ -101,13 +101,13 @@ GPUdii() void GPUTPCGMMergerClearLinks::Thread<0>(int32_t nBlocks, int32_t nThre template <> GPUdii() void GPUTPCGMMergerMergeWithinPrepare::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - merger.MergeWithinSlicesPrepare(nBlocks, nThreads, iBlock, iThread); + merger.MergeWithinSectorsPrepare(nBlocks, nThreads, iBlock, iThread); } template <> -GPUdii() void GPUTPCGMMergerMergeSlicesPrepare::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t border0, int32_t border1, int8_t useOrigTrackParam) +GPUdii() void GPUTPCGMMergerMergeSectorsPrepare::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t border0, int32_t border1, int8_t useOrigTrackParam) { - merger.MergeSlicesPrepare(nBlocks, nThreads, iBlock, iThread, border0, border1, useOrigTrackParam); + merger.MergeSectorsPrepare(nBlocks, nThreads, iBlock, iThread, border0, border1, useOrigTrackParam); } template @@ -116,14 +116,14 @@ GPUdii() void GPUTPCGMMergerMergeBorders::Thread(int32_t nBlocks, int32_t nThrea merger.MergeBorderTracks(nBlocks, nThreads, iBlock, iThread, args...); } #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. -template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); -template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); +template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector, int8_t withinSector, int8_t mergeMode); +template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector, int8_t withinSector, int8_t mergeMode); template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, gputpcgmmergertypes::GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax); #endif template <> -GPUdii() void GPUTPCGMMergerMergeBorders::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) +GPUdii() void GPUTPCGMMergerMergeBorders::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector, int8_t withinSector, int8_t mergeMode) { - merger.MergeBorderTracks<1>(2, nThreads, iBlock & 1, iThread, iBlock / 2, withinSlice, mergeMode); + merger.MergeBorderTracks<1>(2, nThreads, iBlock & 1, iThread, iBlock / 2, withinSector, mergeMode); } template <> diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index dec9befa25ce2..bda00822bac6a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -19,9 +19,7 @@ #include "GPUConstantMem.h" #include "GPUTPCGMMergerTypes.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMMergerGeneral : public GPUKernelTemplate { @@ -48,18 +46,18 @@ class GPUTPCGMMergerFollowLoopers : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -class GPUTPCGMMergerSliceRefit : public GPUTPCGMMergerGeneral +class GPUTPCGMMergerSectorRefit : public GPUTPCGMMergerGeneral { public: template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSlice); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSector); }; class GPUTPCGMMergerUnpackGlobal : public GPUTPCGMMergerGeneral { public: template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSlice); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSector); }; class GPUTPCGMMergerUnpackSaveNumber : public GPUTPCGMMergerGeneral @@ -100,7 +98,7 @@ class GPUTPCGMMergerMergeWithinPrepare : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -class GPUTPCGMMergerMergeSlicesPrepare : public GPUTPCGMMergerGeneral +class GPUTPCGMMergerMergeSectorsPrepare : public GPUTPCGMMergerGeneral { public: template @@ -182,7 +180,6 @@ class GPUTPCGMMergerMergeLoopers : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 3c8f21420a14f..4e225a61661c2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -18,11 +18,7 @@ #include "GPUTPCDef.h" #include "GPUGeneralKernels.h" -namespace o2 -{ -namespace gpu -{ -namespace gputpcgmmergertypes +namespace o2::gpu::gputpcgmmergertypes { enum attachTypes { attachAttached = 0x40000000, @@ -59,8 +55,6 @@ struct GPUTPCOuterParam { float C[15]; }; -} // namespace gputpcgmmergertypes -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu::gputpcgmmergertypes #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 203968e091014..45293bae9820b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -193,7 +193,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks continue; } int32_t clusterIdGlobal = trackClusters[tracks[i].FirstClusterRef() + j].num; - int32_t sector = trackClusters[tracks[i].FirstClusterRef() + j].slice; + int32_t sector = trackClusters[tracks[i].FirstClusterRef() + j].sector; int32_t globalRow = trackClusters[tracks[i].FirstClusterRef() + j].row; int32_t clusterIdInRow = clusterIdGlobal - clusters->clusterOffset[sector][globalRow]; clIndArr[nOutCl2] = clusterIdInRow; @@ -214,11 +214,11 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks if (merger.Param().par.continuousTracking) { time0 = tracks[i].GetParam().GetTZOffset(); if (cce) { - bool lastSide = trackClusters[tracks[i].FirstClusterRef()].slice < MAXSECTOR / 2; + bool lastSide = trackClusters[tracks[i].FirstClusterRef()].sector < MAXSECTOR / 2; float delta = 0.f; for (uint32_t iCl = 1; iCl < tracks[i].NClusters(); iCl++) { auto& cacl1 = trackClusters[tracks[i].FirstClusterRef() + iCl]; - if (lastSide ^ (cacl1.slice < MAXSECTOR / 2)) { + if (lastSide ^ (cacl1.sector < MAXSECTOR / 2)) { auto& cl1 = clusters->clustersLinear[cacl1.num]; auto& cl2 = clusters->clustersLinear[trackClusters[tracks[i].FirstClusterRef() + iCl - 1].num]; delta = CAMath::Abs(cl1.getTime() - cl2.getTime()) * 0.5f; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h index a5a9869c2061a..8f7a91ad69269 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h @@ -18,9 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGMMergerGPU.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMO2Output : public GPUTPCGMMergerGeneral @@ -34,7 +32,6 @@ class GPUTPCGMO2Output : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h index d77cb861affa3..eac86a5598644 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h @@ -27,9 +27,7 @@ * */ -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMPhysicalTrackModel { @@ -272,7 +270,6 @@ GPUdi() void GPUTPCGMPhysicalTrackModel::Rotate(float alpha) RotateLight(alpha); UpdateValues(); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index 15f2bd880e351..88f0882a79f03 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -18,13 +18,10 @@ #include "GPUCommonDef.h" class AliMagF; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMPolynomialField; -} -} // namespace o2 +} // namespace o2::gpu /** * @class GPUTPCGMPolynomialFieldManager diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 9f344a04739fd..e15d6fe8b17bd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -614,8 +614,8 @@ GPUd() void GPUTPCGMPropagator::GetErr2(float& GPUrestrict() err2Y, float& GPUre param.GetClusterErrors2(sector, iRow, posZ, snp, tgl, time, avgCharge, charge, err2Y, err2Z); } param.UpdateClusterError2ByState(clusterState, err2Y, err2Z); - float statErr2 = param.GetSystematicClusterErrorIFC2(trackX, trackY, posZ, sector >= (GPUCA_NSLICES / 2)); - if (sector >= GPUCA_NSLICES / 2 + 1 && sector <= GPUCA_NSLICES / 2 + 2) { + float statErr2 = param.GetSystematicClusterErrorIFC2(trackX, trackY, posZ, sector >= (GPUCA_NSECTORS / 2)); + if (sector >= GPUCA_NSECTORS / 2 + 1 && sector <= GPUCA_NSECTORS / 2 + 2) { statErr2 += param.GetSystematicClusterErrorC122(trackX, trackY, sector); } err2Y += statErr2; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index eaff9be4f5e46..a2369bafc9751 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -20,18 +20,13 @@ #include "GPUTPCGMPolynomialField.h" #include "GPUCommonMath.h" -namespace o2 -{ -namespace base +namespace o2::base { struct MatBudget; class MatLayerCylSet; -} // namespace base -} // namespace o2 +} // namespace o2::base -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMTrackParam; struct GPUParam; @@ -275,7 +270,6 @@ GPUdi() float GPUTPCGMPropagator::getGlobalY(float X, float Y) const return getGlobalY(mCosAlpha, mSinAlpha, X, Y); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx similarity index 85% rename from GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx rename to GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index 6c8641517b80d..a439e6e653039 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -9,12 +9,12 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCGMSliceTrack.cxx +/// \file GPUTPCGMSectorTrack.cxx /// \author Sergey Gorbunov, David Rohr #include "GPUParam.h" #include "GPUTPCGMBorderTrack.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUO2DataTypes.h" #include "GPUTPCGMMerger.h" #include "GPUTPCConvertImpl.h" @@ -23,10 +23,10 @@ using namespace o2::gpu; using namespace o2::tpc; -GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sliceTr, float alpha, int32_t slice) +GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sectorTr, float alpha, int32_t sector) { - const GPUTPCBaseTrackParam& t = sliceTr->Param(); - mOrigTrack = sliceTr; + const GPUTPCBaseTrackParam& t = sectorTr->Param(); + mOrigTrack = sectorTr; mParam.mX = t.GetX(); mParam.mY = t.GetY(); mParam.mZ = t.GetZ(); @@ -36,18 +36,18 @@ GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTr mParam.mCosPhi = CAMath::Sqrt(1.f - mParam.mSinPhi * mParam.mSinPhi); mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; - mSlice = slice; + mSector = sector; if (merger->Param().par.earlyTpcTransform) { mTZOffset = t.GetZOffset(); } else { - mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(slice, t.GetZOffset(), merger->Param().continuousMaxTimeBin); + mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, t.GetZOffset(), merger->Param().continuousMaxTimeBin); } - mNClusters = sliceTr->NHits(); + mNClusters = sectorTr->NHits(); } -GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sliceTr, float alpha, int32_t slice) +GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector) { - mOrigTrack = sliceTr; + mOrigTrack = sectorTr; mParam.mX = trk.GetX(); mParam.mY = trk.GetY(); mParam.mZ = trk.GetZ(); @@ -57,9 +57,9 @@ GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCT mParam.mCosPhi = CAMath::Sqrt(1.f - mParam.mSinPhi * mParam.mSinPhi); mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; - mSlice = slice; + mSector = sector; mTZOffset = trk.GetTZOffset(); - mNClusters = sliceTr->NHits(); + mNClusters = sectorTr->NHits(); mParam.mC0 = trk.GetCov(0); mParam.mC2 = trk.GetCov(2); mParam.mC3 = trk.GetCov(3); @@ -71,7 +71,7 @@ GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCT mParam.mC14 = trk.GetCov(14); } -GPUd() void GPUTPCGMSliceTrack::SetParam2(const GPUTPCGMTrackParam& trk) +GPUd() void GPUTPCGMSectorTrack::SetParam2(const GPUTPCGMTrackParam& trk) { mParam2.mX = trk.GetX(); mParam2.mY = trk.GetY(); @@ -92,18 +92,18 @@ GPUd() void GPUTPCGMSliceTrack::SetParam2(const GPUTPCGMTrackParam& trk) mParam2.mC14 = trk.GetCov(14); } -GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSlice, float maxSinPhi, float sinPhiMargin) +GPUd() bool GPUTPCGMSectorTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi, float sinPhiMargin) { float lastX; // float lastX = merger->Param().tpcGeometry.Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks float y, z; int32_t row, index; - const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSector]; const GPUTPCHitId& ic = trk.TrackHits()[mOrigTrack->FirstHitID() + mOrigTrack->NHits() - 1]; - index = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + merger->GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; + index = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + merger->GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; row = ic.RowIndex(); const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[index]; - GPUTPCConvertImpl::convert(*merger->GetConstantMem(), iSlice, row, cl.getPad(), cl.getTime(), lastX, y, z); + GPUTPCConvertImpl::convert(*merger->GetConstantMem(), iSector, row, cl.getPad(), cl.getTime(), lastX, y, z); const int32_t N = 3; @@ -116,10 +116,10 @@ GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32 float kdx205 = 2.f + kdx * kdx * 0.5f; { - merger->Param().GetClusterErrors2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, mParam.mC0, mParam.mC2); // TODO: provide correct time and row + merger->Param().GetClusterErrors2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, mParam.mC0, mParam.mC2); // TODO: provide correct time and row #ifndef GPUCA_TPC_GEOMETRY_O2 float C0a, C2a; - merger->Param().GetClusterErrorsSeeding2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); + merger->Param().GetClusterErrorsSeeding2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); if (C0a > mParam.mC0) { mParam.mC0 = C0a; } @@ -176,10 +176,10 @@ GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32 float dz = dS * mParam.mDzDs; float ex1i = 1.f / ex1; { - merger->Param().GetClusterErrors2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Provide correct time / row + merger->Param().GetClusterErrors2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Provide correct time / row #ifndef GPUCA_TPC_GEOMETRY_O2 float C0a, C2a; - merger->Param().GetClusterErrorsSeeding2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); + merger->Param().GetClusterErrorsSeeding2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); if (C0a > err2Y) { err2Y = C0a; } @@ -274,7 +274,7 @@ GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32 return ok; } -GPUd() bool GPUTPCGMSliceTrack::TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const +GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const { Bz = -Bz; float ex = mParam.mCosPhi; @@ -318,7 +318,7 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToX(GPUTPCGMMerger* merger, float x, fl if (merger->Param().par.earlyTpcTransform) { b.SetZOffsetLinear(mTZOffset); } else { - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSlice, mTZOffset, merger->Param().continuousMaxTimeBin)); + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); } if (!doCov) { @@ -366,7 +366,7 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToX(GPUTPCGMMerger* merger, float x, fl return 1; } -GPUd() bool GPUTPCGMSliceTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float newX, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const +GPUd() bool GPUTPCGMSectorTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float newX, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const { //* @@ -474,7 +474,7 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float if (merger->Param().par.earlyTpcTransform) { b.SetZOffsetLinear(mTZOffset); } else { - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSlice, mTZOffset, merger->Param().continuousMaxTimeBin)); + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); } b.SetCov(0, c00 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40)); @@ -490,9 +490,9 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float return 1; } -GPUd() void GPUTPCGMSliceTrack::CopyBaseTrackCov() +GPUd() void GPUTPCGMSectorTrack::CopyBaseTrackCov() { - const float* GPUrestrict() cov = mOrigTrack->Param().mC; + const float* GPUrestrict() cov = mOrigTrack -> Param().mC; mParam.mC0 = cov[0]; mParam.mC2 = cov[2]; mParam.mC3 = cov[3]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h similarity index 84% rename from GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h rename to GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 47841a616a13e..27e4a89300ca4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -9,33 +9,31 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCGMSliceTrack.h +/// \file GPUTPCGMSectorTrack.h /// \author Sergey Gorbunov, David Rohr -#ifndef GPUTPCGMSLICETRACK_H -#define GPUTPCGMSLICETRACK_H +#ifndef GPUTPCGMSECTORTRACK_H +#define GPUTPCGMSECTORTRACK_H #include "GPUTPCTrack.h" #include "GPUTPCGMTrackParam.h" #include "GPUCommonMath.h" #include "GPUO2DataTypes.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** - * @class GPUTPCGMSliceTrack + * @class GPUTPCGMSectorTrack * - * The class describes TPC slice tracks used in GPUTPCGMMerger + * The class describes TPC sector tracks used in GPUTPCGMMerger */ class GPUTPCGMMerger; -class GPUTPCGMSliceTrack +class GPUTPCGMSectorTrack { public: GPUd() float Alpha() const { return mAlpha; } - GPUd() uint8_t Slice() const { return mSlice; } - GPUd() bool CSide() const { return mSlice >= 18; } + GPUd() uint8_t Sector() const { return mSector; } + GPUd() bool CSide() const { return mSector >= 18; } GPUd() int32_t NClusters() const { return mNClusters; } GPUd() int32_t PrevNeighbour() const { return mNeighbour[0]; } GPUd() int32_t NextNeighbour() const { return mNeighbour[1]; } @@ -75,9 +73,9 @@ class GPUTPCGMSliceTrack mClusterZT[1] = v2; } - GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sliceTr, float alpha, int32_t slice); + GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); GPUd() void SetParam2(const GPUTPCGMTrackParam& trk); - GPUd() void Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sliceTr, float alpha, int32_t slice); + GPUd() void Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); GPUd() void UseParam2() { mParam = mParam2; } GPUd() void SetX2(float v) { mParam2.mX = v; } GPUd() float X2() const { return mParam2.mX; } @@ -103,7 +101,7 @@ class GPUTPCGMSliceTrack GPUd() void SetNextSegmentNeighbour(int32_t v) { mSegmentNeighbour[1] = v; } GPUd() void SetLeg(uint8_t v) { mLeg = v; } - GPUd() void CopyParamFrom(const GPUTPCGMSliceTrack& t) + GPUd() void CopyParamFrom(const GPUTPCGMSectorTrack& t) { mParam.mX = t.mParam.mX; mParam.mY = t.mParam.mY; @@ -116,33 +114,32 @@ class GPUTPCGMSliceTrack mAlpha = t.mAlpha; } - GPUd() bool FilterErrors(const GPUTPCGMMerger* merger, int32_t iSlice, float maxSinPhi = GPUCA_MAX_SIN_PHI, float sinPhiMargin = 0.f); + GPUd() bool FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi = GPUCA_MAX_SIN_PHI, float sinPhiMargin = 0.f); GPUd() bool TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov = true) const; GPUd() bool TransportToXAlpha(GPUTPCGMMerger* merger, float x, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const; GPUd() void CopyBaseTrackCov(); - struct sliceTrackParam { + struct sectorTrackParam { float mX, mY, mZ, mSinPhi, mDzDs, mQPt, mCosPhi, mSecPhi; // parameters float mC0, mC2, mC3, mC5, mC7, mC9, mC10, mC12, mC14; // covariances }; private: - const GPUTPCTrack* mOrigTrack; // pointer to original slice track - sliceTrackParam mParam; // Track parameters - sliceTrackParam mParam2; // Parameters at other side + const GPUTPCTrack* mOrigTrack; // pointer to original sector track + sectorTrackParam mParam; // Track parameters + sectorTrackParam mParam2; // Parameters at other side float mTZOffset; // Z offset with early transform, T offset otherwise float mAlpha; // alpha angle float mClusterZT[2]; // Minimum maximum cluster Z / T int32_t mNClusters; // N clusters int32_t mNeighbour[2]; // int32_t mSegmentNeighbour[2]; // - int32_t mLocalTrackId; // Corrected local track id in terms of GMSliceTracks array for extrapolated tracks, UNDEFINED for local tracks! + int32_t mLocalTrackId; // Corrected local track id in terms of GMSectorTracks array for extrapolated tracks, UNDEFINED for local tracks! int32_t mExtrapolatedTrackIds[2]; // IDs of associated extrapolated tracks - uint8_t mSlice; // slice of this track segment + uint8_t mSector; // sector of this track segment uint8_t mLeg; // Leg of this track segment - ClassDefNV(GPUTPCGMSliceTrack, 1); + ClassDefNV(GPUTPCGMSectorTrack, 1); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 790e911a1d865..be1d3803312fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -67,7 +67,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetPolynomialField(¶m.polynomialField); prop.SetMaxSinPhi(maxSinPhi); prop.SetToyMCEventsFlag(param.par.toyMCEventsFlag); - if ((clusters[0].slice < 18) == (clusters[N - 1].slice < 18)) { + if ((clusters[0].sector < 18) == (clusters[N - 1].sector < 18)) { ShiftZ2(clusters, clustersXYZ, merger, N); } if (param.rec.tpc.mergerInterpolateErrors) { @@ -82,7 +82,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float covYYUpd = 0.f; float lastUpdateX = -1.f; uint8_t lastRow = 255; - uint8_t lastSlice = 255; + uint8_t lastSector = 255; uint8_t storeOuter = 0; for (int32_t iWay = 0; iWay < nWays; iWay++) { @@ -124,9 +124,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ bool noFollowCircle = false, noFollowCircle2 = false; int32_t goodRows = 0; for (int32_t ihit = ihitStart; ihit >= 0 && ihit < maxN; ihit += wayDirection) { - const bool crossCE = lastSlice != 255 && ((lastSlice < 18) ^ (clusters[ihit].slice < 18)); + const bool crossCE = lastSector != 255 && ((lastSector < 18) ^ (clusters[ihit].sector < 18)); if (crossCE) { - lastSlice = clusters[ihit].slice; + lastSector = clusters[ihit].sector; noFollowCircle2 = true; } @@ -151,19 +151,19 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ int32_t ihitMergeFirst = ihit; uint8_t clusterState = clusters[ihit].state; - const float clAlpha = param.Alpha(clusters[ihit].slice); + const float clAlpha = param.Alpha(clusters[ihit].sector); float xx, yy, zz; if (param.par.earlyTpcTransform) { - const float zOffset = (clusters[ihit].slice < 18) == (clusters[0].slice < 18) ? mTZOffset : -mTZOffset; + const float zOffset = (clusters[ihit].sector < 18) == (clusters[0].sector < 18) ? mTZOffset : -mTZOffset; xx = clustersXYZ[ihit].x; yy = clustersXYZ[ihit].y; zz = clustersXYZ[ihit].z - zOffset; } else { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].slice, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTZOffset); } // clang-format off - CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f (Missed %d)\n", ihit, maxN, (int32_t)clusters[ihit].row, clAlpha, (int32_t)clusters[ihit].slice, xx, yy, zz, nMissed)); + CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f (Missed %d)\n", ihit, maxN, (int32_t)clusters[ihit].row, clAlpha, (int32_t)clusters[ihit].sector, xx, yy, zz, nMissed)); // CADEBUG(if ((uint32_t)merger->GetTrackingChain()->mIOPtrs.nMCLabelsTPC > clusters[ihit].num)) // CADEBUG({printf(" MC:"); for (int32_t i = 0; i < 3; i++) {int32_t mcId = merger->GetTrackingChain()->mIOPtrs.mcLabelsTPC[clusters[ihit].num].fClusterID[i].fMCID; if (mcId >= 0) printf(" %d", mcId); } } printf("\n")); // clang-format on @@ -186,14 +186,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ bool changeDirection = (cluster.leg - lastLeg) & 1; // clang-format off CADEBUG(if (changeDirection) printf("\t\tChange direction\n")); - CADEBUG(printf("\tLeg %3d Slice %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.leg, (int32_t)cluster.slice, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); + CADEBUG(printf("\tLeg %3d Sector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.leg, (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on if (allowModification && changeDirection && !noFollowCircle && !noFollowCircle2) { bool tryFollow = lastRow != 255; if (tryFollow) { const GPUTPCGMTrackParam backup = *this; const float backupAlpha = prop.GetAlpha(); - if (FollowCircle<0>(merger, prop, lastSlice, lastRow, iTrk, clAlpha, xx, yy, cluster.slice, cluster.row, inFlyDirection)) { + if (FollowCircle<0>(merger, prop, lastSector, lastRow, iTrk, clAlpha, xx, yy, cluster.sector, cluster.row, inFlyDirection)) { CADEBUG(printf("Error during follow circle, resetting track!\n")); *this = backup; prop.SetTrack(this, backupAlpha); @@ -202,10 +202,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } if (tryFollow) { - MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, false, cluster.slice); + MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, false, cluster.sector); lastUpdateX = mX; lastLeg = cluster.leg; - lastSlice = cluster.slice; + lastSector = cluster.sector; lastRow = 255; N++; resetT0 = initResetT0(); @@ -217,7 +217,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.slice, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection, param); } @@ -234,14 +234,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ err = prop.PropagateToXAlpha(xx, clAlpha, inFlyDirection); } } - if (lastRow == 255 || CAMath::Abs((int32_t)lastRow - (int32_t)cluster.row) > 5 || lastSlice != cluster.slice || (param.rec.tpc.trackFitRejectMode < 0 && -nMissed <= param.rec.tpc.trackFitRejectMode)) { + if (lastRow == 255 || CAMath::Abs((int32_t)lastRow - (int32_t)cluster.row) > 5 || lastSector != cluster.sector || (param.rec.tpc.trackFitRejectMode < 0 && -nMissed <= param.rec.tpc.trackFitRejectMode)) { goodRows = 0; } else { goodRows++; } if (err == 0) { lastRow = cluster.row; - lastSlice = cluster.slice; + lastSector = cluster.sector; } // clang-format off CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[0] - yy, mP[1] - zz, sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], err)); @@ -265,9 +265,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (CAMath::Abs(yy - mP[0]) > CAMath::Abs(yy - mirrordY)) { CADEBUG(printf(" - Mirroring!!!")); if (allowModification) { - AttachClustersMirror<0>(merger, cluster.slice, cluster.row, iTrk, yy, prop); // TODO: Never true, will always call FollowCircle above, really??? + AttachClustersMirror<0>(merger, cluster.sector, cluster.row, iTrk, yy, prop); // TODO: Never true, will always call FollowCircle above, really??? } - MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, true, cluster.slice); + MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, true, cluster.sector); noFollowCircle = false; lastUpdateX = mX; @@ -285,7 +285,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float uncorrectedY = -1e6f; if (allowModification) { - uncorrectedY = AttachClusters(merger, cluster.slice, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop); + uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop); } const int32_t err2 = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; @@ -335,7 +335,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ const float invCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? (1.f / merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; float invAvgCharge = (sumInvSqrtCharge += invSqrtCharge) / ++nAvgCharge; invAvgCharge *= invAvgCharge; - retVal = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.slice, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); + retVal = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.sector, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); } GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamUpdateTrack, iTrk)) { merger->DebugStreamerUpdate(iTrk, ihit, xx, yy, zz, cluster, merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num], *this, prop, interpolation.hit[ihit], rejectChi2, refit, retVal, sumInvSqrtCharge / nAvgCharge * sumInvSqrtCharge / nAvgCharge, yy, zz, clusterState, debugVals.retVal, debugVals.err2Y, debugVals.err2Z); @@ -384,7 +384,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ pad /= clusterCount; relTime /= clusterCount; relTime = relTime - CAMath::Round(relTime); - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.slice, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime); + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } else if (retVal >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track if (allowModification) { @@ -398,7 +398,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } - if (((nWays - iWay) & 1) && (clusters[0].slice < 18) == (clusters[maxN - 1].slice < 18)) { + if (((nWays - iWay) & 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { ShiftZ2(clusters, clustersXYZ, merger, maxN); } } @@ -486,9 +486,9 @@ GPUd() void GPUTPCGMTrackParam::MirrorTo(GPUTPCGMPropagator& GPUrestrict() prop, GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* GPUrestrict() merger, GPUTPCGMPropagator& GPUrestrict() prop, float& GPUrestrict() xx, float& GPUrestrict() yy, float& GPUrestrict() zz, int32_t maxN, float clAlpha, uint8_t& GPUrestrict() clusterState, bool rejectChi2) { - if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].slice == clusters[ihit + wayDirection].slice && clusters[ihit].leg == clusters[ihit + wayDirection].leg) { + if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector && clusters[ihit].leg == clusters[ihit + wayDirection].leg) { float maxDistY, maxDistZ; - prop.GetErr2(maxDistY, maxDistZ, merger->Param(), zz, clusters[ihit].row, 0, clusters[ihit].slice, -1.f, 0.f, 0.f); // TODO: Use correct time, avgCharge + prop.GetErr2(maxDistY, maxDistZ, merger->Param(), zz, clusters[ihit].row, 0, clusters[ihit].sector, -1.f, 0.f, 0.f); // TODO: Use correct time, avgCharge maxDistY = (maxDistY + mC[0]) * 20.f; maxDistZ = (maxDistZ + mC[2]) * 20.f; int32_t noReject = 0; // Cannot reject if simple estimation of y/z fails (extremely unlike case) @@ -505,7 +505,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t while (true) { float clx, cly, clz, clamp; if (merger->Param().par.earlyTpcTransform) { - const float zOffset = (clusters[ihit].slice < 18) == (clusters[0].slice < 18) ? mTZOffset : -mTZOffset; + const float zOffset = (clusters[ihit].sector < 18) == (clusters[0].sector < 18) ? mTZOffset : -mTZOffset; clx = clustersXYZ[ihit].x; cly = clustersXYZ[ihit].y; clz = clustersXYZ[ihit].z - zOffset; @@ -513,7 +513,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t } else { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; clamp = cl.qTot; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].slice, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); } float dy = cly - projY; float dz = clz - projZ; @@ -530,7 +530,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t clusterState |= clusters[ihit].state; count += clamp; } - if (!(ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].slice == clusters[ihit + wayDirection].slice && clusters[ihit].leg == clusters[ihit + wayDirection].leg)) { + if (!(ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector && clusters[ihit].leg == clusters[ihit + wayDirection].leg)) { break; } ihit += wayDirection; @@ -546,7 +546,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t return 0; } -GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop) +GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop) { float Y, Z; if (Merger->Param().par.earlyTpcTransform) { @@ -554,21 +554,21 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric Z = mP[1]; } else { float X = 0; - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(slice, iRow, mP[0], mP[1], X); + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(sector, iRow, mP[0], mP[1], X); if (prop.GetPropagatedYZ(X, Y, Z)) { Y = mP[0]; Z = mP[1]; } } - return AttachClusters(Merger, slice, iRow, iTrack, goodLeg, Y, Z); + return AttachClusters(Merger, sector, iRow, iTrack, goodLeg, Y, Z); } -GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z) +GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z) { if (Merger->Param().rec.tpc.disableRefitAttachment & 1) { return -1e6f; } - const GPUTPCTracker& GPUrestrict() tracker = *(Merger->GetConstantMem()->tpcTrackers + slice); + const GPUTPCTracker& GPUrestrict() tracker = *(Merger -> GetConstantMem()->tpcTrackers + sector); const GPUTPCRow& GPUrestrict() row = tracker.Row(iRow); #ifndef GPUCA_TEXTURE_FETCH_CONSTRUCTOR GPUglobalref() const cahit2* hits = tracker.HitData(row); @@ -578,7 +578,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (slice >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(slice, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -586,7 +586,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric int32_t bin, ny, nz; float err2Y, err2Z; - Merger->Param().GetClusterErrors2(slice, iRow, Z, mP[2], mP[3], -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Use correct time/avgCharge + Merger->Param().GetClusterErrors2(sector, iRow, Z, mP[2], mP[3], -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Use correct time/avgCharge const float sy2 = CAMath::Min(Merger->Param().rec.tpc.tubeMaxSize2, Merger->Param().rec.tpc.tubeChi2 * (err2Y + CAMath::Abs(mC[0]))); // Cov can be bogus when following circle const float sz2 = CAMath::Min(Merger->Param().rec.tpc.tubeMaxSize2, Merger->Param().rec.tpc.tubeChi2 * (err2Z + CAMath::Abs(mC[2]))); // In that case we should provide the track error externally const float tubeY = CAMath::Sqrt(sy2); @@ -598,7 +598,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric uncorrectedY = Y; uncorrectedZ = Z; } else { - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(slice, iRow, Y, Z, uncorrectedY, uncorrectedZ); + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, Y, Z, uncorrectedY, uncorrectedZ); } if (CAMath::Abs(uncorrectedY) > row.getTPCMaxY()) { @@ -640,7 +640,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return uncorrectedY; } -GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& GPUrestrict() prop, bool inFlyDirection, float maxSinPhi, bool dodEdx) +GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& GPUrestrict() prop, bool inFlyDirection, float maxSinPhi, bool dodEdx) { static constexpr float kSectAngle = 2 * M_PI / 18.f; if (Merger->Param().rec.tpc.disableRefitAttachment & 2) { @@ -664,14 +664,14 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP } if (dodEdx && iRow + step == toRow) { float yUncorrected, zUncorrected; - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(slice, iRow, mP[0], mP[1], yUncorrected, zUncorrected); - uint32_t pad = CAMath::Float2UIntRn(Merger->Param().tpcGeometry.LinearY2Pad(slice, iRow, yUncorrected)); - if (pad >= Merger->Param().tpcGeometry.NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(slice, iRow, pad))) { + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, mP[0], mP[1], yUncorrected, zUncorrected); + uint32_t pad = CAMath::Float2UIntRn(Merger->Param().tpcGeometry.LinearY2Pad(sector, iRow, yUncorrected)); + if (pad >= Merger->Param().tpcGeometry.NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(sector, iRow, pad))) { dodEdx = false; } } CADEBUG(printf("Attaching in row %d\n", iRow)); - AttachClusters(Merger, slice, iRow, iTrack, goodLeg, prop); + AttachClusters(Merger, sector, iRow, iTrack, goodLeg, prop); } return dodEdx; } @@ -696,7 +696,7 @@ GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterPar outerParam->alpha = prop.GetAlpha(); } -GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSlice, int32_t toRow, bool inFlyDirection, float alpha) +GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha) { uint32_t nLoopData = CAMath::AtomicAdd(&Merger->Memory()->nLoopData, 1u); if (nLoopData >= Merger->NMaxTracks()) { @@ -711,9 +711,9 @@ GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GP data.toAlpha = toAlpha; data.toY = toY; data.toX = toX; - data.slice = slice; + data.sector = sector; data.row = iRow; - data.toSlice = toSlice; + data.toSector = toSector; data.toRow = toRow; data.inFlyDirection = inFlyDirection; Merger->LoopData()[nLoopData] = data; @@ -733,28 +733,28 @@ GPUdii() void GPUTPCGMTrackParam::RefitLoop(const GPUTPCGMMerger* GPUrestrict() GPUTPCGMLoopData& data = Merger->LoopData()[loopIdx]; prop.SetTrack(&data.param, data.alpha); - if (data.toSlice == -1) { - data.param.AttachClustersMirror<1>(Merger, data.slice, data.row, data.track, data.toY, prop, true); + if (data.toSector == -1) { + data.param.AttachClustersMirror<1>(Merger, data.sector, data.row, data.track, data.toY, prop, true); } else { - data.param.FollowCircle<1>(Merger, prop, data.slice, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSlice, data.toRow, data.inFlyDirection, true); + data.param.FollowCircle<1>(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection, true); } } template -GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSlice, int32_t toRow, bool inFlyDirection, bool phase2) +GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection, bool phase2) { static constexpr float kSectAngle = 2 * M_PI / 18.f; if (Merger->Param().rec.tpc.disableRefitAttachment & 4) { return 1; } if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { - StoreAttachMirror(Merger, slice, iRow, iTrack, toAlpha, toY, toX, toSlice, toRow, inFlyDirection, prop.GetAlpha()); + StoreAttachMirror(Merger, sector, iRow, iTrack, toAlpha, toY, toX, toSector, toRow, inFlyDirection, prop.GetAlpha()); return 1; } const GPUParam& GPUrestrict() param = Merger->Param(); bool right; float dAlpha = toAlpha - prop.GetAlpha(); - int32_t sliceSide = slice >= (GPUCA_NSLICES / 2) ? (GPUCA_NSLICES / 2) : 0; + int32_t sectorSide = sector >= (GPUCA_NSECTORS / 2) ? (GPUCA_NSECTORS / 2) : 0; if (CAMath::Abs(dAlpha) > 0.001f) { right = CAMath::Abs(dAlpha) < CAMath::Pi() ? (dAlpha > 0) : (dAlpha < 0); } else { @@ -764,16 +764,16 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr int32_t targetRow = up ? (GPUCA_ROW_COUNT - 1) : 0; float lrFactor = mP[2] < 0 ? -1.f : 1.f; // !(right ^ down) // TODO: shouldn't it be "right ? 1.f : -1.f", but that gives worse results... // clang-format off - CADEBUG(printf("CIRCLE Track %d: Slice %d Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f - Next hit: Slice %d Alpha %f X %f Y %f - Right %d Up %d dAlpha %f lrFactor %f\n", iTrack, slice, prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3], toSlice, toAlpha, toX, toY, (int32_t)right, (int32_t)up, dAlpha, lrFactor)); + CADEBUG(printf("CIRCLE Track %d: Sector %d Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f - Next hit: Sector %d Alpha %f X %f Y %f - Right %d Up %d dAlpha %f lrFactor %f\n", iTrack, sector, prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3], toSector, toAlpha, toX, toY, (int32_t)right, (int32_t)up, dAlpha, lrFactor)); // clang-format on - AttachClustersPropagate(Merger, slice, iRow, targetRow, iTrack, false, prop, inFlyDirection, 0.7f); + AttachClustersPropagate(Merger, sector, iRow, targetRow, iTrack, false, prop, inFlyDirection, 0.7f); if (prop.RotateToAlpha(prop.GetAlpha() + (CAMath::Pi() / 2.f) * lrFactor)) { return 1; } CADEBUG(printf("\tRotated: X %f Y %f Z %f SinPhi %f (Alpha %f / %f)\n", mP[0], mX, mP[1], mP[2], prop.GetAlpha(), prop.GetAlpha() + CAMath::Pi() / 2.f)); - while (slice != toSlice || FollowCircleChk(lrFactor, toY, toX, up, right)) { - while ((slice != toSlice) ? (CAMath::Abs(mX) <= CAMath::Abs(mP[0]) * CAMath::Tan(kSectAngle / 2.f)) : FollowCircleChk(lrFactor, toY, toX, up, right)) { + while (sector != toSector || FollowCircleChk(lrFactor, toY, toX, up, right)) { + while ((sector != toSector) ? (CAMath::Abs(mX) <= CAMath::Abs(mP[0]) * CAMath::Tan(kSectAngle / 2.f)) : FollowCircleChk(lrFactor, toY, toX, up, right)) { int32_t err = prop.PropagateToXAlpha(mX + 1.f, prop.GetAlpha(), inFlyDirection); if (err) { CADEBUG(printf("\t\tpropagation error (%d)\n", err)); @@ -785,22 +785,22 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr float rowX = Merger->Param().tpcGeometry.Row2X(j); if (CAMath::Abs(rowX - (-mP[0] * lrFactor)) < 1.5f) { CADEBUG(printf("\t\tAttempt row %d (Y %f Z %f)\n", j, mX * lrFactor, mP[1])); - AttachClusters(Merger, slice, j, iTrack, false, mX * lrFactor, mP[1]); + AttachClusters(Merger, sector, j, iTrack, false, mX * lrFactor, mP[1]); } } } - if (slice != toSlice) { + if (sector != toSector) { if (right) { - if (++slice >= sliceSide + 18) { - slice -= 18; + if (++sector >= sectorSide + 18) { + sector -= 18; } } else { - if (--slice < sliceSide) { - slice += 18; + if (--sector < sectorSide) { + sector += 18; } } - CADEBUG(printf("\tRotating to slice %d\n", slice)); - if (prop.RotateToAlpha(param.Alpha(slice) + (CAMath::Pi() / 2.f) * lrFactor)) { + CADEBUG(printf("\tRotating to sector %d\n", sector)); + if (prop.RotateToAlpha(param.Alpha(sector) + (CAMath::Pi() / 2.f) * lrFactor)) { CADEBUG(printf("\t\trotation error\n")); prop.RotateToAlpha(prop.GetAlpha() - (CAMath::Pi() / 2.f) * lrFactor); return 1; @@ -835,7 +835,7 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr } } prop.PropagateToXAlpha(Merger->Param().tpcGeometry.Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); - AttachClustersPropagate(Merger, slice, iRow, toRow, iTrack, false, prop, inFlyDirection); + AttachClustersPropagate(Merger, sector, iRow, toRow, iTrack, false, prop, inFlyDirection); } if (prop.PropagateToXAlpha(toX, prop.GetAlpha(), inFlyDirection)) { mX = toX; @@ -845,7 +845,7 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr } template -GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop, bool phase2) +GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop, bool phase2) { static constexpr float kSectAngle = 2 * M_PI / 18.f; @@ -853,7 +853,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU return; } if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { - StoreAttachMirror(Merger, slice, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); + StoreAttachMirror(Merger, sector, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); return; } // Note that the coordinate system is rotated by 90 degree swapping X and Y! @@ -908,7 +908,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU float rowX = mX + Merger->Param().tpcGeometry.Row2X(j) - myRowX; if (CAMath::Abs(rowX - paramX) < 1.5f) { // printf("Attempt row %d\n", j); - AttachClusters(Merger, slice, j, iTrack, false, mP[2] > 0 ? X : -X, Z); + AttachClusters(Merger, sector, j, iTrack, false, mP[2] > 0 ? X : -X, Z); } } } @@ -933,10 +933,10 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, xInner = merger->Param().tpcGeometry.Row2X(clusters[N - 1].row); xOuter = merger->Param().tpcGeometry.Row2X(clusters[0].row); } - ShiftZ(merger, clusters[0].slice, tzInner, tzOuter, xInner, xOuter); + ShiftZ(merger, clusters[0].sector, tzInner, tzOuter, xInner, xOuter); } -GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t slice, float tz1, float tz2, float x1, float x2) +GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t sector, float tz1, float tz2, float x1, float x2) { if (!merger->Param().par.continuousTracking) { return; @@ -989,9 +989,9 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge baset = tz2; basex = x2; } - float refZ = ((slice < GPUCA_NSLICES / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; + float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; float basez; - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(slice, baset, basez, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, baset, basez, mTZOffset); deltaZ = basez - refZ; } } @@ -1016,11 +1016,11 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge mTZOffset += deltaZ; mP[1] -= deltaZ; } else { - float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(slice, deltaZ); + float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(sector, deltaZ); mTZOffset += deltaT; mP[1] -= deltaZ; const float maxT = CAMath::Min(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getT0(); - const float minT = CAMath::Max(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(slice); + const float minT = CAMath::Max(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(sector); // printf("T Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, minT, maxT, mTZOffset); deltaT = 0.f; if (mTZOffset < minT) { @@ -1030,7 +1030,7 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge deltaT = maxT - mTZOffset; } if (deltaT != 0.f) { - deltaZ = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(slice, deltaT); + deltaZ = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(sector, deltaT); // printf("Moving clusters to TPC Range: QPt %f, New mTZOffset %f, t1 %f, t2 %f, Shift %f in Z: %f to %f --> %f to %f in T\n", mP[4], mTZOffset + deltaT, tz1, tz2, deltaZ, tz2 - mTZOffset, tz1 - mTZOffset, tz2 - mTZOffset - deltaT, tz1 - mTZOffset - deltaT); mTZOffset += deltaT; mP[1] -= deltaZ; @@ -1126,7 +1126,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr if (track.OK()) { int32_t ind = track.FirstClusterRef(); const GPUParam& GPUrestrict() param = merger->Param(); - float alphaa = param.Alpha(merger->Clusters()[ind].slice); + float alphaa = param.Alpha(merger->Clusters()[ind].sector); float xx, yy, zz; if (merger->Param().par.earlyTpcTransform) { xx = merger->ClustersXYZ()[ind].x; @@ -1134,7 +1134,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr zz = merger->ClustersXYZ()[ind].z - track.Param().GetTZOffset(); } else { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[merger->Clusters()[ind].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].slice, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].sector, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); } float sinA, cosA; CAMath::SinCos(alphaa - track.Alpha(), sinA, cosA); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index a2d7dcf2b3e3d..e3a5b2f7c1d01 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -27,9 +27,7 @@ class AliExternalTrackParam; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMMerger; class GPUTPCGMBorderTrack; @@ -43,7 +41,7 @@ class GPUTPCGMPropagator; * @class GPUTPCGMTrackParam * * GPUTPCGMTrackParam class describes the track parametrisation - * which is used by the GPUTPCGMTracker slice tracker. + * which is used by the GPUTPCGMTracker sector tracker. * */ class GPUTPCGMTrackParam @@ -148,15 +146,15 @@ class GPUTPCGMTrackParam GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector); GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2); - GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); - GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use - GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); + GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); + GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use + GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); // We force to compile these twice, for RefitLoop and for Fit, for better optimization template - GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop, bool phase2 = false); + GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop, bool phase2 = false); template - GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSlice, int32_t toRow, bool inFlyDirection, bool phase2 = false); - GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSlice, int32_t toRow, bool inFlyDirection, float alpha); + GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection, bool phase2 = false); + GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha); GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, const GPUTPCGMPropagator& prop, int32_t phase); GPUd() static void RefitLoop(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); @@ -187,7 +185,7 @@ class GPUTPCGMTrackParam } GPUd() void Rotate(float alpha); - GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t slice, float tzInner, float tzOuter, float x1, float x2); + GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float tzInner, float tzOuter, float x1, float x2); GPUd() void ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, int32_t N); GPUd() static float Reciprocal(float x) { return 1.f / x; } @@ -235,9 +233,9 @@ struct GPUTPCGMLoopData { float toX; float alpha; float toAlpha; - uint8_t slice; + uint8_t sector; uint8_t row; - int8_t toSlice; + int8_t toSector; uint8_t toRow; uint8_t inFlyDirection; }; @@ -285,7 +283,6 @@ GPUdi() float GPUTPCGMTrackParam::GetMirroredY(float Bz) const } return GetY() - 2.f * CAMath::Sqrt(cosPhi2) / qptBz; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx index 78015b347a8c6..78eea63edecdd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx @@ -62,7 +62,7 @@ void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPC continue; } - AliTPCtrackerRow& row = tpctracker->GetRow(cls.slice % 18, cls.row); + AliTPCtrackerRow& row = tpctracker->GetRow(cls.sector % 18, cls.row); uint32_t clIndexOffline = 0; AliTPCclusterMI* clOffline = row.FindNearest2(cls.y, cls.z, 0.01f, 0.01f, clIndexOffline); if (!clOffline) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index be057172a968f..a21593b7ba9e9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -26,7 +26,7 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t) { - for (int32_t i = iBlock * nThreads + iThread; i < GPUCA_NSLICES * merger.NMaxSingleSliceTracks(); i++) { + for (int32_t i = iBlock * nThreads + iThread; i < GPUCA_NSECTORS * merger.NMaxSingleSectorTracks(); i++) { merger.TrackIDs()[i] = -1; } } @@ -37,12 +37,12 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0 && tmp[j] != j) { - auto getTrackIDIndex = [&merger](const int32_t iSlice, const int32_t iTrack) { - const int32_t kEnd = merger.NMaxSingleSliceTracks(); + auto getTrackIDIndex = [&merger](const int32_t iSector, const int32_t iTrack) { + const int32_t kEnd = merger.NMaxSingleSectorTracks(); for (int32_t k = 0; k < kEnd; k++) { - if (merger.TrackIDs()[iSlice * merger.NMaxSingleSliceTracks() + k] == iTrack) { + if (merger.TrackIDs()[iSector * merger.NMaxSingleSectorTracks() + k] == iTrack) { return k; } } @@ -70,23 +70,23 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::ThreadUniform(-1.5, 1.5); double theta = 2 * TMath::ATan(1. / TMath::Exp(eta)); double lambda = theta - TMath::Pi() / 2; // double theta = gRandom->Uniform(-60,60)*TMath::Pi()/180.; double pt = .1 * std::pow(10, gRandom->Uniform(0, 2.2)); double q = 1.; - int32_t iSlice = GetSlice(phi); - phi = phi - GetSliceAngle(iSlice); + int32_t iSector = GetSector(phi); + phi = phi - GetSectorAngle(iSector); // std::cout<<"phi = "<Load("libAliHLTTPC"); diff --git a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C index 6ffa2bbe4babe..e18f0f703b0d5 100644 --- a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C +++ b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + int32_t fitPolynomialFieldTpc() { gSystem->Load("libAliHLTTPC"); diff --git a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C index bc515e1fa5849..67eea34110ab8 100644 --- a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C +++ b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + int32_t fitPolynomialFieldTrd() { gSystem->Load("libAliHLTTPC"); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 643ca7b7a99df..9d10d40107b8f 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -63,7 +63,9 @@ void GPUTrackingRefitProcessor::SetMaxData(const GPUTrackingInOutPointers& io) } #endif -namespace +namespace o2::gpu::internal +{ +namespace // anonymous { template struct refitTrackTypes; @@ -76,6 +78,7 @@ struct refitTrackTypes { using propagator = const Propagator*; }; } // anonymous namespace +} // namespace o2::gpu::internal template <> GPUd() void GPUTrackingRefit::initProp(GPUTPCGMPropagator& prop) // FIXME: GPUgeneric() needed to make the clang spirv output link correctly @@ -210,10 +213,10 @@ template GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov) { CADEBUG(int32_t ii; printf("\nRefitting track\n")); - typename refitTrackTypes::propagator prop; + typename internal::refitTrackTypes::propagator prop; S trk; float TrackParCovChi2 = 0.f; - convertTrack::propagator>(trk, trkX, prop, &TrackParCovChi2); + convertTrack::propagator>(trk, trkX, prop, &TrackParCovChi2); int32_t begin = 0, count; float tOffset; if constexpr (std::is_same_v) { @@ -271,7 +274,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov break; } row = hit.row; - sector = hit.slice; + sector = hit.sector; nextState = mPclusterState[hit.num]; } else if constexpr (std::is_same_v) { cl = &trkX.getCluster(mPtrackHitReferences, i, *mPclusterNative, sector, row); @@ -417,7 +420,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov static_assert("Invalid template"); } - convertTrack::propagator>(trkX, trk, prop, &TrackParCovChi2); + convertTrack::propagator>(trkX, trk, prop, &TrackParCovChi2); return nFitted; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h b/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h similarity index 96% rename from GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h rename to GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h index 0eabd82e59a02..74ff251af4247 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTrackParam; @@ -27,7 +25,7 @@ class GPUTPCTrackParam; * @class GPUTPCBaseTrackParam * * GPUTPCBaseTrackParam class contains track parameters - * used in output of the GPUTPCTracker slice tracker. + * used in output of the GPUTPCTracker sector tracker. * This class is used for transfer between tracker and merger and does not contain the covariance matrice */ struct GPUTPCBaseTrackParam { @@ -80,7 +78,6 @@ struct GPUTPCBaseTrackParam { float mZOffset; // z offset float mP[5]; // 'active' track parameters: Y, Z, SinPhi, DzDs, q/Pt }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h b/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h similarity index 93% rename from GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h rename to GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h index 1961ffabd791c..cf269a27bd6a5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterData { int32_t id; @@ -37,7 +35,6 @@ struct GPUTPCClusterData { float sigmaTime2; #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // CLUSTERDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.cxx similarity index 78% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.cxx index bada60b9cec80..ae71bcdb541ca 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.cxx @@ -23,15 +23,15 @@ GPUdii() void GPUTPCCreateOccupancyMap::Thread(i const GPUTrackingInOutPointers& GPUrestrict() ioPtrs = processors.ioPtrs; const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = ioPtrs.clustersNative; GPUParam& GPUrestrict() param = processors.param; - const int32_t iSliceRow = iBlock * nThreads + iThread; - if (iSliceRow >= GPUCA_ROW_COUNT * GPUCA_NSLICES) { + const int32_t iSectorRow = iBlock * nThreads + iThread; + if (iSectorRow >= GPUCA_ROW_COUNT * GPUCA_NSECTORS) { return; } - const uint32_t iSlice = iSliceRow / GPUCA_ROW_COUNT; - const uint32_t iRow = iSliceRow % GPUCA_ROW_COUNT; - for (uint32_t i = 0; i < clusters->nClusters[iSlice][iRow]; i++) { - const uint32_t bin = clusters->clusters[iSlice][iRow][i].getTime() / param.rec.tpc.occupancyMapTimeBins; - map[bin].bin[iSlice][iRow]++; + const uint32_t iSector = iSectorRow / GPUCA_ROW_COUNT; + const uint32_t iRow = iSectorRow % GPUCA_ROW_COUNT; + for (uint32_t i = 0; i < clusters->nClusters[iSector][iRow]; i++) { + const uint32_t bin = clusters->clusters[iSector][iRow][i].getTime() / param.rec.tpc.occupancyMapTimeBins; + map[bin].bin[iSector][iRow]++; } } @@ -47,8 +47,8 @@ GPUdii() void GPUTPCCreateOccupancyMap::Thread(i int32_t binmax = CAMath::Min(GPUTPCClusterOccupancyMapBin::getNBins(param), bin + param.rec.tpc.occupancyMapTimeBinsAverage + 1); uint32_t sum = 0; for (int32_t i = binmin; i < binmax; i++) { - for (int32_t iSliceRow = 0; iSliceRow < GPUCA_NSLICES * GPUCA_ROW_COUNT; iSliceRow++) { - sum += (&map[i].bin[0][0])[iSliceRow]; + for (int32_t iSectorRow = 0; iSectorRow < GPUCA_NSECTORS * GPUCA_ROW_COUNT; iSectorRow++) { + sum += (&map[i].bin[0][0])[iSectorRow]; } } sum /= binmax - binmin; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.h similarity index 95% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.h index 91f5816f69df2..de8eb8622adb1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.h @@ -29,7 +29,7 @@ class GPUTPCCreateOccupancyMap : public GPUKernelTemplate enum K { defaultKernel = 0, fill = 0, fold = 1 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args); }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.cxx similarity index 68% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.cxx index bd33927408a26..641326a8a2caa 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.cxx @@ -9,17 +9,17 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCCreateSliceData.cxx +/// \file GPUTPCCreateTrackingData.cxx /// \author David Rohr -#include "GPUTPCCreateSliceData.h" +#include "GPUTPCCreateTrackingData.h" #include "GPUTPCTracker.h" #include "GPUCommonMath.h" using namespace o2::gpu; template <> -GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCCreateTrackingData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { - tracker.Data().InitFromClusterData(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem(), tracker.ISlice(), s.tmp); + tracker.Data().InitFromClusterData(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem(), tracker.ISector(), s.tmp); } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h similarity index 80% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h index 9065b220bb44d..9327699c9404b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h @@ -9,24 +9,22 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCCreateSliceData.h +/// \file GPUTPCCreateTrackingData.h /// \author David Rohr -#ifndef GPUTPCCREATESLICEDATA_H -#define GPUTPCCREATESLICEDATA_H +#ifndef GPUTPCCREATESECTORDATA_H +#define GPUTPCCREATESECTORDATA_H #include "GPUTPCDef.h" #include "GPUTPCHitId.h" #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; -class GPUTPCCreateSliceData : public GPUKernelTemplate +class GPUTPCCreateTrackingData : public GPUKernelTemplate { public: struct GPUSharedMemory { @@ -34,7 +32,7 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -42,7 +40,6 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu -#endif // GPUTPCCREATESLICEDATA_H +#endif // GPUTPCCREATESECTORDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h b/GPU/GPUTracking/SectorTracker/GPUTPCDef.h similarity index 96% rename from GPU/GPUTracking/SliceTracker/GPUTPCDef.h rename to GPU/GPUTracking/SectorTracker/GPUTPCDef.h index 3b53c3e66875a..84ea8e836007c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCDef.h @@ -21,9 +21,7 @@ #define CALINK_INVAL ((calink) -1) #define CALINK_DEAD_CHANNEL ((calink) -2) -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if defined(GPUCA_O2_LIB) || defined(GPUCA_O2_INTERFACE) typedef uint32_t calink; @@ -33,8 +31,7 @@ typedef uint32_t calink; typedef uint32_t cahit; #endif struct cahit2 { cahit x, y; }; -} -} // o2::GPU +} // namespace o2::GPU #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME // Needs full clusterdata #define GPUCA_FULL_CLUSTERDATA diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCDefinitions.h b/GPU/GPUTracking/SectorTracker/GPUTPCDefinitions.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCDefinitions.h rename to GPU/GPUTracking/SectorTracker/GPUTPCDefinitions.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx similarity index 80% rename from GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx index 1a5e99f0f52ca..3ffead1c5902b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx @@ -22,7 +22,7 @@ using namespace o2::gpu; -GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) +GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sectorSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { /*for (int32_t j = 0;j < Tracks()[j].NHits();j++) { @@ -37,7 +37,7 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT tParam.SetCov(5, 0.001f); tParam.SetCov(9, 0.001f); tParam.SetCov(14, 0.05f); - tParam.SetParam(sliceSource.Tracks()[iTrack].Param()); + tParam.SetParam(sectorSource.Tracks()[iTrack].Param()); // GPUInfo("Parameters X %f Y %f Z %f SinPhi %f DzDs %f QPt %f SignCosPhi %f", tParam.X(), tParam.Y(), tParam.Z(), tParam.SinPhi(), tParam.DzDs(), tParam.QPt(), tParam.SignCosPhi()); if (!tParam.Rotate(angle, GPUCA_MAX_SIN_PHI)) { @@ -73,13 +73,13 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT // GPUInfo("%d hits found", nHits); uint32_t hitId = CAMath::AtomicAdd(&tracker.CommonMemory()->nTrackHits, (uint32_t)nHits); if (hitId + nHits > tracker.NMaxTrackHits()) { - tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_HIT_OVERFLOW, tracker.ISlice(), hitId + nHits, tracker.NMaxTrackHits()); + tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_HIT_OVERFLOW, tracker.ISector(), hitId + nHits, tracker.NMaxTrackHits()); CAMath::AtomicExch(&tracker.CommonMemory()->nTrackHits, tracker.NMaxTrackHits()); return 0; } uint32_t trackId = CAMath::AtomicAdd(&tracker.CommonMemory()->nTracks, 1u); if (trackId >= tracker.NMaxTracks()) { // >= since will increase by 1 - tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_OVERFLOW, tracker.ISlice(), trackId, tracker.NMaxTracks()); + tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_OVERFLOW, tracker.ISector(), trackId, tracker.NMaxTracks()); CAMath::AtomicExch(&tracker.CommonMemory()->nTracks, tracker.NMaxTracks()); return 0; } @@ -112,13 +112,13 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT track.SetParam(tParam.GetParam()); track.SetNHits(nHits); track.SetFirstHitID(hitId); - track.SetLocalTrackId((sliceSource.ISlice() << 24) | sliceSource.Tracks()[iTrack].LocalTrackId()); + track.SetLocalTrackId((sectorSource.ISector() << 24) | sectorSource.Tracks()[iTrack].LocalTrackId()); } return (nHits >= tracker.Param().rec.tpc.extrapolationTrackingMinHits); } -GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) +GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sectorTarget, bool right) { for (int32_t i = iBlock * nThreads + iThread; i < tracker.CommonMemory()->nLocalTracks; i += nThreads * nBlocks) { { @@ -129,11 +129,11 @@ GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nB float Y = (float)tracker.Data().HitDataY(row, tracker.TrackHits()[tmpHit].HitIndex()) * row.HstepY() + row.Grid().YMin(); if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeLower) { // GPUInfo("Track %d, lower row %d, left border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, -row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, -1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, -1); } if (right && Y > row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeLower) { // GPUInfo("Track %d, lower row %d, right border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, -1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, -1); } } } @@ -146,11 +146,11 @@ GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nB float Y = (float)tracker.Data().HitDataY(row, tracker.TrackHits()[tmpHit].HitIndex()) * row.HstepY() + row.Grid().YMin(); if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeUpper) { // GPUInfo("Track %d, upper row %d, left border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, -row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, 1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, 1); } if (right && Y > row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeUpper) { // GPUInfo("Track %d, upper row %d, right border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, 1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, 1); } } } @@ -160,42 +160,42 @@ GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nB template <> GPUdii() void GPUTPCExtrapolationTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { - CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); + CA_SHARED_CACHE(&smem.mRows[0], tracker.TrackingDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); if (tracker.NHitsTotal() == 0) { return; } - const int32_t iSlice = tracker.ISlice(); - int32_t sliceLeft = (iSlice + (GPUDataTypes::NSLICES / 2 - 1)) % (GPUDataTypes::NSLICES / 2); - int32_t sliceRight = (iSlice + 1) % (GPUDataTypes::NSLICES / 2); - if (iSlice >= (int32_t)GPUDataTypes::NSLICES / 2) { - sliceLeft += GPUDataTypes::NSLICES / 2; - sliceRight += GPUDataTypes::NSLICES / 2; + const int32_t iSector = tracker.ISector(); + int32_t sectorLeft = (iSector + (GPUDataTypes::NSECTORS / 2 - 1)) % (GPUDataTypes::NSECTORS / 2); + int32_t sectorRight = (iSector + 1) % (GPUDataTypes::NSECTORS / 2); + if (iSector >= (int32_t)GPUDataTypes::NSECTORS / 2) { + sectorLeft += GPUDataTypes::NSECTORS / 2; + sectorRight += GPUDataTypes::NSECTORS / 2; } - PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceLeft], smem, tracker, true); - PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceRight], smem, tracker, false); + PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sectorLeft], smem, tracker, true); + PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sectorRight], smem, tracker, false); } -GPUd() int32_t GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(int32_t iSlice) +GPUd() int32_t GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(int32_t iSector) { - iSlice++; - if (iSlice == GPUDataTypes::NSLICES / 2) { - iSlice = 0; + iSector++; + if (iSector == GPUDataTypes::NSECTORS / 2) { + iSector = 0; } - if (iSlice == GPUDataTypes::NSLICES) { - iSlice = GPUDataTypes::NSLICES / 2; + if (iSector == GPUDataTypes::NSECTORS) { + iSector = GPUDataTypes::NSECTORS / 2; } - return iSlice; + return iSector; } -GPUd() void GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right) +GPUd() void GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(uint32_t iSector, uint32_t& left, uint32_t& right) { - left = (iSlice + (GPUDataTypes::NSLICES / 2 - 1)) % (GPUDataTypes::NSLICES / 2); - right = (iSlice + 1) % (GPUDataTypes::NSLICES / 2); - if (iSlice >= (int32_t)GPUDataTypes::NSLICES / 2) { - left += GPUDataTypes::NSLICES / 2; - right += GPUDataTypes::NSLICES / 2; + left = (iSector + (GPUDataTypes::NSECTORS / 2 - 1)) % (GPUDataTypes::NSECTORS / 2); + right = (iSector + 1) % (GPUDataTypes::NSECTORS / 2); + if (iSector >= (int32_t)GPUDataTypes::NSECTORS / 2) { + left += GPUDataTypes::NSECTORS / 2; + right += GPUDataTypes::NSECTORS / 2; } } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h similarity index 83% rename from GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h rename to GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h index cd6533a3439ed..2d2b275d06399 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h @@ -18,9 +18,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -32,7 +30,7 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -40,19 +38,19 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); - GPUd() static int32_t ExtrapolationTrackingSliceOrder(int32_t iSlice); - GPUd() static void ExtrapolationTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right); + GPUd() static int32_t ExtrapolationTrackingSectorOrder(int32_t iSector); + GPUd() static void ExtrapolationTrackingSectorLeftRight(uint32_t iSector, uint32_t& left, uint32_t& right); private: - GPUd() static int32_t PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); - GPUd() static void PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sliceTarget, bool right); + GPUd() static int32_t PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sectorSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); + GPUd() static void PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sectorTarget, bool right); }; class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -61,7 +59,6 @@ class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker, int32_t n); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLETCONSTRUCTOR_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCGrid.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h similarity index 80% rename from GPU/GPUTracking/SliceTracker/GPUTPCGrid.h rename to GPU/GPUTracking/SectorTracker/GPUTPCGrid.h index 1fbb1c5a23c45..ebb6f9e52500e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGrid @@ -37,8 +35,8 @@ class GPUTPCGrid GPUd() int32_t GetBin(float Y, float Z) const; /** - * returns -1 if the row is empty == no hits - */ + * returns -1 if the row is empty == no hits + */ GPUd() int32_t GetBinBounded(float Y, float Z) const; GPUd() void GetBin(float Y, float Z, int32_t* const bY, int32_t* const bZ) const; GPUd() void GetBinArea(float Y, float Z, float dy, float dz, int32_t& bin, int32_t& ny, int32_t& nz) const; @@ -56,17 +54,16 @@ class GPUTPCGrid private: friend class GPUTPCNeighboursFinder; - uint32_t mNy; //* N bins in Y - uint32_t mNz; //* N bins in Z - uint32_t mN; //* total N bins - float mYMin; //* minimal Y value - float mYMax; //* maximal Y value - float mZMin; //* minimal Z value - float mZMax; //* maximal Z value - float mStepYInv; //* inverse bin size in Y - float mStepZInv; //* inverse bin size in Z + uint32_t mNy; //* N bins in Y + uint32_t mNz; //* N bins in Z + uint32_t mN; //* total N bins + float mYMin; //* minimal Y value + float mYMax; //* maximal Y value + float mZMin; //* minimal Z value + float mZMax; //* maximal Z value + float mStepYInv; //* inverse bin size in Y + float mStepZInv; //* inverse bin size in Z }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCGRID_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCHit.h b/GPU/GPUTracking/SectorTracker/GPUTPCHit.h similarity index 94% rename from GPU/GPUTracking/SliceTracker/GPUTPCHit.h rename to GPU/GPUTracking/SectorTracker/GPUTPCHit.h index 34a59b2f08dd2..e064441a1dba3 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCHit.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCHit.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCHit @@ -43,7 +41,6 @@ class GPUTPCHit private: friend class GPUTPCNeighboursFinder; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCHIT_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h b/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h similarity index 93% rename from GPU/GPUTracking/SliceTracker/GPUTPCHitId.h rename to GPU/GPUTracking/SectorTracker/GPUTPCHitId.h index 19cfde1d76f4b..51c72969da8ae 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h @@ -15,9 +15,7 @@ #ifndef GPUTPCHITID_H #define GPUTPCHITID_H -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCHitId { @@ -29,7 +27,6 @@ class GPUTPCHitId private: int32_t mId; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCHITID_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h b/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h similarity index 93% rename from GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h rename to GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h index 13f2753db6c93..ffd95cd807413 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h @@ -15,9 +15,7 @@ #ifndef GPUTPCMCINFO_H #define GPUTPCMCINFO_H -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCMCInfo { int32_t charge; @@ -39,7 +37,6 @@ struct GPUTPCMCInfoCol { uint32_t first; uint32_t num; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h similarity index 93% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h index 1682e18244732..7af6e8eb1a582 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h @@ -19,9 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -40,7 +38,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -48,7 +46,6 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate template GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCNEIGHBOURSCLEANER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index 36254243e81b8..ec348b59ce7a5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -15,7 +15,7 @@ #include "GPUTPCHit.h" #include "GPUTPCNeighboursFinder.h" #include "GPUTPCTracker.h" -//#include "GPUCommonMath.h" +// #include "GPUCommonMath.h" #include "GPUDefMacros.h" using namespace o2::gpu; @@ -26,10 +26,10 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh #ifdef GPUCA_GPUCODE for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) { - reinterpret_cast(&s.mRow)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock])[i]; + reinterpret_cast(&s.mRow)[i] = reinterpret_cast(&tracker.TrackingDataRows()[iBlock])[i]; if (iBlock >= 2 && iBlock < GPUCA_ROW_COUNT - 2) { - reinterpret_cast(&s.mRowUp)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock + 2])[i]; - reinterpret_cast(&s.mRowDown)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock - 2])[i]; + reinterpret_cast(&s.mRowUp)[i] = reinterpret_cast(&tracker.TrackingDataRows()[iBlock + 2])[i]; + reinterpret_cast(&s.mRowDown)[i] = reinterpret_cast(&tracker.TrackingDataRows()[iBlock - 2])[i]; } } GPUbarrier(); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h similarity index 84% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 2d71d948ad9e1..54dc0876f8a55 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -34,11 +32,11 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate { public: struct GPUSharedMemory { - int32_t mNHits; // n hits - float mUpDx; // x distance to the next row - float mDnDx; // x distance to the previous row - float mUpTx; // normalized x distance to the next row - float mDnTx; // normalized x distance to the previous row + int32_t mNHits; // n hits + float mUpDx; // x distance to the next row + float mDnDx; // x distance to the previous row + float mUpTx; // normalized x distance to the next row + float mDnTx; // normalized x distance to the previous row int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number @@ -51,7 +49,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -59,7 +57,6 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCNEIGHBOURSFINDER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h similarity index 83% rename from GPU/GPUTracking/SliceTracker/GPUTPCRow.h rename to GPU/GPUTracking/SectorTracker/GPUTPCRow.h index d401311683f28..eb787f99ea336 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h @@ -18,9 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGrid.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCRow @@ -31,13 +29,9 @@ namespace gpu */ class GPUTPCRow { - friend class GPUTPCSliceData; + friend class GPUTPCTrackingData; public: -#if !defined(GPUCA_GPUCODE) - GPUTPCRow(); -#endif //! GPUCA_GPUCODE - GPUhd() int32_t NHits() const { return mNHits; @@ -61,9 +55,9 @@ class GPUTPCRow friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsFinder; - int32_t mNHits; // number of hits - float mX; // X coordinate of the row - float mMaxY; // maximal Y coordinate of the row + int32_t mNHits; // number of hits + float mX; // X coordinate of the row + float mMaxY; // maximal Y coordinate of the row GPUTPCGrid mGrid; // grid of hits // hit packing: @@ -74,11 +68,9 @@ class GPUTPCRow float mHstepYi; // inverse step size float mHstepZi; // inverse step size - int32_t mHitNumberOffset; // index of the first hit in the hit array, used as - // offset in GPUTPCSliceData::LinkUp/DownData/HitDataY/... + int32_t mHitNumberOffset; // index of the first hit in the hit array, used as offset in GPUTPCTrackingData::LinkUp/DownData/HitDataY/... uint32_t mFirstHitInBinOffset; // offset in Tracker::mRowData to find the FirstHitInBin }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCROW_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx similarity index 95% rename from GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx index 9f06b00f30c3f..7981ef5af26d8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx @@ -15,7 +15,7 @@ #include "GPUParam.h" #include "GPUTPCClusterData.h" #include "GPUTPCHit.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUProcessor.h" #include "GPUO2DataTypes.h" #include "GPUCommonMath.h" @@ -86,7 +86,7 @@ GPUdii() void GPUTPCSectorDebugSortKernels::Thread -GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { if (iThread || iBlock) { return; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.h similarity index 94% rename from GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.h index 5617f9745311e..520a791b0eb43 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.h @@ -29,8 +29,8 @@ class GPUTPCSectorDebugSortKernels : public GPUKernelTemplate enum K { defaultKernel = 0, hitData = 0, startHits = 1, - sliceTracks = 2 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } + sectorTracks = 2 }; + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } typedef GPUTPCTracker processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h similarity index 67% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h index 1d958de1ff7a4..2c62a2ca184b2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h @@ -9,24 +9,22 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceOutCluster.h +/// \file GPUTPCSectorOutCluster.h /// \author Sergey Gorbunov, David Rohr -#ifndef GPUTPCSLICEOUTCLUSTER_H -#define GPUTPCSLICEOUTCLUSTER_H +#ifndef GPUTPCSECTOROUTCLUSTER_H +#define GPUTPCSECTOROUTCLUSTER_H #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** - * @class GPUTPCSliceOutCluster - * GPUTPCSliceOutCluster class contains clusters which are assigned to slice tracks. - * It is used to send the data from TPC slice trackers to the GlobalMerger + * @class GPUTPCSectorOutCluster + * GPUTPCSectorOutCluster class contains clusters which are assigned to sector tracks. + * It is used to send the data from TPC sector trackers to the GlobalMerger */ -class GPUTPCSliceOutCluster +class GPUTPCSectorOutCluster { public: GPUhd() void Set(uint32_t id, uint8_t row, uint8_t flags, uint16_t amp, float x, float y, float z) @@ -49,13 +47,13 @@ class GPUTPCSliceOutCluster GPUhd() uint8_t GetFlags() const { return mFlags; } private: - uint32_t mId; // Id - uint8_t mRow; // row - uint8_t mFlags; // flags - uint16_t mAmp; // amplitude - float mX; // coordinates - float mY; // coordinates - float mZ; // coordinates + uint32_t mId; // Id + uint8_t mRow; // row + uint8_t mFlags; // flags + uint16_t mAmp; // amplitude + float mX; // coordinates + float mY; // coordinates + float mZ; // coordinates #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME public: @@ -63,7 +61,6 @@ class GPUTPCSliceOutCluster float mTime; #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx similarity index 71% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx index 06b87c7a682d3..864a5c6b7106e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx @@ -9,26 +9,26 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceOutput.cxx +/// \file GPUTPCSectorOutput.cxx /// \author Sergey Gorbunov, Ivan Kisel, David Rohr #include "GPUOutputControl.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUCommonMath.h" #include using namespace o2::gpu; -uint32_t GPUTPCSliceOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) +uint32_t GPUTPCSectorOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) { // calculate the amount of memory [bytes] needed for the event - return sizeof(GPUTPCSliceOutput) + sizeof(GPUTPCTrack) * nOfTracks + sizeof(GPUTPCSliceOutCluster) * nOfTrackClusters; + return sizeof(GPUTPCSectorOutput) + sizeof(GPUTPCTrack) * nOfTracks + sizeof(GPUTPCSectorOutCluster) * nOfTrackClusters; } #ifndef GPUCA_GPUCODE -void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory) +void GPUTPCSectorOutput::Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory) { - // Allocate All memory needed for slice output + // Allocate All memory needed for sector output const size_t memsize = EstimateSize(nTracks, nTrackHits); if (outputControl && outputControl->useExternal()) { @@ -42,7 +42,7 @@ void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, lock.clear(std::memory_order_release); return; } - ptrOutput = reinterpret_cast(outputControl->ptrCurrent); + ptrOutput = reinterpret_cast(outputControl->ptrCurrent); outputControl->ptrCurrent = (char*)outputControl->ptrCurrent + memsize; lock.clear(std::memory_order_release); } else { @@ -50,7 +50,7 @@ void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, free(internalMemory); } internalMemory = malloc(memsize); - ptrOutput = reinterpret_cast(internalMemory); + ptrOutput = reinterpret_cast(internalMemory); } ptrOutput->SetMemorySize(memsize); } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h similarity index 66% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h index 3b5712ccbb8f4..cc02206dc09a7 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h @@ -9,33 +9,31 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceOutput.h +/// \file GPUTPCSectorOutput.h /// \author Sergey Gorbunov, Ivan Kisel, David Rohr -#ifndef GPUTPCSLICEOUTPUT_H -#define GPUTPCSLICEOUTPUT_H +#ifndef GPUTPCSECTOROUTPUT_H +#define GPUTPCSECTOROUTPUT_H #include "GPUTPCDef.h" #include "GPUTPCTrack.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUOutputControl; /** - * @class GPUTPCSliceOutput + * @class GPUTPCSectorOutput * - * GPUTPCSliceOutput class is used to store the output of GPUTPCTracker{Component} + * GPUTPCSectorOutput class is used to store the output of GPUTPCTracker{Component} * and transport the output to GPUTPCGBMerger{Component} * - * The class contains all the necessary information about TPC tracks, reconstructed in one slice. + * The class contains all the necessary information about TPC tracks, reconstructed in one sector. * This includes the reconstructed track parameters and some compressed information * about the assigned clusters: clusterId, position and amplitude. * */ -class GPUTPCSliceOutput +class GPUTPCSectorOutput { public: GPUhd() uint32_t NTracks() const @@ -58,25 +56,24 @@ class GPUTPCSliceOutput } static uint32_t EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters); - static void Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory); + static void Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory); GPUhd() void SetNTracks(uint32_t v) { mNTracks = v; } GPUhd() void SetNLocalTracks(uint32_t v) { mNLocalTracks = v; } GPUhd() void SetNTrackClusters(uint32_t v) { mNTrackClusters = v; } private: - GPUTPCSliceOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! - ~GPUTPCSliceOutput() = delete; // NOLINT - GPUTPCSliceOutput(const GPUTPCSliceOutput&) = delete; // NOLINT - GPUTPCSliceOutput& operator=(const GPUTPCSliceOutput&) = delete; // NOLINT + GPUTPCSectorOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! + ~GPUTPCSectorOutput() = delete; // NOLINT + GPUTPCSectorOutput(const GPUTPCSectorOutput&) = delete; // NOLINT + GPUTPCSectorOutput& operator=(const GPUTPCSectorOutput&) = delete; // NOLINT GPUhd() void SetMemorySize(size_t val) { mMemorySize = val; } uint32_t mNTracks; // number of reconstructed tracks uint32_t mNLocalTracks; - uint32_t mNTrackClusters; // total number of track clusters - size_t mMemorySize; // Amount of memory really used + uint32_t mNTrackClusters; // total number of track clusters + size_t mMemorySize; // Amount of memory really used }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx similarity index 92% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index 7b60e0621e78f..af79dddae554e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -43,7 +43,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxRowStartHits) { - tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISlice() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); + tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); CAMath::AtomicExchShared(&s.mNRowStartHits, tracker.mNMaxRowStartHits); break; } @@ -51,7 +51,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletStartHits; uint32_t nextRowStartHits = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISlice() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); break; } @@ -66,7 +66,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; if (nOffset + s.mNRowStartHits > tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISlice() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); } } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h similarity index 93% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h index f818e6986dbc6..5e620180570c8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -40,7 +38,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -48,7 +46,6 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCSTARTHITSFINDER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h similarity index 93% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h index 0877b6c15a511..b0349d660dbc1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -40,7 +38,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -48,7 +46,6 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCSTARTHITSSORTER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrack.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h similarity index 70% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrack.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index fcf9d1149c588..225f5f0e2c7ad 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -17,11 +17,9 @@ #include "GPUTPCBaseTrackParam.h" #include "GPUTPCDef.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutCluster.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTrack @@ -51,23 +49,22 @@ class GPUTPCTrack GPUhd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = v; } - // Only if used as replacement for SliceOutTrack - GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSliceOutCluster); } + // Only if used as replacement for SectorOutTrack + GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); } GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } - GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSliceOutCluster& v) { ((GPUTPCSliceOutCluster*)((char*)this + sizeof(*this)))[i] = v; } - GPUhd() const GPUTPCSliceOutCluster* OutTrackClusters() const { return (const GPUTPCSliceOutCluster*)((char*)this + sizeof(*this)); } - GPUhd() const GPUTPCSliceOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } + GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; } + GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); } + GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } private: - int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array - int32_t mNHits; // number of track cells - int32_t mLocalTrackId; // Id of local track this extrapolated track belongs to, index of this track itself if it is a local track + int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array + int32_t mNHits; // number of track cells + int32_t mLocalTrackId; // Id of local track this extrapolated track belongs to, index of this track itself if it is a local track GPUTPCBaseTrackParam mParam; // track parameters private: }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACK_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h index c9ab6158179bd..06ead3ce00f6c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h @@ -17,9 +17,7 @@ #include "GPUTPCTrackParam.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTrackLinearisation @@ -87,7 +85,6 @@ GPUdi() void GPUTPCTrackLinearisation::Set(float SinPhi1, float CosPhi1, float D SetDzDs(DzDs1); SetQPt(QPt1); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLINEARISATION_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx similarity index 99% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx index 68ced574a18a9..af6f8e6cddc08 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx @@ -709,7 +709,7 @@ GPUd() bool GPUTPCTrackParam::CheckNumericalQuality() const GPUd() void GPUTPCTrackParam::ConstrainZ(float& z, int32_t sector, float& z0, float& lastZ) { - if (sector < GPUCA_NSLICES / 2) { + if (sector < GPUCA_NSECTORS / 2) { if (z < 0) { mParam.mZOffset += z; mParam.mP[1] -= z; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h index 72f9d5fbaa23d..e31abe338d2b8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h @@ -19,9 +19,7 @@ #include "GPUTPCDef.h" #include "GPUCommonMath.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTrackLinearisation; @@ -29,7 +27,7 @@ class GPUTPCTrackLinearisation; * @class GPUTPCTrackParam * * GPUTPCTrackParam class describes the track parametrisation - * which is used by the GPUTPCTracker slice tracker. + * which is used by the GPUTPCTracker sector tracker. * */ class GPUTPCTrackParam @@ -143,7 +141,7 @@ class GPUTPCTrackParam #ifndef GPUCA_GPUCODE private: -#endif //! GPUCA_GPUCODE +#endif //! GPUCA_GPUCODE GPUTPCBaseTrackParam mParam; // Track Parameters private: @@ -181,7 +179,6 @@ GPUdi() void GPUTPCTrackParam::InitParam() SetCov(14, 1000.f); SetZOffset(0); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKPARAM_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx similarity index 78% rename from GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index cece49073f11b..63c64f78cc095 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -18,7 +18,7 @@ #include "GPUCommonMath.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" @@ -39,11 +39,6 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) -GPUTPCTracker::GPUTPCTracker() - : GPUProcessor(), mLinkTmpMemory(nullptr), mISlice(-1), mData(), mNMaxStartHits(0), mNMaxRowStartHits(0), mNMaxTracklets(0), mNMaxRowHits(0), mNMaxTracks(0), mNMaxTrackHits(0), mMemoryResLinks(-1), mMemoryResScratchHost(-1), mMemoryResCommon(-1), mMemoryResTracklets(-1), mMemoryResOutput(-1), mMemoryResSliceScratch(-1), mRowStartHitCountOffset(nullptr), mTrackletTmpStartHits(nullptr), mGPUTrackletTemp(nullptr), mGPUParametersConst(), mCommonMem(nullptr), mTrackletStartHits(nullptr), mTracklets(nullptr), mTrackletRowHits(nullptr), mTracks(nullptr), mTrackHits(nullptr), mOutput(nullptr), mOutputMemory(nullptr) -{ -} - GPUTPCTracker::~GPUTPCTracker() { if (mOutputMemory) { @@ -52,11 +47,11 @@ GPUTPCTracker::~GPUTPCTracker() } // ---------------------------------------------------------------------------------- -void GPUTPCTracker::SetSlice(int32_t iSlice) { mISlice = iSlice; } +void GPUTPCTracker::SetSector(int32_t iSector) { mISector = iSector; } void GPUTPCTracker::InitializeProcessor() { - if (mISlice < 0) { - throw std::runtime_error("Slice not set"); + if (mISector < 0) { + throw std::runtime_error("Sector not set"); } InitializeRows(&Param()); SetupCommonMemory(); @@ -73,7 +68,7 @@ void* GPUTPCTracker::SetPointersScratch(void* mem) if (mRec->GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_INDIVIDUAL) { mem = SetPointersTracklets(mem); } - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) { + if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { computePointerWithAlignment(mem, mTrackletTmpStartHits, GPUCA_ROW_COUNT * mNMaxRowStartHits); computePointerWithAlignment(mem, mRowStartHitCountOffset, GPUCA_ROW_COUNT); } @@ -98,17 +93,17 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); - GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; - mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceLinks", reLinks); - mMemoryResSliceScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSliceScratch"); - GPUMemoryReuse reWeights{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataWeights, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; - mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataWeights, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceWeights", reWeights); - GPUMemoryReuse reScratch{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerScratch, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); + GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; + mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorLinks", reLinks); + mMemoryResSectorScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSectorScratch"); + GPUMemoryReuse reWeights{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataWeights, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; + mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataWeights, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorWeights", reWeights); + GPUMemoryReuse reScratch{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerScratch, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCTrackerScratch", reScratch); mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersScratchHost, GPUMemoryResource::MEMORY_SCRATCH_HOST, "TPCTrackerHost"); mMemoryResCommon = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersCommon, GPUMemoryResource::MEMORY_PERMANENT, "TPCTrackerCommon"); - mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataRows, GPUMemoryResource::MEMORY_PERMANENT, "TPCSliceRows"); + mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataRows, GPUMemoryResource::MEMORY_PERMANENT, "TPCSectorRows"); uint32_t type = GPUMemoryResource::MEMORY_SCRATCH; if (mRec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { // For individual scheme, we allocate tracklets separately, and change the type for the following allocations to custom @@ -142,8 +137,8 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) if (io.clustersNative) { uint32_t maxRowHits = 0; for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { - if (io.clustersNative->nClusters[mISlice][i] > maxRowHits) { - maxRowHits = io.clustersNative->nClusters[mISlice][i]; + if (io.clustersNative->nClusters[mISector][i] > maxRowHits) { + maxRowHits = io.clustersNative->nClusters[mISector][i]; } } mNMaxRowStartHits = mRec->MemoryScalers()->NTPCRowStartHits(maxRowHits * GPUCA_ROW_COUNT); @@ -155,7 +150,7 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxTracks = mRec->MemoryScalers()->NTPCSectorTracks(mData.NumberOfHits()); mNMaxTrackHits = mRec->MemoryScalers()->NTPCSectorTrackHits(mData.NumberOfHits(), mRec->GetProcessingSettings().tpcInputWithClusterRejection); #ifdef GPUCA_SORT_STARTHITS_GPU - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) { + if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { if (mNMaxStartHits > mNMaxRowStartHits * GPUCA_ROW_COUNT) { mNMaxStartHits = mNMaxRowStartHits * GPUCA_ROW_COUNT; } @@ -173,9 +168,9 @@ void GPUTPCTracker::UpdateMaxData() void GPUTPCTracker::SetupCommonMemory() { new (mCommonMem) commonMemoryStruct; } -GPUh() int32_t GPUTPCTracker::CheckEmptySlice() +GPUh() int32_t GPUTPCTracker::CheckEmptySector() { - // Check if the Slice is empty, if so set the output apropriate and tell the reconstuct procesdure to terminate + // Check if the Sector is empty, if so set the output apropriate and tell the reconstuct procesdure to terminate if (NHitsTotal() < 1) { mCommonMem->nTracks = mCommonMem->nTrackHits = 0; if (mOutput) { @@ -188,7 +183,7 @@ GPUh() int32_t GPUTPCTracker::CheckEmptySlice() return 0; } -GPUh() void GPUTPCTracker::WriteOutputPrepare() { GPUTPCSliceOutput::Allocate(mOutput, mCommonMem->nTracks, mCommonMem->nTrackHits, &mRec->OutputControl(), mOutputMemory); } +GPUh() void GPUTPCTracker::WriteOutputPrepare() { GPUTPCSectorOutput::Allocate(mOutput, mCommonMem->nTracks, mCommonMem->nTrackHits, &mRec->OutputControl(), mOutputMemory); } template static inline bool SortComparison(const T& a, const T& b) @@ -205,7 +200,7 @@ GPUh() void GPUTPCTracker::WriteOutput() if (mCommonMem->nTracks == 0) { return; } - if (mCommonMem->nTracks > GPUCA_MAX_SLICE_NTRACK) { + if (mCommonMem->nTracks > GPUCA_MAX_SECTOR_NTRACK) { GPUError("Maximum number of tracks exceeded, cannot store"); return; } @@ -241,12 +236,12 @@ GPUh() void GPUTPCTracker::WriteOutput() int32_t clusterIndex = mData.ClusterDataIndex(row, ih); #ifdef GPUCA_ARRAY_BOUNDS_CHECKS if (ih >= row.NHits() || ih < 0) { - GPUError("Array out of bounds access (Sector Row) (Hit %d / %d - NumC %d): Sector %d Row %d Index %d", ith, iTrack.NHits(), NHitsTotal(), mISlice, iRow, ih); + GPUError("Array out of bounds access (Sector Row) (Hit %d / %d - NumC %d): Sector %d Row %d Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih); fflush(stdout); continue; } if (clusterIndex >= NHitsTotal() || clusterIndex < 0) { - GPUError("Array out of bounds access (Cluster Data) (Hit %d / %d - NumC %d): Sector %d Row %d Hit %d, Clusterdata Index %d", ith, iTrack.NHits(), NHitsTotal(), mISlice, iRow, ih, clusterIndex); + GPUError("Array out of bounds access (Cluster Data) (Hit %d / %d - NumC %d): Sector %d Row %d Hit %d, Clusterdata Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih, clusterIndex); fflush(stdout); continue; } @@ -265,12 +260,12 @@ GPUh() void GPUTPCTracker::WriteOutput() id = mData.ClusterData()[clusterIndex].id; } else { const ClusterNativeAccess& cls = *mConstantMem->ioPtrs.clustersNative; - id = clusterIndex + cls.clusterOffset[mISlice][0]; - GPUTPCConvertImpl::convert(*mConstantMem, mISlice, iRow, cls.clustersLinear[id].getPad(), cls.clustersLinear[id].getTime(), origX, origY, origZ); + id = clusterIndex + cls.clusterOffset[mISector][0]; + GPUTPCConvertImpl::convert(*mConstantMem, mISector, iRow, cls.clustersLinear[id].getPad(), cls.clustersLinear[id].getTime(), origX, origY, origZ); flags = cls.clustersLinear[id].getFlags(); amp = cls.clustersLinear[id].qTot; } - GPUTPCSliceOutCluster c; + GPUTPCSectorOutCluster c; c.Set(id, iRow, flags, amp, origX, origY, origZ); #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME c.mPad = mData.ClusterData()[clusterIndex].pad; @@ -294,7 +289,7 @@ GPUh() void GPUTPCTracker::WriteOutput() mOutput->SetNLocalTracks(nStoredLocalTracks); mOutput->SetNTrackClusters(nStoredHits); if (Param().par.debugLevel >= 3) { - GPUInfo("Slice %d, Output: Tracks %d, local tracks %d, hits %d", mISlice, nStoredTracks, nStoredLocalTracks, nStoredHits); + GPUInfo("Sector %d, Output: Tracks %d, local tracks %d, hits %d", mISector, nStoredTracks, nStoredLocalTracks, nStoredHits); } } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h similarity index 67% rename from GPU/GPUTracking/SliceTracker/GPUTPCTracker.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index c5d4d40a2bef8..4a789b5adf6bf 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -22,16 +22,14 @@ #endif #include "GPUTPCHitId.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUTPCTrackParam.h" #include "GPUTPCTracklet.h" #include "GPUProcessor.h" -namespace o2 +namespace o2::gpu { -namespace gpu -{ -class GPUTPCSliceOutput; +class GPUTPCSectorOutput; struct GPUTPCClusterData; struct GPUParam; class GPUTPCTrack; @@ -42,27 +40,27 @@ class GPUTPCTracker : public GPUProcessor { public: #ifndef GPUCA_GPUCODE_DEVICE - GPUTPCTracker(); + GPUTPCTracker() = default; ~GPUTPCTracker(); GPUTPCTracker(const GPUTPCTracker&) = delete; GPUTPCTracker& operator=(const GPUTPCTracker&) = delete; - void SetSlice(int32_t iSlice); + void SetSector(int32_t iSector); void InitializeProcessor(); void InitializeRows(const GPUParam* param) { mData.InitializeRows(*param); } - int32_t CheckEmptySlice(); + int32_t CheckEmptySector(); void WriteOutputPrepare(); void WriteOutput(); // Debugging Stuff - void DumpSliceData(std::ostream& out); // Dump Input Slice Data + void DumpTrackingData(std::ostream& out); // Dump Input Sector Data void DumpLinks(std::ostream& out, int32_t phase); // Dump all links to file (for comparison after NeighboursFinder/Cleaner) - void DumpStartHits(std::ostream& out); // Same for Start Hits - void DumpHitWeights(std::ostream& out); //.... - void DumpTrackHits(std::ostream& out); // Same for Track Hits - void DumpTrackletHits(std::ostream& out); // Same for Track Hits - void DumpOutput(std::ostream& out); // Similar for output + void DumpStartHits(std::ostream& out); // Same for Start Hits + void DumpHitWeights(std::ostream& out); //.... + void DumpTrackHits(std::ostream& out); // Same for Track Hits + void DumpTrackletHits(std::ostream& out); // Same for Track Hits + void DumpOutput(std::ostream& out); // Similar for output #endif struct StructGPUParameters { @@ -75,14 +73,14 @@ class GPUTPCTracker : public GPUProcessor struct commonMemoryStruct { commonMemoryStruct() : nStartHits(0), nTracklets(0), nRowHits(0), nTracks(0), nLocalTracks(0), nTrackHits(0), nLocalTrackHits(0), gpuParameters() {} - GPUAtomic(uint32_t) nStartHits; // number of start hits - GPUAtomic(uint32_t) nTracklets; // number of tracklets - GPUAtomic(uint32_t) nRowHits; // number of tracklet hits - GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks - int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking - GPUAtomic(uint32_t) nTrackHits; // number of track hits - int32_t nLocalTrackHits; // see above - StructGPUParameters gpuParameters; // GPU parameters + GPUAtomic(uint32_t) nStartHits; // number of start hits + GPUAtomic(uint32_t) nTracklets; // number of tracklets + GPUAtomic(uint32_t) nRowHits; // number of tracklet hits + GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks + int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking + GPUAtomic(uint32_t) nTrackHits; // number of track hits + int32_t nLocalTrackHits; // see above + StructGPUParameters gpuParameters; // GPU parameters }; GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const @@ -90,7 +88,7 @@ class GPUTPCTracker : public GPUProcessor return mData.ClusterData(); } GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } - GPUhdi() GPUglobalref() GPUTPCSliceOutput* Output() const { return mOutput; } + GPUhdi() GPUglobalref() GPUTPCSectorOutput* Output() const { return mOutput; } GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); @@ -104,13 +102,13 @@ class GPUTPCTracker : public GPUProcessor GPUdi() void GetErrors2Seeding(int32_t iRow, const GPUTPCTrackParam& t, float time, float& ErrY2, float& ErrZ2) const { - // Param().GetClusterErrors2(mISlice, iRow, Param().GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); - Param().GetClusterErrorsSeeding2(mISlice, iRow, Param().par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); + // Param().GetClusterErrors2(mISector, iRow, Param().GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); + Param().GetClusterErrorsSeeding2(mISector, iRow, Param().par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); } GPUdi() void GetErrors2Seeding(int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { - // Param().GetClusterErrors2(mISlice, iRow, Param().GetContinuousTracking() != 0. ? 125.f : z, sinPhi, DzDs, time, 0.f, 0.f, ErrY2, ErrZ2); - Param().GetClusterErrorsSeeding2(mISlice, iRow, Param().par.continuousTracking != 0.f ? 125.f : z, sinPhi, DzDs, time, ErrY2, ErrZ2); + // Param().GetClusterErrors2(mISector, iRow, Param().GetContinuousTracking() != 0. ? 125.f : z, sinPhi, DzDs, time, 0.f, 0.f, ErrY2, ErrZ2); + Param().GetClusterErrorsSeeding2(mISector, iRow, Param().par.continuousTracking != 0.f ? 125.f : z, sinPhi, DzDs, time, ErrY2, ErrZ2); } void SetupCommonMemory(); @@ -130,15 +128,15 @@ class GPUTPCTracker : public GPUProcessor int16_t MemoryResCommon() const { return mMemoryResCommon; } int16_t MemoryResTracklets() const { return mMemoryResTracklets; } int16_t MemoryResOutput() const { return mMemoryResOutput; } - int16_t MemoryResSliceScratch() const { return mMemoryResSliceScratch; } + int16_t MemoryResSectorScratch() const { return mMemoryResSectorScratch; } void SetMaxData(const GPUTrackingInOutPointers& io); void UpdateMaxData(); - GPUhd() int32_t ISlice() const { return mISlice; } + GPUhd() int32_t ISector() const { return mISector; } - GPUhd() GPUconstantref() const GPUTPCSliceData& Data() const { return mData; } - GPUhdi() GPUconstantref() GPUTPCSliceData& Data() + GPUhd() GPUconstantref() const GPUTPCTrackingData& Data() const { return mData; } + GPUhdi() GPUconstantref() GPUTPCTrackingData& Data() { return mData; } @@ -172,13 +170,13 @@ class GPUTPCTracker : public GPUProcessor GPUhd() int32_t HitInputID(const GPUTPCRow& row, int32_t hitIndex) const { return mData.ClusterDataIndex(row, hitIndex); } /** - * The hit weight is used to determine whether a hit belongs to a certain tracklet or another one - * competing for the same hit. The tracklet that has a higher weight wins. Comparison is done - * using the the number of hits in the tracklet (the more hits it has the more it keeps). If - * tracklets have the same number of hits then it doesn't matter who gets it, but it should be - * only one. So a unique number (row index is good) is added in the least significant part of - * the weight - */ + * The hit weight is used to determine whether a hit belongs to a certain tracklet or another one + * competing for the same hit. The tracklet that has a higher weight wins. Comparison is done + * using the the number of hits in the tracklet (the more hits it has the more it keeps). If + * tracklets have the same number of hits then it doesn't matter who gets it, but it should be + * only one. So a unique number (row index is good) is added in the least significant part of + * the weight + */ GPUdi() static int32_t CalculateHitWeight(int32_t NHits, float chi2) { const float chi2_suppress = 6.f; @@ -210,7 +208,7 @@ class GPUTPCTracker : public GPUProcessor GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTrackHits() const { return &mCommonMem->nTrackHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackHits() const { return mTrackHits; } - GPUhd() GPUglobalref() GPUTPCRow* SliceDataRows() const { return (mData.Rows()); } + GPUhd() GPUglobalref() GPUTPCRow* TrackingDataRows() const { return (mData.Rows()); } GPUhd() GPUglobalref() int32_t* RowStartHitCountOffset() const { return (mRowStartHitCountOffset); } GPUhd() GPUglobalref() StructGPUParameters* GPUParameters() const { return (&mCommonMem->gpuParameters); } GPUhd() StructGPUParametersConst* GPUParametersConst() @@ -222,7 +220,7 @@ class GPUTPCTracker : public GPUProcessor struct trackSortData { int32_t fTtrack; // Track ID - float fSortVal; // Value to sort for + float fSortVal; // Value to sort for }; void* LinkTmpMemory() { return mLinkTmpMemory; } @@ -235,48 +233,47 @@ class GPUTPCTracker : public GPUProcessor friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsSorter; friend class GPUTPCStartHitsFinder; - char* mLinkTmpMemory; // tmp memory for hits after neighbours finder - - int32_t mISlice; // Number of slice - - GPUTPCSliceData mData; // The SliceData object. It is used to encapsulate the storage in memory from the access - - uint32_t mNMaxStartHits; - uint32_t mNMaxRowStartHits; - uint32_t mNMaxTracklets; - uint32_t mNMaxRowHits; - uint32_t mNMaxTracks; - uint32_t mNMaxTrackHits; - int16_t mMemoryResLinks; - int16_t mMemoryResScratch; - int16_t mMemoryResScratchHost; - int16_t mMemoryResCommon; - int16_t mMemoryResTracklets; - int16_t mMemoryResOutput; - int16_t mMemoryResSliceScratch; + char* mLinkTmpMemory = nullptr; // tmp memory for hits after neighbours finder + + int32_t mISector = -1; // Number of sector + + GPUTPCTrackingData mData; // The TrackingData object. It is used to encapsulate the storage in memory from the access + + uint32_t mNMaxStartHits = 0; + uint32_t mNMaxRowStartHits = 0; + uint32_t mNMaxTracklets = 0; + uint32_t mNMaxRowHits = 0; + uint32_t mNMaxTracks = 0; + uint32_t mNMaxTrackHits = 0; + uint16_t mMemoryResLinks = (uint16_t)-1; + uint16_t mMemoryResScratch = (uint16_t)-1; + uint16_t mMemoryResScratchHost = (uint16_t)-1; + uint16_t mMemoryResCommon = (uint16_t)-1; + uint16_t mMemoryResTracklets = (uint16_t)-1; + uint16_t mMemoryResOutput = (uint16_t)-1; + uint16_t mMemoryResSectorScratch = (uint16_t)-1; // GPU Temp Arrays - GPUglobalref() int32_t* mRowStartHitCountOffset; // Offset, length and new offset of start hits in row - GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits; // Unsorted start hits - GPUglobalref() char* mGPUTrackletTemp; // Temp Memory for GPU Tracklet Constructor + GPUglobalref() int32_t* mRowStartHitCountOffset = nullptr; // Offset, length and new offset of start hits in row + GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits = nullptr; // Unsorted start hits + GPUglobalref() char* mGPUTrackletTemp = nullptr; // Temp Memory for GPU Tracklet Constructor StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker // event - GPUglobalref() commonMemoryStruct* mCommonMem; // common event memory - GPUglobalref() GPUTPCHitId* mTrackletStartHits; // start hits for the tracklets - GPUglobalref() GPUTPCTracklet* mTracklets; // tracklets - GPUglobalref() calink* mTrackletRowHits; // Hits for each Tracklet in each row - GPUglobalref() GPUTPCTrack* mTracks; // reconstructed tracks - GPUglobalref() GPUTPCHitId* mTrackHits; // array of track hit numbers + GPUglobalref() commonMemoryStruct* mCommonMem = nullptr; // common event memory + GPUglobalref() GPUTPCHitId* mTrackletStartHits = nullptr; // start hits for the tracklets + GPUglobalref() GPUTPCTracklet* mTracklets = nullptr; // tracklets + GPUglobalref() calink* mTrackletRowHits = nullptr; // Hits for each Tracklet in each row + GPUglobalref() GPUTPCTrack* mTracks = nullptr; // reconstructed tracks + GPUglobalref() GPUTPCHitId* mTrackHits = nullptr; // array of track hit numbers // output - GPUglobalref() GPUTPCSliceOutput* mOutput; // address of pointer pointing to SliceOutput Object - void* mOutputMemory; // Pointer to output memory if stored internally + GPUglobalref() GPUTPCSectorOutput* mOutput; // address of pointer pointing to SectorOutput Object + void* mOutputMemory; // Pointer to output memory if stored internally static int32_t StarthitSortComparison(const void* a, const void* b); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx similarity index 90% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx index 5c2ed83d47966..ba1727fa602a4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx @@ -13,7 +13,7 @@ /// \author David Rohr #include "GPUTPCTracker.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUReconstruction.h" #include "GPUTPCHitId.h" #include "GPUTPCTrack.h" @@ -29,7 +29,7 @@ using namespace o2::gpu; void GPUTPCTracker::DumpOutput(std::ostream& out) { if (Param().par.earlyTpcTransform) { - out << "\nSlice " << mISlice << "\n"; + out << "\nSector " << mISector << "\n"; const GPUTPCTrack* track = (Output())->GetFirstTrack(); for (uint32_t j = 0; j < (Output())->NTracks(); j++) { out << "Track " << j << " (" << track->NHits() << "): "; @@ -42,10 +42,10 @@ void GPUTPCTracker::DumpOutput(std::ostream& out) } } -void GPUTPCTracker::DumpSliceData(std::ostream& out) +void GPUTPCTracker::DumpTrackingData(std::ostream& out) { - // Dump Slice Input Data to File - out << "\nSlice Data (Slice" << mISlice << "):" << std::endl; + // Dump Sector Input Data to File + out << "\nSector Data (Sector" << mISector << "):" << std::endl; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { if (Row(i).NHits() == 0) { continue; @@ -64,7 +64,7 @@ void GPUTPCTracker::DumpSliceData(std::ostream& out) void GPUTPCTracker::DumpLinks(std::ostream& out, int32_t phase) { // Dump Links (after Neighbours Finder / Cleaner) to file - out << "\nHit Links (Phase " << phase << ", Slice" << mISlice << "):" << std::endl; + out << "\nHit Links (Phase " << phase << ", Sector" << mISector << "):" << std::endl; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { if (Row(i).NHits() == 0) { continue; @@ -83,7 +83,7 @@ void GPUTPCTracker::DumpLinks(std::ostream& out, int32_t phase) void GPUTPCTracker::DumpHitWeights(std::ostream& out) { // dump hit weights to file - out << "\nHit Weights(Slice" << mISlice << "):" << std::endl; + out << "\nHit Weights(Sector" << mISector << "):" << std::endl; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { if (Row(i).NHits() == 0) { continue; @@ -102,7 +102,7 @@ void GPUTPCTracker::DumpHitWeights(std::ostream& out) void GPUTPCTracker::DumpStartHits(std::ostream& out) { // dump start hits to file - out << "\nStart Hits: (Slice" << mISlice << ") (" << *NStartHits() << ")" << std::endl; + out << "\nStart Hits: (Sector" << mISector << ") (" << *NStartHits() << ")" << std::endl; for (uint32_t i = 0; i < *NStartHits(); i++) { out << TrackletStartHit(i).RowIndex() << "-" << TrackletStartHit(i).HitIndex() << std::endl; } @@ -112,7 +112,7 @@ void GPUTPCTracker::DumpStartHits(std::ostream& out) void GPUTPCTracker::DumpTrackHits(std::ostream& out) { // dump tracks to file - out << "\nTracks: (Slice" << mISlice << ") (" << *NTracks() << ")" << std::endl; + out << "\nTracks: (Sector" << mISector << ") (" << *NTracks() << ")" << std::endl; for (uint32_t j = 0; j < *NTracks(); j++) { if (Tracks()[j].NHits() == 0) { continue; @@ -140,7 +140,7 @@ void GPUTPCTracker::DumpTrackletHits(std::ostream& out) if (nTracklets < 0) { nTracklets = 0; } - out << "\nTracklets: (Slice" << mISlice << ") (" << nTracklets << ")" << std::endl; + out << "\nTracklets: (Sector" << mISector << ") (" << nTracklets << ")" << std::endl; std::vector Ids(nTracklets); std::iota(Ids.begin(), Ids.end(), 0); if (mRec->GetProcessingSettings().deterministicGPUReconstruction) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx similarity index 86% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx index 3cc3e3805dce8..a3e73c377ed44 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx @@ -9,13 +9,13 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceData.cxx +/// \file GPUTPCTrackingData.cxx /// \author Matthias Kretz, Sergey Gorbunov, David Rohr #include "GPUParam.h" #include "GPUTPCClusterData.h" #include "GPUTPCHit.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUProcessor.h" #include "GPUO2DataTypes.h" #include "GPUTPCConvertImpl.h" @@ -32,7 +32,7 @@ using namespace o2::gpu; #ifndef GPUCA_GPUCODE -void GPUTPCSliceData::InitializeRows(const GPUParam& p) +void GPUTPCTrackingData::InitializeRows(const GPUParam& p) { // initialisation of rows for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; i++) { @@ -44,34 +44,34 @@ void GPUTPCSliceData::InitializeRows(const GPUParam& p) } } -void GPUTPCSliceData::SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset) +void GPUTPCTrackingData::SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset) { mClusterData = data; mNumberOfHits = nClusters; mClusterIdOffset = clusterIdOffset; } -void GPUTPCSliceData::SetMaxData() +void GPUTPCTrackingData::SetMaxData() { int32_t hitMemCount = GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT + mNumberOfHits; const uint32_t kVectorAlignment = 256; mNumberOfHitsPlusAlign = GPUProcessor::nextMultipleOf<(kVectorAlignment > GPUCA_ROWALIGNMENT ? kVectorAlignment : GPUCA_ROWALIGNMENT) / sizeof(int32_t)>(hitMemCount); } -void* GPUTPCSliceData::SetPointersLinks(void* mem) +void* GPUTPCTrackingData::SetPointersLinks(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mLinkUpData, mNumberOfHitsPlusAlign); GPUProcessor::computePointerWithAlignment(mem, mLinkDownData, mNumberOfHitsPlusAlign); return mem; } -void* GPUTPCSliceData::SetPointersWeights(void* mem) +void* GPUTPCTrackingData::SetPointersWeights(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mHitWeights, mNumberOfHitsPlusAlign + 16 / sizeof(*mHitWeights)); return mem; } -void* GPUTPCSliceData::SetPointersScratch(void* mem, bool idsOnGPU) +void* GPUTPCTrackingData::SetPointersScratch(void* mem, bool idsOnGPU) { const int32_t firstHitInBinSize = GetGridSize(mNumberOfHits, GPUCA_ROW_COUNT) + GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT / sizeof(int32_t); GPUProcessor::computePointerWithAlignment(mem, mHitData, mNumberOfHitsPlusAlign); @@ -82,7 +82,7 @@ void* GPUTPCSliceData::SetPointersScratch(void* mem, bool idsOnGPU) return mem; } -void* GPUTPCSliceData::SetPointersClusterIds(void* mem, bool idsOnGPU) +void* GPUTPCTrackingData::SetPointersClusterIds(void* mem, bool idsOnGPU) { if (!idsOnGPU) { GPUProcessor::computePointerWithAlignment(mem, mClusterDataIndex, mNumberOfHitsPlusAlign); @@ -90,7 +90,7 @@ void* GPUTPCSliceData::SetPointersClusterIds(void* mem, bool idsOnGPU) return mem; } -void* GPUTPCSliceData::SetPointersRows(void* mem) +void* GPUTPCTrackingData::SetPointersRows(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mRows, GPUCA_ROW_COUNT + 1); return mem; @@ -98,19 +98,19 @@ void* GPUTPCSliceData::SetPointersRows(void* mem) #endif -GPUd() void GPUTPCSliceData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) +GPUd() void GPUTPCTrackingData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) { maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1; maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50; maxZ = maxZ / GPUCA_MIN_BIN_SIZE + 1; } -GPUd() uint32_t GPUTPCSliceData::GetGridSize(uint32_t nHits, uint32_t nRows) +GPUd() uint32_t GPUTPCTrackingData::GetGridSize(uint32_t nHits, uint32_t nRows) { return 128 * nRows + 4 * nHits; } -GPUdi() void GPUTPCSliceData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) +GPUdi() void GPUTPCTrackingData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) { float dz = zMax - zMin; float tfFactor = 1.f; @@ -144,7 +144,7 @@ GPUdi() static void UpdateMinMaxYZ(float& yMin, float& yMax, float& zMin, float& } } -GPUdii() void GPUTPCSliceData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) +GPUdii() void GPUTPCTrackingData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) { GPUAtomic(calink)* c = (GPUAtomic(calink)*)mFirstHitInBin + row.mFirstHitInBinOffset; row.mGrid.CreateEmpty(); @@ -161,7 +161,7 @@ GPUdii() void GPUTPCSliceData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) } } -GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSlice, float* tmpMinMax) +GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSector, float* tmpMinMax) { #ifdef GPUCA_GPUCODE constexpr bool EarlyTransformWithoutClusterNative = false; @@ -220,8 +220,8 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n float zMin = 1.e6f; float zMax = -1.e6f; - const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSlice][rowIndex]; - const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSlice][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSlice][0]); + const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex]; + const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]); constexpr const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24); GPUTPCRow& row = mRows[rowIndex]; if (iThread == 0) { @@ -229,7 +229,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n } if (NumberOfClusters >= maxN) { if (iThread == 0) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_HITINROW_OVERFLOW, iSlice * 1000 + rowIndex, NumberOfClusters, maxN); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_HITINROW_OVERFLOW, iSector * 1000 + rowIndex, NumberOfClusters, maxN); SetRowGridEmpty(row); } continue; @@ -265,7 +265,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n } else { for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) { float x, y, z; - GPUTPCConvertImpl::convert(*mem, iSlice, rowIndex, mem->ioPtrs.clustersNative->clusters[iSlice][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSlice][rowIndex][i].getTime(), x, y, z); + GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getTime(), x, y, z); UpdateMinMaxYZ(yMin, yMax, zMin, zMax, y, z); YZData[RowOffset + i] = CAMath::MakeFloat2(y, z); } @@ -310,7 +310,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n constexpr const int32_t maxBins = sizeof(calink) < 4 ? (int32_t)(1ul << (sizeof(calink) * 8)) : 0x7FFFFFFF; // NOLINT: false warning if (sizeof(calink) < 4 && numberOfBins >= maxBins) { if (iThread == 0) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_BIN_OVERFLOW, iSlice * 1000 + rowIndex, numberOfBins, maxBins); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_BIN_OVERFLOW, iSector * 1000 + rowIndex, numberOfBins, maxBins); SetRowGridEmpty(row); } continue; @@ -319,7 +319,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n const uint32_t maxnn = GetGridSize(NumberOfClusters, 1); if (nn >= maxnn) { if (iThread == 0) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_FIRSTHITINBIN_OVERFLOW, iSlice, nn, maxnn); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, iSector, nn, maxnn); SetRowGridEmpty(row); } continue; @@ -399,7 +399,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n if (iThread == 0 && !mem->param.par.continuousTracking) { const float maxAbsZ = CAMath::Max(CAMath::Abs(tmpMinMax[2]), CAMath::Abs(tmpMinMax[3])); if (maxAbsZ > 300) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_Z_OVERFLOW, iSlice, (uint32_t)maxAbsZ); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_Z_OVERFLOW, iSector, (uint32_t)maxAbsZ); SetRowGridEmpty(row); continue; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h similarity index 57% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h index 200a123b9bb83..d7d5e76bc9d44 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceData.h +/// \file GPUTPCTrackingData.h /// \author Matthias Kretz, Sergey Gorbunov, David Rohr -#ifndef GPUTPCSLICEDATA_H -#define GPUTPCSLICEDATA_H +#ifndef GPUTPCSECTORDATA_H +#define GPUTPCSECTORDATA_H #include "GPUTPCDef.h" #include "GPUTPCRow.h" @@ -21,20 +21,18 @@ #include "GPUParam.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterData; class GPUTPCHit; -class GPUTPCSliceData +class GPUTPCTrackingData { public: - GPUTPCSliceData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} + GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} #ifndef GPUCA_GPUCODE_DEVICE - ~GPUTPCSliceData() = default; + ~GPUTPCTrackingData() = default; void InitializeRows(const GPUParam& p); void SetMaxData(); void SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset); @@ -45,20 +43,20 @@ class GPUTPCSliceData void* SetPointersRows(void* mem); #endif - GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* mem, int32_t iSlice, float* tmpMinMax); + GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* mem, int32_t iSector, float* tmpMinMax); /** - * Return the number of hits in this slice. - */ + * Return the number of hits in this sector. + */ GPUhd() int32_t NumberOfHits() const { return mNumberOfHits; } GPUhd() int32_t NumberOfHitsPlusAlign() const { return mNumberOfHitsPlusAlign; } GPUhd() int32_t ClusterIdOffset() const { return mClusterIdOffset; } /** - * Access to the hit links. - * - * The links values give the hit index in the row above/below. Or -1 if there is no link. - */ + * Access to the hit links. + * + * The links values give the hit index in the row above/below. Or -1 if there is no link. + */ GPUd() calink HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const; GPUd() calink HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const; @@ -74,41 +72,41 @@ class GPUTPCSliceData GPUd() void SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value); /** - * Return the y and z coordinate(s) of the given hit(s). - */ + * Return the y and z coordinate(s) of the given hit(s). + */ GPUd() cahit HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const; GPUd() cahit HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const; GPUd() cahit2 HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const; /** - * For a given bin index, content tells how many hits there are in the preceding bins. This maps - * directly to the hit index in the given row. - * - * \param binIndexes in the range 0 to row.Grid.N + row.Grid.Ny + 3. - */ + * For a given bin index, content tells how many hits there are in the preceding bins. This maps + * directly to the hit index in the given row. + * + * \param binIndexes in the range 0 to row.Grid.N + row.Grid.Ny + 3. + */ GPUd() calink FirstHitInBin(const GPUTPCRow& row, calink binIndex) const; /** - * If the given weight is higher than what is currently stored replace with the new weight. - */ + * If the given weight is higher than what is currently stored replace with the new weight. + */ GPUd() void MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); GPUd() void SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); /** - * Return the maximal weight the given hit got from one tracklet - */ + * Return the maximal weight the given hit got from one tracklet + */ GPUd() int32_t HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const; /** - * Returns the index in the original GPUTPCClusterData object of the given hit - */ + * Returns the index in the original GPUTPCClusterData object of the given hit + */ GPUhd() int32_t ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const; GPUd() GPUglobalref() const int32_t* ClusterDataIndex() const { return mClusterDataIndex; } GPUd() GPUglobalref() int32_t* ClusterDataIndex() { return mClusterDataIndex; } /** - * Return the row object for the given row index. - */ + * Return the row object for the given row index. + */ GPUhdi() GPUglobalref() const GPUTPCRow& Row(int32_t rowIndex) const { return mRows[rowIndex]; } GPUhdi() GPUglobalref() GPUTPCRow* Rows() const { return mRows; } @@ -122,8 +120,8 @@ class GPUTPCSliceData private: #ifndef GPUCA_GPUCODE - GPUTPCSliceData& operator=(const GPUTPCSliceData&) = delete; // ROOT 5 tries to use this if it is not private - GPUTPCSliceData(const GPUTPCSliceData&) = delete; // + GPUTPCTrackingData& operator=(const GPUTPCTrackingData&) = delete; // ROOT 5 tries to use this if it is not private + GPUTPCTrackingData(const GPUTPCTrackingData&) = delete; // #endif GPUd() void CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax); GPUd() void SetRowGridEmpty(GPUTPCRow& GPUrestrict() row); @@ -133,7 +131,7 @@ class GPUTPCSliceData friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsFinder; - int32_t mNumberOfHits; // the number of hits in this slice + int32_t mNumberOfHits; // the number of hits in this sector int32_t mNumberOfHitsPlusAlign; int32_t mClusterIdOffset; @@ -141,56 +139,55 @@ class GPUTPCSliceData GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions - GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index - GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index - GPUglobalref() cahit2* mHitData; // packed y,z coordinate of the given (global) hit index + GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index + GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index + GPUglobalref() cahit2* mHitData; // packed y,z coordinate of the given (global) hit index GPUglobalref() int32_t* mClusterDataIndex; // see ClusterDataIndex() /* - * The size of the array is row.Grid.N + row.Grid.Ny + 3. The row.Grid.Ny + 3 is an optimization - * to remove the need for bounds checking. The last values are the same as the entry at [N - 1]. - */ - GPUglobalref() calink* mFirstHitInBin; // see FirstHitInBin - GPUglobalref() GPUAtomic(uint32_t) * mHitWeights; // the weight of the longest tracklet crossed the cluster + * The size of the array is row.Grid.N + row.Grid.Ny + 3. The row.Grid.Ny + 3 is an optimization + * to remove the need for bounds checking. The last values are the same as the entry at [N - 1]. + */ + GPUglobalref() calink* mFirstHitInBin; // see FirstHitInBin + GPUglobalref() GPUAtomic(uint32_t) * mHitWeights; // the weight of the longest tracklet crossed the cluster GPUglobalref() const GPUTPCClusterData* mClusterData; }; -GPUdi() calink GPUTPCSliceData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCTrackingData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } -GPUdi() calink GPUTPCSliceData::HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCTrackingData::HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } -GPUdi() void GPUTPCSliceData::SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCTrackingData::SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkUpData[row.mHitNumberOffset + hitIndex] = value; } -GPUdi() void GPUTPCSliceData::SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCTrackingData::SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkDownData[row.mHitNumberOffset + hitIndex] = value; } -GPUdi() cahit GPUTPCSliceData::HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } +GPUdi() cahit GPUTPCTrackingData::HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } -GPUdi() cahit GPUTPCSliceData::HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } +GPUdi() cahit GPUTPCTrackingData::HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } -GPUdi() cahit2 GPUTPCSliceData::HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } +GPUdi() cahit2 GPUTPCTrackingData::HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } -GPUdi() calink GPUTPCSliceData::FirstHitInBin(const GPUTPCRow& row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } +GPUdi() calink GPUTPCTrackingData::FirstHitInBin(const GPUTPCRow& row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } -GPUhdi() int32_t GPUTPCSliceData::ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } +GPUhdi() int32_t GPUTPCTrackingData::ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } -GPUdi() void GPUTPCSliceData::MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCTrackingData::MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { CAMath::AtomicMax(&mHitWeights[row.mHitNumberOffset + hitIndex], weight); } -GPUdi() void GPUTPCSliceData::SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCTrackingData::SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { mHitWeights[row.mHitNumberOffset + hitIndex] = weight; } -GPUdi() int32_t GPUTPCSliceData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } -} // namespace gpu -} // namespace o2 +GPUdi() int32_t GPUTPCTrackingData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } +} // namespace o2::gpu -#endif // GPUTPCSLICEDATA_H +#endif // GPUTPCSECTORDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h similarity index 85% rename from GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 873368f1635a0..10ff0a32aeaf3 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -18,9 +18,7 @@ #include "GPUTPCBaseTrackParam.h" #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTracklet @@ -48,13 +46,12 @@ class GPUTPCTracklet GPUhd() void SetHitWeight(const int32_t w) { mHitWeight = w; } private: - int32_t mFirstRow; // first TPC row // TODO: We can use smaller data format here! - int32_t mLastRow; // last TPC row + int32_t mFirstRow; // first TPC row // TODO: We can use smaller data format here! + int32_t mLastRow; // last TPC row GPUTPCBaseTrackParam mParam; // tracklet parameters - int32_t mHitWeight; // Hit Weight of Tracklet - uint32_t mFirstHit; // first hit in row hit array + int32_t mHitWeight; // Hit Weight of Tracklet + uint32_t mFirstHit; // first hit in row hit array }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLET_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx similarity index 91% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 04833375ad6df..3aac31c87498c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -67,13 +67,13 @@ GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_ uint32_t hitout = CAMath::AtomicAdd(tracker.NRowHits(), nHits); if (hitout + nHits > tracker.NMaxRowHits()) { - tracker.raiseError(GPUErrors::ERROR_TRACKLET_HIT_OVERFLOW, tracker.ISlice(), hitout + nHits, tracker.NMaxRowHits()); + tracker.raiseError(GPUErrors::ERROR_TRACKLET_HIT_OVERFLOW, tracker.ISector(), hitout + nHits, tracker.NMaxRowHits()); CAMath::AtomicExch(tracker.NRowHits(), tracker.NMaxRowHits()); return; } uint32_t itrout = CAMath::AtomicAdd(tracker.NTracklets(), 1u); if (itrout >= tracker.NMaxTracklets()) { - tracker.raiseError(GPUErrors::ERROR_TRACKLET_OVERFLOW, tracker.ISlice(), itrout, tracker.NMaxTracklets()); + tracker.raiseError(GPUErrors::ERROR_TRACKLET_OVERFLOW, tracker.ISector(), itrout, tracker.NMaxTracklets()); CAMath::AtomicExch(tracker.NTracklets(), tracker.NMaxTracklets()); return; } @@ -132,14 +132,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float y = y0 + hh.x * stepY; float z = z0 + hh.y * stepZ; if (iRow != r.mStartRow || !tracker.Param().par.continuousTracking) { - tParam.ConstrainZ(z, tracker.ISlice(), z0, r.mLastZ); - tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, z); + tParam.ConstrainZ(z, tracker.ISector(), z0, r.mLastZ); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISector(), iRow, x, y, z); } if (iRow == r.mStartRow) { if (tracker.Param().par.continuousTracking) { float refZ = ((z > 0) ? tracker.Param().rec.tpc.defaultZOffsetOverR : -tracker.Param().rec.tpc.defaultZOffsetOverR) * x; float zTmp = refZ; - tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, zTmp); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISector(), iRow, x, y, zTmp); z += zTmp - refZ; // Add zCorrection (=zTmp - refZ) to z, such that zOffset is set such, that transformed (z - zOffset) becomes refZ tParam.SetZOffset(z - refZ); tParam.SetZ(refZ); @@ -194,7 +194,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, tracker.GetErrors2Seeding(iRow, tParam.GetZ(), sinPhi, tParam.GetDzDs(), -1.f, err2Y, err2Z); // TODO: Use correct time if (r.mNHits >= 10) { - const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISlice() >= 18); + const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISector() >= 18); err2Y += sErr2; err2Z += sErr2; const float kFactor = tracker.Param().rec.tpc.hitPickUpFactor * tracker.Param().rec.tpc.hitPickUpFactor * 3.5f * 3.5f; @@ -262,8 +262,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, rowHit = CALINK_INVAL; break; } - tParam.ConstrainZ(tmpZ, tracker.ISlice(), z0, r.mLastZ); - tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISlice(), iRow, tmpY, tmpZ, x); + tParam.ConstrainZ(tmpZ, tracker.ISector(), z0, r.mLastZ); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISector(), iRow, tmpY, tmpZ, x); } CADEBUG(printf("%14s: SEA TRACK ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); @@ -286,7 +286,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); #endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR - tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISlice(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISector(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); if (tracker.Param().rec.tpc.rejectEdgeClustersInSeeding && tracker.Param().rejectEdgeClusterByY(yUncorrected, iRow, CAMath::Sqrt(tParam.Err2Y()))) { rowHit = CALINK_INVAL; @@ -297,7 +297,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float err2Y, err2Z; tracker.GetErrors2Seeding(iRow, *((GPUTPCTrackParam*)&tParam), -1.f, err2Y, err2Z); // TODO: Use correct time if (r.mNHits >= 10) { - const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISlice() >= 18); + const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISector() >= 18); err2Y += sErr2; err2Z += sErr2; } @@ -377,8 +377,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } while (false); (void)found; if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { - uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected)); - if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) { + uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); + if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { r.mNMissed--; rowHit = CALINK_DEAD_CHANNEL; } @@ -446,12 +446,12 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCT { float tmpY, tmpZ; if (tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { - if (tracker.ISlice() < GPUCA_NSLICES / 2 ? (tmpZ < 0) : (tmpZ > 0)) { + if (tracker.ISector() < GPUCA_NSECTORS / 2 ? (tmpZ < 0) : (tmpZ > 0)) { tmpZ = 0; - } else if (tracker.ISlice() < GPUCA_NSLICES / 2 ? (tmpZ > GPUTPCGeometry::TPCLength()) : (tmpZ < -GPUTPCGeometry::TPCLength())) { - tmpZ = tracker.ISlice() < GPUCA_NSLICES / 2 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); + } else if (tracker.ISector() < GPUCA_NSECTORS / 2 ? (tmpZ > GPUTPCGeometry::TPCLength()) : (tmpZ < -GPUTPCGeometry::TPCLength())) { + tmpZ = tracker.ISector() < GPUCA_NSECTORS / 2 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); } - tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISlice(), iRow, tmpY, tmpZ, x); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISector(), iRow, tmpY, tmpZ, x); } else { r.mGo = 0; continue; @@ -476,12 +476,12 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCT } template <> -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) { if (get_local_id(0) == 0) { sMem.mNStartHits = *tracker.NStartHits(); } - CA_SHARED_CACHE(&sMem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); + CA_SHARED_CACHE(&sMem.mRows[0], tracker.TrackingDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); GPUTPCThreadMemory rMem; @@ -492,19 +492,19 @@ GPUdii() void GPUTPCTrackletConstructor::Thread -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) { GPUconstantref() GPUTPCTracker* GPUrestrict() pTracker = &tracker0; #ifdef GPUCA_GPUCODE - int32_t mySlice = get_group_id(0) % GPUCA_NSLICES; - int32_t currentSlice = -1; + int32_t mySector = get_group_id(0) % GPUCA_NSECTORS; + int32_t currentSector = -1; if (get_local_id(0) == 0) { sMem.mNextStartHitFirstRun = 1; } GPUCA_UNROLL(, U()) - for (uint32_t iSlice = 0; iSlice < GPUCA_NSLICES; iSlice++) { - GPUconstantref() GPUTPCTracker& GPUrestrict() tracker = pTracker[mySlice]; + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + GPUconstantref() GPUTPCTracker& GPUrestrict() tracker = pTracker[mySector]; GPUTPCThreadMemory rMem; @@ -515,13 +515,13 @@ GPUdii() void GPUTPCTrackletConstructor::Thread= 0 && rMem.mISH < sMem.mNStartHits) { @@ -529,13 +529,13 @@ GPUdii() void GPUTPCTrackletConstructor::Thread= GPUCA_NSLICES) { - mySlice = 0; + if (++mySector >= GPUCA_NSECTORS) { + mySector = 0; } } #else - for (int32_t iSlice = 0; iSlice < GPUCA_NSLICES; iSlice++) { - Thread(nBlocks, nThreads, iBlock, iThread, sMem, pTracker[iSlice]); + for (int32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + Thread(nBlocks, nThreads, iBlock, iThread, sMem, pTracker[iSector]); } #endif } @@ -549,7 +549,7 @@ GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() GPUTPCT if (get_local_id(0) == 0) { int32_t firstStartHit = -2; if (sMem.mNextStartHitFirstRun == 1) { - firstStartHit = (get_group_id(0) - tracker.ISlice()) / GPUCA_NSLICES * GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor); + firstStartHit = (get_group_id(0) - tracker.ISector()) / GPUCA_NSECTORS * GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor); sMem.mNextStartHitFirstRun = 0; } else { if (tracker.GPUParameters()->nextStartHit < nStartHit) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h similarity index 74% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 9af1eeb0ae7b2..8757ed87072da 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTrackletConstructor @@ -34,8 +32,8 @@ class GPUTPCTrackletConstructor { public: enum K { - singleSlice = 0, - allSlices = 1 + singleSector = 0, + allSectors = 1 }; class GPUTPCThreadMemory @@ -53,31 +51,31 @@ class GPUTPCTrackletConstructor protected: // WARNING: This data is copied element by element in CopyTrackletTempData. Changes to members of this class must be reflected in CopyTrackletTempData!!! - int32_t mISH; // track index - int32_t mFirstRow; // first row index - int32_t mLastRow; // last row index - int32_t mStartRow; // row index of first hit in seed - int32_t mEndRow; // row index of last hit in seed - calink mCurrIH; // indef of the current hit - int8_t mGo; // do fit/searching flag - int32_t mStage; // reco stage - int32_t mNHits; // n track hits + int32_t mISH; // track index + int32_t mFirstRow; // first row index + int32_t mLastRow; // last row index + int32_t mStartRow; // row index of first hit in seed + int32_t mEndRow; // row index of last hit in seed + calink mCurrIH; // indef of the current hit + int8_t mGo; // do fit/searching flag + int32_t mStage; // reco stage + int32_t mNHits; // n track hits int32_t mNHitsEndRow; // n hits at end row int32_t mNMissed; // n missed hits during search - float mLastY; // Y of the last fitted cluster - float mLastZ; // Z of the last fitted cluster + float mLastY; // Y of the last fitted cluster + float mLastZ; // Z of the last fitted cluster }; struct GPUSharedMemory { - CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows - int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration - int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration - int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? - int32_t mNStartHits; // Total number of start hits + CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows + int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration + int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration + int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? + int32_t mNStartHits; // Total number of start hits #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE int32_t fMaxSync; // temporary shared variable during profile creation -#endif // GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE +#endif // GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE }; GPUd() static void InitTracklet(GPUTPCTrackParam& tParam); @@ -99,7 +97,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -108,7 +106,6 @@ class GPUTPCTrackletConstructor GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLETCONSTRUCTOR_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index d5492602a4283..8810b692e1377 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -86,13 +86,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread if (nHits >= minHits) { uint32_t nFirstTrackHit = CAMath::AtomicAdd(tracker.NTrackHits(), (uint32_t)nHits); if (nFirstTrackHit + nHits > tracker.NMaxTrackHits()) { - tracker.raiseError(GPUErrors::ERROR_TRACK_HIT_OVERFLOW, tracker.ISlice(), nFirstTrackHit + nHits, tracker.NMaxTrackHits()); + tracker.raiseError(GPUErrors::ERROR_TRACK_HIT_OVERFLOW, tracker.ISector(), nFirstTrackHit + nHits, tracker.NMaxTrackHits()); CAMath::AtomicExch(tracker.NTrackHits(), tracker.NMaxTrackHits()); return; } uint32_t itrout = CAMath::AtomicAdd(tracker.NTracks(), 1u); if (itrout >= tracker.NMaxTracks()) { - tracker.raiseError(GPUErrors::ERROR_TRACK_OVERFLOW, tracker.ISlice(), itrout, tracker.NMaxTracks()); + tracker.raiseError(GPUErrors::ERROR_TRACK_OVERFLOW, tracker.ISector(), itrout, tracker.NMaxTracks()); CAMath::AtomicExch(tracker.NTracks(), tracker.NMaxTracks()); return; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h similarity index 94% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index 80a29d21edac3..bb969d866ef29 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -44,7 +42,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -52,7 +50,6 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLETSELECTOR_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx deleted file mode 100644 index 3d0102f2938e6..0000000000000 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCRow.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUTPCRow.h" -using namespace o2::gpu; - -#if !defined(GPUCA_GPUCODE) -GPUTPCRow::GPUTPCRow() : mNHits(0), mX(0), mMaxY(0), mGrid(), mHy0(0), mHz0(0), mHstepY(0), mHstepZ(0), mHstepYi(0), mHstepZi(0), mHitNumberOffset(0), mFirstHitInBinOffset(0) -{ - // dummy constructor -} - -#endif diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 53ed77fe62d8c..58866224943c0 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -578,7 +578,7 @@ int32_t LoadEvent(int32_t iEvent, int32_t x) if (!configStandalone.runTransformation) { chainTracking->mIOPtrs.clustersNative = nullptr; } else { - for (int32_t i = 0; i < chainTracking->NSLICES; i++) { + for (int32_t i = 0; i < chainTracking->NSECTORS; i++) { if (chainTracking->mIOPtrs.rawClusters[i]) { if (configStandalone.proc.debugLevel >= 2) { printf("Converting Legacy Raw Cluster to Native\n"); @@ -687,7 +687,7 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU chainTrackingAsync->mIOPtrs.nMCInfosTPCCol = 0; chainTrackingAsync->mIOPtrs.mcLabelsTPC = nullptr; chainTrackingAsync->mIOPtrs.nMCLabelsTPC = 0; - for (int32_t i = 0; i < chainTracking->NSLICES; i++) { + for (int32_t i = 0; i < chainTracking->NSECTORS; i++) { chainTrackingAsync->mIOPtrs.clusterData[i] = nullptr; chainTrackingAsync->mIOPtrs.nClusterData[i] = 0; chainTrackingAsync->mIOPtrs.rawClusters[i] = nullptr; diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 32cdb246cf417..1c61316ed454e 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -144,7 +144,7 @@ include_directories(${GPU_DIR}/Common ${GPUTRACKING_DIR}/Merger ${GPUTRACKING_DIR}/Refit ${GPUTRACKING_DIR}/qa - ${GPUTRACKING_DIR}/SliceTracker + ${GPUTRACKING_DIR}/SectorTracker ${GPUTRACKING_DIR}/DataCompression ${GPUTRACKING_DIR}/TRDTracking ${GPUTRACKING_DIR}/TPCClusterFinder diff --git a/GPU/GPUTracking/Standalone/tools/createGeo.sh b/GPU/GPUTracking/Standalone/tools/createGeo.sh index 33973cf26079a..a449e1997a16e 100755 --- a/GPU/GPUTracking/Standalone/tools/createGeo.sh +++ b/GPU/GPUTracking/Standalone/tools/createGeo.sh @@ -7,5 +7,5 @@ alienv load O2/latest o2-sim -n 1 -export ROOT_INCLUDE_PATH=$ROOT_INCLUDE_PATH:/home/qon/alice/GPU/Common/:/home/qon/alice/GPU/GPUTracking/Base:/home/qon/alice/GPU/GPUTracking/SliceTracker:/home/qon/alice/GPU/GPUTracking/Merger:/home/qon/alice/GPU/GPUTracking/TRDTracking +export ROOT_INCLUDE_PATH=$ROOT_INCLUDE_PATH:/home/qon/alice/GPU/Common/:/home/qon/alice/GPU/GPUTracking/Base:/home/qon/alice/GPU/GPUTracking/SectorTracker:/home/qon/alice/GPU/GPUTracking/Merger:/home/qon/alice/GPU/GPUTracking/TRDTracking root -l -q -b createGeo.C+ diff --git a/GPU/GPUTracking/TPCClusterFinder/CfConsts.h b/GPU/GPUTracking/TPCClusterFinder/CfConsts.h index a53f73ed69e26..62695f2ae30a5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfConsts.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfConsts.h @@ -17,11 +17,7 @@ #include "clusterFinderDefs.h" -namespace o2 -{ -namespace gpu -{ -namespace cfconsts +namespace o2::gpu::cfconsts { GPUconstexpr() tpccf::Delta2 InnerNeighbors[8] = @@ -190,8 +186,6 @@ GPUconstexpr() uint32_t NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] = (1 << 24), (1 << 24) | (1 << 25)}; -} // namespace cfconsts -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu::cfconsts #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h index de5dfe9e1d683..dcea2bf8e966c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h @@ -27,9 +27,9 @@ struct CfFragment { OverlapTimebins = 8, }; - // Time offset of this sub slice within the entire time slice + // Time offset of this sub sector within the entire time sector tpccf::TPCTime start = 0; - // Number of time bins to process in this slice + // Number of time bins to process in this sector tpccf::TPCFragmentTime length = 0; size_t digitsStart = 0; // Start digits in this fragment. Only used when zero suppression is skipped @@ -38,23 +38,23 @@ struct CfFragment { bool hasBacklog = false; bool hasFuture = false; - tpccf::TPCTime totalSliceLength = 0; - tpccf::TPCFragmentTime maxSubSliceLength = 0; + tpccf::TPCTime totalSectorLength = 0; + tpccf::TPCFragmentTime maxSubSectorLength = 0; GPUdDefault() CfFragment() = default; - GPUd() CfFragment(tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen) : CfFragment(0, false, 0, totalSliceLen, maxSubSliceLen) {} + GPUd() CfFragment(tpccf::TPCTime totalSectorLen, tpccf::TPCFragmentTime maxSubSectorLen) : CfFragment(0, false, 0, totalSectorLen, maxSubSectorLen) {} GPUdi() bool isEnd() const { return length == 0; } GPUdi() CfFragment next() const { - return CfFragment{index + 1, hasFuture, tpccf::TPCTime(start + length - (hasFuture ? 2 * OverlapTimebins : 0)), totalSliceLength, maxSubSliceLength}; + return CfFragment{index + 1, hasFuture, tpccf::TPCTime(start + length - (hasFuture ? 2 * OverlapTimebins : 0)), totalSectorLength, maxSubSectorLength}; } GPUdi() uint32_t count() const { - return (totalSliceLength + maxSubSliceLength - 4 * OverlapTimebins - 1) / (maxSubSliceLength - 2 * OverlapTimebins); + return (totalSectorLength + maxSubSectorLength - 4 * OverlapTimebins - 1) / (maxSubSectorLength - 2 * OverlapTimebins); } GPUdi() tpccf::TPCTime first() const @@ -104,16 +104,16 @@ struct CfFragment { } private: - GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen) + GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSectorLen, tpccf::TPCFragmentTime maxSubSectorLen) { this->index = index_; this->hasBacklog = hasBacklog_; this->start = start_; - tpccf::TPCTime remainder = totalSliceLen - start; - this->hasFuture = remainder > tpccf::TPCTime(maxSubSliceLen); - this->length = hasFuture ? maxSubSliceLen : remainder; - this->totalSliceLength = totalSliceLen; - this->maxSubSliceLength = maxSubSliceLen; + tpccf::TPCTime remainder = totalSectorLen - start; + this->hasFuture = remainder > tpccf::TPCTime(maxSubSectorLen); + this->length = hasFuture ? maxSubSectorLen : remainder; + this->totalSectorLength = totalSectorLen; + this->maxSubSectorLength = maxSubSectorLen; } }; diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h index 10e375ee6f4bd..b4a4752b0f932 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h @@ -17,9 +17,7 @@ #include "clusterFinderDefs.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { #define INVALID_TIME_BIN (-GPUCF_PADDING_TIME - 1) @@ -59,7 +57,6 @@ struct ChargePos { inline constexpr ChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h index d6107a6503e86..2344c089a4436 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h @@ -22,17 +22,15 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCCFChainContext { struct FragmentData { - uint32_t nDigits[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - uint32_t nPages[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - std::vector pageDigits[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - GPUTPCClusterFinder::MinMaxCN minMaxCN[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; + uint32_t nDigits[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + uint32_t nPages[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + std::vector pageDigits[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + GPUTPCClusterFinder::MinMaxCN minMaxCN[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; }; struct PtrSave { @@ -45,21 +43,21 @@ struct GPUTPCCFChainContext { std::vector fragmentData; uint32_t nPagesTotal; uint32_t nPagesFragmentMax; - uint32_t nPagesSector[GPUCA_NSLICES]; - uint32_t nDigitsEndpointMax[GPUCA_NSLICES]; + uint32_t nPagesSector[GPUCA_NSECTORS]; + uint32_t nDigitsEndpointMax[GPUCA_NSECTORS]; uint32_t tpcMaxTimeBin; bool abandonTimeframe; uint32_t nFragments; CfFragment fragmentFirst; - std::pair nextPos[GPUCA_NSLICES]; - PtrSave ptrSave[GPUCA_NSLICES]; + std::pair nextPos[GPUCA_NSECTORS]; + PtrSave ptrSave[GPUCA_NSECTORS]; const o2::tpc::ClusterNativeAccess* ptrClusterNativeSave; void prepare(bool tpcZS, const CfFragment& fragmentMax) { abandonTimeframe = false; nPagesTotal = nPagesFragmentMax = 0; - for (uint32_t i = 0; i < GPUCA_NSLICES; i++) { + for (uint32_t i = 0; i < GPUCA_NSECTORS; i++) { nPagesSector[i] = 0; nDigitsEndpointMax[i] = 0; } @@ -72,7 +70,7 @@ struct GPUTPCCFChainContext { } for (uint32_t i = 0; i < nFragments; i++) { - for (uint32_t j = 0; j < GPUCA_NSLICES; j++) { + for (uint32_t j = 0; j < GPUCA_NSECTORS; j++) { for (uint32_t k = 0; k < GPUTrackingInOutZS::NENDPOINTS; k++) { fragmentData[i].nDigits[j][k] = fragmentData[i].nPages[j][k] = 0; fragmentData[i].pageDigits[j][k].clear(); @@ -83,7 +81,6 @@ struct GPUTPCCFChainContext { } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx index 8f184836de6df..8dbc5804f8fb8 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx @@ -64,7 +64,7 @@ GPUd() void GPUTPCCFChargeMapFiller::fillFromDigitsImpl(int32_t nBlocks, int32_t ChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp())); positions[idx] = pos; float q = digit.getChargeFloat(); - q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(clusterer.mISlice, digit.getRow(), digit.getPad()); + q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(clusterer.mISector, digit.getRow(), digit.getPad()); chargeMap[pos] = PackedCharge(q); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index ad07f2b93f3e0..1aeae812f5193 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -261,7 +261,7 @@ GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, c if (index < maxElemsPerBucket) { buckets[maxElemsPerBucket * row + index] = cluster; } else { - clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISlice * 1000 + row, index, maxElemsPerBucket); + clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket); CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket); } return index; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index 3727e23bcf16c..e7634fa397bae 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -42,13 +42,13 @@ GPUdii() void GPUTPCCFDecodeZS::Thread(int32_t nBloc GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t firstHBF) { - const uint32_t slice = clusterer.mISlice; + const uint32_t sector = clusterer.mISector; #ifdef GPUCA_GPUCODE const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint; #else const uint32_t endpoint = iBlock; #endif - const GPUTrackingInOutZS::GPUTrackingInOutZSSlice& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->slice[slice]; + const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector]; if (zs.count[endpoint] == 0) { return; } @@ -179,7 +179,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared if (inFragment) { float q = float(byte & mask) * decodeBitsFactor; - q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(slice, row, pad); + q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, row, pad); chargeMap[pos] = PackedCharge(q); } pad++; @@ -277,7 +277,7 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorT (void)nDecoded; #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (iThread == 0 && nDecoded != decHdr->nADCsamples) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISlice * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded); /*#ifndef GPUCA_GPUCODE FILE* foo = fopen("dump.bin", "w+b"); fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo); @@ -463,7 +463,7 @@ GPUd() bool GPUTPCCFDecodeZSLink::ChannelIsActive(const uint32_t* chan, uint8_t template GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF) { - const uint32_t slice = clusterer.mISlice; + const uint32_t sector = clusterer.mISector; #ifdef GPUCA_GPUCODE const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint; @@ -471,7 +471,7 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, const uint32_t endpoint = iBlock; #endif - const GPUTrackingInOutZS::GPUTrackingInOutZSSlice& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->slice[slice]; + const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector]; if (zs.count[endpoint] == 0) { return; } @@ -514,7 +514,7 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, if (iThread == 0 && iBlock < nBlocks - 1) { uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset; if (pageDigitOffset != maxOffset) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISlice * 1000 + endpoint, pageDigitOffset, maxOffset); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 + endpoint, pageDigitOffset, maxOffset); } } #endif @@ -550,12 +550,12 @@ GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorTy GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset) { - const uint32_t slice = clusterer.mISlice; + const uint32_t sector = clusterer.mISector; ChargePos* positions = clusterer.mPpositions; #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (padAndRow.getRow() >= GPUCA_ROW_COUNT) { positions[positionOffset] = INVALID_CHARGE_POS; - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISlice * 1000 + padAndRow.getRow()); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISector * 1000 + padAndRow.getRow()); return; } #endif @@ -564,7 +564,7 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, floa ChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); positions[positionOffset] = pos; - charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(slice, padAndRow.getRow(), padAndRow.getPad()); + charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad()); chargeMap[pos] = PackedCharge(charge); } @@ -627,7 +627,7 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, pro nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten); #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (iThread == 0) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISlice * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage)); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage)); } #endif } @@ -642,7 +642,7 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, pro #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (iThread == 0 && nSamplesWritten != nSamplesInPage) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISlice * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten); /*#ifndef GPUCA_GPUCODE FILE* foo = fopen("dump.bin", "w+b"); fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index edc4fd6bab56c..efed3643800b6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -120,7 +120,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread bufferSize) { - clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISlice, nFinal, bufferSize); + clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISector, nFinal, bufferSize); nFinal = bufferSize; } if (stage) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index e009ac12389b4..613c4ad9e5fa6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -103,11 +103,11 @@ void GPUTPCClusterFinder::RegisterMemoryAllocation() if (mRec->GetProcessingSettings().runMC) { scratchType |= GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU; } - mScratchId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersScratch, scratchType, "TPCClustererScratch", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererScratch, (uint16_t)(mISlice % mRec->GetProcessingSettings().nTPCClustererLanes)}); + mScratchId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersScratch, scratchType, "TPCClustererScratch", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererScratch, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)}); mMemoryId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersMemory, GPUMemoryResource::MEMORY_PERMANENT, "TPCClustererMemory"); mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_STACK, "TPCClustererOutput"); - mZSId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersZS, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_CUSTOM_TRANSFER | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK, "TPCClustererZSData", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererZS, (uint16_t)(mISlice % mRec->GetProcessingSettings().nTPCClustererLanes)}); + mZSId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersZS, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_CUSTOM_TRANSFER | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK, "TPCClustererZSData", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererZS, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)}); mZSOffsetId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersZSOffset, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_CUSTOM_TRANSFER | GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_STACK, "TPCClustererZSOffsets"); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index a02d32f250604..f59102aa6b5c3 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -61,7 +61,7 @@ class GPUTPCClusterFinder : public GPUProcessor tpccf::SizeT nPeaks = 0; tpccf::SizeT nClusters = 0; uint32_t maxTimeBin = 0; - uint32_t nPagesSubslice = 0; + uint32_t nPagesSubsector = 0; } counters; CfFragment fragment; }; @@ -123,7 +123,7 @@ class GPUTPCClusterFinder : public GPUProcessor uint32_t mPlabelsHeaderGlobalOffset = 0; uint32_t mPlabelsDataGlobalOffset = 0; - int32_t mISlice = 0; + int32_t mISector = 0; constexpr static int32_t mScanWorkGroupSize = GPUCA_THREAD_COUNT_SCAN; uint32_t mNMaxClusterPerRow = 0; uint32_t mNMaxClusters = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index eb5d7505eea22..a9fbc1b5f40e0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -24,7 +24,7 @@ void GPUTPCClusterFinder::DumpDigits(std::ostream& out) { const auto nPositions = mPmemory->counters.nPositions; - out << "\nClusterer - Digits - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << ": " << nPositions << "\n"; + out << "\nClusterer - Digits - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << ": " << nPositions << "\n"; out << std::hex; for (size_t i = 0; i < mPmemory->counters.nPositions; i++) { @@ -36,7 +36,7 @@ void GPUTPCClusterFinder::DumpDigits(std::ostream& out) void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view title) { - out << "\nClusterer - " << title << " - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; Array2D map(mPchargeMap); out << std::hex; @@ -69,7 +69,7 @@ void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view titl void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title) { - out << "\nClusterer - " << title << " - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; Array2D map(mPpeakMap); @@ -105,7 +105,7 @@ void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title) void GPUTPCClusterFinder::DumpPeaks(std::ostream& out) { - out << "\nClusterer - Peaks - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - Peaks - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; for (uint32_t i = 0; i < mPmemory->counters.nPositions; i++) { out << int32_t{mPisPeak[i]}; if ((i + 1) % 100 == 0) { @@ -118,7 +118,7 @@ void GPUTPCClusterFinder::DumpPeaksCompacted(std::ostream& out) { const auto nPeaks = mPmemory->counters.nPeaks; - out << "\nClusterer - Compacted Peaks - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << ": " << nPeaks << "\n"; + out << "\nClusterer - Compacted Peaks - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << ": " << nPeaks << "\n"; for (size_t i = 0; i < nPeaks; i++) { const auto& pos = mPpeakPositions[i]; out << pos.time() << " " << int32_t{pos.pad()} << " " << int32_t{pos.row()} << "\n"; @@ -130,7 +130,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaks(std::ostream& out) const auto& fragment = mPmemory->fragment; const auto nPeaks = mPmemory->counters.nPeaks; - out << "\nClusterer - NoiseSuppression - Slice " << mISlice << " - Fragment " << fragment.index << mISlice << "\n"; + out << "\nClusterer - NoiseSuppression - Sector " << mISector << " - Fragment " << fragment.index << mISector << "\n"; for (uint32_t i = 0; i < nPeaks; i++) { out << int32_t{mPisPeak[i]}; if ((i + 1) % 100 == 0) { @@ -144,7 +144,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaksCompacted(std::ostream& out) const auto& fragment = mPmemory->fragment; const auto nPeaks = mPmemory->counters.nClusters; - out << "\nClusterer - Noise Suppression Peaks Compacted - Slice " << mISlice << " - Fragment " << fragment.index << ": " << nPeaks << "\n"; + out << "\nClusterer - Noise Suppression Peaks Compacted - Sector " << mISector << " - Fragment " << fragment.index << ": " << nPeaks << "\n"; for (size_t i = 0; i < nPeaks; i++) { const auto& peak = mPfilteredPeakPositions[i]; out << peak.time() << " " << int32_t{peak.pad()} << " " << int32_t{peak.row()} << "\n"; @@ -153,7 +153,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaksCompacted(std::ostream& out) void GPUTPCClusterFinder::DumpClusters(std::ostream& out) { - out << "\nClusterer - Clusters - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { size_t N = mPclusterInRow[i]; diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h index 222c2ffa65648..9bf40417192b6 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h @@ -18,9 +18,7 @@ #include "GPUDef.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterData; @@ -39,10 +37,10 @@ class GPUTPCConvert : public GPUProcessor void* SetPointersMemory(void* mem); #endif - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; struct Memory { - GPUTPCClusterData* clusters[NSLICES]; + GPUTPCClusterData* clusters[NSECTORS]; }; protected: @@ -53,7 +51,6 @@ class GPUTPCConvert : public GPUProcessor int16_t mMemoryResOutput = -1; int16_t mMemoryResMemory = -1; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h index 8dfe4ac6c28bc..dd9a74f9b9131 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h @@ -20,33 +20,30 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCConvertImpl { public: - GPUd() static void convert(const GPUConstantMem& GPUrestrict() cm, int32_t slice, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) + GPUd() static void convert(const GPUConstantMem& GPUrestrict() cm, int32_t sector, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) { if (cm.param.par.continuousTracking) { - cm.calibObjects.fastTransformHelper->getCorrMap()->TransformInTimeFrame(slice, row, pad, time, x, y, z, cm.param.continuousMaxTimeBin); + cm.calibObjects.fastTransformHelper->getCorrMap()->TransformInTimeFrame(sector, row, pad, time, x, y, z, cm.param.continuousMaxTimeBin); } else { - cm.calibObjects.fastTransformHelper->Transform(slice, row, pad, time, x, y, z); + cm.calibObjects.fastTransformHelper->Transform(sector, row, pad, time, x, y, z); } } - GPUd() static void convert(const TPCFastTransform& GPUrestrict() transform, const GPUParam& GPUrestrict() param, int32_t slice, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) + GPUd() static void convert(const TPCFastTransform& GPUrestrict() transform, const GPUParam& GPUrestrict() param, int32_t sector, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) { if (param.par.continuousTracking) { - transform.TransformInTimeFrame(slice, row, pad, time, x, y, z, param.continuousMaxTimeBin); + transform.TransformInTimeFrame(sector, row, pad, time, x, y, z, param.continuousMaxTimeBin); } else { - transform.Transform(slice, row, pad, time, x, y, z); + transform.Transform(sector, row, pad, time, x, y, z); } } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx index dc01b3782daf9..e17bfc1dff025 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx @@ -24,18 +24,18 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCConvertKernel::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) { - const int32_t iSlice = iBlock / GPUCA_ROW_COUNT; + const int32_t iSector = iBlock / GPUCA_ROW_COUNT; const int32_t iRow = iBlock % GPUCA_ROW_COUNT; GPUTPCConvert& GPUrestrict() convert = processors.tpcConverter; const o2::tpc::ClusterNativeAccess* GPUrestrict() native = processors.ioPtrs.clustersNative; - GPUTPCClusterData* GPUrestrict() clusters = convert.mMemory->clusters[iSlice]; - const int32_t idOffset = native->clusterOffset[iSlice][iRow]; - const int32_t indexOffset = native->clusterOffset[iSlice][iRow] - native->clusterOffset[iSlice][0]; + GPUTPCClusterData* GPUrestrict() clusters = convert.mMemory -> clusters[iSector]; + const int32_t idOffset = native->clusterOffset[iSector][iRow]; + const int32_t indexOffset = native->clusterOffset[iSector][iRow] - native->clusterOffset[iSector][0]; - for (uint32_t k = get_local_id(0); k < native->nClusters[iSlice][iRow]; k += get_local_size(0)) { - const auto& GPUrestrict() clin = native->clusters[iSlice][iRow][k]; + for (uint32_t k = get_local_id(0); k < native->nClusters[iSector][iRow]; k += get_local_size(0)) { + const auto& GPUrestrict() clin = native -> clusters[iSector][iRow][k]; float x, y, z; - GPUTPCConvertImpl::convert(processors, iSlice, iRow, clin.getPad(), clin.getTime(), x, y, z); + GPUTPCConvertImpl::convert(processors, iSector, iRow, clin.getPad(), clin.getTime(), x, y, z); auto& GPUrestrict() clout = clusters[indexOffset + k]; clout.x = x; clout.y = y; diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h index 085260dc48067..d62e10e682a4b 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h @@ -17,9 +17,7 @@ #include "GPUGeneralKernels.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCConvertKernel : public GPUKernelTemplate { @@ -28,7 +26,6 @@ class GPUTPCConvertKernel : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h index 2af6fbf922ed4..a99cc5f4a7a2d 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h @@ -24,9 +24,7 @@ class TObjArray; #include "DataFormatsTRD/Constants.h" #include "GPUCommonTransform3D.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDpadPlane : private o2::trd::PadPlane @@ -78,7 +76,6 @@ class GPUTRDGeometry : private o2::trd::GeometryFlat static constexpr int32_t kNstack = o2::trd::constants::NSTACK; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDGEOMETRY_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 7aed063f05ad9..24624e60ceba7 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -23,23 +23,18 @@ #include "GPUTPCGMTrackParam.h" #include "GPUTRDDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template class trackInterface; template class propagatorInterface; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #include "DetectorsBase/Propagator.h" #include "GPUTRDInterfaceO2Track.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { GPUdi() trackInterface::trackInterface(const GPUTPCGMMergedTrack& trk) { set(trk.OuterParam().X, trk.OuterParam().alpha, trk.OuterParam().P, trk.OuterParam().C); } @@ -87,8 +82,7 @@ class propagatorInterface const o2::base::Propagator* mProp; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #include "GPUTPCGMPropagator.h" #include "GPUParam.h" @@ -96,9 +90,7 @@ class propagatorInterface #include "DataFormatsTPC/TrackTPC.h" #include "ReconstructionDataFormats/TrackTPCITS.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template <> @@ -235,7 +227,6 @@ class propagatorInterface : public GPUTPCGMPropagator trackInterface* mTrack; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDINTERFACES_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h index f7e89169cde24..c11e60bed26db 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h @@ -18,9 +18,7 @@ #ifndef GPUCA_TPC_GEOMETRY_O2 // compatibility to Run 2 data types -namespace o2 -{ -namespace gpu +namespace o2::gpu { // class to hold the information on the space points @@ -44,16 +42,13 @@ class GPUTRDSpacePoint float mDy; // deflection over drift length }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #else // compatibility with Run 3 data types #include "DataFormatsTRD/CalibratedTracklet.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDSpacePoint : public o2::trd::CalibratedTracklet @@ -62,8 +57,7 @@ class GPUTRDSpacePoint : public o2::trd::CalibratedTracklet static_assert(sizeof(GPUTRDSpacePoint) == sizeof(o2::trd::CalibratedTracklet), "Incorrect memory layout"); -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index 4e8fcd13e0801..1bd2eca769913 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -1105,12 +1105,9 @@ GPUd() bool GPUTRDTracker_t::IsGeoFindable(const TRDTRK* t, const #ifndef GPUCA_GPUCODE -namespace o2 -{ -namespace gpu +namespace o2::gpu { template class GPUTRDTracker_t; template class GPUTRDTracker_t; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 274dfd6668eaf..29a9b529b0558 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -30,9 +30,7 @@ #include #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackletWord; @@ -196,7 +194,6 @@ class GPUTRDTracker_t : public GPUProcessor float mTPCTDriftOffset; // TPC drift time additive offset GPUTRDTrackerDebug* mDebug; // debug output }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACKER_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h index 45b083a4cca66..4f2199792b2b4 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h @@ -23,9 +23,7 @@ #else -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -65,8 +63,7 @@ template class GPUTRDTrackerDebug; template class GPUTRDTrackerDebug; #endif #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif #endif // GPUTRDTRACKERDEBUG_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 8745eabb02473..70b525420f294 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -17,9 +17,7 @@ #include "GPUGeneralKernels.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackerKernels : public GPUKernelTemplate @@ -32,7 +30,6 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACKERKERNELSCA_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h index d56ee1cbbba5e..20e1df7c72212 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h @@ -17,15 +17,12 @@ #ifndef GPUTRDTRACKLETLABELS_H #define GPUTRDTRACKLETLABELS_H -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTRDTrackletLabels { int32_t mLabel[3]; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACKLETLABELS_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index 83acbcda8e3a1..fc874070ec9b8 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -24,9 +24,7 @@ class AliTRDtrackletWord; class AliTRDtrackletMCM; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackletWord @@ -72,16 +70,13 @@ class GPUTRDTrackletWord uint32_t mTrackletWord; // tracklet word: PID | Z | deflection length | Y // bits: 8 4 7 13 }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #else // compatibility with Run 3 data types #include "DataFormatsTRD/Tracklet64.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackletWord : private o2::trd::Tracklet64 @@ -108,8 +103,7 @@ class GPUTRDTrackletWord : private o2::trd::Tracklet64 static_assert(sizeof(GPUTRDTrackletWord) == sizeof(o2::trd::Tracklet64), "Incorrect memory layout"); -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 4604a8cdbdf70..6c0a96d3adb75 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -24,9 +24,7 @@ #include "CalibdEdxContainer.h" #include "GPUDebugStreamer.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUdEdx @@ -34,7 +32,7 @@ class GPUdEdx public: // The driver must call clear(), fill clusters row by row outside-in, then run computedEdx() to get the result GPUd() void clear(); - GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t slice, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); + GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); GPUd() void fillSubThreshold(int32_t padRow, const GPUParam& param); GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); @@ -94,7 +92,7 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) mLastROC = roc; } -GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t slice, float trackSnp, float trackTgl, const GPUParam& GPUrestrict() param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) +GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& GPUrestrict() param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) { if (mCount >= MAX_NCL) { return; @@ -123,10 +121,10 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint const float absRelPad = CAMath::Abs(pad - padPos); const int32_t region = param.tpcGeometry.GetRegion(padRow); z = CAMath::Abs(z); - const float threshold = calibContainer->getZeroSupressionThreshold(slice, padRow, padPos); // TODO: Use the mean zero supresion threshold of all pads in the cluster? + const float threshold = calibContainer->getZeroSupressionThreshold(sector, padRow, padPos); // TODO: Use the mean zero supresion threshold of all pads in the cluster? const bool useFullGainMap = calibContainer->isUsageOfFullGainMap(); float qTotIn = qtot; - const float fullGainMapGain = calibContainer->getGain(slice, padRow, padPos); + const float fullGainMapGain = calibContainer->getGain(sector, padRow, padPos); if (useFullGainMap) { qmax /= fullGainMapGain; qtot /= fullGainMapGain; @@ -140,7 +138,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qtot /= qTotTopologyCorr; tpc::StackID stack{ - slice, + sector, static_cast(roc)}; const float qMaxResidualCorr = calibContainer->getResidualCorrection(stack, tpc::ChargeType::Max, trackTgl, trackSnp); @@ -148,7 +146,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= qMaxResidualCorr; qtot /= qTotResidualCorr; - const float residualGainMapGain = calibContainer->getResidualGain(slice, padRow, padPos); + const float residualGainMapGain = calibContainer->getResidualGain(sector, padRow, padPos); qmax /= residualGainMapGain; qtot /= residualGainMapGain; @@ -164,13 +162,13 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamdEdx)) { float padlx = param.tpcGeometry.Row2X(padRow); - float padly = param.tpcGeometry.LinearPad2Y(slice, padRow, padPos); + float padly = param.tpcGeometry.LinearPad2Y(sector, padRow, padPos); o2::utils::DebugStreamer::instance()->getStreamer("debug_dedx", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_dedx").data() << "qTot=" << mChargeTot[mCount - 1] << "qMax=" << mChargeMax[mCount - 1] << "region=" << region << "padRow=" << padRow - << "sector=" << slice + << "sector=" << sector << "lx=" << padlx << "ly=" << padly << "tanTheta=" << tanTheta @@ -198,7 +196,6 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri mNSubThresh++; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h b/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h index 1bbe21a5e7c20..fd3ab61ab0323 100644 --- a/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h +++ b/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h @@ -56,7 +56,7 @@ extern "C" { ** included as . ** ** glcorearb.h includes only APIs in the latest OpenGL core profile -** implementation together with APIs in newer ARB extensions which +** implementation together with APIs in newer ARB extensions which ** can be supported by the core profile. It does not, and never will ** include functionality removed from the core profile, such as ** fixed-function vertex and fragment processing. @@ -3952,9 +3952,9 @@ GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR(GLuint count); #define GL_KHR_texture_compression_astc_ldr 1 #endif /* GL_KHR_texture_compression_astc_ldr */ -#ifndef GL_KHR_texture_compression_astc_sliced_3d -#define GL_KHR_texture_compression_astc_sliced_3d 1 -#endif /* GL_KHR_texture_compression_astc_sliced_3d */ +#ifndef GL_KHR_texture_compression_astc_sectord_3d +#define GL_KHR_texture_compression_astc_sectord_3d 1 +#endif /* GL_KHR_texture_compression_astc_sectord_3d */ #ifndef GL_AMD_framebuffer_multisample_advanced #define GL_AMD_framebuffer_multisample_advanced 1 diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index e42a4fa3e4bf1..1a3717fd88017 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -27,7 +27,7 @@ #include "GPUChainTracking.h" #include "GPUQA.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUChainTracking.h" #include "GPUTPCTrack.h" #include "GPUTPCTracker.h" @@ -106,11 +106,11 @@ void GPUDisplay::calcXYZ(const float* matrix) }*/ } -void GPUDisplay::SetCollisionFirstCluster(uint32_t collision, int32_t slice, int32_t cluster) +void GPUDisplay::SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) { mNCollissions = std::max(mNCollissions, collision + 1); mOverlayTFClusters.resize(mNCollissions); - mOverlayTFClusters[collision][slice] = cluster; + mOverlayTFClusters[collision][sector] = cluster; } void GPUDisplay::mAnimationCloseAngle(float& newangle, float lastAngle) @@ -234,7 +234,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla // Calculate rotation / translation scaling factors float scalefactor = mFrontend->mKeys[mFrontend->KEY_SHIFT] ? 0.2f : 1.0f; float rotatescalefactor = scalefactor * 0.25f; - if (mCfgL.drawSlice != -1) { + if (mCfgL.drawSector != -1) { scalefactor *= 0.2f; } float sqrdist = sqrtf(sqrtf(mViewMatrixP[12] * mViewMatrixP[12] + mViewMatrixP[13] * mViewMatrixP[13] + mViewMatrixP[14] * mViewMatrixP[14]) * GL_SCALE_FACTOR) * 0.8f; @@ -258,7 +258,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla mCfgL.pointSize = 2.0f; mCfgL.lineWidth = 1.4f; - mCfgL.drawSlice = -1; + mCfgL.drawSector = -1; mCfgH.xAdd = mCfgH.zAdd = 0; mCfgR.camLookOrigin = mCfgR.camYUp = false; mAngleRollOrigin = -1e9f; @@ -411,8 +411,15 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla void GPUDisplay::DrawGLScene_drawCommands() { -#define LOOP_SLICE for (int32_t iSlice = (mCfgL.drawSlice == -1 ? 0 : mCfgL.drawRelatedSlices ? (mCfgL.drawSlice % (NSLICES / 4)) : mCfgL.drawSlice); iSlice < NSLICES; iSlice += (mCfgL.drawSlice == -1 ? 1 : mCfgL.drawRelatedSlices ? (NSLICES / 4) : NSLICES)) -#define LOOP_SLICE2 for (int32_t iSlice = (mCfgL.drawSlice == -1 ? 0 : mCfgL.drawRelatedSlices ? (mCfgL.drawSlice % (NSLICES / 4)) : mCfgL.drawSlice) % (NSLICES / 2); iSlice < NSLICES / 2; iSlice += (mCfgL.drawSlice == -1 ? 1 : mCfgL.drawRelatedSlices ? (NSLICES / 4) : NSLICES)) +#define LOOP_SECTOR for (int32_t iSector = (mCfgL.drawSector == -1 ? 0 : mCfgL.drawRelatedSectors ? (mCfgL.drawSector % (NSECTORS / 4)) \ + : mCfgL.drawSector); \ + iSector < NSECTORS; iSector += (mCfgL.drawSector == -1 ? 1 : mCfgL.drawRelatedSectors ? (NSECTORS / 4) \ + : NSECTORS)) +#define LOOP_SECTOR2 for (int32_t iSector = (mCfgL.drawSector == -1 ? 0 : mCfgL.drawRelatedSectors ? (mCfgL.drawSector % (NSECTORS / 4)) \ + : mCfgL.drawSector) % \ + (NSECTORS / 2); \ + iSector < NSECTORS / 2; iSector += (mCfgL.drawSector == -1 ? 1 : mCfgL.drawRelatedSectors ? (NSECTORS / 4) \ + : NSECTORS)) #define LOOP_COLLISION for (int32_t iCol = (mCfgL.showCollision == -1 ? 0 : mCfgL.showCollision); iCol < mNCollissions; iCol += (mCfgL.showCollision == -1 ? 1 : mNCollissions)) #define LOOP_COLLISION_COL(cmd) \ LOOP_COLLISION \ @@ -426,37 +433,37 @@ void GPUDisplay::DrawGLScene_drawCommands() if (mCfgL.drawGrid) { if (mCfgL.drawTPC) { SetColorGrid(); - LOOP_SLICE drawVertices(mGlDLGrid[iSlice], GPUDisplayBackend::LINES); + LOOP_SECTOR drawVertices(mGlDLGrid[iSector], GPUDisplayBackend::LINES); } if (mCfgL.drawTRD) { SetColorGridTRD(); - LOOP_SLICE2 drawVertices(mGlDLGridTRD[iSlice], GPUDisplayBackend::LINES); + LOOP_SECTOR2 drawVertices(mGlDLGridTRD[iSector], GPUDisplayBackend::LINES); } } if (mCfgL.drawClusters) { if (mCfgL.drawTRD) { SetColorTRD(); mBackend->lineWidthFactor(2); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tTRDCLUSTER][iCol], GPUDisplayBackend::LINES)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tTRDCLUSTER][iCol], GPUDisplayBackend::LINES)); if (mCfgL.drawFinal && mCfgL.colorClusters) { SetColorFinal(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tTRDATTACHED][iCol], GPUDisplayBackend::LINES)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tTRDATTACHED][iCol], GPUDisplayBackend::LINES)); mBackend->lineWidthFactor(1); } if (mCfgL.drawTOF) { SetColorTOF(); mBackend->pointSizeFactor(2); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tTOFCLUSTER][0], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tTOFCLUSTER][0], GPUDisplayBackend::POINTS)); mBackend->pointSizeFactor(1); } if (mCfgL.drawITS) { SetColorITS(); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tITSCLUSTER][0], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tITSCLUSTER][0], GPUDisplayBackend::POINTS)); } if (mCfgL.drawTPC) { SetColorClusters(); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tCLUSTER][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tCLUSTER][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawInitLinks) { if (mCfgL.excludeClusters) { @@ -466,7 +473,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorInitLinks(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tINITLINK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tINITLINK][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawLinks) { if (mCfgL.excludeClusters) { @@ -478,7 +485,7 @@ void GPUDisplay::DrawGLScene_drawCommands() } else { SetColorClusters(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tLINK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tLINK][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawSeeds) { if (mCfgL.excludeClusters) { @@ -488,7 +495,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorSeeds(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tSEED][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tSEED][iCol], GPUDisplayBackend::POINTS)); skip1: SetColorClusters(); @@ -500,7 +507,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorTracklets(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tTRACKLET][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tTRACKLET][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawTracks) { if (mCfgL.excludeClusters) { @@ -510,7 +517,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorTracks(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tSLICETRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tSECTORTRACK][iCol], GPUDisplayBackend::POINTS)); skip2:; if (mCfgL.drawExtrapolatedTracks) { @@ -523,7 +530,7 @@ void GPUDisplay::DrawGLScene_drawCommands() } else { SetColorClusters(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tEXTRAPOLATEDTRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tEXTRAPOLATEDTRACK][iCol], GPUDisplayBackend::POINTS)); SetColorClusters(); if (mCfgL.drawFinal && mCfgL.propagateTracks < 2) { @@ -534,7 +541,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorFinal(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tFINALTRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tFINALTRACK][iCol], GPUDisplayBackend::POINTS)); skip3:; } } @@ -543,47 +550,47 @@ void GPUDisplay::DrawGLScene_drawCommands() if (mCfgL.drawTPC) { if (mCfgL.drawInitLinks) { SetColorInitLinks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tINITLINK], GPUDisplayBackend::LINES); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tINITLINK], GPUDisplayBackend::LINES); } if (mCfgL.drawLinks) { SetColorLinks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tLINK], GPUDisplayBackend::LINES); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tLINK], GPUDisplayBackend::LINES); } if (mCfgL.drawSeeds) { SetColorSeeds(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tSEED], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tSEED], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.drawTracklets) { SetColorTracklets(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tTRACKLET], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tTRACKLET], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.drawTracks) { SetColorTracks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tSLICETRACK], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tSECTORTRACK], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.drawExtrapolatedTracks) { SetColorExtrapolatedTracks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tEXTRAPOLATEDTRACK], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tEXTRAPOLATEDTRACK], GPUDisplayBackend::LINE_STRIP); } } if (mCfgL.drawFinal) { SetColorFinal(); - LOOP_SLICE LOOP_COLLISION + LOOP_SECTOR LOOP_COLLISION { if (mCfgL.colorCollisions) { SetCollisionColor(iCol); } if (mCfgL.propagateTracks < 2) { - drawVertices(mGlDLFinal[iSlice][iCol][0], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][0], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.propagateTracks > 0 && mCfgL.propagateTracks < 3) { - drawVertices(mGlDLFinal[iSlice][iCol][1], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][1], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.propagateTracks == 2) { - drawVertices(mGlDLFinal[iSlice][iCol][2], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][2], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.propagateTracks == 3) { - drawVertices(mGlDLFinal[iSlice][iCol][3], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][3], GPUDisplayBackend::LINE_STRIP); } } if (mCfgH.drawTracksAndFilter ? (mCfgH.drawTPCTracks || mCfgH.drawTRDTracks || mCfgH.drawTOFTracks) : mCfgH.drawITSTracks) { @@ -595,7 +602,7 @@ void GPUDisplay::DrawGLScene_drawCommands() mBackend->pointSizeFactor(3); } SetColorMarked(); - LOOP_SLICE LOOP_COLLISION drawVertices(mGlDLPoints[iSlice][tMARKED][iCol], GPUDisplayBackend::POINTS); + LOOP_SECTOR LOOP_COLLISION drawVertices(mGlDLPoints[iSector][tMARKED][iCol], GPUDisplayBackend::POINTS); if (mCfgH.markFakeClusters) { mBackend->pointSizeFactor(1); } @@ -665,9 +672,9 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) char info[1024]; float fps = (double)mFramesDoneFPS / fpstime; snprintf(info, 1024, - "FPS: %6.2f (Slice: %d, 1:Clusters %d, 2:Prelinks %d, 3:Links %d, 4:Seeds %d, 5:Tracklets %d, 6:Tracks %d, 7:GTracks %d, 8:Merger %d) (%d frames, %d draw calls) " + "FPS: %6.2f (Sector: %d, 1:Clusters %d, 2:Prelinks %d, 3:Links %d, 4:Seeds %d, 5:Tracklets %d, 6:Tracks %d, 7:GTracks %d, 8:Merger %d) (%d frames, %d draw calls) " "(X %1.2f Y %1.2f Z %1.2f / R %1.2f Phi %1.1f Theta %1.1f) / Yaw %1.1f Pitch %1.1f Roll %1.1f)", - fps, mCfgL.drawSlice, mCfgL.drawClusters, mCfgL.drawInitLinks, mCfgL.drawLinks, mCfgL.drawSeeds, mCfgL.drawTracklets, mCfgL.drawTracks, mCfgL.drawExtrapolatedTracks, mCfgL.drawFinal, mFramesDone, mNDrawCalls, mXYZ[0], mXYZ[1], mXYZ[2], mRPhiTheta[0], mRPhiTheta[1] * 180 / CAMath::Pi(), + fps, mCfgL.drawSector, mCfgL.drawClusters, mCfgL.drawInitLinks, mCfgL.drawLinks, mCfgL.drawSeeds, mCfgL.drawTracklets, mCfgL.drawTracks, mCfgL.drawExtrapolatedTracks, mCfgL.drawFinal, mFramesDone, mNDrawCalls, mXYZ[0], mXYZ[1], mXYZ[2], mRPhiTheta[0], mRPhiTheta[1] * 180 / CAMath::Pi(), mRPhiTheta[2] * 180 / CAMath::Pi(), mAngle[1] * 180 / CAMath::Pi(), mAngle[0] * 180 / CAMath::Pi(), mAngle[2] * 180 / CAMath::Pi()); if (fpstime > 1.) { if (mPrintInfoText & 2) { diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 1c4b751bbf85b..73f65b6b24241 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -29,9 +29,7 @@ #include "utils/timer.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; struct GPUParam; @@ -47,7 +45,7 @@ class GPUDisplay : public GPUDisplayInterface int32_t StartDisplay() override; void ShowNextEvent(const GPUTrackingInOutPointers* ptrs = nullptr) override; void WaitForNextEvent() override; - void SetCollisionFirstCluster(uint32_t collision, int32_t slice, int32_t cluster) override; + void SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) override; void UpdateCalib(const GPUCalibObjectsConst* calib) override { mCalib = calib; } void UpdateParam(const GPUParam* param) override { mParam = param; } @@ -79,7 +77,7 @@ class GPUDisplay : public GPUDisplayInterface int32_t& drawTextFontSize() { return mDrawTextFontSize; } private: - static constexpr int32_t NSLICES = GPUChainTracking::NSLICES; + static constexpr int32_t NSECTORS = GPUChainTracking::NSECTORS; static constexpr float GL_SCALE_FACTOR = (1.f / 100.f); static constexpr const int32_t N_POINTS_TYPE = 15; @@ -95,7 +93,7 @@ class GPUDisplay : public GPUDisplayInterface tLINK = 2, tSEED = 3, tTRACKLET = 4, - tSLICETRACK = 5, + tSECTORTRACK = 5, tEXTRAPOLATEDTRACK = 6, tFINALTRACK = 7, tMARKED = 8, @@ -153,11 +151,11 @@ class GPUDisplay : public GPUDisplayInterface void updateOptions(); void disableUnsupportedOptions(); int32_t buildTrackFilter(); - const GPUTPCTracker& sliceTracker(int32_t iSlice); + const GPUTPCTracker& sectorTracker(int32_t iSector); const GPUTRDGeometry* trdGeometry(); const GPUTrackingInOutPointers* mIOPtrs = nullptr; void insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last); - void insertVertexList(int32_t iSlice, size_t first, size_t last); + void insertVertexList(int32_t iSector, size_t first, size_t last); template void SetInfo(Args... args) { @@ -195,19 +193,19 @@ class GPUDisplay : public GPUDisplayInterface void SetColorMarked(); void SetCollisionColor(int32_t col); void updateConfig(); - void drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t id, int32_t id_limit = TRACK_TYPE_ID_LIMIT); - vboList DrawClusters(int32_t iSlice, int32_t select, uint32_t iCol); - vboList DrawSpacePointsTRD(int32_t iSlice, int32_t select, int32_t iCol); - vboList DrawSpacePointsTOF(int32_t iSlice, int32_t select, int32_t iCol); - vboList DrawSpacePointsITS(int32_t iSlice, int32_t select, int32_t iCol); + void drawPointLinestrip(int32_t iSector, int32_t cid, int32_t id, int32_t id_limit = TRACK_TYPE_ID_LIMIT); + vboList DrawClusters(int32_t iSector, int32_t select, uint32_t iCol); + vboList DrawSpacePointsTRD(int32_t iSector, int32_t select, int32_t iCol); + vboList DrawSpacePointsTOF(int32_t iSector, int32_t select, int32_t iCol); + vboList DrawSpacePointsITS(int32_t iSector, int32_t select, int32_t iCol); vboList DrawLinks(const GPUTPCTracker& tracker, int32_t id, bool dodown = false); vboList DrawSeeds(const GPUTPCTracker& tracker); vboList DrawTracklets(const GPUTPCTracker& tracker); vboList DrawTracks(const GPUTPCTracker& tracker, int32_t global); - void DrawTrackITS(int32_t trackId, int32_t iSlice); + void DrawTrackITS(int32_t trackId, int32_t iSector); GPUDisplay::vboList DrawFinalITS(); template - void DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer); + void DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer); vboList DrawGrid(const GPUTPCTracker& tracker); vboList DrawGridTRD(int32_t sector); void DoScreenshot(const char* filename, std::vector& pixels, float animateTime = -1.f); @@ -252,9 +250,9 @@ class GPUDisplay : public GPUDisplayInterface vecpod> mOverlayTFClusters; int32_t mNCollissions = 1; - vecpod mVertexBuffer[NSLICES]; - vecpod mVertexBufferStart[NSLICES]; - vecpod mVertexBufferCount[NSLICES]; + vecpod mVertexBuffer[NSECTORS]; + vecpod mVertexBufferStart[NSECTORS]; + vecpod mVertexBufferCount[NSECTORS]; std::unique_ptr mGlobalPosPtr; std::unique_ptr mGlobalPosPtrTRD; @@ -303,25 +301,24 @@ class GPUDisplay : public GPUDisplayInterface HighResTimer mInfoText2Timer, mInfoHelpTimer; std::vector mThreadBuffers; - std::vector, 2>, NSLICES>>> mThreadTracks; + std::vector, 2>, NSECTORS>>> mThreadTracks; volatile int32_t mInitResult = 0; float mFPSScale = 1, mFPSScaleadjust = 0; int32_t mFramesDone = 0, mFramesDoneFPS = 0; HighResTimer mTimerFPS, mTimerDisplay, mTimerDraw; - vboList mGlDLLines[NSLICES][N_LINES_TYPE]; - vecpod> mGlDLFinal[NSLICES]; + vboList mGlDLLines[NSECTORS][N_LINES_TYPE]; + vecpod> mGlDLFinal[NSECTORS]; vboList mGlDLFinalITS; - vecpod mGlDLPoints[NSLICES][N_POINTS_TYPE]; - vboList mGlDLGrid[NSLICES]; - vboList mGlDLGridTRD[NSLICES / 2]; + vecpod mGlDLPoints[NSECTORS][N_POINTS_TYPE]; + vboList mGlDLGrid[NSECTORS]; + vboList mGlDLGridTRD[NSECTORS / 2]; bool mRequestScreenshot = false; std::string mScreenshotFile; float mYFactor = 1.0f; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 44ae35068cac3..3c6928c78e5a1 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -17,9 +17,7 @@ #include "GPUSettings.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChainTracking; @@ -33,7 +31,7 @@ class GPUDisplayInterface virtual int32_t StartDisplay() = 0; virtual void ShowNextEvent(const GPUTrackingInOutPointers* ptrs = nullptr) = 0; virtual void WaitForNextEvent() = 0; - virtual void SetCollisionFirstCluster(uint32_t collision, int32_t slice, int32_t cluster) = 0; + virtual void SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) = 0; virtual void UpdateCalib(const GPUCalibObjectsConst* calib) = 0; virtual void UpdateParam(const GPUParam* param) = 0; static GPUDisplayInterface* getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr); @@ -61,7 +59,6 @@ class GPUDisplayFrontendInterface GPUDisplayFrontendInterface(); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUDISPLAYINTERFACE_H diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx index 508c9d0b2e4ff..ded8803801fb7 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx @@ -138,12 +138,12 @@ std::vector GPUDisplayBackend::getPixels() void GPUDisplayBackend::fillIndirectCmdBuffer() { mCmdBuffer.clear(); - mIndirectSliceOffset.resize(GPUCA_NSLICES); + mIndirectSectorOffset.resize(GPUCA_NSECTORS); // TODO: Check if this can be parallelized - for (int32_t iSlice = 0; iSlice < GPUCA_NSLICES; iSlice++) { - mIndirectSliceOffset[iSlice] = mCmdBuffer.size(); - for (uint32_t k = 0; k < mDisplay->vertexBufferStart()[iSlice].size(); k++) { - mCmdBuffer.emplace_back(mDisplay->vertexBufferCount()[iSlice][k], 1, mDisplay->vertexBufferStart()[iSlice][k], 0); + for (int32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + mIndirectSectorOffset[iSector] = mCmdBuffer.size(); + for (uint32_t k = 0; k < mDisplay->vertexBufferStart()[iSector].size(); k++) { + mCmdBuffer.emplace_back(mDisplay->vertexBufferCount()[iSector][k], 1, mDisplay->vertexBufferStart()[iSector][k], 0); } } } diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h index c2c23f659e418..dc56dedf587ed 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h @@ -113,7 +113,7 @@ class GPUDisplayBackend bool smoothFont(); GPUDisplay* mDisplay = nullptr; - std::vector mIndirectSliceOffset; + std::vector mIndirectSectorOffset; vecpod mCmdBuffer; bool mFreetypeInitialized = false; bool mFrontendCompatTextDraw = false; diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx index 10acbea3a2586..3ee3384c8e118 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx @@ -186,7 +186,7 @@ uint32_t GPUDisplayBackendOpenGL::drawVertices(const vboList& v, const drawType GLenum t = types[tt]; auto first = std::get<0>(v); auto count = std::get<1>(v); - auto iSlice = std::get<2>(v); + auto iSector = std::get<2>(v); if (count == 0) { return 0; } @@ -195,7 +195,7 @@ uint32_t GPUDisplayBackendOpenGL::drawVertices(const vboList& v, const drawType if (mDisplay->cfgR().openGLCore) { CHKERR(glBindVertexArray(mVertexArray)); } - CHKERR(glBindBuffer(GL_ARRAY_BUFFER, mVBOId[iSlice])); + CHKERR(glBindBuffer(GL_ARRAY_BUFFER, mVBOId[iSector])); #ifndef GPUCA_DISPLAY_OPENGL_CORE if (!mDisplay->cfgR().openGLCore) { CHKERR(glVertexPointer(3, GL_FLOAT, 0, nullptr)); @@ -208,14 +208,14 @@ uint32_t GPUDisplayBackendOpenGL::drawVertices(const vboList& v, const drawType } if (mDisplay->cfgR().useGLIndirectDraw) { - CHKERR(glMultiDrawArraysIndirect(t, (void*)(size_t)((mIndirectSliceOffset[iSlice] + first) * sizeof(DrawArraysIndirectCommand)), count, 0)); + CHKERR(glMultiDrawArraysIndirect(t, (void*)(size_t)((mIndirectSectorOffset[iSector] + first) * sizeof(DrawArraysIndirectCommand)), count, 0)); } else if (OPENGL_EMULATE_MULTI_DRAW) { for (uint32_t k = 0; k < count; k++) { - CHKERR(glDrawArrays(t, mDisplay->vertexBufferStart()[iSlice][first + k], mDisplay->vertexBufferCount()[iSlice][first + k])); + CHKERR(glDrawArrays(t, mDisplay->vertexBufferStart()[iSector][first + k], mDisplay->vertexBufferCount()[iSector][first + k])); } } else { - static_assert(sizeof(GLsizei) == sizeof(*mDisplay->vertexBufferCount()[iSlice].data()), "Invalid counter size does not match GLsizei"); - CHKERR(glMultiDrawArrays(t, mDisplay->vertexBufferStart()[iSlice].data() + first, ((const GLsizei*)mDisplay->vertexBufferCount()[iSlice].data()) + first, count)); + static_assert(sizeof(GLsizei) == sizeof(*mDisplay->vertexBufferCount()[iSector].data()), "Invalid counter size does not match GLsizei"); + CHKERR(glMultiDrawArrays(t, mDisplay->vertexBufferStart()[iSector].data() + first, ((const GLsizei*)mDisplay->vertexBufferCount()[iSector].data()) + first, count)); } return count; } @@ -315,7 +315,7 @@ int32_t GPUDisplayBackendOpenGL::InitBackendA() GPUError("Unsupported OpenGL runtime %d.%d < %d.%d", glVersion[0], glVersion[1], GPUDisplayFrontend::GL_MIN_VERSION_MAJOR, GPUDisplayFrontend::GL_MIN_VERSION_MINOR); return (1); } - mVBOId.resize(GPUCA_NSLICES); + mVBOId.resize(GPUCA_NSECTORS); CHKERR(glCreateBuffers(mVBOId.size(), mVBOId.data())); CHKERR(glBindBuffer(GL_ARRAY_BUFFER, mVBOId[0])); CHKERR(glGenBuffers(1, &mIndirectId)); @@ -457,7 +457,7 @@ void GPUDisplayBackendOpenGL::loadDataToGPU(size_t totalVertizes) { // TODO: Check if this can be parallelized if (mDisplay->useMultiVBO()) { - for (int32_t i = 0; i < GPUCA_NSLICES; i++) { + for (int32_t i = 0; i < GPUCA_NSECTORS; i++) { CHKERR(glNamedBufferData(mVBOId[i], mDisplay->vertexBuffer()[i].size() * sizeof(mDisplay->vertexBuffer()[i][0]), mDisplay->vertexBuffer()[i].data(), GL_STATIC_DRAW)); } } else { diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx index 6f0ebb9baf945..2324c194d04b9 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx @@ -1469,7 +1469,7 @@ uint32_t GPUDisplayBackendVulkan::drawVertices(const vboList& v, const drawType { auto first = std::get<0>(v); auto count = std::get<1>(v); - auto iSlice = std::get<2>(v); + auto iSector = std::get<2>(v); if (count == 0) { return 0; } @@ -1482,10 +1482,10 @@ uint32_t GPUDisplayBackendVulkan::drawVertices(const vboList& v, const drawType mCurrentCommandBufferLastPipeline = tt; } if (mDisplay->cfgR().useGLIndirectDraw) { - mCurrentCommandBuffer.drawIndirect(mIndirectCommandBuffer.buffer, (mIndirectSliceOffset[iSlice] + first) * sizeof(DrawArraysIndirectCommand), count, sizeof(DrawArraysIndirectCommand)); + mCurrentCommandBuffer.drawIndirect(mIndirectCommandBuffer.buffer, (mIndirectSectorOffset[iSector] + first) * sizeof(DrawArraysIndirectCommand), count, sizeof(DrawArraysIndirectCommand)); } else { for (uint32_t k = 0; k < count; k++) { - mCurrentCommandBuffer.draw(mDisplay->vertexBufferCount()[iSlice][first + k], 1, mDisplay->vertexBufferStart()[iSlice][first + k], 0); + mCurrentCommandBuffer.draw(mDisplay->vertexBufferCount()[iSector][first + k], 1, mDisplay->vertexBufferStart()[iSector][first + k], 0); } } diff --git a/GPU/GPUTracking/display/filterMacros/TRDCandidate.C b/GPU/GPUTracking/display/filterMacros/TRDCandidate.C index f00681d0ca335..4bbab658c31c4 100644 --- a/GPU/GPUTracking/display/filterMacros/TRDCandidate.C +++ b/GPU/GPUTracking/display/filterMacros/TRDCandidate.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" #include "GPUConstantMem.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C b/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C index 886ed29611553..a27d988e84e43 100644 --- a/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C +++ b/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" #include "GPUTPCGMMergedTrack.h" diff --git a/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C b/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C index 636cdd0319011..484fff3e7d4ef 100644 --- a/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C +++ b/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" #if !defined(__CLING__) || defined(__ROOTCLING__) #include "DataFormatsTPC/TrackTPC.h" diff --git a/GPU/GPUTracking/display/filterMacros/hasTRD.C b/GPU/GPUTracking/display/filterMacros/hasTRD.C index cd98fb2fe349b..2392442c4a961 100644 --- a/GPU/GPUTracking/display/filterMacros/hasTRD.C +++ b/GPU/GPUTracking/display/filterMacros/hasTRD.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx index ed0d08fb24add..ad3b620ba8f55 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx @@ -39,16 +39,8 @@ using namespace o2::gpu; -GPUDisplayFrontendWayland::GPUDisplayFrontendWayland() -{ - mFrontendType = TYPE_WAYLAND; - mFrontendName = "Wayland"; -} - -void GPUDisplayFrontendWayland::OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton) +namespace o2::gpu::internal { -} - template struct CCallWrapper { std::function func; @@ -58,6 +50,17 @@ struct CCallWrapper { return funcwrap->func(std::forward(args)...); } }; +} // namespace o2::gpu::internal + +GPUDisplayFrontendWayland::GPUDisplayFrontendWayland() +{ + mFrontendType = TYPE_WAYLAND; + mFrontendName = "Wayland"; +} + +void GPUDisplayFrontendWayland::OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton) +{ +} int32_t GPUDisplayFrontendWayland::GetKey(uint32_t key, uint32_t state) { @@ -283,7 +286,7 @@ int32_t GPUDisplayFrontendWayland::FrontendMain() wl_keyboard_add_listener(mKeyboard, &keyboard_listener, this); } }; - auto seat_capabilities_c = CCallWrapper{[seat_capabilities](wl_seat* seat, uint32_t capabilities) { seat_capabilities(seat, capabilities); }}; + auto seat_capabilities_c = internal::CCallWrapper{[seat_capabilities](wl_seat* seat, uint32_t capabilities) { seat_capabilities(seat, capabilities); }}; auto seat_name = [](void* data, struct wl_seat* seat, const char* name) { if (((GPUDisplayFrontendWayland*)data)->mDisplay->param()->par.debugLevel >= 2) { @@ -317,7 +320,7 @@ int32_t GPUDisplayFrontendWayland::FrontendMain() } }; - auto registry_global_c = CCallWrapper{[registry_global](wl_registry* registry, uint32_t name, const char* interface, uint32_t version) { registry_global(registry, name, interface, version); }}; + auto registry_global_c = internal::CCallWrapper{[registry_global](wl_registry* registry, uint32_t name, const char* interface, uint32_t version) { registry_global(registry, name, interface, version); }}; auto registry_global_remove = [](void* a, wl_registry* b, uint32_t c) {}; const wl_registry_listener registry_listener = {.global = ®istry_global_c.callback, .global_remove = registry_global_remove}; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx index 69d24538123c6..ff7763ea62948 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx @@ -24,7 +24,7 @@ using namespace o2::gpu; -namespace o2::gpu +namespace o2::gpu::internal { struct GPUDisplayGUIWrapperObjects { std::unique_ptr app; @@ -39,7 +39,7 @@ struct GPUDisplayGUIWrapperObjects { std::mutex mutex, mutexRet; std::condition_variable signal, signalRet; }; -} // namespace o2::gpu +} // namespace o2::gpu::internal GPUDisplayGUIWrapper::GPUDisplayGUIWrapper() { @@ -52,7 +52,7 @@ GPUDisplayGUIWrapper::GPUDisplayGUIWrapper() first = true; } } - mO.reset(new GPUDisplayGUIWrapperObjects); + mO.reset(new internal::GPUDisplayGUIWrapperObjects); mO->t = std::thread(&GPUDisplayGUIWrapper::guiThread, this); } GPUDisplayGUIWrapper::~GPUDisplayGUIWrapper() diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h index 00542321d6a19..4bf88b4726532 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h @@ -20,7 +20,10 @@ namespace o2::gpu { +namespace internal +{ struct GPUDisplayGUIWrapperObjects; +} // namespace internal class GPUDisplayGUIWrapper { @@ -35,7 +38,7 @@ class GPUDisplayGUIWrapper int32_t focus(); private: - std::unique_ptr mO; + std::unique_ptr mO; void guiThread(); }; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index acf5566489f49..32ff6c73e110c 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -20,8 +20,8 @@ const char* HelpText[] = { "[ESC] Quit", "[n] Next event", "[r] Reset Display Settings", - "[l] / [k] / [J] Draw single slice (next / previous slice), draw related slices (same plane in phi)", - "[;] / [:] Show splitting of TPC in slices by extruding volume, [:] resets", + "[l] / [k] / [J] Draw single sector (next / previous sector), draw related sectors (same plane in phi)", + "[;] / [:] Show splitting of TPC in sectors by extruding volume, [:] resets", "[#] Invert colors", "[y] / [Y] / [X] / [M] Start Animation, Add / remove Animation point, Reset Points, Cycle animation camera mode (resets)", "[>] / [<] Toggle config interpolation during Animation / change Animation interval (via movement)", @@ -110,27 +110,27 @@ void GPUDisplay::HandleKey(uint8_t key) } else if (key == mFrontend->KEY_ALT) { mFrontend->mKeys[mFrontend->KEY_CTRL] = false; // Release CTRL with alt, to avoid orienting along y automatically! } else if (key == 'l') { - if (mCfgL.drawSlice >= (mCfgL.drawRelatedSlices ? (NSLICES / 4 - 1) : (NSLICES - 1))) { - mCfgL.drawSlice = -1; - SetInfo("Showing all slices", 1); + if (mCfgL.drawSector >= (mCfgL.drawRelatedSectors ? (NSECTORS / 4 - 1) : (NSECTORS - 1))) { + mCfgL.drawSector = -1; + SetInfo("Showing all sectors", 1); } else { - mCfgL.drawSlice++; - SetInfo("Showing slice %d", mCfgL.drawSlice); + mCfgL.drawSector++; + SetInfo("Showing sector %d", mCfgL.drawSector); } } else if (key == 'k') { - if (mCfgL.drawSlice <= -1) { - mCfgL.drawSlice = mCfgL.drawRelatedSlices ? (NSLICES / 4 - 1) : (NSLICES - 1); + if (mCfgL.drawSector <= -1) { + mCfgL.drawSector = mCfgL.drawRelatedSectors ? (NSECTORS / 4 - 1) : (NSECTORS - 1); } else { - mCfgL.drawSlice--; + mCfgL.drawSector--; } - if (mCfgL.drawSlice == -1) { - SetInfo("Showing all slices", 1); + if (mCfgL.drawSector == -1) { + SetInfo("Showing all sectors", 1); } else { - SetInfo("Showing slice %d", mCfgL.drawSlice); + SetInfo("Showing sector %d", mCfgL.drawSector); } } else if (key == 'J') { - mCfgL.drawRelatedSlices ^= 1; - SetInfo("Drawing of related slices %s", mCfgL.drawRelatedSlices ? "enabled" : "disabled"); + mCfgL.drawRelatedSectors ^= 1; + SetInfo("Drawing of related sectors %s", mCfgL.drawRelatedSectors ? "enabled" : "disabled"); } else if (key == 'L') { if (mCfgL.showCollision >= mNCollissions - 1) { mCfgL.showCollision = -1; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 8c42cfa46abb9..fbe330afa1211 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -40,10 +40,10 @@ using namespace o2::gpu; -#define GET_CID(slice, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[slice][i].id : (mIOPtrs->clustersNative->clusterOffset[slice][0] + i)) +#define GET_CID(sector, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[sector][i].id : (mIOPtrs->clustersNative->clusterOffset[sector][0] + i)) const GPUTRDGeometry* GPUDisplay::trdGeometry() { return (GPUTRDGeometry*)mCalib->trdGeometry; } -const GPUTPCTracker& GPUDisplay::sliceTracker(int32_t iSlice) { return mChain->GetTPCSliceTrackers()[iSlice]; } +const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetTPCSectorTrackers()[iSector]; } inline void GPUDisplay::insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last) { @@ -53,15 +53,15 @@ inline void GPUDisplay::insertVertexList(std::pair*, vecpodemplace_back(first); vBuf.second->emplace_back(last - first); } -inline void GPUDisplay::insertVertexList(int32_t iSlice, size_t first, size_t last) +inline void GPUDisplay::insertVertexList(int32_t iSector, size_t first, size_t last) { - std::pair*, vecpod*> vBuf(mVertexBufferStart + iSlice, mVertexBufferCount + iSlice); + std::pair*, vecpod*> vBuf(mVertexBufferStart + iSector, mVertexBufferCount + iSector); insertVertexList(vBuf, first, last); } -inline void GPUDisplay::drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t id, int32_t id_limit) +inline void GPUDisplay::drawPointLinestrip(int32_t iSector, int32_t cid, int32_t id, int32_t id_limit) { - mVertexBuffer[iSlice].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); float curVal; while ((curVal = mGlobalPos[cid].w) < id_limit) { if (GPUCommonMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { @@ -71,66 +71,67 @@ inline void GPUDisplay::drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t } } -GPUDisplay::vboList GPUDisplay::DrawSpacePointsTRD(int32_t iSlice, int32_t select, int32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawSpacePointsTRD(int32_t iSector, int32_t select, int32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); if (iCol == 0) { for (uint32_t i = 0; i < mIOPtrs->nTRDTracklets; i++) { int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); - bool draw = iSlice == iSec && mGlobalPosTRD[i].w == select; + bool draw = iSector == iSec && mGlobalPosTRD[i].w == select; if (draw) { - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD[i].x, mGlobalPosTRD[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[i].z); - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD2[i].x, mGlobalPosTRD2[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD[i].x, mGlobalPosTRD[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD2[i].x, mGlobalPosTRD2[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[i].z); } } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawSpacePointsTOF(int32_t iSlice, int32_t select, int32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawSpacePointsTOF(int32_t iSector, int32_t select, int32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); - if (iCol == 0 && iSlice == 0) { + if (iCol == 0 && iSector == 0) { for (uint32_t i = 0; i < mIOPtrs->nTOFClusters; i++) { - mVertexBuffer[iSlice].emplace_back(mGlobalPosTOF[i].x, mGlobalPosTOF[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTOF[i].x, mGlobalPosTOF[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[i].z); } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawSpacePointsITS(int32_t iSlice, int32_t select, int32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawSpacePointsITS(int32_t iSector, int32_t select, int32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); - if (iCol == 0 && iSlice == 0 && mIOPtrs->itsClusters) { + if (iCol == 0 && iSector == 0 && mIOPtrs->itsClusters) { for (uint32_t i = 0; i < mIOPtrs->nItsClusters; i++) { - mVertexBuffer[iSlice].emplace_back(mGlobalPosITS[i].x, mGlobalPosITS[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosITS[i].x, mGlobalPosITS[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[i].z); } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSlice, int32_t select, uint32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSector, int32_t select, uint32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); if (mOverlayTFClusters.size() > 0 || iCol == 0 || mNCollissions) { - const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSlice] : 0; - const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSlice] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSlice] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSlice] : 0); + const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSector] : 0; + const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSector] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] + : 0); const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; - for (int32_t cidInSlice = firstCluster; cidInSlice < lastCluster; cidInSlice++) { - const int32_t cid = GET_CID(iSlice, cidInSlice); + for (int32_t cidInSector = firstCluster; cidInSector < lastCluster; cidInSector++) { + const int32_t cid = GET_CID(iSector, cidInSector); #ifdef GPUCA_TPC_GEOMETRY_O2 if (checkClusterCollision) { const auto& labels = mIOPtrs->clustersNative->clustersMCTruth->getLabels(cid); @@ -170,7 +171,7 @@ GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSlice, int32_t select, uin } else if (mCfgH.markClusters) { int16_t flags; if (mParam->par.earlyTpcTransform) { - flags = mIOPtrs->clusterData[iSlice][cidInSlice].flags; + flags = mIOPtrs->clusterData[iSector][cidInSector].flags; } else { flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags(); } @@ -181,22 +182,22 @@ GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSlice, int32_t select, uin draw = (select == tMARKED) ? (fake) : (draw && !fake); } if (draw) { - mVertexBuffer[iSlice].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); } } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t id, bool dodown) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { const GPUTPCRow& row = tracker.Data().Row(i); @@ -204,10 +205,10 @@ GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t const GPUTPCRow& rowUp = tracker.Data().Row(i + 2); for (int32_t j = 0; j < row.NHits(); j++) { if (tracker.Data().HitLinkUpData(row, j) != CALINK_INVAL) { - const int32_t cid1 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, j)); - const int32_t cid2 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(rowUp, tracker.Data().HitLinkUpData(row, j))); - drawPointLinestrip(iSlice, cid1, id); - drawPointLinestrip(iSlice, cid2, id); + const int32_t cid1 = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, j)); + const int32_t cid2 = GET_CID(iSector, tracker.Data().ClusterDataIndex(rowUp, tracker.Data().HitLinkUpData(row, j))); + drawPointLinestrip(iSector, cid1, id); + drawPointLinestrip(iSector, cid2, id); } } } @@ -216,114 +217,114 @@ GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t const GPUTPCRow& rowDown = tracker.Data().Row(i - 2); for (int32_t j = 0; j < row.NHits(); j++) { if (tracker.Data().HitLinkDownData(row, j) != CALINK_INVAL) { - const int32_t cid1 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, j)); - const int32_t cid2 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(rowDown, tracker.Data().HitLinkDownData(row, j))); - drawPointLinestrip(iSlice, cid1, id); - drawPointLinestrip(iSlice, cid2, id); + const int32_t cid1 = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, j)); + const int32_t cid2 = GET_CID(iSector, tracker.Data().ClusterDataIndex(rowDown, tracker.Data().HitLinkDownData(row, j))); + drawPointLinestrip(iSector, cid1, id); + drawPointLinestrip(iSector, cid2, id); } } } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawSeeds(const GPUTPCTracker& tracker) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = 0; i < *tracker.NStartHits(); i++) { const GPUTPCHitId& hit = tracker.TrackletStartHit(i); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); int32_t ir = hit.RowIndex(); calink ih = hit.HitIndex(); do { const GPUTPCRow& row = tracker.Data().Row(ir); - const int32_t cid = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, ih)); - drawPointLinestrip(iSlice, cid, tSEED); + const int32_t cid = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, ih)); + drawPointLinestrip(iSector, cid, tSEED); ir += 2; ih = tracker.Data().HitLinkUpData(row, ih); } while (ih != CALINK_INVAL); - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawTracklets(const GPUTPCTracker& tracker) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = 0; i < *tracker.NTracklets(); i++) { const GPUTPCTracklet& tracklet = tracker.Tracklet(i); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); float4 oldpos; for (int32_t j = tracklet.FirstRow(); j <= tracklet.LastRow(); j++) { const calink rowHit = tracker.TrackletRowHits()[tracklet.FirstHit() + (j - tracklet.FirstRow())]; if (rowHit != CALINK_INVAL && rowHit != CALINK_DEAD_CHANNEL) { const GPUTPCRow& row = tracker.Data().Row(j); - const int32_t cid = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, rowHit)); + const int32_t cid = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, rowHit)); oldpos = mGlobalPos[cid]; - drawPointLinestrip(iSlice, cid, tTRACKLET); + drawPointLinestrip(iSector, cid, tTRACKLET); } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawTracks(const GPUTPCTracker& tracker, int32_t global) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = (global ? tracker.CommonMemory()->nLocalTracks : 0); i < (global ? *tracker.NTracks() : tracker.CommonMemory()->nLocalTracks); i++) { GPUTPCTrack& track = tracker.Tracks()[i]; - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t j = 0; j < track.NHits(); j++) { const GPUTPCHitId& hit = tracker.TrackHits()[track.FirstHitID() + j]; const GPUTPCRow& row = tracker.Data().Row(hit.RowIndex()); - const int32_t cid = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, hit.HitIndex())); - drawPointLinestrip(iSlice, cid, tSLICETRACK + global); + const int32_t cid = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, hit.HitIndex())); + drawPointLinestrip(iSector, cid, tSECTORTRACK + global); } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -void GPUDisplay::DrawTrackITS(int32_t trackId, int32_t iSlice) +void GPUDisplay::DrawTrackITS(int32_t trackId, int32_t iSector) { const auto& trk = mIOPtrs->itsTracks[trackId]; for (int32_t k = 0; k < trk.getNClusters(); k++) { int32_t cid = mIOPtrs->itsTrackClusIdx[trk.getFirstClusterEntry() + k]; - mVertexBuffer[iSlice].emplace_back(mGlobalPosITS[cid].x, mGlobalPosITS[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosITS[cid].x, mGlobalPosITS[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[cid].z); mGlobalPosITS[cid].w = tITSATTACHED; } } GPUDisplay::vboList GPUDisplay::DrawFinalITS() { - const int32_t iSlice = 0; - size_t startCount = mVertexBufferStart[iSlice].size(); + const int32_t iSector = 0; + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = 0; i < mIOPtrs->nItsTracks; i++) { if (mITSStandaloneTracks[i]) { - size_t startCountInner = mVertexBuffer[iSlice].size(); - DrawTrackITS(i, iSlice); - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + size_t startCountInner = mVertexBuffer[iSector].size(); + DrawTrackITS(i, iSector); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } template -void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer) +void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer) { auto& vBuf = threadBuffer.vBuf; auto& buffer = threadBuffer.buffer; @@ -354,7 +355,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa throw std::runtime_error("invalid type"); } - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); bool drawing = false; if constexpr (std::is_same_v) { @@ -375,7 +376,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa if (mIOPtrs->tpcLinkTOF && mIOPtrs->tpcLinkTOF[i] != -1 && mIOPtrs->nTOFClusters) { int32_t cid = mIOPtrs->tpcLinkTOF[i]; drawing = true; - mVertexBuffer[iSlice].emplace_back(mGlobalPosTOF[cid].x, mGlobalPosTOF[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTOF[cid].x, mGlobalPosTOF[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[cid].z); mGlobalPosTOF[cid].w = tTOFATTACHED; } } @@ -388,8 +389,8 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa continue; } drawing = true; - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD2[cid].x, mGlobalPosTRD2[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[cid].z); - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD[cid].x, mGlobalPosTRD[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD2[cid].x, mGlobalPosTRD2[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD[cid].x, mGlobalPosTRD[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[cid].z); mGlobalPosTRD[cid].w = tTRDATTACHED; } }; @@ -429,21 +430,21 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa int32_t w = mGlobalPos[cid].w; if (drawing) { if (mCfgH.splitCETracks && lastSide != (mGlobalPos[cid].z < 0)) { - insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSector].size()); drawing = false; lastCluster = -1; } else { - drawPointLinestrip(iSlice, cid, tFINALTRACK, separateExtrapolatedTracksLimit); + drawPointLinestrip(iSector, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } } if (w == separateExtrapolatedTracksLimit) { if (drawing) { - insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSector].size()); } drawing = false; } else { if (!drawing) { - startCountInner = mVertexBuffer[iSlice].size(); + startCountInner = mVertexBuffer[iSector].size(); if (lastCluster != -1 && (!mCfgH.splitCETracks || lastSide == (mGlobalPos[cid].z < 0))) { int32_t lastcid; if constexpr (std::is_same_v) { @@ -451,9 +452,9 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa } else { lastcid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; } - drawPointLinestrip(iSlice, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); + drawPointLinestrip(iSector, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); } - drawPointLinestrip(iSlice, cid, tFINALTRACK, separateExtrapolatedTracksLimit); + drawPointLinestrip(iSector, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } drawing = true; } @@ -464,10 +465,10 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa // Print ITS part of track if constexpr (std::is_same_v) { if (mIOPtrs->tpcLinkITS && mIOPtrs->tpcLinkITS[i] != -1 && mIOPtrs->nItsTracks && mIOPtrs->nItsClusters) { - DrawTrackITS(mIOPtrs->tpcLinkITS[i], iSlice); + DrawTrackITS(mIOPtrs->tpcLinkITS[i], iSector); } } - insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSector].size()); break; } @@ -491,7 +492,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa } } - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t inFlyDirection = 0; inFlyDirection < 2; inFlyDirection++) { GPUTPCGMPhysicalTrackModel trkParam; float ZOffset = 0; @@ -503,7 +504,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa } if constexpr (std::is_same_v) { trkParam.Set(track->GetParam()); - alphaOrg = mParam->Alpha(iSlice); + alphaOrg = mParam->Alpha(iSector); } else { GPUTPCGMTrackParam t; convertTrackParam(t, *track); @@ -521,8 +522,8 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa if constexpr (std::is_same_v) { auto cl = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster]; const auto& cln = mIOPtrs->clustersNative->clustersLinear[cl.num]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.slice, cl.row, cln.getPad(), cln.getTime(), x, y, z); - ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSlice, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.sector, cl.row, cln.getPad(), cln.getTime(), x, y, z); + ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSector, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); } else { uint8_t sector, row; auto cln = track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative, sector, row); @@ -539,7 +540,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa break; } - alphaOrg = mParam->Alpha(iSlice); + alphaOrg = mParam->Alpha(iSector); float c = cosf(alphaOrg); float s = sinf(alphaOrg); float mclocal[4]; @@ -577,7 +578,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa break; } float alpha = alphaOrg; - vecpod& useBuffer = iMC && inFlyDirection == 0 ? buffer : mVertexBuffer[iSlice]; + vecpod& useBuffer = iMC && inFlyDirection == 0 ? buffer : mVertexBuffer[iSector]; int32_t nPoints = 0; while (nPoints++ < 5000) { @@ -623,24 +624,24 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa if (inFlyDirection == 0) { if (iMC) { for (int32_t k = (int32_t)buffer.size() - 1; k >= 0; k--) { - mVertexBuffer[iSlice].emplace_back(buffer[k]); + mVertexBuffer[iSector].emplace_back(buffer[k]); } } else { - insertVertexList(vBuf[1], startCountInner, mVertexBuffer[iSlice].size()); - startCountInner = mVertexBuffer[iSlice].size(); + insertVertexList(vBuf[1], startCountInner, mVertexBuffer[iSector].size()); + startCountInner = mVertexBuffer[iSector].size(); } } } - insertVertexList(vBuf[iMC ? 3 : 2], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[iMC ? 3 : 2], startCountInner, mVertexBuffer[iSector].size()); } } } GPUDisplay::vboList GPUDisplay::DrawGrid(const GPUTPCTracker& tracker) { - int32_t iSlice = tracker.ISlice(); - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + int32_t iSector = tracker.ISector(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { const GPUTPCRow& row = tracker.Data().Row(i); for (int32_t j = 0; j <= (signed)row.Grid().Ny(); j++) { @@ -649,17 +650,17 @@ GPUDisplay::vboList GPUDisplay::DrawGrid(const GPUTPCTracker& tracker) float x = row.X() + mCfgH.xAdd; float y = row.Grid().YMin() + (float)j / row.Grid().StepYInv(); float zz1, zz2, yy1, yy2, xx1, xx2; - mParam->Slice2Global(tracker.ISlice(), x, y, z1, &xx1, &yy1, &zz1); - mParam->Slice2Global(tracker.ISlice(), x, y, z2, &xx2, &yy2, &zz2); - if (iSlice < 18) { + mParam->Sector2Global(tracker.ISector(), x, y, z1, &xx1, &yy1, &zz1); + mParam->Sector2Global(tracker.ISector(), x, y, z2, &xx2, &yy2, &zz2); + if (iSector < 18) { zz1 += mCfgH.zAdd; zz2 += mCfgH.zAdd; } else { zz1 -= mCfgH.zAdd; zz2 -= mCfgH.zAdd; } - mVertexBuffer[iSlice].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); - mVertexBuffer[iSlice].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); } for (int32_t j = 0; j <= (signed)row.Grid().Nz(); j++) { float y1 = row.Grid().YMin(); @@ -667,21 +668,21 @@ GPUDisplay::vboList GPUDisplay::DrawGrid(const GPUTPCTracker& tracker) float x = row.X() + mCfgH.xAdd; float z = row.Grid().ZMin() + (float)j / row.Grid().StepZInv(); float zz1, zz2, yy1, yy2, xx1, xx2; - mParam->Slice2Global(tracker.ISlice(), x, y1, z, &xx1, &yy1, &zz1); - mParam->Slice2Global(tracker.ISlice(), x, y2, z, &xx2, &yy2, &zz2); - if (iSlice < 18) { + mParam->Sector2Global(tracker.ISector(), x, y1, z, &xx1, &yy1, &zz1); + mParam->Sector2Global(tracker.ISector(), x, y2, z, &xx2, &yy2, &zz2); + if (iSector < 18) { zz1 += mCfgH.zAdd; zz2 += mCfgH.zAdd; } else { zz1 -= mCfgH.zAdd; zz2 -= mCfgH.zAdd; } - mVertexBuffer[iSlice].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); - mVertexBuffer[iSlice].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); } } - insertVertexList(tracker.ISlice(), startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(tracker.ISector(), startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) @@ -691,7 +692,7 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) size_t startCountInner = mVertexBuffer[sector].size(); auto* geo = trdGeometry(); if (geo) { - int32_t trdsector = NSLICES / 2 - 1 - sector; + int32_t trdsector = NSECTORS / 2 - 1 - sector; float alpha = geo->GetAlpha() / 2.f + geo->GetAlpha() * trdsector; if (trdsector >= 9) { alpha -= 2 * CAMath::Pi(); @@ -753,7 +754,7 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) size_t GPUDisplay::DrawGLScene_updateVertexList() { - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { mVertexBuffer[i].clear(); mVertexBufferStart[i].clear(); mVertexBufferCount[i].clear(); @@ -766,46 +767,46 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlobalPosTRD[i].w = tTRDCLUSTER; } - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = 0; i < N_POINTS_TYPE; i++) { - mGlDLPoints[iSlice][i].resize(mNCollissions); + mGlDLPoints[iSector][i].resize(mNCollissions); } for (int32_t i = 0; i < N_FINAL_TYPE; i++) { - mGlDLFinal[iSlice].resize(mNCollissions); + mGlDLFinal[iSector].resize(mNCollissions); } } int32_t numThreads = getNumThreads(); tbb::task_arena(numThreads).execute([&] { - if (mChain && (mChain->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { - GPUTPCTracker& tracker = (GPUTPCTracker&)sliceTracker(iSlice); + if (mChain && (mChain->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { + GPUTPCTracker& tracker = (GPUTPCTracker&)sectorTracker(iSector); tracker.SetPointersDataLinks(tracker.LinkTmpMemory()); - mGlDLLines[iSlice][tINITLINK] = DrawLinks(tracker, tINITLINK, true); + mGlDLLines[iSector][tINITLINK] = DrawLinks(tracker, tINITLINK, true); tracker.SetPointersDataLinks(mChain->rec()->Res(tracker.MemoryResLinks()).Ptr()); // clang-format off }, tbb::simple_partitioner()); // clang-format on - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { - const GPUTPCTracker& tracker = sliceTracker(iSlice); + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { + const GPUTPCTracker& tracker = sectorTracker(iSector); - mGlDLLines[iSlice][tLINK] = DrawLinks(tracker, tLINK); - mGlDLLines[iSlice][tSEED] = DrawSeeds(tracker); - mGlDLLines[iSlice][tTRACKLET] = DrawTracklets(tracker); - mGlDLLines[iSlice][tSLICETRACK] = DrawTracks(tracker, 0); - mGlDLGrid[iSlice] = DrawGrid(tracker); - if (iSlice < NSLICES / 2) { - mGlDLGridTRD[iSlice] = DrawGridTRD(iSlice); + mGlDLLines[iSector][tLINK] = DrawLinks(tracker, tLINK); + mGlDLLines[iSector][tSEED] = DrawSeeds(tracker); + mGlDLLines[iSector][tTRACKLET] = DrawTracklets(tracker); + mGlDLLines[iSector][tSECTORTRACK] = DrawTracks(tracker, 0); + mGlDLGrid[iSector] = DrawGrid(tracker); + if (iSector < NSECTORS / 2) { + mGlDLGridTRD[iSector] = DrawGridTRD(iSector); } // clang-format off }, tbb::simple_partitioner()); // clang-format on - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { - const GPUTPCTracker& tracker = sliceTracker(iSlice); - mGlDLLines[iSlice][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); // clang-format off + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { + const GPUTPCTracker& tracker = sectorTracker(iSector); + mGlDLLines[iSector][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); // clang-format off }, tbb::simple_partitioner()); // clang-format on } tbb::parallel_for(0, numThreads, [&](int32_t iThread) { mThreadTracks[iThread].resize(mNCollissions); for (int32_t i = 0; i < mNCollissions; i++) { - for (int32_t j = 0; j < NSLICES; j++) { + for (int32_t j = 0; j < NSECTORS; j++) { for (int32_t k = 0; k < 2; k++) { mThreadTracks[iThread][i][j][k].clear(); } @@ -837,19 +838,19 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (mCfgH.hideRejectedTracks && !track->OK()) { return; } - int32_t slice = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + track->NClusters() - 1].slice; + int32_t sector = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + track->NClusters() - 1].sector; uint32_t col = 0; if (mQA) { const auto& label = mQA->GetMCTrackLabel(i); #ifdef GPUCA_TPC_GEOMETRY_O2 col = mQA->GetMCLabelCol(label); #else - while (label.isValid() && col < mOverlayTFClusters.size() && mOverlayTFClusters[col][NSLICES] < label.track) { + while (label.isValid() && col < mOverlayTFClusters.size() && mOverlayTFClusters[col][NSECTORS] < label.track) { col++; } #endif } - mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][slice][0].emplace_back(i); + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][sector][0].emplace_back(i); }); } for (uint32_t col = 0; col < mIOPtrs->nMCInfosTPCCol; col++) { @@ -866,11 +867,11 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (alpha < 0) { alpha += 2 * CAMath::Pi(); } - int32_t slice = alpha / (2 * CAMath::Pi()) * 18; + int32_t sector = alpha / (2 * CAMath::Pi()) * 18; if (mc.z < 0) { - slice += 18; + sector += 18; } - mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][slice][1].emplace_back(i); + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][sector][1].emplace_back(i); }); } @@ -879,33 +880,33 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() prop.SetMaterialTPC(); prop.SetPolynomialField(&mParam->polynomialField); - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { int32_t numThread = GPUReconstruction::getHostThreadIndex(); for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { mThreadBuffers[numThread].clear(); for (int32_t iSet = 0; iSet < numThreads; iSet++) { if (mConfig.showTPCTracksFromO2Format) { - DrawFinal(iSlice, iCol, &prop, mThreadTracks[iSet][iCol][iSlice], mThreadBuffers[numThread]); + DrawFinal(iSector, iCol, &prop, mThreadTracks[iSet][iCol][iSector], mThreadBuffers[numThread]); } else { - DrawFinal(iSlice, iCol, &prop, mThreadTracks[iSet][iCol][iSlice], mThreadBuffers[numThread]); + DrawFinal(iSector, iCol, &prop, mThreadTracks[iSet][iCol][iSector], mThreadBuffers[numThread]); } } - vboList* list = &mGlDLFinal[iSlice][iCol][0]; + vboList* list = &mGlDLFinal[iSector][iCol][0]; for (int32_t i = 0; i < N_FINAL_TYPE; i++) { - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t j = 0; j < mThreadBuffers[numThread].start[i].size(); j++) { - mVertexBufferStart[iSlice].emplace_back(mThreadBuffers[numThread].start[i][j]); - mVertexBufferCount[iSlice].emplace_back(mThreadBuffers[numThread].count[i][j]); + mVertexBufferStart[iSector].emplace_back(mThreadBuffers[numThread].start[i][j]); + mVertexBufferCount[iSector].emplace_back(mThreadBuffers[numThread].count[i][j]); } - list[i] = vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice); + list[i] = vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector); } } // clang-format off }, tbb::simple_partitioner()); // clang-format on - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawClusters(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawClusters(iSector, i, iCol); } } // clang-format off }, tbb::simple_partitioner()); // clang-format on @@ -914,35 +915,35 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLFinalITS = DrawFinalITS(); - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = N_POINTS_TYPE_TPC; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawSpacePointsTRD(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTRD(iSector, i, iCol); } } } - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawSpacePointsTOF(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTOF(iSector, i, iCol); } } - break; // TODO: Only slice 0 filled for now + break; // TODO: Only sector 0 filled for now } - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF + N_POINTS_TYPE_ITS; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawSpacePointsITS(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsITS(iSector, i, iCol); } } - break; // TODO: Only slice 0 filled for now + break; // TODO: Only sector 0 filled for now } mUpdateVertexLists = 0; size_t totalVertizes = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { totalVertizes += mVertexBuffer[i].size(); } if (totalVertizes > 0xFFFFFFFF) { @@ -953,7 +954,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (!mUseMultiVBO) { size_t totalYet = mVertexBuffer[0].size(); mVertexBuffer[0].resize(totalVertizes); - for (int32_t i = 1; i < GPUCA_NSLICES; i++) { + for (int32_t i = 1; i < GPUCA_NSECTORS; i++) { for (uint32_t j = 0; j < mVertexBufferStart[i].size(); j++) { mVertexBufferStart[i][j] += totalYet; } @@ -963,7 +964,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } } mBackend->loadDataToGPU(totalVertizes); - for (int32_t i = 0; i < (mUseMultiVBO ? GPUCA_NSLICES : 1); i++) { + for (int32_t i = 0; i < (mUseMultiVBO ? GPUCA_NSECTORS : 1); i++) { mVertexBuffer[i].clear(); } return totalVertizes; diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index f53fa185029f8..6fd70354c9486 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -44,8 +44,8 @@ void GPUDisplay::DrawGLScene_updateEventData() mCurrentClusters = mIOPtrs->clustersNative->nClustersTotal; } else { mCurrentClusters = 0; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - mCurrentClusters += mIOPtrs->nClusterData[iSlice]; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + mCurrentClusters += mIOPtrs->nClusterData[iSector]; } } if (mNMaxClusters < mCurrentClusters) { @@ -128,19 +128,19 @@ void GPUDisplay::DrawGLScene_updateEventData() } mUpdateTrackFilter = false; - mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSLICES, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { - for (int32_t iSlice = r.begin(); iSlice < r.end(); iSlice++) { + mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { + for (int32_t iSector = r.begin(); iSector < r.end(); iSector++) { int32_t row = 0; - uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSlice] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSlice] : 0); + uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); for (uint32_t i = 0; i < nCls; i++) { int32_t cid; if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSlice][i]; + const auto& cl = mIOPtrs->clusterData[iSector][i]; cid = cl.id; row = cl.row; } else { - cid = mIOPtrs->clustersNative->clusterOffset[iSlice][0] + i; - while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSlice][row + 1] <= (uint32_t)cid) { + cid = mIOPtrs->clustersNative->clusterOffset[iSector][0] + i; + while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSector][row + 1] <= (uint32_t)cid) { row++; } } @@ -149,22 +149,22 @@ void GPUDisplay::DrawGLScene_updateEventData() } float4* ptr = &mGlobalPos[cid]; if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSlice][i]; - mParam->Slice2Global(iSlice, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); + const auto& cl = mIOPtrs->clusterData[iSector][i]; + mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); } else { float x, y, z; - const auto& cln = mIOPtrs->clustersNative->clusters[iSlice][0][i]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSlice, row, cln.getPad(), cln.getTime(), x, y, z); + const auto& cln = mIOPtrs->clustersNative->clusters[iSector][0][i]; + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSector, row, cln.getPad(), cln.getTime(), x, y, z); if (mCfgH.clustersOnNominalRow) { x = mParam->tpcGeometry.Row2X(row); } - mParam->Slice2Global(iSlice, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSector, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); } if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); } - ptr->z += iSlice < 18 ? mCfgH.zAdd : -mCfgH.zAdd; + ptr->z += iSector < 18 ? mCfgH.zAdd : -mCfgH.zAdd; ptr->x *= GL_SCALE_FACTOR; ptr->y *= GL_SCALE_FACTOR; ptr->z *= GL_SCALE_FACTOR; @@ -186,7 +186,7 @@ void GPUDisplay::DrawGLScene_updateEventData() const auto& sp = mIOPtrs->trdSpacePoints[i]; int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); float4* ptr = &mGlobalPosTRD[i]; - mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd, sp.getY(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSec, sp.getX() + mCfgH.xAdd, sp.getY(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); @@ -196,7 +196,7 @@ void GPUDisplay::DrawGLScene_updateEventData() ptr->z *= GL_SCALE_FACTOR; ptr->w = tTRDCLUSTER; ptr = &mGlobalPosTRD2[i]; - mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd + 4.5f, sp.getY() + 1.5f * sp.getDy(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSec, sp.getX() + mCfgH.xAdd + 4.5f, sp.getY() + 1.5f * sp.getDy(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); @@ -212,7 +212,7 @@ void GPUDisplay::DrawGLScene_updateEventData() mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, mCurrentClustersTOF, 32), float(mMaxClusterZ), [&](const tbb::blocked_range& r, float maxClusterZ) { for (int32_t i = r.begin(); i < r.end(); i++) { float4* ptr = &mGlobalPosTOF[i]; - mParam->Slice2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); float ZOffset = 0; if (mParam->par.continuousTracking) { float tofTime = mIOPtrs->tofClusters[i].getTime() * 1e-3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; diff --git a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h index 23d382466ba22..88162ef29fda3 100644 --- a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h +++ b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h @@ -16,9 +16,7 @@ #define GPUDISPLAYSHADERS_H #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUDisplayShaders { @@ -468,7 +466,6 @@ void main() { } )"; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 4085bebee08c4..c973264bfde2a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -13,13 +13,13 @@ # author David Rohr o2_gpu_kernel_file_list(ERRORS GPUErrors.cxx) -o2_gpu_kernel_file_list(TPCTRACKER ERRORS GPUTPCTrackParam.cxx GPUTPCTrack.cxx GPUTPCGrid.cxx GPUTPCRow.cxx GPUTPCTracker.cxx) +o2_gpu_kernel_file_list(TPCTRACKER ERRORS GPUTPCTrackParam.cxx GPUTPCTrack.cxx GPUTPCGrid.cxx GPUTPCTracker.cxx) o2_gpu_kernel_file_list(TPCTRACKLETCONS GPUTPCTrackletConstructor.cxx) -o2_gpu_kernel_file_list(TPCSLICEDATA TPCTRACKER GPUTPCSliceData.cxx) +o2_gpu_kernel_file_list(TPCSECTORDATA TPCTRACKER GPUTPCTrackingData.cxx) o2_gpu_kernel_file_list(TPCOCCUPANCY GPUTPCClusterOccupancyMap.cxx) o2_gpu_kernel_file_list(TPCDEDX GPUdEdx.cxx) o2_gpu_kernel_file_list(MATLUT MatLayerCylSet.cxx MatLayerCyl.cxx Ray.cxx) -o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMMerger.cxx GPUTPCGMSliceTrack.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) +o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMMerger.cxx GPUTPCGMSectorTrack.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizationWithError.cxx Propagator.cxx TrackLTIntegral.cxx) o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) @@ -31,17 +31,17 @@ o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRAC o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB both) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) -o2_gpu_add_kernel("GPUTPCCreateSliceData" "= TPCTRACKER TPCSLICEDATA" LB single) +o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sliceTracks" "= TPCTRACKER" NO single) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO single int8_t parameter) @@ -51,9 +51,9 @@ o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCU o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB simple int32_t mode) o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerSliceRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB simple int32_t iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB simple int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t id) o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) @@ -62,10 +62,10 @@ o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMM o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t useOrigTrackParam int8_t mergeAll) o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t output) o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSlicesPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t border0 int32_t border1 int8_t useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice int8_t withinSlice int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t iSlice int8_t withinSlice int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice int8_t withinSlice int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t border0 int32_t border1 int8_t useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB simple) @@ -98,7 +98,7 @@ o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCom o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB simple int32_t trackStart int32_t trackEnd) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB simple int32_t sliceStart int32_t nSlices) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB simple int32_t sectorStart int32_t nSectors) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB simple) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 015159fee24d7..552c82f1bd299 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -36,7 +36,7 @@ #include "GPUQA.h" #include "GPUTPCDef.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUChainTracking.h" #include "GPUTPCTrack.h" #include "GPUTPCTracker.h" @@ -315,12 +315,12 @@ void GPUQA::createHist(T*& h, const char* name, Args... args) p.second->emplace_back(&h); } -namespace o2::gpu +namespace o2::gpu::internal { struct GPUQAGarbageCollection { std::tuple>, std::vector>, std::vector>, std::vector>, std::vector>> v; }; -} // namespace o2::gpu +} // namespace o2::gpu::internal template T* GPUQA::createGarbageCollected(Args... args) @@ -335,7 +335,7 @@ void GPUQA::clearGarbagageCollector() std::apply([](auto&&... args) { ((args.clear()), ...); }, mGarbageCollector->v); } -GPUQA::GPUQA(GPUChainTracking* chain, const GPUSettingsQA* config, const GPUParam* param) : mTracking(chain), mConfig(config ? *config : GPUQA_GetConfig(chain)), mParam(param ? param : &chain->GetParam()), mGarbageCollector(std::make_unique()) +GPUQA::GPUQA(GPUChainTracking* chain, const GPUSettingsQA* config, const GPUParam* param) : mTracking(chain), mConfig(config ? *config : GPUQA_GetConfig(chain)), mParam(param ? param : &chain->GetParam()), mGarbageCollector(std::make_unique()) { mMCEventOffset.resize(1, 0); } @@ -1027,8 +1027,8 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } else if (mTracking->GetParam().par.earlyTpcTransform) { comp = fabsf(trks[i].GetParam().GetZ() + trks[i].GetParam().GetTZOffset()) < fabsf(trks[revLabel].GetParam().GetZ() + trks[revLabel].GetParam().GetTZOffset()); } else { - float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSLICES / 2, trks[i].GetParam().GetTZOffset()); - float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSLICES / 2, trks[revLabel].GetParam().GetTZOffset()); + float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSECTORS / 2, trks[i].GetParam().GetTZOffset()); + float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSECTORS / 2, trks[revLabel].GetParam().GetTZOffset()); comp = fabsf(trks[i].GetParam().GetZ() + shift1) < fabsf(trks[revLabel].GetParam().GetZ() + shift2); } if (revLabel == -1 || !trks[revLabel].OK() || (trks[i].OK() && comp)) { @@ -1362,7 +1362,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } #ifdef GPUCA_TPC_GEOMETRY_O2 if (!mParam->par.earlyTpcTransform) { - float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSLICES / 2, param.GetTZOffset() - mc1.t0); + float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSECTORS / 2, param.GetTZOffset() - mc1.t0); return param.GetZ() + shift - mc1.z; } #endif @@ -1664,13 +1664,13 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mNCl->Fill(track.NClustersFitted()); } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { - for (uint32_t i = 0; i < GPUChainTracking::NSLICES; i++) { + for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < mClNative->nClusters[i][j]; k++) { const auto& cl = mClNative->clusters[i][j][k]; float x, y, z; GPUTPCConvertImpl::convert(*mTracking->GetTPCTransformHelper()->getCorrMap(), mTracking->GetParam(), i, j, cl.getPad(), cl.getTime(), x, y, z); - mTracking->GetParam().Slice2Global(i, x, y, z, &x, &y, &z); + mTracking->GetParam().Sector2Global(i, x, y, z, &x, &y, &z); mClXY->Fill(x, y); } } @@ -1759,7 +1759,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx throw std::runtime_error("Cannot dump non o2::tpc::clusterNative clusters, need also hit attachmend and GPU tracks"); } uint32_t clid = 0; - for (uint32_t i = 0; i < GPUChainTracking::NSLICES; i++) { + for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < mClNative->nClusters[i][j]; k++) { const auto& cl = mClNative->clusters[i][j][k]; @@ -1769,7 +1769,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx uint32_t track = attach & gputpcgmmergertypes::attachTrackMask; const auto& trk = mTracking->mIOPtrs.mergedTracks[track]; mTracking->GetTPCTransformHelper()->Transform(i, j, cl.getPad(), cl.getTime(), x, y, z, trk.GetParam().GetTZOffset()); - mTracking->GetParam().Slice2Global(i, x, y, z, &x, &y, &z); + mTracking->GetParam().Sector2Global(i, x, y, z, &x, &y, &z); } uint32_t extState = mTracking->mIOPtrs.mergedTrackHitStates ? mTracking->mIOPtrs.mergedTrackHitStates[clid] : 0; diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index 76774f740477f..32b0553700f90 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -33,9 +33,7 @@ typedef int16_t Color_t; #if !defined(GPUCA_BUILD_QA) || defined(GPUCA_GPUCODE) -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUQA { @@ -59,8 +57,7 @@ class GPUQA static bool IsInitialized() { return false; } void UpdateChain(GPUChainTracking* chain) {} }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #else @@ -89,7 +86,10 @@ namespace o2::gpu class GPUChainTracking; struct GPUParam; struct GPUTPCMCInfo; +namespace internal +{ struct GPUQAGarbageCollection; +} // namespace internal class GPUQA { @@ -324,7 +324,7 @@ class GPUQA template void createHist(T*& h, const char* name, Args... args); - std::unique_ptr mGarbageCollector; + std::unique_ptr mGarbageCollector; template T* createGarbageCollected(Args... args); void clearGarbagageCollector(); diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 3bd4779dd13f0..627cfc5f9909a 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -47,10 +47,10 @@ namespace o2::gpu extern GPUSettingsStandalone configStandalone; } -int32_t genEvents::GetSlice(double GlobalPhi) +int32_t genEvents::GetSector(double GlobalPhi) { double phi = GlobalPhi; - // std::cout<<" GetSlice: phi = "<Fill(sigmaY); @@ -286,7 +286,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) // std::cout< 0.5 ) sigmaY = 0.5; // if( sigmaZ > 0.5 ) sigmaZ = 0.5; - c.sector = (t.GetZ() >= 0.) ? iSlice : iSlice + 18; + c.sector = (t.GetZ() >= 0.) ? iSector : iSector + 18; c.row = iRow; c.mcID = itr; c.x = t.GetX(); @@ -299,9 +299,9 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) std::vector labels; - std::unique_ptr clSlices[GPUChainTracking::NSLICES]; + std::unique_ptr clSectors[GPUChainTracking::NSECTORS]; - for (int32_t iSector = 0; iSector < (int32_t)GPUChainTracking::NSLICES; iSector++) // HLT Sector numbering, sectors go from 0 to 35, all spanning all rows from 0 to 158. + for (int32_t iSector = 0; iSector < (int32_t)GPUChainTracking::NSECTORS; iSector++) // HLT Sector numbering, sectors go from 0 to 35, all spanning all rows from 0 to 158. { int32_t nNumberOfHits = 0; for (uint32_t i = 0; i < vClusters.size(); i++) { @@ -313,7 +313,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) mRec->mIOPtrs.nClusterData[iSector] = nNumberOfHits; GPUTPCClusterData* clusters = new GPUTPCClusterData[nNumberOfHits]; - clSlices[iSector].reset(clusters); + clSectors[iSector].reset(clusters); int32_t icl = 0; for (uint32_t i = 0; i < vClusters.size(); i++) { GenCluster& c = vClusters[i]; @@ -338,7 +338,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) mRec->mIOPtrs.clusterData[iSector] = clusters; } - // Create vector with cluster MC labels, clusters are counter from 0 to clusterId in the order they have been written above. No separation in slices. + // Create vector with cluster MC labels, clusters are counter from 0 to clusterId in the order they have been written above. No separation in sectors. mRec->mIOPtrs.nMCLabelsTPC = labels.size(); mRec->mIOPtrs.mcLabelsTPC = labels.data(); diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index 43c091099bcf0..fb3c5f22d61ef 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChainTracking; struct GPUParam; @@ -30,7 +28,7 @@ class genEvents public: genEvents(GPUChainTracking* rec) {} void InitEventGenerator() {} - int32_t GenerateEvent(const GPUParam& sliceParam, char* filename) { return 1; } + int32_t GenerateEvent(const GPUParam& sectorParam, char* filename) { return 1; } void FinishEventGenerator() {} static void RunEventGenerator(GPUChainTracking* rec){}; @@ -43,16 +41,16 @@ class genEvents public: genEvents(GPUChainTracking* rec) : mRec(rec) {} void InitEventGenerator(); - int32_t GenerateEvent(const GPUParam& sliceParam, char* filename); + int32_t GenerateEvent(const GPUParam& sectorParam, char* filename); void FinishEventGenerator(); static void RunEventGenerator(GPUChainTracking* rec); private: - int32_t GetSlice(double GlobalPhi); - int32_t GetDSlice(double LocalPhi); - double GetSliceAngle(int32_t iSlice); - int32_t RecalculateSlice(GPUTPCGMPhysicalTrackModel& t, int32_t& iSlice); + int32_t GetSector(double GlobalPhi); + int32_t GetDSector(double LocalPhi); + double GetSectorAngle(int32_t iSector); + int32_t RecalculateSector(GPUTPCGMPhysicalTrackModel& t, int32_t& iSector); double GetGaus(double sigma); TH1F* mClusterError[3][2] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}}; @@ -68,14 +66,13 @@ class genEvents }; const double mTwoPi = 2 * M_PI; - const double mSliceDAngle = mTwoPi / 18.; - const double mSliceAngleOffset = mSliceDAngle / 2; + const double mSectorDAngle = mTwoPi / 18.; + const double mSectorAngleOffset = mSectorDAngle / 2; GPUChainTracking* mRec; }; #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/utils/timer.h b/GPU/GPUTracking/utils/timer.h index 6365a63263cfe..44a01b04747cb 100644 --- a/GPU/GPUTracking/utils/timer.h +++ b/GPU/GPUTracking/utils/timer.h @@ -40,9 +40,7 @@ class HighResTimer static double GetFrequency(); static double GetTime(); -#ifndef GPUCODE static double Frequency; -#endif }; #endif diff --git a/GPU/Workflow/src/GPUWorkflowInternal.h b/GPU/Workflow/src/GPUWorkflowInternal.h index 2e30adbd0130f..7ac9c60048e20 100644 --- a/GPU/Workflow/src/GPUWorkflowInternal.h +++ b/GPU/Workflow/src/GPUWorkflowInternal.h @@ -29,10 +29,10 @@ namespace gpurecoworkflow_internals { struct GPURecoWorkflowSpec_TPCZSBuffers { - std::vector Pointers[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - std::vector Sizes[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - const void** Pointers2[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - const uint32_t* Sizes2[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; + std::vector Pointers[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + std::vector Sizes[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + const void** Pointers2[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + const uint32_t* Sizes2[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; }; struct GPURecoWorkflow_QueueObject { diff --git a/GPU/Workflow/src/GPUWorkflowPipeline.cxx b/GPU/Workflow/src/GPUWorkflowPipeline.cxx index fb23680266ae2..5aca7502d8e91 100644 --- a/GPU/Workflow/src/GPUWorkflowPipeline.cxx +++ b/GPU/Workflow/src/GPUWorkflowPipeline.cxx @@ -53,7 +53,7 @@ struct pipelinePrepareMessage { size_t magicWord = MAGIC_WORD; DataProcessingHeader::StartTime timeSliceId; GPUSettingsTF tfSettings; - size_t pointerCounts[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; + size_t pointerCounts[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; size_t pointersTotal; bool flagEndOfStream; }; @@ -181,12 +181,12 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn size_t ptrsTotal = 0; const void* firstPtr = nullptr; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - if (firstPtr == nullptr && ptrs.tpcZS->slice[i].count[j]) { - firstPtr = ptrs.tpcZS->slice[i].zsPtr[j][0]; + if (firstPtr == nullptr && ptrs.tpcZS->sector[i].count[j]) { + firstPtr = ptrs.tpcZS->sector[i].zsPtr[j][0]; } - ptrsTotal += ptrs.tpcZS->slice[i].count[j]; + ptrsTotal += ptrs.tpcZS->sector[i].count[j]; } } @@ -202,11 +202,11 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn size_t* ptrBuffer = messageBuffer.data() + sizeof(preMessage) / sizeof(size_t); size_t ptrsCopied = 0; int32_t lastRegion = -1; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - preMessage.pointerCounts[i][j] = ptrs.tpcZS->slice[i].count[j]; - for (uint32_t k = 0; k < ptrs.tpcZS->slice[i].count[j]; k++) { - const void* curPtr = ptrs.tpcZS->slice[i].zsPtr[j][k]; + preMessage.pointerCounts[i][j] = ptrs.tpcZS->sector[i].count[j]; + for (uint32_t k = 0; k < ptrs.tpcZS->sector[i].count[j]; k++) { + const void* curPtr = ptrs.tpcZS->sector[i].zsPtr[j][k]; bool regionFound = lastRegion != -1 && (size_t)curPtr >= (size_t)mRegionInfos[lastRegion].ptr && (size_t)curPtr < (size_t)mRegionInfos[lastRegion].ptr + mRegionInfos[lastRegion].size; if (!regionFound) { for (uint32_t l = 0; l < mRegionInfos.size(); l++) { @@ -221,11 +221,11 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn LOG(fatal) << "Found a TPC ZS pointer outside of shared memory"; } ptrBuffer[ptrsCopied + k] = (size_t)curPtr - (size_t)mRegionInfos[lastRegion].ptr; - ptrBuffer[ptrsTotal + ptrsCopied + k] = ptrs.tpcZS->slice[i].nZSPtr[j][k]; + ptrBuffer[ptrsTotal + ptrsCopied + k] = ptrs.tpcZS->sector[i].nZSPtr[j][k]; ptrBuffer[2 * ptrsTotal + ptrsCopied + k] = mRegionInfos[lastRegion].managed; ptrBuffer[3 * ptrsTotal + ptrsCopied + k] = mRegionInfos[lastRegion].id; } - ptrsCopied += ptrs.tpcZS->slice[i].count[j]; + ptrsCopied += ptrs.tpcZS->sector[i].count[j]; } } @@ -353,10 +353,10 @@ void GPURecoWorkflowSpec::RunReceiveThread() context->tpcZSmeta.Pointers[0][0].resize(m->pointersTotal); context->tpcZSmeta.Sizes[0][0].resize(m->pointersTotal); int32_t lastRegion = -1; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - context->tpcZS.slice[i].count[j] = m->pointerCounts[i][j]; - for (uint32_t k = 0; k < context->tpcZS.slice[i].count[j]; k++) { + context->tpcZS.sector[i].count[j] = m->pointerCounts[i][j]; + for (uint32_t k = 0; k < context->tpcZS.sector[i].count[j]; k++) { bool regionManaged = ptrBuffer[2 * m->pointersTotal + ptrsCopied + k]; size_t regionId = ptrBuffer[3 * m->pointersTotal + ptrsCopied + k]; bool regionFound = lastRegion != -1 && mRegionInfos[lastRegion].managed == regionManaged && mRegionInfos[lastRegion].id == regionId; @@ -375,9 +375,9 @@ void GPURecoWorkflowSpec::RunReceiveThread() context->tpcZSmeta.Pointers[0][0][ptrsCopied + k] = (void*)(ptrBuffer[ptrsCopied + k] + (size_t)mRegionInfos[lastRegion].ptr); context->tpcZSmeta.Sizes[0][0][ptrsCopied + k] = ptrBuffer[m->pointersTotal + ptrsCopied + k]; } - context->tpcZS.slice[i].zsPtr[j] = context->tpcZSmeta.Pointers[0][0].data() + ptrsCopied; - context->tpcZS.slice[i].nZSPtr[j] = context->tpcZSmeta.Sizes[0][0].data() + ptrsCopied; - ptrsCopied += context->tpcZS.slice[i].count[j]; + context->tpcZS.sector[i].zsPtr[j] = context->tpcZSmeta.Pointers[0][0].data() + ptrsCopied; + context->tpcZS.sector[i].nZSPtr[j] = context->tpcZSmeta.Sizes[0][0].data() + ptrsCopied; + ptrsCopied += context->tpcZS.sector[i].count[j]; } } context->ptrs.tpcZS = &context->tpcZS; diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index 06942eab476c6..aa4f3cfca1289 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -194,7 +194,7 @@ void GPURecoWorkflowSpec::init(InitContext& ic) // Configure the "GPU workflow" i.e. which steps we run on the GPU (or CPU) if (mSpecConfig.outputTracks || mSpecConfig.outputCompClusters || mSpecConfig.outputCompClustersFlat) { mConfig->configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, - GPUDataTypes::RecoStep::TPCSliceTracking, + GPUDataTypes::RecoStep::TPCSectorTracking, GPUDataTypes::RecoStep::TPCMerging); mConfig->configWorkflow.outputs.set(GPUDataTypes::InOutType::TPCMergedTracks); mConfig->configWorkflow.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, mConfParam->rundEdx == -1 ? !mConfParam->synchronousProcessing : mConfParam->rundEdx); @@ -396,7 +396,7 @@ void GPURecoWorkflowSpec::processInputs(ProcessingContext& pc, D& tpcZSmeta, E& constexpr static size_t NEndpoints = o2::gpu::GPUTrackingInOutZS::NENDPOINTS; if (mSpecConfig.zsOnTheFly || mSpecConfig.zsDecoder) { - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { tpcZSmeta.Pointers[i][j].clear(); tpcZSmeta.Sizes[i][j].clear(); @@ -473,13 +473,13 @@ void GPURecoWorkflowSpec::processInputs(ProcessingContext& pc, D& tpcZSmeta, E& } int32_t totalCount = 0; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { tpcZSmeta.Pointers2[i][j] = tpcZSmeta.Pointers[i][j].data(); tpcZSmeta.Sizes2[i][j] = tpcZSmeta.Sizes[i][j].data(); - tpcZS.slice[i].zsPtr[j] = tpcZSmeta.Pointers2[i][j]; - tpcZS.slice[i].nZSPtr[j] = tpcZSmeta.Sizes2[i][j]; - tpcZS.slice[i].count[j] = tpcZSmeta.Pointers[i][j].size(); + tpcZS.sector[i].zsPtr[j] = tpcZSmeta.Pointers2[i][j]; + tpcZS.sector[i].nZSPtr[j] = tpcZSmeta.Sizes2[i][j]; + tpcZS.sector[i].count[j] = tpcZSmeta.Pointers[i][j].size(); totalCount += tpcZSmeta.Pointers[i][j].size(); } } @@ -640,9 +640,9 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc) if (!(mTPCSectorMask & (1ul << i))) { if (ptrs.tpcZS) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - tpcZS.slice[i].zsPtr[j] = nullptr; - tpcZS.slice[i].nZSPtr[j] = nullptr; - tpcZS.slice[i].count[j] = 0; + tpcZS.sector[i].zsPtr[j] = nullptr; + tpcZS.sector[i].nZSPtr[j] = nullptr; + tpcZS.sector[i].count[j] = 0; } } }