From 225f744de8523575f06897410c1b35000db0f9c0 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Thu, 31 Jul 2025 23:50:54 +0200 Subject: [PATCH] bug-fix for memory allocation --- .../Global/GPUChainTrackingClusterizer.cxx | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 07b332db1fc12..846df352d3a34 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -643,26 +643,28 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // Maximum of 4 lanes supported HighResTimer* nnTimers[12]; - if (GetProcessingSettings().nn.applyNNclusterizer && GetProcessingSettings().debugLevel >= 1) { - nnTimers[0] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 0); - nnTimers[1] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 1); - nnTimers[2] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 2); - nnTimers[3] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 3); - nnTimers[4] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 4); - nnTimers[5] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 5); - nnTimers[6] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 6); - nnTimers[7] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 7); - nnTimers[8] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 8); - nnTimers[9] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 9); - nnTimers[10] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 10); - nnTimers[11] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 11); - } if (GetProcessingSettings().nn.applyNNclusterizer) { int32_t deviceId = -1; int32_t numLanes = GetProcessingSettings().nTPCClustererLanes; int32_t maxThreads = mRec->getNKernelHostThreads(true); // bool recreateMemoryAllocator = false; + + if (GetProcessingSettings().debugLevel >= 1) { + nnTimers[0] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 0); + nnTimers[1] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 1); + nnTimers[2] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 2); + nnTimers[3] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 3); + nnTimers[4] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 4); + nnTimers[5] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 5); + nnTimers[6] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 6); + nnTimers[7] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 7); + nnTimers[8] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 8); + nnTimers[9] = &getTimer("GPUTPCNNClusterizer_ONNXClassification_0_", 9); + nnTimers[10] = &getTimer("GPUTPCNNClusterizer_ONNXRegression_1_", 10); + nnTimers[11] = &getTimer("GPUTPCNNClusterizer_ONNXRegression2_2_", 11); + } + mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) { nnApplications[lane].init(nn_settings, GetProcessingSettings().deterministicGPUReconstruction); if (nnApplications[lane].mModelsUsed[0]) { @@ -708,7 +710,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId; } }); - mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) { + for (int32_t sector = 0; sector < NSECTORS; sector++) { GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector]; GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN; int32_t lane = sector % numLanes; @@ -725,7 +727,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) AllocateRegisteredMemory(clustererNN.mMemoryId); // nnApplications[lane].createBoundary(clustererNNShadow); // nnApplications[lane].createIndexLookup(clustererNNShadow); - }); + } if (doGPU) { WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); }