diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 816ee43d50b15..dd4cd6ef0be96 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -977,20 +977,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; - // // bool recreateMemoryAllocator = false; - // if (lane == 0) { - // (nnApplications[lane].mModelClass).initEnvironment(); - // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0); - // } - // // recreateMemoryAllocator = true; - // (nnApplications[lane].mModelClass).initSession(); - // (nnApplications[lane].mModelReg1).initSession(); - - int withMC = (doGPU && propagateMCLabels); - - if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + // int withMC = (doGPU && propagateMCLabels); + + if (nn_settings.nnClusterizerApplyCfDeconvolution) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); - DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false); } @@ -1001,15 +991,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); // auto start0 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(iSize * clustererNNShadow.mNnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the data + runKernel({GetGrid(iSize * clustererNNShadow.mNnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); // Filling the data if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the regression data + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); // Filling the regression data } - // auto stop0 = std::chrono::high_resolution_clock::now(); - // auto start1 = std::chrono::high_resolution_clock::now(); - // NN evaluations if (clustererNNShadow.mNnInferenceInputDType == 0) { if (clustererNNShadow.mNnInferenceOutputDType == 0) { @@ -1055,55 +1042,27 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } - // auto stopNNs = std::chrono::high_resolution_clock::now(); - // Publishing kernels if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels } else { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels } if (!clustererNNShadow.mNnClusterizerUseCfRegression) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 2 regression results } } - - // for(int i = 0; i < iSize; ++i) { - // if(clustererNNShadow.mOutputDataClass[i + batchStart] > 1) { - // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.mModelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.mOutputDataClass[i + batchStart] << " thresh " << clustererNNShadow.mNnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; - // // std::string input = "["; - // // for(int j = 0; j < clustererNNShadow.mNnClusterizerElementSize; j++){ - // // input += std::to_string(clustererNNShadow.mInputData_16[i * clustererNNShadow.mNnClusterizerElementSize + j].ToFloat()) + ", "; - // // } - // // input += "]"; - // // LOG(info) << "Input is: " << input; - // } - // } - - // auto stop1 = std::chrono::high_resolution_clock::now(); - - // time_networks += std::chrono::duration_cast(stopNNs - start1).count() / 1e9; - // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; - // time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; } + if (clustererNNShadow.mNnClusterizerUseCfRegression) { - // auto start1 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 - // auto stop1 = std::chrono::high_resolution_clock::now(); - // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + if(!nn_settings.nnClusterizerApplyCfDeconvolution) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); + } + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 } - // if (clustererNNShadow.mNnClusterizerVerbosity < 3) { - // int acceptedClusters = 0; - // for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { - // if(clustererNNShadow.mOutputDataClass[i] > 1 || clustererNNShadow.mOutputDataClass[i] < 0) { - // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.mOutputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; - // } - // acceptedClusters += clustererNNShadow.mOutputDataClass[i]; - // } - // LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; - // } #else GPUFatal("Project not compiled with neural network clusterization. Aborting."); #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 58dd49630d8e6..bc8d26954b5dc 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -127,6 +127,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfChargePos peak = clusterer.mPfilteredPeakPositions[idx + batchStart]; + clustererNN.mClusterFlags[2 * idx] = 0; + clustererNN.mClusterFlags[2 * idx + 1] = 0; for (int i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; CfChargePos tmp_pos = peak.delta(d);