Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -709,19 +709,21 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;
}
});
const int16_t maxFragmentLen = GetProcessingSettings().overrideClusterizerFragmentLen;
const uint32_t maxAllowedTimebin = param().par.continuousTracking ? std::max<int32_t>(param().continuousMaxTimeBin, maxFragmentLen) : TPC_MAX_TIME_BIN_TRIGGERED;
for (int32_t sector = 0; sector < NSECTORS; sector++) {
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector];
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN;
int32_t lane = sector % numLanes;
clustererNN.mDeviceId = deviceId;
clustererNN.mISector = sector;
clustererNN.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters;
nnApplications[lane].initClusterizer(nn_settings, clustererNN);
nnApplications[lane].initClusterizer(nn_settings, clustererNN, maxFragmentLen, maxAllowedTimebin);
if (doGPU) {
clustererNNShadow.mDeviceId = deviceId;
clustererNNShadow.mISector = sector;
clustererNNShadow.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters;
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow, maxFragmentLen, maxAllowedTimebin);
}
if (nn_settings.nnClusterizerVerbosity > 2) {
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Processor initialized. Sector " << sector << ", lane " << lane << ", max clusters " << clustererNN.mNnClusterizerTotalClusters << " (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
Expand Down Expand Up @@ -1051,7 +1053,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)

// NN evaluations
if(clustererNNShadow.mNnClusterizerUseClassification) {
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Start(); }
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); }
if (clustererNNShadow.mNnInferenceInputDType == 0) {
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
Expand All @@ -1065,13 +1067,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
}
}
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Stop(); }
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
if (nn_settings.nnClusterizerVerbosity > 3) {
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
}
}
if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Start(); }
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); }
if (clustererNNShadow.mNnInferenceInputDType == 0) {
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
Expand All @@ -1085,9 +1087,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
}
}
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Stop(); }
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); }
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Start(); }
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); }
if (clustererNNShadow.mNnInferenceInputDType == 0) {
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
Expand All @@ -1101,7 +1103,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
}
}
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Stop(); }
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); }
}
if (nn_settings.nnClusterizerVerbosity > 3) {
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
Expand Down
4 changes: 4 additions & 0 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ class GPUTPCNNClusterizer : public GPUProcessor
int32_t mISector = -1;
int32_t mDeviceId = -1;

// charge array boundaries
int32_t maxFragmentLen = -1;
int32_t maxAllowedTimebin = -1; // == tpcMaxTimeBin

// GPU optimizations
uint32_t mNnClusterizerFullRowSize = 0;
uint32_t mNnClusterizerFullPadSize = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "GPUReconstruction.h"
#include "GPUTPCGeometry.h"
#include "DataFormatsTPC/Constants.h"
#include "clusterFinderDefs.h"

#ifdef GPUCA_HAS_ONNX
#include <onnxruntime_cxx_api.h>
Expand Down Expand Up @@ -84,7 +85,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
}
}

void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN)
void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN, int32_t maxFragmentLen, int32_t maxAllowedTimebin)
{
clustererNN.mNnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression;
clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow;
Expand All @@ -109,6 +110,8 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
clustererNN.mNnClusterizerUseClassification = settings.nnClusterizerUseClassification;
clustererNN.mNnClusterizerSetDeconvolutionFlags = (bool)settings.nnClusterizerSetDeconvolutionFlags;
clustererNN.maxFragmentLen = maxFragmentLen == -1 ? TPC_MAX_FRAGMENT_LEN_GPU : maxFragmentLen;
clustererNN.maxAllowedTimebin = maxAllowedTimebin == -1 ? TPC_MAX_FRAGMENT_LEN_GPU : maxAllowedTimebin;
if (clustererNN.mNnSigmoidTrafoClassThreshold) {
clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class GPUTPCNNClusterizerHost
GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, bool useDeterministicMode = false) { init(settings, useDeterministicMode); }

void init(const GPUSettingsProcessingNNclusterizer&, bool = false);
void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&);
void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&, int32_t = -1, int32_t = -1);
void createBoundary(GPUTPCNNClusterizer&);
void createIndexLookup(GPUTPCNNClusterizer&);

Expand Down
Loading