Skip to content

Commit 0c1cfb7

Browse files
committed
Adding separate functions. Now the host process only needs one instance and one initialization
1 parent c0bc918 commit 0c1cfb7

File tree

3 files changed

+26
-8
lines changed

3 files changed

+26
-8
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -612,14 +612,16 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
612612
}
613613

614614
#ifdef GPUCA_HAS_ONNX
615+
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
616+
GPUTPCNNClusterizerHost nnApplication; // potentially this needs to be GPUTPCNNClusterizerHost nnApplication[NSECTORS]; Technically ONNX ->Run() is threadsafe at inference time since its read-only
615617
if (GetProcessingSettings().nn.applyNNclusterizer) {
616618
uint32_t maxClusters = 0;
619+
nnApplication.init(nn_settings);
617620
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
618621
maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters);
619622
}
620623
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
621624
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
622-
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
623625
clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression;
624626
clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow;
625627
clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad;
@@ -640,7 +642,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
640642
clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
641643
}
642644
clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos;
643-
GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN);
645+
nnApplication.initClusterizer(nn_settings, clustererNN);
644646
AllocateRegisteredMemory(clustererNN.mMemoryId);
645647
}
646648
}
@@ -916,7 +918,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
916918
#ifdef GPUCA_HAS_ONNX
917919
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
918920
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
919-
GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN);
920921
int withMC = (doGPU && propagateMCLabels);
921922

922923
if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@
2121

2222
using namespace o2::gpu;
2323

24-
GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer)
24+
GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings)
25+
{
26+
init(settings);
27+
}
28+
29+
void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings)
2530
{
2631
OrtOptions = {
2732
{"model-path", settings.nnClassificationPath},
@@ -37,21 +42,30 @@ GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNcl
3742
{"logging-level", std::to_string(settings.nnInferenceVerbosity)}};
3843

3944
model_class.init(OrtOptions);
40-
clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
4145

42-
reg_model_paths = o2::utils::Str::tokenize(settings.nnRegressionPath, ':');
46+
reg_model_paths = splitString(settings.nnRegressionPath, ":");
4347

4448
if (!settings.nnClusterizerUseCfRegression) {
4549
if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) {
4650
OrtOptions["model-path"] = reg_model_paths[0];
4751
model_reg_1.init(OrtOptions);
48-
clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
4952
} else {
5053
OrtOptions["model-path"] = reg_model_paths[0];
5154
model_reg_1.init(OrtOptions);
52-
clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
5355
OrtOptions["model-path"] = reg_model_paths[1];
5456
model_reg_2.init(OrtOptions);
57+
}
58+
}
59+
}
60+
61+
void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer)
62+
{
63+
clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
64+
if (!settings.nnClusterizerUseCfRegression) {
65+
if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) {
66+
clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
67+
} else {
68+
clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
5569
clusterer.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1];
5670
}
5771
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class GPUTPCNNClusterizerHost
3939
GPUTPCNNClusterizerHost() = default;
4040
GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&);
4141

42+
void init(const GPUSettingsProcessingNNclusterizer&);
43+
void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&);
44+
4245
void networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype);
4346

4447
std::unordered_map<std::string, std::string> OrtOptions;

0 commit comments

Comments
 (0)