Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9c8984c
Improve GPU filling kernel speed
ChSonnabend Oct 17, 2025
a075c43
Adjusting parameter bounds and additional GPU kernel optimizations
ChSonnabend Oct 18, 2025
587c3e6
Adding back if statement for early exit
ChSonnabend Oct 18, 2025
6e43257
const'ing + fixing CPU kernel
ChSonnabend Oct 18, 2025
bb795c4
Remiving print statements
ChSonnabend Oct 19, 2025
f7cdc0b
Fixing CI build issue
ChSonnabend Oct 27, 2025
8c7d5f4
Merge branch 'dev' into devel
ChSonnabend Nov 12, 2025
a2aaf8e
Merge branch 'AliceO2Group:dev' into devel
ChSonnabend Nov 20, 2025
3775044
Working version of NN CCDB fetching and loading to file
ChSonnabend Nov 21, 2025
a963c01
Cleanup
ChSonnabend Nov 21, 2025
caf20fc
Please consider the following formatting changes
alibuild Nov 21, 2025
125f3e2
Merge pull request #38 from alibuild/alibot-cleanup-14841
ChSonnabend Nov 21, 2025
5284b01
Using char* buffer for model loading
ChSonnabend Nov 23, 2025
ab19782
Please consider the following formatting changes
alibuild Nov 23, 2025
5ce258c
Merge pull request #39 from alibuild/alibot-cleanup-14841
ChSonnabend Nov 23, 2025
4fed621
Bug-fix
ChSonnabend Nov 23, 2025
e7cd6fa
Working version of CCDB fetching and loading into ROOT class of std::…
ChSonnabend Nov 25, 2025
9ed60e9
Please consider the following formatting changes
alibuild Nov 25, 2025
ae1d630
Merge pull request #40 from alibuild/alibot-cleanup-14841
ChSonnabend Nov 25, 2025
6cba1f3
Disable dumpToFile by default
ChSonnabend Nov 25, 2025
5c6d214
Moving macro, adding o2-test
ChSonnabend Nov 25, 2025
979a8d5
Merge branch 'dev' into devel
ChSonnabend Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Common/ML/include/ML/OrtInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class OrtModel
void initOptions(std::unordered_map<std::string, std::string> optionsMap);
void initEnvironment();
void initSession();
void initSessionFromBuffer(const char* buffer, size_t bufferSize);
void memoryOnDevice(int32_t = 0);
bool isInitialized() { return mInitialized; }
void resetSession();
Expand Down
18 changes: 18 additions & 0 deletions Common/ML/src/OrtInterface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,24 @@ void OrtModel::initEnvironment()
(mPImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
}

void OrtModel::initSessionFromBuffer(const char* buffer, size_t bufferSize)
{
mPImplOrt->sessionOptions.AddConfigEntry("session.load_model_format", "ONNX");
mPImplOrt->sessionOptions.AddConfigEntry("session.use_ort_model_bytes_directly", "1");

mPImplOrt->session = std::make_unique<Ort::Session>(*mPImplOrt->env,
buffer,
bufferSize,
mPImplOrt->sessionOptions);
mPImplOrt->ioBinding = std::make_unique<Ort::IoBinding>(*mPImplOrt->session);

setIO();

if (mLoggingLevel < 2) {
LOG(info) << "(ORT) Model loaded successfully from buffer! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")";
}
}

void OrtModel::initSession()
{
if (mAllocateDeviceMemory) {
Expand Down
1 change: 0 additions & 1 deletion Detectors/TPC/base/test/testTPCCDBInterface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

// o2 includes
#include "TPCBase/CDBInterface.h"
#include "TPCBase/CDBInterface.h"
#include "TPCBase/CalArray.h"
#include "TPCBase/CalDet.h"
#include "TPCBase/Mapper.h"
Expand Down
2 changes: 0 additions & 2 deletions Detectors/TPC/calibration/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ o2_add_library(TPCCalibration
src/CalibPadGainTracksBase.cxx
src/CalibLaserTracks.cxx
src/LaserTracksCalibrator.cxx
src/NeuralNetworkClusterizer.cxx
src/SACDecoder.cxx
src/IDCAverageGroup.cxx
src/IDCAverageGroupBase.cxx
Expand Down Expand Up @@ -84,7 +83,6 @@ o2_target_root_dictionary(TPCCalibration
include/TPCCalibration/FastHisto.h
include/TPCCalibration/CalibLaserTracks.h
include/TPCCalibration/LaserTracksCalibrator.h
include/TPCCalibration/NeuralNetworkClusterizer.h
include/TPCCalibration/SACDecoder.h
include/TPCCalibration/IDCAverageGroup.h
include/TPCCalibration/IDCAverageGroupBase.h
Expand Down

This file was deleted.

48 changes: 0 additions & 48 deletions Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx

This file was deleted.

1 change: 1 addition & 0 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ set(SRCS_DATATYPES
DataTypes/TPCPadBitMap.cxx
DataTypes/TPCZSLinkMapping.cxx
DataTypes/CalibdEdxContainer.cxx
DataTypes/ORTRootSerializer.cxx
DataTypes/CalibdEdxTrackTopologyPol.cxx
DataTypes/CalibdEdxTrackTopologySpline.cxx
DataTypes/GPUTRDTrackO2.cxx)
Expand Down
4 changes: 4 additions & 0 deletions GPU/GPUTracking/DataTypes/GPUDataTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class Cluster;
namespace tpc
{
class CalibdEdxContainer;
class ORTRootSerializer;
} // namespace tpc
} // namespace o2

Expand Down Expand Up @@ -182,6 +183,9 @@ struct GPUCalibObjectsTemplate { // use only pointers on PODs or flat objects he
typename S<o2::tpc::CalibdEdxContainer>::type* dEdxCalibContainer = nullptr;
typename S<o2::base::PropagatorImpl<float>>::type* o2Propagator = nullptr;
typename S<o2::itsmft::TopologyDictionary>::type* itsPatternDict = nullptr;

// NN clusterizer objects
typename S<o2::tpc::ORTRootSerializer>::type* nnClusterizerNetworks[3] = {nullptr, nullptr, nullptr};
};
typedef GPUCalibObjectsTemplate<DefaultPtr> GPUCalibObjects; // NOTE: These 2 must have identical layout since they are memcopied
typedef GPUCalibObjectsTemplate<ConstPtr> GPUCalibObjectsConst;
Expand Down
25 changes: 25 additions & 0 deletions GPU/GPUTracking/DataTypes/ORTRootSerializer.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file ORTRootSerializer.cxx
/// \author Christian Sonnabend <christian.sonnabend@cern.ch>

#include "ORTRootSerializer.h"
#include <cstring>

using namespace o2::tpc;

/// Initialize the serialization from a char* buffer containing the model
void ORTRootSerializer::setOnnxModel(const char* onnxModel, uint32_t size)
{
mModelBuffer.resize(size);
std::memcpy(mModelBuffer.data(), onnxModel, size);
}
43 changes: 43 additions & 0 deletions GPU/GPUTracking/DataTypes/ORTRootSerializer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file ORTRootSerializer.h
/// \brief Class to serialize ONNX objects for ROOT snapshots of CCDB objects at runtime
/// \author Christian Sonnabend <christian.sonnabend@cern.ch>

#ifndef ALICEO2_TPC_ORTROOTSERIALIZER_H_
#define ALICEO2_TPC_ORTROOTSERIALIZER_H_

#include "GPUCommonRtypes.h"
#include <vector>
#include <string>

namespace o2::tpc
{

class ORTRootSerializer
{
public:
ORTRootSerializer() = default;
~ORTRootSerializer() = default;

void setOnnxModel(const char* onnxModel, uint32_t size);
const char* getONNXModel() const { return mModelBuffer.data(); }
uint32_t getONNXModelSize() const { return static_cast<uint32_t>(mModelBuffer.size()); }

private:
std::vector<char> mModelBuffer; ///< buffer for serialization
ClassDefNV(ORTRootSerializer, 1);
};

} // namespace o2::tpc

#endif // ALICEO2_TPC_ORTROOTSERIALIZER_H_
12 changes: 6 additions & 6 deletions GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,22 +277,22 @@ AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the bo
AddOption(nnClusterizerApplyNoiseSuppression, int, 1, "", 0, "Applies the NoiseSuppression kernel before the digits to the network are filled")
AddOption(nnClusterizerSetDeconvolutionFlags, int, 1, "", 0, "Runs the deconvolution kernel without overwriting the charge in order to make cluster-to-track attachment identical to heuristic CF")
AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path")
AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")
AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)")
AddOption(nnClusterizerUseClassification, int, 1, "", 0, "If 1, the classification output of the network is used to select clusters, else only the regression output is used and no clusters are rejected by classification")
AddOption(nnClusterizerForceGpuInputFill, int, 0, "", 0, "Forces to use the fillInputNNGPU function")
// CCDB
AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally")
AddOption(nnCCDBDumpToFile, int, 0, "", 0, "If 1, additionally dump fetched CCDB networks to nnLocalFolder")
AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched")
AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched")
AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks")
AddOption(nnCCDBWithMomentum, int, 1, "", 0, "Distinguishes between the network with and without momentum output for the regression")
AddOption(nnCCDBWithMomentum, std::string, "", "", 0, "Distinguishes between the network with and without momentum output for the regression")
AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
AddOption(nnCCDBRegressionLayerType, std::string, "CNN", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp")
AddOption(nnCCDBInteractionRate, int, 50, "", 0, "Distinguishes between networks for different interaction rates [kHz].")
AddOption(nnCCDBRegressionLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
AddOption(nnCCDBBeamType, std::string, "pp", "", 0, "Distinguishes between networks trained for different beam types. Options: pp, pPb, PbPb")
AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes between networks for different interaction rates [kHz].")
AddHelp("help", 'h')
EndConfig()

Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,6 @@
#pragma link C++ class o2::tpc::CalibdEdxTrackTopologyPol + ;
#pragma link C++ class o2::tpc::CalibdEdxTrackTopologySpline + ;
#pragma link C++ struct o2::tpc::CalibdEdxTrackTopologyPolContainer + ;
#pragma link C++ struct o2::tpc::ORTRootSerializer + ;

#endif
21 changes: 17 additions & 4 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#ifdef GPUCA_HAS_ONNX
#include "GPUTPCNNClusterizerKernels.h"
#include "GPUTPCNNClusterizerHost.h"
#include "ORTRootSerializer.h"
#endif

#ifdef GPUCA_O2_LIB
Expand Down Expand Up @@ -639,7 +640,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
// Maximum of 4 lanes supported
HighResTimer* nnTimers[12];

if (GetProcessingSettings().nn.applyNNclusterizer) {
if (nn_settings.applyNNclusterizer) {
int32_t deviceId = -1;
int32_t numLanes = GetProcessingSettings().nTPCClustererLanes;
int32_t maxThreads = mRec->getNKernelHostThreads(true);
Expand Down Expand Up @@ -677,7 +678,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
// }
// recreateMemoryAllocator = true;
(nnApplications[lane].mModelClass).initSession();
if (!nn_settings.nnLoadFromCCDB) {
(nnApplications[lane].mModelClass).initSession(); // loads from file
} else {
(nnApplications[lane].mModelClass).initSessionFromBuffer((processors()->calibObjects.nnClusterizerNetworks[0])->getONNXModel(), (processors()->calibObjects.nnClusterizerNetworks[0])->getONNXModelSize()); // loads from CCDB
}
}
if (nnApplications[lane].mModelsUsed[1]) {
SetONNXGPUStream(*(nnApplications[lane].mModelReg1).getSessionOptions(), lane, &deviceId);
Expand All @@ -688,7 +693,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
// (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv());
(nnApplications[lane].mModelReg1).initEnvironment();
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator);
(nnApplications[lane].mModelReg1).initSession();
if (!nn_settings.nnLoadFromCCDB) {
(nnApplications[lane].mModelReg1).initSession(); // loads from file
} else {
(nnApplications[lane].mModelReg1).initSessionFromBuffer((processors()->calibObjects.nnClusterizerNetworks[1])->getONNXModel(), (processors()->calibObjects.nnClusterizerNetworks[1])->getONNXModelSize()); // loads from CCDB
}
}
if (nnApplications[lane].mModelsUsed[2]) {
SetONNXGPUStream(*(nnApplications[lane].mModelReg2).getSessionOptions(), lane, &deviceId);
Expand All @@ -699,7 +708,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
// (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv());
(nnApplications[lane].mModelReg2).initEnvironment();
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
(nnApplications[lane].mModelReg2).initSession();
if (!nn_settings.nnLoadFromCCDB) {
(nnApplications[lane].mModelReg2).initSession(); // loads from file
} else {
(nnApplications[lane].mModelReg2).initSessionFromBuffer((processors()->calibObjects.nnClusterizerNetworks[2])->getONNXModel(), (processors()->calibObjects.nnClusterizerNetworks[2])->getONNXModelSize()); // loads from CCDB
}
}
if (nn_settings.nnClusterizerVerbosity > 0) {
LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
std::vector<std::string> evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':');

if (settings.nnLoadFromCCDB) {
reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to NeuralNetworkClusterizer.cxx, otherwise the networks might be loaded from the wrong place
reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to GPUWorkflowSpec.cxx, otherwise the networks might be loaded from the wrong place
if (evalMode[0] == "c1") {
class_model_path = settings.nnLocalFolder + "/net_classification_c1.onnx";
} else if (evalMode[0] == "c2") {
Expand Down
7 changes: 6 additions & 1 deletion GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ class GPURecoWorkflowSpec : public o2::framework::Task
bool tpcTriggerHandling = false;
bool isITS3 = false;
bool useFilteredOutputSpecs = false;

// NN clusterizer
bool nnLoadFromCCDB = false;
bool nnDumpToFile = false;
std::vector<std::string> nnEvalMode;
};

GPURecoWorkflowSpec(CompletionPolicyData* policyData, Config const& specconfig, std::vector<int32_t> const& tpcsectors, uint64_t tpcSectorMask, std::shared_ptr<o2::base::GRPGeomRequest>& ggr, std::function<bool(o2::framework::DataProcessingHeader::StartTime)>** gPolicyOrder = nullptr);
Expand Down Expand Up @@ -230,7 +235,7 @@ class GPURecoWorkflowSpec : public o2::framework::Task
uint32_t mNextThreadIndex = 0;
bool mUpdateGainMapCCDB = true;
std::unique_ptr<o2::gpu::GPUSettingsTF> mTFSettings;
std::unique_ptr<o2::gpu::GPUSettingsProcessingNNclusterizer> mNNClusterizerSettings;
std::map<std::string, std::string> nnCCDBSettings;

Config mSpecConfig;
std::shared_ptr<o2::base::GRPGeomRequest> mGGR;
Expand Down
Loading