AliceO2Group · davidrohr · Nov 28, 2025 · Oct 17, 2025 · Oct 18, 2025 · Oct 18, 2025
@@ -51,6 +51,7 @@ class OrtModel
   void initOptions(std::unordered_map<std::string, std::string> optionsMap);
   void initEnvironment();
   void initSession();
+  void initSessionFromBuffer(const char* buffer, size_t bufferSize);
   void memoryOnDevice(int32_t = 0);
   bool isInitialized() { return mInitialized; }
   void resetSession();

@@ -138,6 +138,24 @@ void OrtModel::initEnvironment()
   (mPImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
 }
 
+void OrtModel::initSessionFromBuffer(const char* buffer, size_t bufferSize)
+{
+  mPImplOrt->sessionOptions.AddConfigEntry("session.load_model_format", "ONNX");
+  mPImplOrt->sessionOptions.AddConfigEntry("session.use_ort_model_bytes_directly", "1");
+
+  mPImplOrt->session = std::make_unique<Ort::Session>(*mPImplOrt->env,
+                                                      buffer,
+                                                      bufferSize,
+                                                      mPImplOrt->sessionOptions);
+  mPImplOrt->ioBinding = std::make_unique<Ort::IoBinding>(*mPImplOrt->session);
+
+  setIO();
+
+  if (mLoggingLevel < 2) {
+    LOG(info) << "(ORT) Model loaded successfully from buffer! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")";
+  }
+}
+
 void OrtModel::initSession()
 {
   if (mAllocateDeviceMemory) {

@@ -22,7 +22,6 @@
 
 // o2 includes
 #include "TPCBase/CDBInterface.h"
-#include "TPCBase/CDBInterface.h"
 #include "TPCBase/CalArray.h"
 #include "TPCBase/CalDet.h"
 #include "TPCBase/Mapper.h"

@@ -25,7 +25,6 @@ o2_add_library(TPCCalibration
                        src/CalibPadGainTracksBase.cxx
                        src/CalibLaserTracks.cxx
                        src/LaserTracksCalibrator.cxx
-                       src/NeuralNetworkClusterizer.cxx
                        src/SACDecoder.cxx
                        src/IDCAverageGroup.cxx
                        src/IDCAverageGroupBase.cxx
@@ -84,7 +83,6 @@ o2_target_root_dictionary(TPCCalibration
                                   include/TPCCalibration/FastHisto.h
                                   include/TPCCalibration/CalibLaserTracks.h
                                   include/TPCCalibration/LaserTracksCalibrator.h
-                                  include/TPCCalibration/NeuralNetworkClusterizer.h
                                   include/TPCCalibration/SACDecoder.h
                                   include/TPCCalibration/IDCAverageGroup.h
                                   include/TPCCalibration/IDCAverageGroupBase.h

@@ -209,6 +209,7 @@ set(SRCS_DATATYPES
     DataTypes/TPCPadBitMap.cxx
     DataTypes/TPCZSLinkMapping.cxx
     DataTypes/CalibdEdxContainer.cxx
+    DataTypes/ORTRootSerializer.cxx
     DataTypes/CalibdEdxTrackTopologyPol.cxx
     DataTypes/CalibdEdxTrackTopologySpline.cxx
     DataTypes/GPUTRDTrackO2.cxx)

@@ -85,6 +85,7 @@ class Cluster;
 namespace tpc
 {
 class CalibdEdxContainer;
+class ORTRootSerializer;
 } // namespace tpc
 } // namespace o2
 
@@ -182,6 +183,9 @@ struct GPUCalibObjectsTemplate { // use only pointers on PODs or flat objects he
   typename S<o2::tpc::CalibdEdxContainer>::type* dEdxCalibContainer = nullptr;
   typename S<o2::base::PropagatorImpl<float>>::type* o2Propagator = nullptr;
   typename S<o2::itsmft::TopologyDictionary>::type* itsPatternDict = nullptr;
+
+  // NN clusterizer objects
+  typename S<o2::tpc::ORTRootSerializer>::type* nnClusterizerNetworks[3] = {nullptr, nullptr, nullptr};
 };
 typedef GPUCalibObjectsTemplate<DefaultPtr> GPUCalibObjects; // NOTE: These 2 must have identical layout since they are memcopied
 typedef GPUCalibObjectsTemplate<ConstPtr> GPUCalibObjectsConst;

@@ -0,0 +1,25 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file   ORTRootSerializer.cxx
+/// \author Christian Sonnabend <christian.sonnabend@cern.ch>
+
+#include "ORTRootSerializer.h"
+#include <cstring>
+
+using namespace o2::tpc;
+
+/// Initialize the serialization from a char* buffer containing the model
+void ORTRootSerializer::setOnnxModel(const char* onnxModel, uint32_t size)
+{
+  mModelBuffer.resize(size);
+  std::memcpy(mModelBuffer.data(), onnxModel, size);
+}
@@ -0,0 +1,43 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file   ORTRootSerializer.h
+/// \brief  Class to serialize ONNX objects for ROOT snapshots of CCDB objects at runtime
+/// \author Christian Sonnabend <christian.sonnabend@cern.ch>
+
+#ifndef ALICEO2_TPC_ORTROOTSERIALIZER_H_
+#define ALICEO2_TPC_ORTROOTSERIALIZER_H_
+
+#include "GPUCommonRtypes.h"
+#include <vector>
+#include <string>
+
+namespace o2::tpc
+{
+
+class ORTRootSerializer
+{
+ public:
+  ORTRootSerializer() = default;
+  ~ORTRootSerializer() = default;
+
+  void setOnnxModel(const char* onnxModel, uint32_t size);
+  const char* getONNXModel() const { return mModelBuffer.data(); }
+  uint32_t getONNXModelSize() const { return static_cast<uint32_t>(mModelBuffer.size()); }
+
+ private:
+  std::vector<char> mModelBuffer; ///< buffer for serialization
+  ClassDefNV(ORTRootSerializer, 1);
+};
+
+} // namespace o2::tpc
+
+#endif // ALICEO2_TPC_ORTROOTSERIALIZER_H_
@@ -277,22 +277,22 @@ AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the bo
 AddOption(nnClusterizerApplyNoiseSuppression, int, 1, "", 0, "Applies the NoiseSuppression kernel before the digits to the network are filled")
 AddOption(nnClusterizerSetDeconvolutionFlags, int, 1, "", 0, "Runs the deconvolution kernel without overwriting the charge in order to make cluster-to-track attachment identical to heuristic CF")
 AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path")
-AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
 AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")
+AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
 AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
 AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)")
 AddOption(nnClusterizerUseClassification, int, 1, "", 0, "If 1, the classification output of the network is used to select clusters, else only the regression output is used and no clusters are rejected by classification")
 AddOption(nnClusterizerForceGpuInputFill, int, 0, "", 0, "Forces to use the fillInputNNGPU function")
 // CCDB
 AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally")
+AddOption(nnCCDBDumpToFile, int, 0, "", 0, "If 1, additionally dump fetched CCDB networks to nnLocalFolder")
 AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched")
-AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched")
 AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks")
-AddOption(nnCCDBWithMomentum, int, 1, "", 0, "Distinguishes between the network with and without momentum output for the regression")
+AddOption(nnCCDBWithMomentum, std::string, "", "", 0, "Distinguishes between the network with and without momentum output for the regression")
 AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
-AddOption(nnCCDBRegressionLayerType, std::string, "CNN", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
-AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp")
-AddOption(nnCCDBInteractionRate, int, 50, "", 0, "Distinguishes between networks for different interaction rates [kHz].")
+AddOption(nnCCDBRegressionLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
+AddOption(nnCCDBBeamType, std::string, "pp", "", 0, "Distinguishes between networks trained for different beam types. Options: pp, pPb, PbPb")
+AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes between networks for different interaction rates [kHz].")
 AddHelp("help", 'h')
 EndConfig()
 

@@ -43,5 +43,6 @@
 #pragma link C++ class o2::tpc::CalibdEdxTrackTopologyPol + ;
 #pragma link C++ class o2::tpc::CalibdEdxTrackTopologySpline + ;
 #pragma link C++ struct o2::tpc::CalibdEdxTrackTopologyPolContainer + ;
+#pragma link C++ struct o2::tpc::ORTRootSerializer + ;
 
 #endif
@@ -47,6 +47,7 @@
 #ifdef GPUCA_HAS_ONNX
 #include "GPUTPCNNClusterizerKernels.h"
 #include "GPUTPCNNClusterizerHost.h"
+#include "ORTRootSerializer.h"
 #endif
 
 #ifdef GPUCA_O2_LIB
@@ -639,7 +640,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
   // Maximum of 4 lanes supported
   HighResTimer* nnTimers[12];
 
-  if (GetProcessingSettings().nn.applyNNclusterizer) {
+  if (nn_settings.applyNNclusterizer) {
     int32_t deviceId = -1;
     int32_t numLanes = GetProcessingSettings().nTPCClustererLanes;
     int32_t maxThreads = mRec->getNKernelHostThreads(true);
@@ -677,7 +678,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         //   nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
         // }
         // recreateMemoryAllocator = true;
-        (nnApplications[lane].mModelClass).initSession();
+        if (!nn_settings.nnLoadFromCCDB) {
+          (nnApplications[lane].mModelClass).initSession(); // loads from file
+        } else {
+          (nnApplications[lane].mModelClass).initSessionFromBuffer((processors()->calibObjects.nnClusterizerNetworks[0])->getONNXModel(), (processors()->calibObjects.nnClusterizerNetworks[0])->getONNXModelSize()); // loads from CCDB
+        }
       }
       if (nnApplications[lane].mModelsUsed[1]) {
         SetONNXGPUStream(*(nnApplications[lane].mModelReg1).getSessionOptions(), lane, &deviceId);
@@ -688,7 +693,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv());
         (nnApplications[lane].mModelReg1).initEnvironment();
         // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator);
-        (nnApplications[lane].mModelReg1).initSession();
+        if (!nn_settings.nnLoadFromCCDB) {
+          (nnApplications[lane].mModelReg1).initSession(); // loads from file
+        } else {
+          (nnApplications[lane].mModelReg1).initSessionFromBuffer((processors()->calibObjects.nnClusterizerNetworks[1])->getONNXModel(), (processors()->calibObjects.nnClusterizerNetworks[1])->getONNXModelSize()); // loads from CCDB
+        }
       }
       if (nnApplications[lane].mModelsUsed[2]) {
         SetONNXGPUStream(*(nnApplications[lane].mModelReg2).getSessionOptions(), lane, &deviceId);
@@ -699,7 +708,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         // (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv());
         (nnApplications[lane].mModelReg2).initEnvironment();
         // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
-        (nnApplications[lane].mModelReg2).initSession();
+        if (!nn_settings.nnLoadFromCCDB) {
+          (nnApplications[lane].mModelReg2).initSession(); // loads from file
+        } else {
+          (nnApplications[lane].mModelReg2).initSessionFromBuffer((processors()->calibObjects.nnClusterizerNetworks[2])->getONNXModel(), (processors()->calibObjects.nnClusterizerNetworks[2])->getONNXModelSize()); // loads from CCDB
+        }
       }
       if (nn_settings.nnClusterizerVerbosity > 0) {
         LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;

@@ -36,7 +36,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
   std::vector<std::string> evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':');
 
   if (settings.nnLoadFromCCDB) {
-    reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to NeuralNetworkClusterizer.cxx, otherwise the networks might be loaded from the wrong place
+    reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to GPUWorkflowSpec.cxx, otherwise the networks might be loaded from the wrong place
     if (evalMode[0] == "c1") {
       class_model_path = settings.nnLocalFolder + "/net_classification_c1.onnx";
     } else if (evalMode[0] == "c2") {

@@ -135,6 +135,11 @@ class GPURecoWorkflowSpec : public o2::framework::Task
     bool tpcTriggerHandling = false;
     bool isITS3 = false;
     bool useFilteredOutputSpecs = false;
+
+    // NN clusterizer
+    bool nnLoadFromCCDB = false;
+    bool nnDumpToFile = false;
+    std::vector<std::string> nnEvalMode;
   };
 
   GPURecoWorkflowSpec(CompletionPolicyData* policyData, Config const& specconfig, std::vector<int32_t> const& tpcsectors, uint64_t tpcSectorMask, std::shared_ptr<o2::base::GRPGeomRequest>& ggr, std::function<bool(o2::framework::DataProcessingHeader::StartTime)>** gPolicyOrder = nullptr);
@@ -230,7 +235,7 @@ class GPURecoWorkflowSpec : public o2::framework::Task
   uint32_t mNextThreadIndex = 0;
   bool mUpdateGainMapCCDB = true;
   std::unique_ptr<o2::gpu::GPUSettingsTF> mTFSettings;
-  std::unique_ptr<o2::gpu::GPUSettingsProcessingNNclusterizer> mNNClusterizerSettings;
+  std::map<std::string, std::string> nnCCDBSettings;
 
   Config mSpecConfig;
   std::shared_ptr<o2::base::GRPGeomRequest> mGGR;