From 6232cbca968c965a2be53682a9da38ae1c7503e0 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 27 Aug 2025 12:28:20 +0200 Subject: [PATCH 1/2] DPL Analysis: percolate DataOrigin so that we can use it for multiple files reading. --- .../AnalysisSupport/src/DataInputDirector.cxx | 36 ++++++++++++------- .../AnalysisSupport/src/DataInputDirector.h | 8 ++--- .../Framework/AnalysisSupportHelpers.h | 4 +-- .../TestWorkflows/src/o2TestHistograms.cxx | 5 +-- 4 files changed, 33 insertions(+), 20 deletions(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 590329de146f7..5ebb87a0da018 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -124,15 +124,23 @@ void DataInputDescriptor::addFileNameHolder(FileNameHolder* fn) mfilenames.emplace_back(fn); } -bool DataInputDescriptor::setFile(int counter) +bool DataInputDescriptor::setFile(int counter, std::string_view origin) { // no files left if (counter >= getNumberInputfiles()) { return false; } + // In case the origin starts with a anything but AOD, we add the origin as the suffix + // of the filename. In the future we might expand this for proper rewriting of the + // filename based on the origin and the original file information. + std::string filename = mfilenames[counter]->fileName; + if (!origin.starts_with("AOD")) { + filename = std::regex_replace(filename, std::regex("[.]root$"), fmt::format("_{}.root", origin)); + std::cout << "Rewriting filename to " << filename << std::endl; + } + // open file - auto filename = mfilenames[counter]->fileName; auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); if (rootFS.get()) { if (rootFS->GetFile()->GetName() == filename) { @@ -213,11 +221,11 @@ bool DataInputDescriptor::setFile(int counter) return true; } -uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF) +uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF, std::string_view origin) { // open file - if (!setFile(counter)) { + if (!setFile(counter, origin)) { return 0ul; } @@ -229,10 +237,10 @@ uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF) return (mfilenames[counter]->listOfTimeFrameNumbers)[numTF]; } -arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int numTF) +arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int numTF, std::string_view origin) { // open file - if (!setFile(counter)) { + if (!setFile(counter, origin)) { return {}; } @@ -246,7 +254,7 @@ arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int n return {fmt::format("DF_{}", mfilenames[counter]->listOfTimeFrameNumbers[numTF]), mCurrentFilesystem}; } -DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, std::string treename) +DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, std::string treename, std::string_view origin) { if (!mParentFileMap) { // This file has no parent map @@ -283,7 +291,7 @@ DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, mParentFile->mdefaultFilenamesPtr = new std::vector; mParentFile->mdefaultFilenamesPtr->emplace_back(makeFileNameHolder(parentFileName->GetString().Data())); mParentFile->fillInputfiles(); - mParentFile->setFile(0); + mParentFile->setFile(0, origin); return mParentFile; } @@ -440,7 +448,8 @@ struct CalculateDelta { bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed) { CalculateDelta t(mIOTime); - auto folder = getFileFolder(counter, numTF); + std::string origin = dh.dataOrigin.as(); + auto folder = getFileFolder(counter, numTF, origin); if (!folder.filesystem()) { t.deactivate(); return false; @@ -473,7 +482,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh if (!format) { t.deactivate(); LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); - auto parentFile = getParentFile(counter, numTF, treename); + auto parentFile = getParentFile(counter, numTF, treename, origin); if (parentFile != nullptr) { int parentNumTF = parentFile->findDFNumber(0, folder.path()); if (parentNumTF == -1) { @@ -817,8 +826,9 @@ arrow::dataset::FileSource DataInputDirector::getFileFolder(header::DataHeader d if (!didesc) { didesc = mdefaultDataInputDescriptor; } + std::string origin = dh.dataOrigin.as(); - return didesc->getFileFolder(counter, numTF); + return didesc->getFileFolder(counter, numTF, origin); } int DataInputDirector::getTimeFramesInFile(header::DataHeader dh, int counter) @@ -839,8 +849,9 @@ uint64_t DataInputDirector::getTimeFrameNumber(header::DataHeader dh, int counte if (!didesc) { didesc = mdefaultDataInputDescriptor; } + std::string origin = dh.dataOrigin.as(); - return didesc->getTimeFrameNumber(counter, numTF); + return didesc->getTimeFrameNumber(counter, numTF, origin); } bool DataInputDirector::readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t& totalSizeCompressed, size_t& totalSizeUncompressed) @@ -858,6 +869,7 @@ bool DataInputDirector::readTree(DataAllocator& outputs, header::DataHeader dh, didesc = mdefaultDataInputDescriptor; treename = aod::datamodel::getTreeName(dh); } + std::string origin = dh.dataOrigin.as(); auto result = didesc->readTree(outputs, dh, counter, numTF, treename, totalSizeCompressed, totalSizeUncompressed); return result; diff --git a/Framework/AnalysisSupport/src/DataInputDirector.h b/Framework/AnalysisSupport/src/DataInputDirector.h index 94bdcf2c9368e..61b477bd8522d 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.h +++ b/Framework/AnalysisSupport/src/DataInputDirector.h @@ -64,7 +64,7 @@ class DataInputDescriptor void addFileNameHolder(FileNameHolder* fn); int fillInputfiles(); - bool setFile(int counter); + bool setFile(int counter, std::string_view origin); // getters std::string getInputfilesFilename(); @@ -74,9 +74,9 @@ class DataInputDescriptor int getNumberTimeFrames() { return mtotalNumberTimeFrames; } int findDFNumber(int file, std::string dfName); - uint64_t getTimeFrameNumber(int counter, int numTF); - arrow::dataset::FileSource getFileFolder(int counter, int numTF); - DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename); + uint64_t getTimeFrameNumber(int counter, int numTF, std::string_view origin); + arrow::dataset::FileSource getFileFolder(int counter, int numTF, std::string_view origin); + DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename, std::string_view origin); int getTimeFramesInFile(int counter); int getReadTimeFramesInFile(int counter); diff --git a/Framework/Core/include/Framework/AnalysisSupportHelpers.h b/Framework/Core/include/Framework/AnalysisSupportHelpers.h index a4e80decf2bbe..cc4d45a46c8bc 100644 --- a/Framework/Core/include/Framework/AnalysisSupportHelpers.h +++ b/Framework/Core/include/Framework/AnalysisSupportHelpers.h @@ -20,8 +20,8 @@ namespace o2::framework { -static constexpr std::array AODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}}; -static constexpr std::array extendedAODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"DYN"}, header::DataOrigin{"AMD"}}; +static constexpr std::array AODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"EMB"}}; +static constexpr std::array extendedAODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"DYN"}, header::DataOrigin{"AMD"}, header::DataOrigin{"EMB"}}; static constexpr std::array writableAODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"DYN"}}; class DataOutputDirector; diff --git a/Framework/TestWorkflows/src/o2TestHistograms.cxx b/Framework/TestWorkflows/src/o2TestHistograms.cxx index 38cfc00b6df7c..74064a29c7555 100644 --- a/Framework/TestWorkflows/src/o2TestHistograms.cxx +++ b/Framework/TestWorkflows/src/o2TestHistograms.cxx @@ -25,6 +25,7 @@ using namespace o2::framework::expressions; namespace o2::aod { +O2ORIGIN("EMB"); namespace skimmedExampleTrack { DECLARE_SOA_COLUMN(Pt, pt, float); //! @@ -49,7 +50,7 @@ struct EtaAndClsHistogramsSimple { } } - void process(soa::Filtered const& tracks, aod::FT0s const&) + void process(soa::Filtered const& tracks, aod::FT0s const&, aod::StoredTracksFrom> const& ortherTracks) { LOGP(info, "Invoking the simple one"); for (auto& track : tracks) { @@ -72,7 +73,7 @@ struct EtaAndClsHistogramsIUSimple { } } - void process(soa::Filtered const& tracks, aod::FT0s const&) + void process(soa::Filtered const& tracks, aod::FT0s const&, aod::TracksIUFrom> const &otherTracks) { LOGP(info, "Invoking the simple one IU"); for (auto& track : tracks) { From 4b2bd8fc61ee5c926684ab2c762938bacde09df9 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Wed, 27 Aug 2025 10:29:02 +0000 Subject: [PATCH 2/2] Please consider the following formatting changes --- Framework/TestWorkflows/src/o2TestHistograms.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/TestWorkflows/src/o2TestHistograms.cxx b/Framework/TestWorkflows/src/o2TestHistograms.cxx index 74064a29c7555..61710e1f63d5f 100644 --- a/Framework/TestWorkflows/src/o2TestHistograms.cxx +++ b/Framework/TestWorkflows/src/o2TestHistograms.cxx @@ -73,7 +73,7 @@ struct EtaAndClsHistogramsIUSimple { } } - void process(soa::Filtered const& tracks, aod::FT0s const&, aod::TracksIUFrom> const &otherTracks) + void process(soa::Filtered const& tracks, aod::FT0s const&, aod::TracksIUFrom> const& otherTracks) { LOGP(info, "Invoking the simple one IU"); for (auto& track : tracks) {