From 8fcd59e57dd810a53cd9fb065cdf76c088824bf9 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 17 Nov 2025 13:03:18 +0100 Subject: [PATCH 01/22] DPL Analysis: protect aod-spawner from empty tables --- Framework/Core/include/Framework/ASoA.h | 18 ++++++---- Framework/Core/src/AODReaderHelpers.cxx | 48 ++++++++++++------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 10c1fc4ac3ceb..8ead895e8c814 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -34,7 +34,6 @@ #include #include #include // IWYU pragma: export -#include namespace o2::framework { @@ -53,6 +52,12 @@ void dereferenceWithWrongType(const char* getter, const char* target); void missingFilterDeclaration(int hash, int ai); void notBoundTable(const char* tableName); void* extractCCDBPayload(char* payload, size_t size, TClass const* cl, const char* what); + +template +auto createFieldsFromColumns(framework::pack) +{ + return std::vector>{C::asArrowField()...}; +} } // namespace o2::soa namespace o2::soa @@ -248,6 +253,11 @@ struct TableMetadata { return -1; } } + + static std::shared_ptr getSchema() + { + return std::make_shared([](framework::pack&& p){ return o2::soa::createFieldsFromColumns(p); }(columns{})); + } }; template @@ -406,12 +416,6 @@ struct Binding { } }; -template -auto createFieldsFromColumns(framework::pack) -{ - return std::vector>{C::asArrowField()...}; -} - using SelectionVector = std::vector; template diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index 09ec16a93b087..b23b0337e473c 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -143,13 +143,14 @@ struct Maker { std::vector labels; std::vector> expressions; std::shared_ptr projector = nullptr; - std::shared_ptr schema; + std::shared_ptr schema = nullptr; + std::shared_ptr inputSchema = nullptr; header::DataOrigin origin; header::DataDescription description; header::DataHeader::SubSpecificationType version; - std::shared_ptr make(ProcessingContext& pc) + std::shared_ptr make(ProcessingContext& pc) const { std::vector> originals; for (auto const& label : labels) { @@ -159,15 +160,6 @@ struct Maker { if (fullTable->num_rows() == 0) { return arrow::Table::MakeEmpty(schema).ValueOrDie(); } - if (projector == nullptr) { - auto s = gandiva::Projector::Make( - fullTable->schema(), - expressions, - &projector); - if (!s.ok()) { - throw o2::framework::runtime_error_f("Failed to create projector: %s", s.ToString().c_str()); - } - } return spawnerHelper(fullTable, schema, binding.c_str(), schema->num_fields(), projector); } @@ -201,23 +193,18 @@ struct Spawnable { iws.str(loc->defaultValue.get()); outputSchema = ArrowJSONHelpers::read(iws); + std::vector> schemas; for (auto& i : spec.metadata) { if (i.name.starts_with("input:")) { labels.emplace_back(i.name.substr(6)); + iws.clear(); + iws.str(i.defaultValue.get()); + schemas.emplace_back(ArrowJSONHelpers::read(iws)); } } - std::vector> fields; - for (auto& p : projectors) { - expressions::walk(p.node.get(), - [&fields](expressions::Node* n) mutable { - if (n->self.index() == 1) { - auto& b = std::get(n->self); - if (std::find_if(fields.begin(), fields.end(), [&b](std::shared_ptr const& field) { return field->name() == b.name; }) == fields.end()) { - fields.emplace_back(std::make_shared(b.name, expressions::concreteArrowType(b.type))); - } - } - }); + for (auto& s : schemas) { + std::copy(s->fields().begin(), s->fields().end(), std::back_inserter(fields)); } inputSchema = std::make_shared(fields); @@ -233,20 +220,29 @@ struct Spawnable { } } - std::shared_ptr makeProjector() + std::shared_ptr makeProjector() const { - return expressions::createProjectorHelper(projectors.size(), projectors.data(), inputSchema, outputSchema->fields()); + std::shared_ptr p = nullptr; + auto s = gandiva::Projector::Make( + inputSchema, + expressions, + &p); + if (!s.ok()) { + throw o2::framework::runtime_error_f("Failed to create projector: %s", s.ToString().c_str()); + } + return p; } - Maker createMaker() + Maker createMaker() const { o2::framework::addLabelToSchema(outputSchema, binding.c_str()); return { binding, labels, expressions, - nullptr, + makeProjector(), outputSchema, + inputSchema, origin, description, version}; From eaf98e76fad7bbef71e2f60f24d50546f754c72b Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 18 Nov 2025 14:00:41 +0100 Subject: [PATCH 02/22] improve schema serialization --- Framework/Core/include/Framework/ASoA.h | 4 +- .../Core/include/Framework/AnalysisHelpers.h | 38 +++++++++++-- Framework/Core/include/Framework/ArrowTypes.h | 50 +++++++++++++++++ Framework/Core/src/AODReaderHelpers.cxx | 7 +-- Framework/Core/src/AnalysisHelpers.cxx | 2 +- Framework/Core/src/AnalysisSupportHelpers.cxx | 1 - Framework/Core/src/ExpressionJSONHelpers.cxx | 53 ++++++++++++++++++- Framework/Core/test/test_Expressions.cxx | 30 +++++++++++ 8 files changed, 171 insertions(+), 14 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 8ead895e8c814..98686f6222ed3 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -256,7 +256,7 @@ struct TableMetadata { static std::shared_ptr getSchema() { - return std::make_shared([](framework::pack&& p){ return o2::soa::createFieldsFromColumns(p); }(columns{})); + return std::make_shared([](framework::pack&& p){ return o2::soa::createFieldsFromColumns(p); }(persistent_columns_t{})); } }; @@ -690,7 +690,7 @@ struct Column { static auto asArrowField() { - return std::make_shared(inherited_t::mLabel, framework::expressions::concreteArrowType(framework::expressions::selectArrowType())); + return std::make_shared(inherited_t::mLabel, soa::asArrowDataType()); } /// FIXME: rather than keeping this public we should have a protected diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index fa82151c6e756..033792bb758f2 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -29,7 +29,7 @@ namespace o2::framework { std::string serializeProjectors(std::vector& projectors); -std::string serializeSchema(std::shared_ptr& schema); +std::string serializeSchema(std::shared_ptr schema); } // namespace o2::framework namespace o2::soa @@ -44,6 +44,16 @@ constexpr auto tableRef2ConfigParamSpec() {"\"\""}}; } +template +constexpr auto tableRef2Schema() +{ + return o2::framework::ConfigParamSpec{ + std::string{"input-schema:"} + o2::aod::label(), + framework::VariantType::String, + framework::serializeSchema(o2::aod::MetadataTrait>::metadata::getSchema()), + {"\"\""}}; +} + namespace { template @@ -56,6 +66,16 @@ inline constexpr auto getSources() }.template operator()(); } +template +inline constexpr auto getSourceSchemas() +{ + return [] refs>() { + return [](std::index_sequence) { + return std::vector{soa::tableRef2Schema()...}; + }(std::make_index_sequence()); + }.template operator()(); +} + template inline constexpr auto getCCDBUrls() { @@ -73,11 +93,19 @@ template constexpr auto getInputMetadata() -> std::vector { std::vector inputMetadata; + auto inputSources = getSources(); std::sort(inputSources.begin(), inputSources.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name < b.name; }); auto last = std::unique(inputSources.begin(), inputSources.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name == b.name; }); inputSources.erase(last, inputSources.end()); inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); + + auto inputSchemas = getSourceSchemas(); + std::sort(inputSchemas.begin(), inputSchemas.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name < b.name; }); + last = std::unique(inputSchemas.begin(), inputSchemas.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name == b.name; }); + inputSchemas.erase(last, inputSchemas.end()); + inputMetadata.insert(inputMetadata.end(), inputSchemas.begin(), inputSchemas.end()); + return inputMetadata; } @@ -115,11 +143,8 @@ constexpr auto getExpressionMetadata() -> std::vector(o2::soa::createFieldsFromColumns(expression_pack_t{})); - auto json = framework::serializeProjectors(projectors); - return {framework::ConfigParamSpec{"projectors", framework::VariantType::String, json, {"\"\""}}, - framework::ConfigParamSpec{"schema", framework::VariantType::String, framework::serializeSchema(schema), {"\"\""}}}; + return {framework::ConfigParamSpec{"projectors", framework::VariantType::String, json, {"\"\""}}}; } template @@ -141,6 +166,9 @@ constexpr auto tableRef2InputSpec() metadata.insert(metadata.end(), ccdbMetadata.begin(), ccdbMetadata.end()); auto p = getExpressionMetadata>::metadata>(); metadata.insert(metadata.end(), p.begin(), p.end()); + if constexpr(!soa::with_ccdb_urls>::metadata>) { + metadata.emplace_back(framework::ConfigParamSpec{"schema", framework::VariantType::String, framework::serializeSchema(o2::aod::MetadataTrait>::metadata::getSchema()), {"\"\""}}); + } return framework::InputSpec{ o2::aod::label(), diff --git a/Framework/Core/include/Framework/ArrowTypes.h b/Framework/Core/include/Framework/ArrowTypes.h index 6fd70113fede7..2673472a81152 100644 --- a/Framework/Core/include/Framework/ArrowTypes.h +++ b/Framework/Core/include/Framework/ArrowTypes.h @@ -11,6 +11,7 @@ #ifndef O2_FRAMEWORK_ARROWTYPES_H #define O2_FRAMEWORK_ARROWTYPES_H +#include "Framework/Traits.h" #include "arrow/type_fwd.h" #include @@ -117,5 +118,54 @@ template using arrow_array_for_t = typename arrow_array_for::type; template using value_for_t = typename arrow_array_for::value_type; + +template +using array_element_t = std::decay_t()[0])>; + +template +std::shared_ptr asArrowDataType(int list_size = 1) +{ + auto typeGenerator = [](std::shared_ptr const& type, int list_size) -> std::shared_ptr { + switch (list_size) { + case -1: + return arrow::list(type); + case 1: + return std::move(type); + default: + return arrow::fixed_size_list(type, list_size); + } + }; + + if constexpr (std::is_arithmetic_v) { + if constexpr (std::same_as) { + return typeGenerator(arrow::boolean(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::uint8(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::uint16(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::uint32(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::uint64(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::int8(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::int16(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::int32(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::int64(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::float32(), list_size); + } else if constexpr (std::same_as) { + return typeGenerator(arrow::float64(), list_size); + } + } else if constexpr (std::is_bounded_array_v) { + return asArrowDataType>(std::extent_v); + } else if constexpr (o2::framework::is_specialization_v) { + return asArrowDataType(-1); + } + return nullptr; +} } // namespace o2::soa #endif // O2_FRAMEWORK_ARROWTYPES_H diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index b23b0337e473c..e73ea6634c3a3 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -195,10 +195,11 @@ struct Spawnable { std::vector> schemas; for (auto& i : spec.metadata) { - if (i.name.starts_with("input:")) { - labels.emplace_back(i.name.substr(6)); + if (i.name.starts_with("input-schema:")) { + labels.emplace_back(i.name.substr(13)); iws.clear(); - iws.str(i.defaultValue.get()); + auto json = i.defaultValue.get(); + iws.str(json); schemas.emplace_back(ArrowJSONHelpers::read(iws)); } } diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 4f78cc42f3f98..63923008f5a70 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -35,7 +35,7 @@ std::string serializeProjectors(std::vector& return osm.str(); } -std::string serializeSchema(std::shared_ptr& schema) +std::string serializeSchema(std::shared_ptr schema) { std::stringstream osm; ArrowJSONHelpers::write(osm, schema); diff --git a/Framework/Core/src/AnalysisSupportHelpers.cxx b/Framework/Core/src/AnalysisSupportHelpers.cxx index 7cfab22885671..b5c898faa515a 100644 --- a/Framework/Core/src/AnalysisSupportHelpers.cxx +++ b/Framework/Core/src/AnalysisSupportHelpers.cxx @@ -219,7 +219,6 @@ void AnalysisSupportHelpers::addMissingOutputsToAnalysisCCDBFetcher( // FIXME: good enough for now... for (auto& i : input.metadata) { if ((i.type == VariantType::String) && (i.name.find("input:") != std::string::npos)) { - auto value = i.defaultValue.get(); auto spec = DataSpecUtils::fromMetadataString(i.defaultValue.get()); auto j = std::find_if(publisher.inputs.begin(), publisher.inputs.end(), [&](auto x) { return x.binding == spec.binding; }); if (j == publisher.inputs.end()) { diff --git a/Framework/Core/src/ExpressionJSONHelpers.cxx b/Framework/Core/src/ExpressionJSONHelpers.cxx index 8d4907a721f7e..28685fecad468 100644 --- a/Framework/Core/src/ExpressionJSONHelpers.cxx +++ b/Framework/Core/src/ExpressionJSONHelpers.cxx @@ -637,6 +637,18 @@ void o2::framework::ExpressionJSONHelpers::write(std::ostream& o, std::vector arrowDataTypeFromId(atype::type type, int list_size = 1, atype::type element = atype::NA) +{ + switch (list_size) { + case -1: + return arrow::list(expressions::concreteArrowType(element)); + case 1: + return expressions::concreteArrowType(type); + default: + return arrow::fixed_size_list(expressions::concreteArrowType(element), list_size); + } +} + struct SchemaReader : public rapidjson::BaseReaderHandler, SchemaReader> { using Ch = rapidjson::UTF8<>::Ch; using SizeType = rapidjson::SizeType; @@ -658,6 +670,8 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch std::string name; atype::type type; + atype::type element; + int list_size = 1; SchemaReader() { @@ -706,6 +720,12 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch if (currentKey.compare("type") == 0) { return true; } + if (currentKey.compare("size") == 0) { + return true; + } + if (currentKey.compare("element") == 0) { + return true; + } } states.push(State::IN_ERROR); @@ -721,6 +741,9 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch if (states.top() == State::IN_LIST) { states.push(State::IN_FIELD); + list_size = 1; + element = atype::NA; + type = atype::NA; return true; } @@ -734,7 +757,7 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch if (states.top() == State::IN_FIELD) { states.pop(); // add a field - fields.emplace_back(std::make_shared(name, expressions::concreteArrowType(type))); + fields.emplace_back(std::make_shared(name, arrowDataTypeFromId(type, list_size, element))); return true; } @@ -754,6 +777,14 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch type = (atype::type)i; return true; } + if (currentKey.compare("element") == 0) { + element = (atype::type)i; + return true; + } + if (currentKey.compare("size") == 0) { + list_size = i; + return true; + } } states.push(State::IN_ERROR); @@ -777,6 +808,10 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch bool Int(int i) { debug << "Int(" << i << ")" << std::endl; + if (states.top() == State::IN_FIELD && currentKey.compare("size") == 0) { + list_size = i; + return true; + } return Uint(i); } }; @@ -791,7 +826,7 @@ std::shared_ptr o2::framework::ArrowJSONHelpers::read(std::istrea bool ok = reader.Parse(isw, sreader); if (!ok) { - throw framework::runtime_error_f("Cannot parse serialized Expression, error: %s at offset: %d", rapidjson::GetParseError_En(reader.GetParseErrorCode()), reader.GetErrorOffset()); + throw framework::runtime_error_f("Cannot parse serialized Schema, error: %s at offset: %d", rapidjson::GetParseError_En(reader.GetParseErrorCode()), reader.GetErrorOffset()); } return sreader.schema; } @@ -804,6 +839,20 @@ void writeSchema(rapidjson::Writer& w, arrow::Schema* w.StartObject(); w.Key("name"); w.String(f->name().c_str()); + auto fixedList = dynamic_cast(f->type().get()); + if (fixedList != nullptr) { + w.Key("size"); + w.Int(fixedList->list_size()); + w.Key("element"); + w.Int(fixedList->field(0)->type()->id()); + } + auto varList = dynamic_cast(f->type().get()); + if (varList != nullptr) { + w.Key("size"); + w.Int(-1); + w.Key("element"); + w.Int(varList->field(0)->type()->id()); + } w.Key("type"); w.Int(f->type()->id()); w.EndObject(); diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 41be7d53d2276..09a3061585f94 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -454,4 +454,34 @@ TEST_CASE("TestExpressionSerialization") ism.str(osm.str()); auto newSchemap = ArrowJSONHelpers::read(ism); REQUIRE(schemap->ToString() == newSchemap->ToString()); + + osm.clear(); + osm.str(""); + ArrowJSONHelpers::write(osm, schemap1); + + ism.clear(); + ism.str(osm.str()); + auto newSchemap1 = ArrowJSONHelpers::read(ism); + REQUIRE(schemap1->ToString() == newSchemap1->ToString()); + + osm.clear(); + osm.str(""); + auto realisticSchema = std::make_shared(o2::soa::createFieldsFromColumns(o2::aod::MetadataTrait>::metadata::persistent_columns_t{})); + ArrowJSONHelpers::write(osm, realisticSchema); + + ism.clear(); + ism.str(osm.str()); + auto restoredSchema = ArrowJSONHelpers::read(ism); + REQUIRE(realisticSchema->ToString() == restoredSchema->ToString()); + + osm.clear(); + osm.str(""); + auto realisticSchema1 = std::make_shared(o2::soa::createFieldsFromColumns(o2::aod::MetadataTrait>::metadata::persistent_columns_t{})); + ArrowJSONHelpers::write(osm, realisticSchema1); + + ism.clear(); + ism.str(osm.str()); + auto restoredSchema1 = ArrowJSONHelpers::read(ism); + REQUIRE(realisticSchema1->ToString() == restoredSchema1->ToString()); + } From 4198191b76220e9df853b514fabbbd0c9d5274e0 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Wed, 19 Nov 2025 09:06:07 +0000 Subject: [PATCH 03/22] Please consider the following formatting changes --- Framework/Core/include/Framework/ASoA.h | 2 +- .../Core/include/Framework/AnalysisHelpers.h | 22 +++++++++---------- Framework/Core/test/test_Expressions.cxx | 1 - 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 98686f6222ed3..13560bd22c054 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -256,7 +256,7 @@ struct TableMetadata { static std::shared_ptr getSchema() { - return std::make_shared([](framework::pack&& p){ return o2::soa::createFieldsFromColumns(p); }(persistent_columns_t{})); + return std::make_shared([](framework::pack&& p) { return o2::soa::createFieldsFromColumns(p); }(persistent_columns_t{})); } }; diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 033792bb758f2..660149b2154e1 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -47,11 +47,11 @@ constexpr auto tableRef2ConfigParamSpec() template constexpr auto tableRef2Schema() { - return o2::framework::ConfigParamSpec{ - std::string{"input-schema:"} + o2::aod::label(), - framework::VariantType::String, - framework::serializeSchema(o2::aod::MetadataTrait>::metadata::getSchema()), - {"\"\""}}; + return o2::framework::ConfigParamSpec{ + std::string{"input-schema:"} + o2::aod::label(), + framework::VariantType::String, + framework::serializeSchema(o2::aod::MetadataTrait>::metadata::getSchema()), + {"\"\""}}; } namespace @@ -69,11 +69,11 @@ inline constexpr auto getSources() template inline constexpr auto getSourceSchemas() { - return [] refs>() { - return [](std::index_sequence) { - return std::vector{soa::tableRef2Schema()...}; - }(std::make_index_sequence()); - }.template operator()(); + return [] refs>() { + return [](std::index_sequence) { + return std::vector{soa::tableRef2Schema()...}; + }(std::make_index_sequence()); + }.template operator()(); } template @@ -166,7 +166,7 @@ constexpr auto tableRef2InputSpec() metadata.insert(metadata.end(), ccdbMetadata.begin(), ccdbMetadata.end()); auto p = getExpressionMetadata>::metadata>(); metadata.insert(metadata.end(), p.begin(), p.end()); - if constexpr(!soa::with_ccdb_urls>::metadata>) { + if constexpr (!soa::with_ccdb_urls>::metadata>) { metadata.emplace_back(framework::ConfigParamSpec{"schema", framework::VariantType::String, framework::serializeSchema(o2::aod::MetadataTrait>::metadata::getSchema()), {"\"\""}}); } diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 09a3061585f94..b4a65fb0c7b48 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -483,5 +483,4 @@ TEST_CASE("TestExpressionSerialization") ism.str(osm.str()); auto restoredSchema1 = ArrowJSONHelpers::read(ism); REQUIRE(realisticSchema1->ToString() == restoredSchema1->ToString()); - } From b1779806ce2b4d08115f1b001b855ae4a667f90c Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 12 Nov 2025 19:08:12 +0100 Subject: [PATCH 04/22] move to a plugin --- .../Core/include/Framework/AODReaderHelpers.h | 2 +- Framework/Core/src/AODReaderHelpers.cxx | 32 +++---------------- Framework/Core/src/ArrowSupport.cxx | 2 +- Framework/Core/src/WorkflowHelpers.cxx | 2 +- 4 files changed, 8 insertions(+), 30 deletions(-) diff --git a/Framework/Core/include/Framework/AODReaderHelpers.h b/Framework/Core/include/Framework/AODReaderHelpers.h index 800d26c2aeae0..505bc95b863c0 100644 --- a/Framework/Core/include/Framework/AODReaderHelpers.h +++ b/Framework/Core/include/Framework/AODReaderHelpers.h @@ -22,7 +22,7 @@ namespace o2::framework::readers struct AODReaderHelpers { static AlgorithmSpec rootFileReaderCallback(); static AlgorithmSpec aodSpawnerCallback(ConfigContext const& ctx); - static AlgorithmSpec indexBuilderCallback(std::vector& requested); + static AlgorithmSpec indexBuilderCallback(ConfigContext const& ctx); }; } // namespace o2::framework::readers diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index e73ea6634c3a3..3b5eb7635db90 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -15,38 +15,14 @@ #include "Framework/ExpressionHelpers.h" #include "Framework/DataProcessingHelpers.h" #include "Framework/AlgorithmSpec.h" -#include "Framework/ControlService.h" #include "Framework/CallbackService.h" -#include "Framework/EndOfStreamContext.h" #include "Framework/DataSpecUtils.h" #include "ExpressionJSONHelpers.h" #include "Framework/ConfigContext.h" #include "Framework/AnalysisContext.h" -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - namespace o2::framework::readers { -auto setEOSCallback(InitContext& ic) -{ - ic.services().get().set( - [](EndOfStreamContext& eosc) { - auto& control = eosc.services().get(); - control.endOfStream(); - control.readyToQuit(QuitRequest::Me); - }); -} - template refs> static inline auto extractOriginals(ProcessingContext& pc) { @@ -83,9 +59,10 @@ auto make_build(D metadata, InputSpec const& input, ProcessingContext& pc) } } // namespace -AlgorithmSpec AODReaderHelpers::indexBuilderCallback(std::vector& requested) +AlgorithmSpec AODReaderHelpers::indexBuilderCallback(ConfigContext const& ctx) { - return AlgorithmSpec::InitCallback{[requested](InitContext& /*ic*/) { + auto& ac = ctx.services().get(); + return AlgorithmSpec::InitCallback{[requested = ac.requestedIDXs](InitContext& /*ic*/) { return [requested](ProcessingContext& pc) { auto outputs = pc.outputs(); // spawn tables @@ -252,7 +229,7 @@ struct Spawnable { } // namespace -AlgorithmSpec AODReaderHelpers::aodSpawnerCallback(/*std::vector& requested*/ ConfigContext const& ctx) +AlgorithmSpec AODReaderHelpers::aodSpawnerCallback(ConfigContext const& ctx) { auto& ac = ctx.services().get(); return AlgorithmSpec::InitCallback{[requested = ac.spawnerInputs](InitContext& /*ic*/) { @@ -268,6 +245,7 @@ AlgorithmSpec AODReaderHelpers::aodSpawnerCallback(/*std::vector& req return [makers](ProcessingContext& pc) mutable { auto outputs = pc.outputs(); for (auto& maker : makers) { + LOGP(info, ">>> Spawning: {}", maker.binding); outputs.adopt(Output{maker.origin, maker.description, maker.version}, maker.make(pc)); } }; diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 4150fda9f63f1..f795db6b45383 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -619,7 +619,7 @@ o2::framework::ServiceSpec ArrowSupport::arrowBackendSpec() builder->outputs.clear(); // replace AlgorithmSpec // FIXME: it should be made more generic, so it does not need replacement... - builder->algorithm = readers::AODReaderHelpers::indexBuilderCallback(ac.requestedIDXs); + builder->algorithm = readers::AODReaderHelpers::indexBuilderCallback(ctx); AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, *builder); } diff --git a/Framework/Core/src/WorkflowHelpers.cxx b/Framework/Core/src/WorkflowHelpers.cxx index b3af5636127f9..b5fa2ffe5d41b 100644 --- a/Framework/Core/src/WorkflowHelpers.cxx +++ b/Framework/Core/src/WorkflowHelpers.cxx @@ -416,7 +416,7 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext "internal-dpl-aod-index-builder", {}, {}, - readers::AODReaderHelpers::indexBuilderCallback(ac.requestedIDXs), + readers::AODReaderHelpers::indexBuilderCallback(ctx), {}}; AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); From 9e7a7c81d4a51a03b421edce5ab511a53eac5580 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 13 Nov 2025 10:11:50 +0100 Subject: [PATCH 05/22] move to plugin --- Framework/AnalysisSupport/CMakeLists.txt | 6 ++++ .../src/AODReaderHelpers.cxx | 0 .../src}/AODReaderHelpers.h | 0 .../AnalysisSupport/src/OnDemandPlugin.cxx | 32 +++++++++++++++++++ Framework/Core/src/ArrowSupport.cxx | 5 ++- Framework/Core/src/WorkflowHelpers.cxx | 5 ++- 6 files changed, 42 insertions(+), 6 deletions(-) rename Framework/{Core => AnalysisSupport}/src/AODReaderHelpers.cxx (100%) rename Framework/{Core/include/Framework => AnalysisSupport/src}/AODReaderHelpers.h (100%) create mode 100644 Framework/AnalysisSupport/src/OnDemandPlugin.cxx diff --git a/Framework/AnalysisSupport/CMakeLists.txt b/Framework/AnalysisSupport/CMakeLists.txt index 92fd55b86a33d..6024134a5495d 100644 --- a/Framework/AnalysisSupport/CMakeLists.txt +++ b/Framework/AnalysisSupport/CMakeLists.txt @@ -16,6 +16,12 @@ if(TARGET JAliEn::JAliEn) set(EXTRA_TARGETS XRootD::Client JAliEn::JAliEn) endif() +o2_add_library(FrameworkOnDemandTablesSupport + SOURCES src/OnDemandPlugin.cxx + src/AODReaderHelpers.cxx + PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_LIST_DIR}/src + PUBLIC_LINK_LIBRARIES O2::Framework ${EXTRA_TARGETS}) + o2_add_library(FrameworkAnalysisSupport SOURCES src/Plugin.cxx src/DataInputDirector.cxx diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx similarity index 100% rename from Framework/Core/src/AODReaderHelpers.cxx rename to Framework/AnalysisSupport/src/AODReaderHelpers.cxx diff --git a/Framework/Core/include/Framework/AODReaderHelpers.h b/Framework/AnalysisSupport/src/AODReaderHelpers.h similarity index 100% rename from Framework/Core/include/Framework/AODReaderHelpers.h rename to Framework/AnalysisSupport/src/AODReaderHelpers.h diff --git a/Framework/AnalysisSupport/src/OnDemandPlugin.cxx b/Framework/AnalysisSupport/src/OnDemandPlugin.cxx new file mode 100644 index 0000000000000..9438f9bf69c96 --- /dev/null +++ b/Framework/AnalysisSupport/src/OnDemandPlugin.cxx @@ -0,0 +1,32 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#include "Framework/Plugins.h" +#include "Framework/AlgorithmSpec.h" +#include "AODReaderHelpers.h" + +struct ExtendedTableSpawner : o2::framework::AlgorithmPlugin { + o2::framework::AlgorithmSpec create(o2::framework::ConfigContext const& config) override + { + return o2::framework::readers::AODReaderHelpers::aodSpawnerCallback(config); + } +}; + +struct IndexTableBuilder : o2::framework::AlgorithmPlugin { + o2::framework::AlgorithmSpec create(o2::framework::ConfigContext const& config) override + { + return o2::framework::readers::AODReaderHelpers::indexBuilderCallback(config); + } +}; + +DEFINE_DPL_PLUGINS_BEGIN +DEFINE_DPL_PLUGIN_INSTANCE(ExtendedTableSpawner, CustomAlgorithm); +DEFINE_DPL_PLUGIN_INSTANCE(IndexTableBuilder, CustomAlgorithm); +DEFINE_DPL_PLUGINS_END diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index f795db6b45383..17ee95d004118 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -10,7 +10,6 @@ // or submit itself to any jurisdiction. #include "ArrowSupport.h" -#include "Framework/AODReaderHelpers.h" #include "Framework/ArrowContext.h" #include "Framework/ArrowTableSlicingCache.h" #include "Framework/DataProcessor.h" @@ -619,7 +618,7 @@ o2::framework::ServiceSpec ArrowSupport::arrowBackendSpec() builder->outputs.clear(); // replace AlgorithmSpec // FIXME: it should be made more generic, so it does not need replacement... - builder->algorithm = readers::AODReaderHelpers::indexBuilderCallback(ctx); + builder->algorithm = PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTablesBuilder", ctx);//readers::AODReaderHelpers::indexBuilderCallback(ctx); AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, *builder); } @@ -654,7 +653,7 @@ o2::framework::ServiceSpec ArrowSupport::arrowBackendSpec() spawner->inputs.clear(); // replace AlgorithmSpec // FIXME: it should be made more generic, so it does not need replacement... - spawner->algorithm = readers::AODReaderHelpers::aodSpawnerCallback(ctx); + spawner->algorithm = PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "ExtendedTableSpawner", ctx); AnalysisSupportHelpers::addMissingOutputsToSpawner({}, ac.spawnerInputs, ac.requestedAODs, *spawner); } diff --git a/Framework/Core/src/WorkflowHelpers.cxx b/Framework/Core/src/WorkflowHelpers.cxx index b5fa2ffe5d41b..08660ebf5c3ad 100644 --- a/Framework/Core/src/WorkflowHelpers.cxx +++ b/Framework/Core/src/WorkflowHelpers.cxx @@ -11,7 +11,6 @@ #include "WorkflowHelpers.h" #include "Framework/AnalysisSupportHelpers.h" #include "Framework/AlgorithmSpec.h" -#include "Framework/AODReaderHelpers.h" #include "Framework/ConfigParamSpec.h" #include "Framework/ConfigParamsHelper.h" #include "Framework/CommonDataProcessors.h" @@ -416,7 +415,7 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext "internal-dpl-aod-index-builder", {}, {}, - readers::AODReaderHelpers::indexBuilderCallback(ctx), + PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTableBuilder", ctx),//readers::AODReaderHelpers::indexBuilderCallback(ctx), {}}; AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); @@ -436,7 +435,7 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext "internal-dpl-aod-spawner", {}, {}, - readers::AODReaderHelpers::aodSpawnerCallback(ctx), + PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "ExtendedTableSpawner", ctx),//readers::AODReaderHelpers::aodSpawnerCallback(ctx), {}}; AnalysisSupportHelpers::addMissingOutputsToSpawner({}, ac.spawnerInputs, ac.requestedAODs, aodSpawner); From e64c439f11eaddde55c132b5616c6734223da345 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 13 Nov 2025 10:16:15 +0100 Subject: [PATCH 06/22] fixup! move to plugin --- Framework/Core/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index cefb903c29895..8552cfe504a26 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -10,8 +10,7 @@ # or submit itself to any jurisdiction. o2_add_library(Framework - SOURCES src/AODReaderHelpers.cxx - src/AnalysisHelpers.cxx + SOURCES src/AnalysisHelpers.cxx src/AlgorithmSpec.cxx src/ArrowSupport.cxx src/ArrowTableSlicingCache.cxx From 71a32aca2c55f80fda23913b91d03439276c4c91 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 13 Nov 2025 14:54:24 +0100 Subject: [PATCH 07/22] fixes --- Framework/AnalysisSupport/src/AODReaderHelpers.cxx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index 3b5eb7635db90..ae9b291ed3ada 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -#include "Framework/AODReaderHelpers.h" +#include "AODReaderHelpers.h" #include "Framework/AnalysisHelpers.h" #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/ExpressionHelpers.h" @@ -17,7 +17,7 @@ #include "Framework/AlgorithmSpec.h" #include "Framework/CallbackService.h" #include "Framework/DataSpecUtils.h" -#include "ExpressionJSONHelpers.h" +#include "../src/ExpressionJSONHelpers.h" #include "Framework/ConfigContext.h" #include "Framework/AnalysisContext.h" @@ -245,7 +245,6 @@ AlgorithmSpec AODReaderHelpers::aodSpawnerCallback(ConfigContext const& ctx) return [makers](ProcessingContext& pc) mutable { auto outputs = pc.outputs(); for (auto& maker : makers) { - LOGP(info, ">>> Spawning: {}", maker.binding); outputs.adopt(Output{maker.origin, maker.description, maker.version}, maker.make(pc)); } }; From dc5465ad9bbb59ca57b2307fce1693e6a0a26005 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 17 Nov 2025 09:18:42 +0100 Subject: [PATCH 08/22] index builder info and serialization --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 32 +++++++++++++++++++ Framework/Core/include/Framework/ASoA.h | 5 +++ .../Core/include/Framework/AnalysisHelpers.h | 27 ++++++++++++++++ .../include/Framework/IndexBuilderHelpers.h | 2 -- Framework/Core/src/IndexJSONHelpers.cxx | 24 ++++++++++++++ Framework/Core/src/IndexJSONHelpers.h | 25 +++++++++++++++ 6 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 Framework/Core/src/IndexJSONHelpers.cxx create mode 100644 Framework/Core/src/IndexJSONHelpers.h diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index ae9b291ed3ada..96be2ecc4aac7 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -57,6 +57,38 @@ auto make_build(D metadata, InputSpec const& input, ProcessingContext& pc) extractOriginals(pc), index_pack_t{}); } + +struct Builder { + +}; + +struct Buildable { + std::string binding; + + header::DataOrigin origin; + header::DataDescription description; + header::DataHeader::SubSpecificationType version; + + Buildable(InputSpec const& spec) + : binding{spec.binding} + { + auto&& [origin_, description_, version_] = DataSpecUtils::asConcreteDataMatcher(spec); + origin = origin_; + description = description_; + version = version_; + + // The following components are needed to build an index table + // 1. the labels of the source tables to extract from inputRecord -> extracted from input metadata + // 2. the mapping, in the order of the definition of columns, of the + // position in each source table of an index column pointing to the Key + // and the types of index to write (self, single-valued, slice or array) + // the mapping has to be created at the point where the type information is available and + // put into the input spec metadata as a vector of (type, label, pos) + + } + +}; + } // namespace AlgorithmSpec AODReaderHelpers::indexBuilderCallback(ConfigContext const& ctx) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 13560bd22c054..34eb48a9d3834 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1307,6 +1307,11 @@ concept with_expression_pack = requires { typename T::expression_pack_t{}; }; +template +concept with_index_pack = requires { + typename T::index_pack_t{}; +}; + template os1, size_t N2, std::array os2> consteval bool is_compatible() { diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 660149b2154e1..a7828a9544c92 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -54,6 +54,19 @@ constexpr auto tableRef2Schema() {"\"\""}}; } +enum struct IndexKind : unsigned { + IdxSelf = 0, + IdxSingle = 1, + IdxSlice = 2, + IdxArray = 3 +}; + +struct IndexRecord { + std::string label; + IndexKind kind; + size_t pos; +}; + namespace { template @@ -154,6 +167,20 @@ constexpr auto getExpressionMetadata() -> std::vector +constexpr auto getIndexMetadata() -> std::vector +{ + + return {}; +} + +template + requires(!soa::with_index_pack) +constexpr auto getIndexMetadata() -> std::vector +{ + return {}; +} + } // namespace template diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index d02d5cfc59b3f..eef76f8340078 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -11,13 +11,11 @@ #ifndef O2_FRAMEWORK_INDEXBUILDERHELPERS_H_ #define O2_FRAMEWORK_INDEXBUILDERHELPERS_H_ -#include "arrow/array.h" #include #include #include #include #include -#include namespace o2::framework { diff --git a/Framework/Core/src/IndexJSONHelpers.cxx b/Framework/Core/src/IndexJSONHelpers.cxx new file mode 100644 index 0000000000000..81cda2d28e113 --- /dev/null +++ b/Framework/Core/src/IndexJSONHelpers.cxx @@ -0,0 +1,24 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "IndexJSONHelpers.h" + +namespace o2::framework { +std::vector IndexJSONHelpers::read(std::istream& s) +{ + +} + +void IndexJSONHelpers::write(std::ostream& o, std::vector& irs) +{ + +} +} diff --git a/Framework/Core/src/IndexJSONHelpers.h b/Framework/Core/src/IndexJSONHelpers.h new file mode 100644 index 0000000000000..dee534ae390f5 --- /dev/null +++ b/Framework/Core/src/IndexJSONHelpers.h @@ -0,0 +1,25 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#ifndef INDEXJSONHELPERS_H +#define INDEXJSONHELPERS_H + +#include + +namespace o2::framework +{ +struct IndexJSONHelpers { + static std::vector read(std::istream& s); + static void write(std::ostream& o, std::vector& irs); +}; + +} // namespace o2::framework + +#endif // INDEXJSONHELPERS_H From 279fcaae30a2379ab63db1dcd8210fa08fe1de13 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 19 Nov 2025 18:24:08 +0100 Subject: [PATCH 09/22] index mapping serialization --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 71 ++++--- Framework/Core/CMakeLists.txt | 1 + Framework/Core/include/Framework/ASoA.h | 24 ++- .../Core/include/Framework/AnalysisHelpers.h | 79 +++++-- Framework/Core/src/AnalysisHelpers.cxx | 9 + Framework/Core/src/IndexJSONHelpers.cxx | 197 +++++++++++++++++- Framework/Core/test/test_IndexBuilder.cxx | 39 +++- 7 files changed, 366 insertions(+), 54 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index 96be2ecc4aac7..2c5b5e6a5284f 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -18,11 +18,14 @@ #include "Framework/CallbackService.h" #include "Framework/DataSpecUtils.h" #include "../src/ExpressionJSONHelpers.h" +#include "../src/IndexJSONHelpers.h" #include "Framework/ConfigContext.h" #include "Framework/AnalysisContext.h" namespace o2::framework::readers { +namespace +{ template refs> static inline auto extractOriginals(ProcessingContext& pc) { @@ -30,8 +33,7 @@ static inline auto extractOriginals(ProcessingContext& pc) return {pc.inputs().get(o2::aod::label())->asArrowTable()...}; }(std::make_index_sequence()); } -namespace -{ + template requires(D::exclusive) auto make_build(D metadata, InputSpec const& input, ProcessingContext& pc) @@ -58,16 +60,39 @@ auto make_build(D metadata, InputSpec const& input, ProcessingContext& pc) index_pack_t{}); } +static inline auto extractSources(ProcessingContext& pc, std::vector const& labels) +{ + std::vector> tables; + for (auto const& label : labels) { + tables.emplace_back(pc.inputs().get(label.c_str())->asArrowTable()); + } + return tables; +} + struct Builder { + std::string binding; + std::vector labels; + std::vector records; + header::DataOrigin origin; + header::DataDescription description; + header::DataHeader::SubSpecificationType version; + + std::shared_ptr build(ProcessingContext& pc) const + { + std::shared_ptr result; + auto tables = extractSources(pc, labels); + return result; + } }; struct Buildable { std::string binding; - + std::vector labels; header::DataOrigin origin; header::DataDescription description; header::DataHeader::SubSpecificationType version; + std::vector records; Buildable(InputSpec const& spec) : binding{spec.binding} @@ -77,14 +102,25 @@ struct Buildable { description = description_; version = version_; - // The following components are needed to build an index table - // 1. the labels of the source tables to extract from inputRecord -> extracted from input metadata - // 2. the mapping, in the order of the definition of columns, of the - // position in each source table of an index column pointing to the Key - // and the types of index to write (self, single-valued, slice or array) - // the mapping has to be created at the point where the type information is available and - // put into the input spec metadata as a vector of (type, label, pos) + auto loc = std::find_if(spec.metadata.begin(), spec.metadata.end(), [](ConfigParamSpec const& cps) { return cps.name.compare("index-records") == 0; }); + std::stringstream iws(loc->defaultValue.get()); + records = IndexJSONHelpers::read(iws); + for (auto const& r : records) { + labels.emplace_back(r.label); + } + } + + Builder createBuilder() const + { + return Builder{ + binding, + labels, + records, + origin, + description, + version + }; } }; @@ -132,21 +168,6 @@ AlgorithmSpec AODReaderHelpers::indexBuilderCallback(ConfigContext const& ctx) namespace { -template -auto make_spawn(InputSpec const& input, ProcessingContext& pc) -{ - using metadata_t = o2::aod::MetadataTrait::metadata; - constexpr auto sources = metadata_t::sources; - static std::shared_ptr projector = nullptr; - static std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(typename metadata_t::expression_pack_t{})); - static auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (typename metadata_t::expression_pack_t{}); - return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projectors.data(), projector, schema); -} - struct Maker { std::string binding; std::vector labels; diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 8552cfe504a26..ce8fbb0dc55f7 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -142,6 +142,7 @@ o2_add_library(Framework src/Variant.cxx src/VariantJSONHelpers.cxx src/ExpressionJSONHelpers.cxx + src/IndexJSONHelpers.cxx src/VariantPropertyTreeHelpers.cxx src/WorkflowCustomizationHelpers.cxx src/WorkflowHelpers.cxx diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 34eb48a9d3834..918a27f784bbc 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -217,6 +217,19 @@ using is_self_index_t = typename std::conditional_t, std namespace o2::aod { +namespace { +template map> +static consteval int getIndexPosToKey_impl() +{ + constexpr const auto pos = std::find(map.begin(), map.end(), true); + if constexpr (pos != map.end()) { + return std::distance(map.begin(), pos); + } else { + return -1; + } +} +} + /// Base type for table metadata template struct TableMetadata { @@ -243,17 +256,6 @@ struct TableMetadata { return getIndexPosToKey_impl(persistent_columns_t{})>(); } - template map> - static consteval int getIndexPosToKey_impl() - { - constexpr const auto pos = std::find(map.begin(), map.end(), true); - if constexpr (pos != map.end()) { - return std::distance(map.begin(), pos); - } else { - return -1; - } - } - static std::shared_ptr getSchema() { return std::make_shared([](framework::pack&& p) { return o2::soa::createFieldsFromColumns(p); }(persistent_columns_t{})); diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index a7828a9544c92..5bea174cc875c 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -25,11 +25,30 @@ #include "Framework/TableBuilder.h" #include "Framework/Traits.h" +#include #include +namespace o2::soa { +enum struct IndexKind : int { + IdxInvalid = -1, + IdxSelf = 0, + IdxSingle = 1, + IdxSlice = 2, + IdxArray = 3 +}; + +struct IndexRecord { + std::string label; + IndexKind kind; + int pos; + auto operator<=>(const IndexRecord&) const = default; +}; +} // namespace o2::soa + namespace o2::framework { std::string serializeProjectors(std::vector& projectors); std::string serializeSchema(std::shared_ptr schema); +std::string serializeIndexRecords(std::vector& irs); } // namespace o2::framework namespace o2::soa @@ -54,19 +73,6 @@ constexpr auto tableRef2Schema() {"\"\""}}; } -enum struct IndexKind : unsigned { - IdxSelf = 0, - IdxSingle = 1, - IdxSlice = 2, - IdxArray = 3 -}; - -struct IndexRecord { - std::string label; - IndexKind kind; - size_t pos; -}; - namespace { template @@ -102,6 +108,47 @@ inline constexpr auto getCCDBUrls() return result; } +template + requires(std::same_as) +consteval IndexKind getIndexKind() +{ + return IndexKind::IdxSingle; +} + +template + requires(std::is_bounded_array_v) +consteval IndexKind getIndexKind() +{ + return IndexKind::IdxSlice; +} + +template + requires(framework::is_specialization_v) +consteval IndexKind getIndexKind() +{ + return IndexKind::IdxArray; +} + +template +inline constexpr auto getIndexMapping() +{ + std::vector idx; + using indices = T::index_pack_t; + using Key = T::Key; + [&idx](std::index_sequence) mutable { + constexpr auto refs = T::sources; + ([&idx]() mutable { + constexpr auto pos = o2::aod::MetadataTrait>::metadata::template getIndexPosToKey(); + if constexpr (pos == -1) { + idx.emplace_back(o2::aod::label(), IndexKind::IdxSelf, pos); + } else { + idx.emplace_back(o2::aod::label(), getIndexKind(), pos); + } + }.template operator()::type>(), ...); + }(std::make_index_sequence());; + return idx; +} + template constexpr auto getInputMetadata() -> std::vector { @@ -170,8 +217,8 @@ constexpr auto getExpressionMetadata() -> std::vector constexpr auto getIndexMetadata() -> std::vector { - - return {}; + auto map = getIndexMapping(); + return {framework::ConfigParamSpec{"index-records", framework::VariantType::String, framework::serializeIndexRecords(map), {"\"\""}}}; } template @@ -193,6 +240,8 @@ constexpr auto tableRef2InputSpec() metadata.insert(metadata.end(), ccdbMetadata.begin(), ccdbMetadata.end()); auto p = getExpressionMetadata>::metadata>(); metadata.insert(metadata.end(), p.begin(), p.end()); + auto idx = getIndexMetadata>::metadata>(); + metadata.insert(metadata.end(), idx.begin(), idx.end()); if constexpr (!soa::with_ccdb_urls>::metadata>) { metadata.emplace_back(framework::ConfigParamSpec{"schema", framework::VariantType::String, framework::serializeSchema(o2::aod::MetadataTrait>::metadata::getSchema()), {"\"\""}}); } diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 63923008f5a70..91ccfe3dcf13f 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -8,8 +8,10 @@ // In applying this license CERN does not waive the privileges and immunities // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "Framework/AnalysisHelpers.h" #include "Framework/ExpressionHelpers.h" #include "ExpressionJSONHelpers.h" +#include "IndexJSONHelpers.h" namespace o2::framework { @@ -41,4 +43,11 @@ std::string serializeSchema(std::shared_ptr schema) ArrowJSONHelpers::write(osm, schema); return osm.str(); } + +std::string serializeIndexRecords(std::vector& irs) +{ + std::stringstream osm; + IndexJSONHelpers::write(osm, irs); + return osm.str(); +} } // namespace o2::framework diff --git a/Framework/Core/src/IndexJSONHelpers.cxx b/Framework/Core/src/IndexJSONHelpers.cxx index 81cda2d28e113..7334915954737 100644 --- a/Framework/Core/src/IndexJSONHelpers.cxx +++ b/Framework/Core/src/IndexJSONHelpers.cxx @@ -8,17 +8,210 @@ // In applying this license CERN does not waive the privileges and immunities // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. - #include "IndexJSONHelpers.h" +#include +#include +#include +#include +#include + +#include +#include + namespace o2::framework { +namespace { +struct IndexRecordsReader : public rapidjson::BaseReaderHandler, IndexRecordsReader> { + using Ch = rapidjson::UTF8<>::Ch; + using SizeType = rapidjson::SizeType; + + enum struct State { + IN_START, + IN_LIST, + IN_RECORD, + IN_ERROR + }; + + std::stack states; + std::ostringstream debug; + + std::vector records; + std::string currentKey; + std::string label; + o2::soa::IndexKind kind; + int pos; + + IndexRecordsReader() + { + debug << ">>> Start" << std::endl; + states.push(State::IN_START); + } + + bool StartArray() + { + debug << "StartArray()" << std::endl; + if (states.top() == State::IN_START && currentKey.compare("records") == 0) { + states.push(State::IN_LIST); + return true; + } + states.push(State::IN_ERROR); + return false; + } + + bool EndArray(SizeType) + { + debug << "EndArray()" << std::endl; + if (states.top() == State::IN_LIST) { + // records done + states.pop(); + return true; + } + states.push(State::IN_ERROR); + return false; + } + + bool Key(const Ch* str, SizeType, bool) + { + debug << "Key(" << str << ")" << std::endl; + currentKey = str; + if (states.top() == State::IN_START) { + if (currentKey.compare("records") == 0) { + return true; + } + } + + if (states.top() == State::IN_RECORD) { + if (currentKey.compare("label") == 0) { + return true; + } + if (currentKey.compare("kind") == 0) { + return true; + } + if (currentKey.compare("pos") == 0) { + return true; + } + } + + states.push(State::IN_ERROR); + return false; + } + + bool StartObject() + { + debug << "StartObject()" << std::endl; + if (states.top() == State::IN_START) { + return true; + } + + if (states.top() == State::IN_LIST) { + states.push(State::IN_RECORD); + label = ""; + kind = soa::IndexKind::IdxInvalid; + pos = -2; + return true; + } + + states.push(State::IN_ERROR); + return false; + } + + bool EndObject(SizeType) + { + debug << "EndObject()" << std::endl; + if (states.top() == State::IN_RECORD) { + states.pop(); + // add a record + records.emplace_back(label, kind, pos); + return true; + } + + if (states.top() == State::IN_START) { + return true; + } + + states.push(State::IN_ERROR); + return false; + } + + bool Uint(unsigned i) + { + debug << "Uint(" << i << ") passed to Int()" << std::endl; + return Int(i); + } + + bool Int(int i) + { + debug << "Int(" << i << ")" << std::endl; + if (states.top() == State::IN_RECORD) { + if (currentKey.compare("kind") == 0) { + kind = (soa::IndexKind)i; + return true; + } + if (currentKey.compare("pos") == 0) { + pos = i; + return true; + } + } + + states.push(State::IN_ERROR); + return false; + } + + bool String(const Ch* str, SizeType, bool) + { + debug << "String(" << str << ")" << std::endl; + if (states.top() == State::IN_RECORD) { + if (currentKey.compare("label") == 0) { + label = str; + return true; + } + } + + states.push(State::IN_ERROR); + return false; + } +}; +} + std::vector IndexJSONHelpers::read(std::istream& s) { + rapidjson::Reader reader; + rapidjson::IStreamWrapper isw(s); + IndexRecordsReader irreader; + + bool ok = reader.Parse(isw, irreader); + if (!ok) { + throw framework::runtime_error_f("Cannot parse serialized index records vector, error: %s at offset: %d", rapidjson::GetParseError_En(reader.GetParseErrorCode()), reader.GetErrorOffset()); + } + return irreader.records; } -void IndexJSONHelpers::write(std::ostream& o, std::vector& irs) +namespace { +void writeRecords(rapidjson::Writer& w, std::vector& records) { + for (auto& r : records) { + w.StartObject(); + w.Key("label"); + w.String(r.label.c_str()); + w.Key("kind"); + w.Int((int)r.kind); + w.Key("pos"); + w.Int(r.pos); + w.EndObject(); + } +} +} +void IndexJSONHelpers::write(std::ostream& o, std::vector& irs) +{ + rapidjson::OStreamWrapper osw(o); + rapidjson::Writer w(osw); + w.StartObject(); + w.Key("records"); + w.StartArray(); + writeRecords(w, irs); + w.EndArray(); + w.EndObject(); } } diff --git a/Framework/Core/test/test_IndexBuilder.cxx b/Framework/Core/test/test_IndexBuilder.cxx index ea9f715f20c8a..b3931862ca945 100644 --- a/Framework/Core/test/test_IndexBuilder.cxx +++ b/Framework/Core/test/test_IndexBuilder.cxx @@ -10,8 +10,9 @@ // or submit itself to any jurisdiction. #include "Framework/AnalysisDataModel.h" -#include "Framework/AnalysisTask.h" +#include "../src/IndexJSONHelpers.h" #include +#include using namespace o2::framework; using namespace arrow; @@ -235,3 +236,39 @@ TEST_CASE("AdvancedIndexTables") ++count; } } + + +TEST_CASE("IndexRecordsSerialization") +{ + auto map = getIndexMapping>::metadata>(); + + std::stringstream osm; + IndexJSONHelpers::write(osm, map); + + std::stringstream ism; + ism.str(osm.str()); + auto rmap = IndexJSONHelpers::read(ism); + REQUIRE(map == rmap); + + map = getIndexMapping>::metadata>(); + + osm.clear(); + osm.str(""); + IndexJSONHelpers::write(osm, map); + + ism.clear(); + ism.str(osm.str()); + rmap = IndexJSONHelpers::read(ism); + REQUIRE(map == rmap); + + map = getIndexMapping>::metadata>(); + + osm.clear(); + osm.str(""); + IndexJSONHelpers::write(osm, map); + + ism.clear(); + ism.str(osm.str()); + rmap = IndexJSONHelpers::read(ism); + REQUIRE(map == rmap); +} From ca6cc941de93e9019b65de573d8d7d62f346549d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 20 Nov 2025 14:11:31 +0100 Subject: [PATCH 10/22] working generic index builder --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 173 ++-------- .../Core/include/Framework/AnalysisHelpers.h | 306 ++++++++++-------- .../Core/include/Framework/AnalysisManagers.h | 2 +- .../Core/include/Framework/Expressions.h | 2 + .../include/Framework/IndexBuilderHelpers.h | 19 +- Framework/Core/src/ArrowSupport.cxx | 2 +- Framework/Core/src/Expressions.cxx | 16 + Framework/Core/src/IndexJSONHelpers.cxx | 12 +- Framework/Core/test/test_IndexBuilder.cxx | 13 +- 9 files changed, 250 insertions(+), 295 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index 2c5b5e6a5284f..30592f0a497dc 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -10,15 +10,14 @@ // or submit itself to any jurisdiction. #include "AODReaderHelpers.h" +#include "../src/ExpressionJSONHelpers.h" +#include "../src/IndexJSONHelpers.h" + +#include "Framework/AnalysisDataModel.h" #include "Framework/AnalysisHelpers.h" -#include "Framework/AnalysisDataModelHelpers.h" -#include "Framework/ExpressionHelpers.h" #include "Framework/DataProcessingHelpers.h" #include "Framework/AlgorithmSpec.h" -#include "Framework/CallbackService.h" #include "Framework/DataSpecUtils.h" -#include "../src/ExpressionJSONHelpers.h" -#include "../src/IndexJSONHelpers.h" #include "Framework/ConfigContext.h" #include "Framework/AnalysisContext.h" @@ -26,67 +25,8 @@ namespace o2::framework::readers { namespace { -template refs> -static inline auto extractOriginals(ProcessingContext& pc) -{ - return [&](std::index_sequence) -> std::vector> { - return {pc.inputs().get(o2::aod::label())->asArrowTable()...}; - }(std::make_index_sequence()); -} - -template - requires(D::exclusive) -auto make_build(D metadata, InputSpec const& input, ProcessingContext& pc) -{ - using metadata_t = decltype(metadata); - using Key = typename metadata_t::Key; - using index_pack_t = typename metadata_t::index_pack_t; - constexpr auto sources = metadata_t::sources; - return o2::framework::IndexBuilder::indexBuilder(input.binding.c_str(), - extractOriginals(pc), - index_pack_t{}); -} - -template - requires(!D::exclusive) -auto make_build(D metadata, InputSpec const& input, ProcessingContext& pc) -{ - using metadata_t = decltype(metadata); - using Key = typename metadata_t::Key; - using index_pack_t = typename metadata_t::index_pack_t; - constexpr auto sources = metadata_t::sources; - return o2::framework::IndexBuilder::indexBuilder(input.binding.c_str(), - extractOriginals(pc), - index_pack_t{}); -} - -static inline auto extractSources(ProcessingContext& pc, std::vector const& labels) -{ - std::vector> tables; - for (auto const& label : labels) { - tables.emplace_back(pc.inputs().get(label.c_str())->asArrowTable()); - } - return tables; -} - -struct Builder { - std::string binding; - std::vector labels; - std::vector records; - header::DataOrigin origin; - header::DataDescription description; - header::DataHeader::SubSpecificationType version; - - std::shared_ptr build(ProcessingContext& pc) const - { - std::shared_ptr result; - auto tables = extractSources(pc, labels); - return result; - } - -}; - struct Buildable { + bool exclusive = false; std::string binding; std::vector labels; header::DataOrigin origin; @@ -106,14 +46,18 @@ struct Buildable { std::stringstream iws(loc->defaultValue.get()); records = IndexJSONHelpers::read(iws); + loc = std::find_if(spec.metadata.begin(), spec.metadata.end(), [](ConfigParamSpec const& cps){ return cps.name.compare("index-exclusive") == 0; }); + exclusive = loc->defaultValue.get(); + for (auto const& r : records) { labels.emplace_back(r.label); } } - Builder createBuilder() const + framework::Builder createBuilder() const { - return Builder{ + return { + exclusive, binding, labels, records, @@ -131,36 +75,18 @@ AlgorithmSpec AODReaderHelpers::indexBuilderCallback(ConfigContext const& ctx) { auto& ac = ctx.services().get(); return AlgorithmSpec::InitCallback{[requested = ac.requestedIDXs](InitContext& /*ic*/) { - return [requested](ProcessingContext& pc) { + std::vector buildables; + for (auto& i : requested) { + buildables.emplace_back(i); + } + std::vector builders; + for (auto& b : buildables) { + builders.push_back(b.createBuilder()); + } + return [builders](ProcessingContext& pc) { auto outputs = pc.outputs(); - // spawn tables - for (auto& input : requested) { - auto&& [origin, description, version] = DataSpecUtils::asConcreteDataMatcher(input); - if (description == header::DataDescription{"MA_RN2_EX"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run2MatchedExclusiveMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_RN2_SP"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run2MatchedSparseMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_RN3_EX"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run3MatchedExclusiveMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_RN3_SP"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run3MatchedSparseMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_BCCOL_EX"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::MatchedBCCollisionsExclusiveMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_BCCOL_SP"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::MatchedBCCollisionsSparseMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_BCCOLS_EX"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::MatchedBCCollisionsExclusiveMultiMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_BCCOLS_SP"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::MatchedBCCollisionsSparseMultiMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_RN3_BC_SP"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run3MatchedToBCSparseMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_RN3_BC_EX"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run3MatchedToBCExclusiveMetadata{}, input, pc)); - } else if (description == header::DataDescription{"MA_RN2_BC_SP"}) { - outputs.adopt(Output{origin, description, version}, make_build(o2::aod::Run2MatchedToBCSparseMetadata{}, input, pc)); - } else { - throw std::runtime_error("Not an index table"); - } + for (auto& builder : builders) { + outputs.adopt(Output{builder.origin, builder.description, builder.version}, builder.materialize(pc)); } }; }}; @@ -168,33 +94,6 @@ AlgorithmSpec AODReaderHelpers::indexBuilderCallback(ConfigContext const& ctx) namespace { -struct Maker { - std::string binding; - std::vector labels; - std::vector> expressions; - std::shared_ptr projector = nullptr; - std::shared_ptr schema = nullptr; - std::shared_ptr inputSchema = nullptr; - - header::DataOrigin origin; - header::DataDescription description; - header::DataHeader::SubSpecificationType version; - - std::shared_ptr make(ProcessingContext& pc) const - { - std::vector> originals; - for (auto const& label : labels) { - originals.push_back(pc.inputs().get(label)->asArrowTable()); - } - auto fullTable = soa::ArrowHelpers::joinTables(std::move(originals), std::span{labels.begin(), labels.size()}); - if (fullTable->num_rows() == 0) { - return arrow::Table::MakeEmpty(schema).ValueOrDie(); - } - - return spawnerHelper(fullTable, schema, binding.c_str(), schema->num_fields(), projector); - } -}; - struct Spawnable { std::string binding; std::vector labels; @@ -222,6 +121,7 @@ struct Spawnable { iws.clear(); iws.str(loc->defaultValue.get()); outputSchema = ArrowJSONHelpers::read(iws); + o2::framework::addLabelToSchema(outputSchema, binding.c_str()); std::vector> schemas; for (auto& i : spec.metadata) { @@ -233,22 +133,14 @@ struct Spawnable { schemas.emplace_back(ArrowJSONHelpers::read(iws)); } } + std::vector> fields; for (auto& s : schemas) { std::copy(s->fields().begin(), s->fields().end(), std::back_inserter(fields)); } - inputSchema = std::make_shared(fields); - int i = 0; - for (auto& p : projectors) { - expressions.push_back( - expressions::makeExpression( - expressions::createExpressionTree( - expressions::createOperations(p), - inputSchema), - outputSchema->field(i))); - ++i; - } + inputSchema = std::make_shared(fields); + expressions = expressions::materializeProjectors(projectors, inputSchema, outputSchema->fields()); } std::shared_ptr makeProjector() const @@ -264,9 +156,8 @@ struct Spawnable { return p; } - Maker createMaker() const + framework::Spawner createMaker() const { - o2::framework::addLabelToSchema(outputSchema, binding.c_str()); return { binding, labels, @@ -290,15 +181,15 @@ AlgorithmSpec AODReaderHelpers::aodSpawnerCallback(ConfigContext const& ctx) for (auto& i : requested) { spawnables.emplace_back(i); } - std::vector makers; + std::vector spawners; for (auto& s : spawnables) { - makers.push_back(s.createMaker()); + spawners.push_back(s.createMaker()); } - return [makers](ProcessingContext& pc) mutable { + return [spawners](ProcessingContext& pc) mutable { auto outputs = pc.outputs(); - for (auto& maker : makers) { - outputs.adopt(Output{maker.origin, maker.description, maker.version}, maker.make(pc)); + for (auto& spawner : spawners) { + outputs.adopt(Output{spawner.origin, spawner.description, spawner.version}, spawner.materialize(pc)); } }; }}; diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 5bea174cc875c..199a66183412e 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -38,10 +38,111 @@ enum struct IndexKind : int { struct IndexRecord { std::string label; + std::string columnLabel; IndexKind kind; int pos; auto operator<=>(const IndexRecord&) const = default; }; + +namespace +{ +inline constexpr int listSize(soa::IndexKind kind) +{ + switch (kind) { + case soa::IndexKind::IdxSingle: + return 1; + break; + case soa::IndexKind::IdxSlice: + return 2; + break; + case soa::IndexKind::IdxArray: + return -1; + break; + default: + return -2; + break; + } +} +} // namespace + +struct IndexBuilder { + template + static auto materialize(const char* label, std::vector>&& tables, std::vector const& records) + { + auto pool = arrow::default_memory_pool(); + std::vector> builders; + framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; + std::unique_ptr keyIndex = nullptr; + if (records[0].kind != soa::IndexKind::IdxSelf) { + keyIndex = std::make_unique(tables[0]->column(records[0].pos)); + } + + for (auto i = 1U; i < records.size(); ++i) { + if (records[i].kind == soa::IndexKind::IdxSelf) { + builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); + } else { + builders.emplace_back(std::make_shared(tables[i]->column(records[i].pos), records[i].columnLabel.c_str(), listSize(records[i].kind), pool)); + } + } + + std::vector finds; + finds.resize(builders.size()); + for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { + int64_t idx = -1; + if (keyIndex == nullptr) { + idx = counter; + } else { + idx = keyIndex->valueAt(counter); + } + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + finds[i] = builders[i]->find(idx); + } else { + finds[i] = std::static_pointer_cast(builders[i])->find(idx); + } + } + if constexpr (Exclusive) { + if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + builders[i]->fill(idx); + } else { + std::static_pointer_cast(builders[i])->fill(idx); + } + } + self.fill(counter); + } + } else { + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + builders[i]->fill(idx); + } else { + std::static_pointer_cast(builders[i])->fill(idx); + } + } + self.fill(counter); + } + } + + std::vector> arrays; + arrays.reserve(records.size()); + std::vector> fields; + fields.reserve(records.size()); + arrays.push_back(self.result()); + fields.push_back(self.field()); + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + arrays.push_back(builders[i]->result()); + fields.push_back(builders[i]->field()); + } else { + arrays.push_back(std::static_pointer_cast(builders[i])->result()); + fields.push_back(std::static_pointer_cast(builders[i])->field()); + } + } + + return framework::makeArrowTable(label, std::move(arrays), std::move(fields)); + } +}; } // namespace o2::soa namespace o2::framework @@ -49,6 +150,66 @@ namespace o2::framework std::string serializeProjectors(std::vector& projectors); std::string serializeSchema(std::shared_ptr schema); std::string serializeIndexRecords(std::vector& irs); + +struct Spawner { + std::string binding; + std::vector labels; + std::vector> expressions; + std::shared_ptr projector = nullptr; + std::shared_ptr schema = nullptr; + std::shared_ptr inputSchema = nullptr; + + header::DataOrigin origin; + header::DataDescription description; + header::DataHeader::SubSpecificationType version; + + std::shared_ptr materialize(ProcessingContext& pc) const + { + std::vector> originals; + for (auto const& label : labels) { + originals.push_back(pc.inputs().get(label)->asArrowTable()); + } + auto fullTable = soa::ArrowHelpers::joinTables(std::move(originals), std::span{labels.begin(), labels.size()}); + if (fullTable->num_rows() == 0) { + return arrow::Table::MakeEmpty(schema).ValueOrDie(); + } + + return spawnerHelper(fullTable, schema, binding.c_str(), schema->num_fields(), projector); + } +}; + +namespace { +static inline auto extractSources(ProcessingContext& pc, std::vector const& labels) +{ + std::vector> tables; + for (auto const& label : labels) { + tables.emplace_back(pc.inputs().get(label.c_str())->asArrowTable()); + } + return tables; +} +} + +struct Builder { + bool exclusive; + std::string binding; + std::vector labels; + std::vector records; + header::DataOrigin origin; + header::DataDescription description; + header::DataHeader::SubSpecificationType version; + + std::shared_ptr materialize(ProcessingContext& pc) const + { + std::shared_ptr result; + auto tables = extractSources(pc, labels); + if (exclusive) { + result = o2::soa::IndexBuilder::materialize(binding.c_str(), std::move(tables), records); + } else { + result = o2::soa::IndexBuilder::materialize(binding.c_str(), std::move(tables), records); + } + return result; + } +}; } // namespace o2::framework namespace o2::soa @@ -137,14 +298,14 @@ inline constexpr auto getIndexMapping() using Key = T::Key; [&idx](std::index_sequence) mutable { constexpr auto refs = T::sources; - ([&idx]() mutable { + ([&idx]() mutable { constexpr auto pos = o2::aod::MetadataTrait>::metadata::template getIndexPosToKey(); if constexpr (pos == -1) { - idx.emplace_back(o2::aod::label(), IndexKind::IdxSelf, pos); + idx.emplace_back(o2::aod::label(), C::columnLabel(), IndexKind::IdxSelf, pos); } else { - idx.emplace_back(o2::aod::label(), getIndexKind(), pos); + idx.emplace_back(o2::aod::label(), C::columnLabel(), getIndexKind(), pos); } - }.template operator()::type>(), ...); + }.template operator()>(), ...); }(std::make_index_sequence());; return idx; } @@ -218,7 +379,8 @@ template constexpr auto getIndexMetadata() -> std::vector { auto map = getIndexMapping(); - return {framework::ConfigParamSpec{"index-records", framework::VariantType::String, framework::serializeIndexRecords(map), {"\"\""}}}; + return {framework::ConfigParamSpec{"index-records", framework::VariantType::String, framework::serializeIndexRecords(map), {"\"\""}}, + {framework::ConfigParamSpec{"index-exclusive", framework::VariantType::Bool, T::exclusive, {"\"\""}}}}; } template @@ -581,129 +743,6 @@ struct Exclusive { struct Sparse { }; -namespace -{ -template -inline std::shared_ptr getIndexToKey(arrow::Table* table) -{ - using IC = framework::pack_element_t(typename T::external_index_columns_t{}), typename T::external_index_columns_t>; - return table->column(framework::has_type_at_v(typename T::persistent_columns_t{})); -} - -template -struct ColumnTrait { - using column_t = C; - - static consteval auto listSize() - { - if constexpr (std::same_as>) { - return -1; - } else if constexpr (std::same_as) { - return 2; - } else { - return 1; - } - } - - template - static std::shared_ptr makeColumnBuilder(arrow::Table* table, arrow::MemoryPool* pool) - { - if constexpr (!std::same_as) { - return std::make_shared(getIndexToKey(table), C::columnLabel(), listSize(), pool); - } else { - return std::make_shared(C::columnLabel(), pool); - } - } -}; - -template -struct Reduction { - using type = typename std::conditional(), SelfIndexColumnBuilder, IndexColumnBuilder>::type; -}; - -template -using reduced_t = Reduction::type; -} // namespace - -template -struct IndexBuilder { - template refs, typename C1, typename... Cs> - static auto indexBuilder(const char* label, std::vector>&& tables, framework::pack) - { - auto pool = arrow::default_memory_pool(); - SelfIndexColumnBuilder self{C1::columnLabel(), pool}; - std::unique_ptr keyIndex = nullptr; - if constexpr (!Key::template hasOriginal()) { - keyIndex = std::make_unique(tables[0]->column(o2::aod::MetadataTrait>::metadata::template getIndexPosToKey())); - } - - auto sq = std::make_index_sequence(); - - auto columnBuilders = [&tables, &pool ](std::index_sequence) -> std::array, sizeof...(Cs)> - { - return {[](arrow::Table* table, arrow::MemoryPool* pool) { - using T = framework::pack_element_t>; - if constexpr (!Key::template hasOriginal()) { - constexpr auto pos = o2::aod::MetadataTrait>::metadata::template getIndexPosToKey(); - return std::make_shared(table->column(pos), T::columnLabel(), ColumnTrait::listSize(), pool); - } else { - return std::make_shared(T::columnLabel(), pool); - } - }(tables[Is + 1].get(), pool)...}; - } - (sq); - - std::array finds; - - for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { - int64_t idx = -1; - if constexpr (Key::template hasOriginal()) { - idx = counter; - } else { - idx = keyIndex->valueAt(counter); - } - finds = [&idx, &columnBuilders](std::index_sequence) { - return std::array{ - [&idx, &columnBuilders]() { - using T = typename framework::pack_element_t>; - return std::static_pointer_cast>(columnBuilders[Is])->template find(idx); - }()...}; - }(sq); - if constexpr (std::same_as) { - [&idx, &columnBuilders](std::index_sequence) { - ([&idx, &columnBuilders]() { - using T = typename framework::pack_element_t>; - return std::static_pointer_cast>(columnBuilders[Is])->template fill(idx); }(), ...); - }(sq); - self.fill(counter); - } else if constexpr (std::same_as) { - if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { - [&idx, &columnBuilders](std::index_sequence) { - ([&idx, &columnBuilders]() { - using T = typename framework::pack_element_t>; - return std::static_pointer_cast>(columnBuilders[Is])->template fill(idx); - }(), - ...); - }(sq); - self.fill(counter); - } - } - } - - return [&label, &columnBuilders, &self](std::index_sequence) { - return makeArrowTable(label, - {self.template result(), [&columnBuilders]() { - using T = typename framework::pack_element_t>; - return std::static_pointer_cast>(columnBuilders[Is])->template result(); - }()...}, - {self.field(), [&columnBuilders]() { - using T = typename framework::pack_element_t>; - return std::static_pointer_cast>(columnBuilders[Is])->field(); - }()...}); - }(sq); - } -}; - /// This helper struct allows you to declare index tables to be created in a task template @@ -717,12 +756,13 @@ template struct Builds : decltype(transformBase()) { using buildable_t = T; using metadata = decltype(transformBase())::metadata; - using IP = std::conditional_t, IndexBuilder>; using Key = metadata::Key; using H = typename T::first_t; using Ts = typename T::rest_t; using index_pack_t = metadata::index_pack_t; + std::vector map; + T* operator->() { return table.get(); @@ -743,10 +783,12 @@ struct Builds : decltype(transformBase()) { return index_pack_t{}; } - template - auto build(framework::pack, std::vector>&& tables) + auto build(std::vector>&& tables) { - this->table = std::make_shared(IP::template indexBuilder(o2::aod::label(), std::forward>>(tables), framework::pack{})); + if (map.empty()) { + map = soa::getIndexMapping(); + } + this->table = std::make_shared(soa::IndexBuilder::materialize(o2::aod::label(), std::forward>>(tables), map)); return (this->table != nullptr); } }; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 596f3da6a557a..4603c76ea4319 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -308,7 +308,7 @@ template bool prepareOutput(ProcessingContext& context, T& builds) { using metadata = o2::aod::MetadataTrait>::metadata; - return builds.template build(builds.pack(), extractOriginals(context)); + return builds.build(extractOriginals(context)); } template diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index e08bf8db52bb4..0be19954f1faa 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -712,6 +712,8 @@ std::shared_ptr createProjectorHelper(size_t nColumns, expre std::shared_ptr schema, std::vector> const& fields); +std::vector> materializeProjectors(std::vector const& projectors, std::shared_ptr const& inputSchema, std::vector> outputFields); + template std::shared_ptr createProjectors(framework::pack, std::vector> const& fields, gandiva::SchemaPtr schema) { diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index eef76f8340078..340a3ffe6440d 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -44,7 +44,6 @@ struct SelfIndexColumnBuilder { SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool); virtual ~SelfIndexColumnBuilder() = default; - template inline std::shared_ptr result() const { std::shared_ptr array; @@ -56,13 +55,12 @@ struct SelfIndexColumnBuilder { return std::make_shared(array); } std::shared_ptr field() const; - template + inline bool find(int) { return true; } - template inline void fill(int idx) { (void)static_cast(mBuilder.get())->Append(idx); @@ -79,37 +77,34 @@ class IndexColumnBuilder : public SelfIndexColumnBuilder, public ChunkedArrayIte IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool); ~IndexColumnBuilder() override = default; - template inline std::shared_ptr result() const { - if constexpr (std::same_as>) { + if (mListSize == -1) { return resultMulti(); - } else if constexpr (std::same_as) { + } else if (mListSize == 2) { return resultSlice(); } else { return resultSingle(); } } - template inline bool find(int idx) { - if constexpr (std::same_as>) { + if (mListSize == -1) { return findMulti(idx); - } else if constexpr (std::same_as) { + } else if (mListSize == 2) { return findSlice(idx); } else { return findSingle(idx); } } - template inline void fill(int idx) { ++mResultSize; - if constexpr (std::same_as>) { + if (mListSize == -1) { fillMulti(idx); - } else if constexpr (std::same_as) { + } else if (mListSize == 2) { fillSlice(idx); } else { fillSingle(idx); diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 17ee95d004118..98b0c07e55551 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -618,7 +618,7 @@ o2::framework::ServiceSpec ArrowSupport::arrowBackendSpec() builder->outputs.clear(); // replace AlgorithmSpec // FIXME: it should be made more generic, so it does not need replacement... - builder->algorithm = PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTablesBuilder", ctx);//readers::AODReaderHelpers::indexBuilderCallback(ctx); + builder->algorithm = PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTableBuilder", ctx);//readers::AODReaderHelpers::indexBuilderCallback(ctx); AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, *builder); } diff --git a/Framework/Core/src/Expressions.cxx b/Framework/Core/src/Expressions.cxx index 05a3462d6e4da..43143f781ddf4 100644 --- a/Framework/Core/src/Expressions.cxx +++ b/Framework/Core/src/Expressions.cxx @@ -1348,4 +1348,20 @@ OpNode Parser::opFromToken(std::string const& token) return OpNode{static_cast(std::distance(mapping.begin(), locate))}; } +std::vector> materializeProjectors(std::vector const& projectors, std::shared_ptr const& inputSchema, std::vector> outputFields) +{ + std::vector> expressions; + int i = 0; + for (auto& p : projectors) { + expressions.push_back( + expressions::makeExpression( + expressions::createExpressionTree( + expressions::createOperations(p), + inputSchema), + outputFields[i])); + ++i; + } + return expressions; +} + } // namespace o2::framework::expressions diff --git a/Framework/Core/src/IndexJSONHelpers.cxx b/Framework/Core/src/IndexJSONHelpers.cxx index 7334915954737..d18af7e43ddba 100644 --- a/Framework/Core/src/IndexJSONHelpers.cxx +++ b/Framework/Core/src/IndexJSONHelpers.cxx @@ -38,6 +38,7 @@ struct IndexRecordsReader : public rapidjson::BaseReaderHandler records; std::string currentKey; std::string label; + std::string columnLabel; o2::soa::IndexKind kind; int pos; @@ -84,6 +85,9 @@ struct IndexRecordsReader : public rapidjson::BaseReaderHandler& w, std::vector -#include using namespace o2::framework; using namespace arrow; @@ -103,8 +102,8 @@ TEST_CASE("TestIndexBuilder") auto t4 = b4.finalize(); Categorys st4{t4}; - using m1 = MetadataTrait>::metadata; - auto t5 = IndexBuilder::indexBuilder("test1a", {t1, t2, t3, t4}, typename IDXs::persistent_columns_t{}); + auto map = getIndexMapping>::metadata>(); + auto t5 = IndexBuilder::materialize("test1a", {t1, t2, t3, t4}, map); REQUIRE(t5->num_rows() == 4); IDXs idxt{t5}; idxt.bindExternalIndices(&st1, &st2, &st3, &st4); @@ -114,8 +113,8 @@ TEST_CASE("TestIndexBuilder") REQUIRE(row.category().pointId() == row.pointId()); } - using m2 = MetadataTrait>::metadata; - auto t6 = IndexBuilder::indexBuilder("test3", {t2, t1, t3, t4}, typename IDX2s::persistent_columns_t{}); + map = getIndexMapping>::metadata>(); + auto t6 = IndexBuilder::materialize("test2", {t2, t1, t3, t4}, map); REQUIRE(t6->num_rows() == st2.size()); IDX2s idxs{t6}; std::array fs{0, 1, 2, -1, -1, 4, -1}; @@ -213,8 +212,8 @@ TEST_CASE("AdvancedIndexTables") {14, 34}, {8, 31, 42, 46, 58}}}; - using m3 = MetadataTrait>::metadata; - auto t3 = IndexBuilder::indexBuilder("test4", {t1, t2, tc}, typename IDX3s::persistent_columns_t{}); + auto map = getIndexMapping>::metadata>(); + auto t3 = IndexBuilder::materialize("test3", {t1, t2, tc}, map); REQUIRE(t3->num_rows() == st1.size()); IDX3s idxs{t3}; idxs.bindExternalIndices(&st1, &st2, &st3); From 9210426c4d4f6cabd9087d3ea8a64eb23eb2bf4a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 20 Nov 2025 14:38:38 +0100 Subject: [PATCH 11/22] cleanup and out-of-line --- .../Core/include/Framework/AnalysisHelpers.h | 122 +----------------- Framework/Core/src/AnalysisHelpers.cxx | 106 +++++++++++++++ Framework/Core/test/test_IndexBuilder.cxx | 6 +- 3 files changed, 115 insertions(+), 119 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 199a66183412e..9e26cf6f9e6e9 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -66,82 +66,7 @@ inline constexpr int listSize(soa::IndexKind kind) } // namespace struct IndexBuilder { - template - static auto materialize(const char* label, std::vector>&& tables, std::vector const& records) - { - auto pool = arrow::default_memory_pool(); - std::vector> builders; - framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; - std::unique_ptr keyIndex = nullptr; - if (records[0].kind != soa::IndexKind::IdxSelf) { - keyIndex = std::make_unique(tables[0]->column(records[0].pos)); - } - - for (auto i = 1U; i < records.size(); ++i) { - if (records[i].kind == soa::IndexKind::IdxSelf) { - builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); - } else { - builders.emplace_back(std::make_shared(tables[i]->column(records[i].pos), records[i].columnLabel.c_str(), listSize(records[i].kind), pool)); - } - } - - std::vector finds; - finds.resize(builders.size()); - for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { - int64_t idx = -1; - if (keyIndex == nullptr) { - idx = counter; - } else { - idx = keyIndex->valueAt(counter); - } - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - finds[i] = builders[i]->find(idx); - } else { - finds[i] = std::static_pointer_cast(builders[i])->find(idx); - } - } - if constexpr (Exclusive) { - if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - builders[i]->fill(idx); - } else { - std::static_pointer_cast(builders[i])->fill(idx); - } - } - self.fill(counter); - } - } else { - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - builders[i]->fill(idx); - } else { - std::static_pointer_cast(builders[i])->fill(idx); - } - } - self.fill(counter); - } - } - - std::vector> arrays; - arrays.reserve(records.size()); - std::vector> fields; - fields.reserve(records.size()); - arrays.push_back(self.result()); - fields.push_back(self.field()); - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - arrays.push_back(builders[i]->result()); - fields.push_back(builders[i]->field()); - } else { - arrays.push_back(std::static_pointer_cast(builders[i])->result()); - fields.push_back(std::static_pointer_cast(builders[i])->field()); - } - } - - return framework::makeArrowTable(label, std::move(arrays), std::move(fields)); - } + static std::shared_ptr materialize(const char* label, std::vector>&& tables, std::vector const& records, bool exclusive); }; } // namespace o2::soa @@ -150,6 +75,7 @@ namespace o2::framework std::string serializeProjectors(std::vector& projectors); std::string serializeSchema(std::shared_ptr schema); std::string serializeIndexRecords(std::vector& irs); +std::vector> extractSources(ProcessingContext& pc, std::vector const& labels); struct Spawner { std::string binding; @@ -163,32 +89,9 @@ struct Spawner { header::DataDescription description; header::DataHeader::SubSpecificationType version; - std::shared_ptr materialize(ProcessingContext& pc) const - { - std::vector> originals; - for (auto const& label : labels) { - originals.push_back(pc.inputs().get(label)->asArrowTable()); - } - auto fullTable = soa::ArrowHelpers::joinTables(std::move(originals), std::span{labels.begin(), labels.size()}); - if (fullTable->num_rows() == 0) { - return arrow::Table::MakeEmpty(schema).ValueOrDie(); - } - - return spawnerHelper(fullTable, schema, binding.c_str(), schema->num_fields(), projector); - } + std::shared_ptr materialize(ProcessingContext& pc) const; }; -namespace { -static inline auto extractSources(ProcessingContext& pc, std::vector const& labels) -{ - std::vector> tables; - for (auto const& label : labels) { - tables.emplace_back(pc.inputs().get(label.c_str())->asArrowTable()); - } - return tables; -} -} - struct Builder { bool exclusive; std::string binding; @@ -198,17 +101,7 @@ struct Builder { header::DataDescription description; header::DataHeader::SubSpecificationType version; - std::shared_ptr materialize(ProcessingContext& pc) const - { - std::shared_ptr result; - auto tables = extractSources(pc, labels); - if (exclusive) { - result = o2::soa::IndexBuilder::materialize(binding.c_str(), std::move(tables), records); - } else { - result = o2::soa::IndexBuilder::materialize(binding.c_str(), std::move(tables), records); - } - return result; - } + std::shared_ptr materialize(ProcessingContext& pc) const; }; } // namespace o2::framework @@ -761,7 +654,7 @@ struct Builds : decltype(transformBase()) { using Ts = typename T::rest_t; using index_pack_t = metadata::index_pack_t; - std::vector map; + std::vector map = soa::getIndexMapping(); T* operator->() { @@ -785,10 +678,7 @@ struct Builds : decltype(transformBase()) { auto build(std::vector>&& tables) { - if (map.empty()) { - map = soa::getIndexMapping(); - } - this->table = std::make_shared(soa::IndexBuilder::materialize(o2::aod::label(), std::forward>>(tables), map)); + this->table = std::make_shared(soa::IndexBuilder::materialize(o2::aod::label(), std::forward>>(tables), map, metadata::exclusive)); return (this->table != nullptr); } }; diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 91ccfe3dcf13f..58f351e37d508 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -13,6 +13,84 @@ #include "ExpressionJSONHelpers.h" #include "IndexJSONHelpers.h" +namespace o2::soa { +std::shared_ptr IndexBuilder::materialize(const char* label, std::vector>&& tables, std::vector const& records, bool exclusive) +{ + auto pool = arrow::default_memory_pool(); + std::vector> builders; + framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; + std::unique_ptr keyIndex = nullptr; + if (records[0].kind != soa::IndexKind::IdxSelf) { + keyIndex = std::make_unique(tables[0]->column(records[0].pos)); + } + + for (auto i = 1U; i < records.size(); ++i) { + if (records[i].kind == soa::IndexKind::IdxSelf) { + builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); + } else { + builders.emplace_back(std::make_shared(tables[i]->column(records[i].pos), records[i].columnLabel.c_str(), listSize(records[i].kind), pool)); + } + } + + std::vector finds; + finds.resize(builders.size()); + for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { + int64_t idx = -1; + if (keyIndex == nullptr) { + idx = counter; + } else { + idx = keyIndex->valueAt(counter); + } + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + finds[i] = builders[i]->find(idx); + } else { + finds[i] = std::static_pointer_cast(builders[i])->find(idx); + } + } + if (exclusive) { + if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + builders[i]->fill(idx); + } else { + std::static_pointer_cast(builders[i])->fill(idx); + } + } + self.fill(counter); + } + } else { + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + builders[i]->fill(idx); + } else { + std::static_pointer_cast(builders[i])->fill(idx); + } + } + self.fill(counter); + } + } + + std::vector> arrays; + arrays.reserve(records.size()); + std::vector> fields; + fields.reserve(records.size()); + arrays.push_back(self.result()); + fields.push_back(self.field()); + for (auto i = 0U; i < builders.size(); ++i) { + if (records[i+1].kind == soa::IndexKind::IdxSelf) { + arrays.push_back(builders[i]->result()); + fields.push_back(builders[i]->field()); + } else { + arrays.push_back(std::static_pointer_cast(builders[i])->result()); + fields.push_back(std::static_pointer_cast(builders[i])->field()); + } + } + + return framework::makeArrowTable(label, std::move(arrays), std::move(fields)); +} +} // namespace o2::soa + namespace o2::framework { void initializePartitionCaches(std::set const& hashes, std::shared_ptr const& schema, expressions::Filter const& filter, gandiva::NodePtr& tree, gandiva::FilterPtr& gfilter) @@ -50,4 +128,32 @@ std::string serializeIndexRecords(std::vector& irs) IndexJSONHelpers::write(osm, irs); return osm.str(); } + +std::vector> extractSources(ProcessingContext& pc, std::vector const& labels) +{ + std::vector> tables; + for (auto const& label : labels) { + tables.emplace_back(pc.inputs().get(label.c_str())->asArrowTable()); + } + return tables; +} + +std::shared_ptr Spawner::materialize(ProcessingContext& pc) const +{ + auto tables = extractSources(pc, labels); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels.begin(), labels.size()}); + if (fullTable->num_rows() == 0) { + return arrow::Table::MakeEmpty(schema).ValueOrDie(); + } + + return spawnerHelper(fullTable, schema, binding.c_str(), schema->num_fields(), projector); +} + +std::shared_ptr Builder::materialize(ProcessingContext& pc) const +{ + std::shared_ptr result; + auto tables = extractSources(pc, labels); + result = o2::soa::IndexBuilder::materialize(binding.c_str(), std::move(tables), records, exclusive); + return result; +} } // namespace o2::framework diff --git a/Framework/Core/test/test_IndexBuilder.cxx b/Framework/Core/test/test_IndexBuilder.cxx index 25dbe573a92e9..b08de3d975c56 100644 --- a/Framework/Core/test/test_IndexBuilder.cxx +++ b/Framework/Core/test/test_IndexBuilder.cxx @@ -103,7 +103,7 @@ TEST_CASE("TestIndexBuilder") Categorys st4{t4}; auto map = getIndexMapping>::metadata>(); - auto t5 = IndexBuilder::materialize("test1a", {t1, t2, t3, t4}, map); + auto t5 = IndexBuilder::materialize("test1a", {t1, t2, t3, t4}, map, true); REQUIRE(t5->num_rows() == 4); IDXs idxt{t5}; idxt.bindExternalIndices(&st1, &st2, &st3, &st4); @@ -114,7 +114,7 @@ TEST_CASE("TestIndexBuilder") } map = getIndexMapping>::metadata>(); - auto t6 = IndexBuilder::materialize("test2", {t2, t1, t3, t4}, map); + auto t6 = IndexBuilder::materialize("test2", {t2, t1, t3, t4}, map, false); REQUIRE(t6->num_rows() == st2.size()); IDX2s idxs{t6}; std::array fs{0, 1, 2, -1, -1, 4, -1}; @@ -213,7 +213,7 @@ TEST_CASE("AdvancedIndexTables") {8, 31, 42, 46, 58}}}; auto map = getIndexMapping>::metadata>(); - auto t3 = IndexBuilder::materialize("test3", {t1, t2, tc}, map); + auto t3 = IndexBuilder::materialize("test3", {t1, t2, tc}, map, false); REQUIRE(t3->num_rows() == st1.size()); IDX3s idxs{t3}; idxs.bindExternalIndices(&st1, &st2, &st3); From 8f3b0b9e7f974531da4eb7049edeb3386d591f89 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Nov 2025 09:24:30 +0100 Subject: [PATCH 12/22] rework index builder --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 14 +- Framework/Core/include/Framework/ASoA.h | 1 + .../Core/include/Framework/AnalysisHelpers.h | 150 +++++++++++++----- .../Core/include/Framework/AnalysisManagers.h | 21 +-- .../include/Framework/IndexBuilderHelpers.h | 76 ++++++++- .../Core/include/Framework/TableBuilder.h | 89 +---------- Framework/Core/src/AnalysisHelpers.cxx | 92 +++++++++-- Framework/Core/src/IndexBuilderHelpers.cxx | 86 +++++++--- Framework/Core/src/TableBuilder.cxx | 89 +---------- 9 files changed, 343 insertions(+), 275 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index 30592f0a497dc..da77fbfe4afed 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -33,6 +33,7 @@ struct Buildable { header::DataDescription description; header::DataHeader::SubSpecificationType version; std::vector records; + std::shared_ptr outputSchema; Buildable(InputSpec const& spec) : binding{spec.binding} @@ -52,19 +53,26 @@ struct Buildable { for (auto const& r : records) { labels.emplace_back(r.label); } + outputSchema = std::make_shared([](std::vector const& recs) { + std::vector> fields; + for (auto& r : recs) { + fields.push_back(r.field()); + } + return fields; + }(records)) + ->WithMetadata(std::make_shared(std::vector{std::string{"label"}}, std::vector{std::string{binding}})); } framework::Builder createBuilder() const { return { exclusive, - binding, labels, records, + outputSchema, origin, description, - version - }; + version}; } }; diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 918a27f784bbc..d8f3d1749544b 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -3274,6 +3274,7 @@ consteval auto getIndexTargets() constexpr auto a = o2::soa::mergeOriginals(); \ return o2::aod::filterForKey(); \ }(framework::pack<__VA_ARGS__>{}); \ + static_assert(sources.size() == framework::pack_size(index_pack_t{}), "One of the referred tables does not have index to Key"); \ }; \ using _Name_##Metadata = _Name_##MetadataFrom>; \ \ diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 9e26cf6f9e6e9..f52c32b2a8ac9 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -27,51 +27,111 @@ #include #include -namespace o2::soa { -enum struct IndexKind : int { - IdxInvalid = -1, - IdxSelf = 0, - IdxSingle = 1, - IdxSlice = 2, - IdxArray = 3 -}; - +namespace o2::soa +{ struct IndexRecord { std::string label; std::string columnLabel; IndexKind kind; int pos; - auto operator<=>(const IndexRecord&) const = default; -}; + std::shared_ptr type = [](IndexKind kind) -> std::shared_ptr { + switch (kind) { + case IndexKind::IdxSingle: + case IndexKind::IdxSelf: + return arrow::int32(); + case IndexKind::IdxSlice: + return arrow::fixed_size_list(arrow::int32(), 2); + case IndexKind::IdxArray: + return arrow::list(arrow::int32()); + default: + return {nullptr}; + } + }(kind); -namespace -{ -inline constexpr int listSize(soa::IndexKind kind) -{ - switch (kind) { - case soa::IndexKind::IdxSingle: - return 1; - break; - case soa::IndexKind::IdxSlice: - return 2; - break; - case soa::IndexKind::IdxArray: - return -1; - break; - default: - return -2; - break; + auto operator==(IndexRecord const& other) const + { + return (this->label == other.label) && (this->columnLabel == other.columnLabel) && (this->kind == other.kind) && (this->pos == other.pos); } -} -} // namespace + + std::shared_ptr field() const + { + return std::make_shared(columnLabel, type); + } +}; struct IndexBuilder { - static std::shared_ptr materialize(const char* label, std::vector>&& tables, std::vector const& records, bool exclusive); + static std::shared_ptr materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); }; } // namespace o2::soa namespace o2::framework { +std::shared_ptr makeEmptyTableImpl(const char* name, std::shared_ptr& schema); + +template +auto makeEmptyTable(const char* name) +{ + auto schema = std::make_shared(soa::createFieldsFromColumns(typename T::table_t::persistent_columns_t{})); + return makeEmptyTableImpl(name, schema); +} + +template +auto makeEmptyTable() +{ + auto schema = std::make_shared(soa::createFieldsFromColumns(typename aod::MetadataTrait>::metadata::persistent_columns_t{})); + return makeEmptyTableImpl(o2::aod::label(), schema); +} + +template +auto makeEmptyTable(const char* name, framework::pack p) +{ + auto schema = std::make_shared(soa::createFieldsFromColumns(p)); + return makeEmptyTableImpl(name, schema); +} + +template +auto makeEmptyTable(const char* name) +{ + auto schema = std::make_shared(soa::createFieldsFromColumns(typename aod::MetadataTrait::metadata::persistent_columns_t{})); + return makeEmptyTableImpl(name, schema); +} + +std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, + expressions::Projector* projectors, const char* name, std::shared_ptr& projector); + +std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, + const char* name, size_t nColumns, + const std::shared_ptr& projector); + +/// Expression-based column generator to materialize columns +template + requires(soa::has_extension::metadata>) +auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) +{ + if (fullTable->num_rows() == 0) { + return makeEmptyTable(name); + } + constexpr auto Ncol = []() { + if constexpr (soa::has_configurable_extension) { + return framework::pack_size(typename M::placeholders_pack_t{}); + } else { + return framework::pack_size(typename M::expression_pack_t{}); + } + }.template operator()::metadata>(); + return spawnerHelper(fullTable, schema, Ncol, projectors, name, projector); +} + +template +auto spawner(framework::pack, std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) +{ + std::array labels{"original"}; + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); + if (fullTable->num_rows() == 0) { + return makeEmptyTable(name, framework::pack{}); + } + return spawnerHelper(fullTable, schema, sizeof...(C), projectors, name, projector); +} + std::string serializeProjectors(std::vector& projectors); std::string serializeSchema(std::shared_ptr schema); std::string serializeIndexRecords(std::vector& irs); @@ -94,9 +154,9 @@ struct Spawner { struct Builder { bool exclusive; - std::string binding; std::vector labels; std::vector records; + std::shared_ptr outputSchema; header::DataOrigin origin; header::DataDescription description; header::DataHeader::SubSpecificationType version; @@ -478,29 +538,29 @@ struct TableTransform { constexpr static auto sources = M::sources; template - static constexpr auto base_spec() + static auto base_spec() { return soa::tableRef2InputSpec(); } static auto base_specs() { - return [](std::index_sequence) -> std::vector { - return {base_spec()...}; + return [](std::index_sequence) { + return std::array{base_spec()...}; }(std::make_index_sequence{}); } - constexpr auto spec() const + static constexpr auto spec() { return soa::tableRef2OutputSpec(); } - constexpr auto output() const + static constexpr auto output() { return soa::tableRef2Output(); } - constexpr auto ref() const + static constexpr auto ref() { return soa::tableRef2OutputRef(); } @@ -526,11 +586,10 @@ struct Spawns : decltype(transformBase()) { using spawnable_t = T; using metadata = decltype(transformBase())::metadata; using extension_t = typename metadata::extension_table_t; - using base_table_t = typename metadata::base_table_t; using expression_pack_t = typename metadata::expression_pack_t; static constexpr size_t N = framework::pack_size(expression_pack_t{}); - constexpr auto pack() + static consteval auto pack() { return expression_pack_t{}; } @@ -548,6 +607,7 @@ struct Spawns : decltype(transformBase()) { { return extension->asArrowTable(); } + std::shared_ptr table = nullptr; std::shared_ptr extension = nullptr; std::array projectors = [](framework::pack) -> std::array @@ -556,7 +616,11 @@ struct Spawns : decltype(transformBase()) { } (expression_pack_t{}); std::shared_ptr projector = nullptr; - std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); + std::shared_ptr schema = []() { + auto s = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); + s->WithMetadata(std::make_shared(std::vector{std::string{"label"}}, std::vector{std::string{o2::aod::label()}})); + return s; + }(); }; template @@ -654,6 +718,8 @@ struct Builds : decltype(transformBase()) { using Ts = typename T::rest_t; using index_pack_t = metadata::index_pack_t; + std::shared_ptr outputSchema = []() { return std::make_shared(soa::createFieldsFromColumns(index_pack_t{}))->WithMetadata(std::make_shared(std::vector{std::string{"label"}}, std::vector{std::string{o2::aod::label()}})); }(); + std::vector map = soa::getIndexMapping(); T* operator->() @@ -678,7 +744,7 @@ struct Builds : decltype(transformBase()) { auto build(std::vector>&& tables) { - this->table = std::make_shared(soa::IndexBuilder::materialize(o2::aod::label(), std::forward>>(tables), map, metadata::exclusive)); + this->table = std::make_shared(soa::IndexBuilder::materialize(std::forward>>(tables), map, outputSchema, metadata::exclusive)); return (this->table != nullptr); } }; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 4603c76ea4319..e6f1ed86914ae 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -34,18 +34,6 @@ namespace o2::framework namespace { -template -static inline auto extractOriginal(ProcessingContext& pc) -{ - return pc.inputs().get(aod::MetadataTrait::metadata::tableLabel())->asArrowTable(); -} - -template -static inline std::vector> extractOriginals(framework::pack, ProcessingContext& pc) -{ - return {extractOriginal(pc)...}; -} - template refs> static inline auto extractOriginals(ProcessingContext& pc) { @@ -160,12 +148,12 @@ const char* controlOption() } template -concept with_base_table = requires(T const& t) { t.base_specs(); }; +concept with_base_table = requires { T::base_specs(); }; template bool requestInputs(std::vector& inputs, T const& entity) { - auto base_specs = entity.base_specs(); + auto base_specs = T::base_specs(); for (auto base_spec : base_specs) { base_spec.metadata.push_back(ConfigParamSpec{std::string{controlOption()}, VariantType::Bool, true, {"\"\""}}); DataSpecUtils::updateInputList(inputs, std::forward(base_spec)); @@ -289,9 +277,8 @@ bool prepareOutput(ProcessingContext& context, T& spawns) { using metadata = o2::aod::MetadataTrait>::metadata; auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); - if (originalTable->schema()->fields().empty() == true) { - using base_table_t = typename T::base_table_t::table_t; - originalTable = makeEmptyTable(o2::aod::label()); + if (originalTable->num_rows() == 0) { + originalTable = makeEmptyTable(); } using D = o2::aod::Hash; diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index 340a3ffe6440d..1a758008db728 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -17,15 +17,26 @@ #include #include +namespace o2::soa +{ +enum struct IndexKind : int { + IdxInvalid = -1, + IdxSelf = 0, + IdxSingle = 1, + IdxSlice = 2, + IdxArray = 3 +}; +} // namespace o2::soa + namespace o2::framework { void cannotBuildAnArray(); +void cannotCreateIndexBuilder(); struct ChunkedArrayIterator { ChunkedArrayIterator(std::shared_ptr source); - virtual ~ChunkedArrayIterator() = default; - std::shared_ptr mSource; + std::shared_ptr mSource = nullptr; size_t mPosition = 0; int mChunk = 0; size_t mOffset = 0; @@ -40,6 +51,63 @@ struct ChunkedArrayIterator { int valueAt(size_t pos); }; +struct SelfBuilder { + std::unique_ptr mBuilder = nullptr; + SelfBuilder(arrow::MemoryPool* pool); +}; + +struct SingleBuilder { + ChunkedArrayIterator arrayIterator; + std::unique_ptr mBuilder = nullptr; + SingleBuilder(std::shared_ptr source, arrow::MemoryPool* pool); +}; + +struct SliceBuilder { + ChunkedArrayIterator arrayIterator; + arrow::ArrayBuilder* mValueBuilder = nullptr; + std::unique_ptr mListBuilder = nullptr; + std::shared_ptr> mValues = nullptr; + std::shared_ptr> mCounts = nullptr; + SliceBuilder(std::shared_ptr source, arrow::MemoryPool* pool); + + arrow::Status preSlice(); +}; + +struct ArrayBuilder { + ChunkedArrayIterator arrayIterator; + arrow::ArrayBuilder* mValueBuilder = nullptr; + std::vector mValues; + std::vector> mIndices; + std::unique_ptr mListBuilder = nullptr; + ArrayBuilder(std::shared_ptr source, arrow::MemoryPool* pool); + + arrow::Status preFind(); +}; + +struct IndexColumnBuilderNG { + std::variant builder; + + IndexColumnBuilderNG(soa::IndexKind kind, arrow::MemoryPool* pool, std::shared_ptr source = nullptr) + { + switch (kind) { + case soa::IndexKind::IdxSelf: + builder = SelfBuilder{pool}; + break; + case soa::IndexKind::IdxSingle: + builder = SingleBuilder{source, pool}; + break; + case soa::IndexKind::IdxSlice: + builder = SliceBuilder{source, pool}; + break; + case soa::IndexKind::IdxArray: + builder = ArrayBuilder{source, pool}; + break; + default: + cannotCreateIndexBuilder(); + } + } +}; + struct SelfIndexColumnBuilder { SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool); virtual ~SelfIndexColumnBuilder() = default; @@ -54,7 +122,6 @@ struct SelfIndexColumnBuilder { return std::make_shared(array); } - std::shared_ptr field() const; inline bool find(int) { @@ -67,7 +134,6 @@ struct SelfIndexColumnBuilder { } std::string mColumnName; - std::shared_ptr mArrowType; std::unique_ptr mBuilder = nullptr; }; @@ -141,8 +207,6 @@ class IndexColumnBuilder : public SelfIndexColumnBuilder, public ChunkedArrayIte int mFillOffset = 0; int mValuePos = 0; }; - -std::shared_ptr makeArrowTable(const char* label, std::vector>&& columns, std::vector>&& fields); } // namespace o2::framework #endif // O2_FRAMEWORK_INDEXBUILDERHELPERS_H_ diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 7707afe45b380..845820dfe4bff 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -15,7 +15,6 @@ #include "Framework/ASoA.h" #include "Framework/StructToTuple.h" #include "Framework/RuntimeError.h" -#include "arrow/type_traits.h" // Apparently needs to be on top of the arrow includes. @@ -26,6 +25,7 @@ #include #include #include +#include #include #include @@ -764,92 +764,5 @@ class TableBuilder std::shared_ptr mSchema; std::vector> mArrays; }; - -template -auto makeEmptyTable(const char* name) -{ - TableBuilder b; - [[maybe_unused]] auto writer = b.cursor(); - b.setLabel(name); - return b.finalize(); -} - -template -auto makeEmptyTable() -{ - TableBuilder b; - [[maybe_unused]] auto writer = b.cursor(typename aod::MetadataTrait>::metadata::persistent_columns_t{}); - b.setLabel(aod::label()); - return b.finalize(); -} - -template -auto makeEmptyTable(const char* name, framework::pack p) -{ - TableBuilder b; - [[maybe_unused]] auto writer = b.cursor(p); - b.setLabel(name); - return b.finalize(); -} - -std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, const char* name, std::shared_ptr& projector); - -std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, - const char* name, size_t nColumns, - const std::shared_ptr& projector); - -/// Expression-based column generator to materialize columns -template - requires(soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) -{ - using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, placeholders_pack_t{}); - } - return spawnerHelper(fullTable, schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); -} - -template - requires(soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) -{ - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); - return spawner(fullTable, name, projectors, projector, schema); -} - -template - requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) -{ - using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, expression_pack_t{}); - } - return spawnerHelper(fullTable, schema, framework::pack_size(expression_pack_t{}), projectors, name, projector); -} - -template - requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) -{ - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); - return spawner(fullTable, name, projectors, projector, schema); -} - -template -auto spawner(framework::pack, std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) -{ - std::array labels{"original"}; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, framework::pack{}); - } - return spawnerHelper(fullTable, schema, sizeof...(C), projectors, name, projector); -} - -template -using iterator_tuple_t = std::tuple; } // namespace o2::framework #endif // FRAMEWORK_TABLEBUILDER_H diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 58f351e37d508..4628e970369e3 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -14,10 +14,11 @@ #include "IndexJSONHelpers.h" namespace o2::soa { -std::shared_ptr IndexBuilder::materialize(const char* label, std::vector>&& tables, std::vector const& records, bool exclusive) +std::shared_ptr IndexBuilder::materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) { auto pool = arrow::default_memory_pool(); - std::vector> builders; + std::vector> builders; // this needs to become a state to avoid reallocations + // can builders be reset and re-used? framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; std::unique_ptr keyIndex = nullptr; if (records[0].kind != soa::IndexKind::IdxSelf) { @@ -28,7 +29,23 @@ std::shared_ptr IndexBuilder::materialize(const char* label, std:: if (records[i].kind == soa::IndexKind::IdxSelf) { builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); } else { - builders.emplace_back(std::make_shared(tables[i]->column(records[i].pos), records[i].columnLabel.c_str(), listSize(records[i].kind), pool)); + builders.emplace_back( + std::make_shared( + tables[i]->column(records[i].pos), + records[i].columnLabel.c_str(), + [](IndexKind kind) { + switch (kind) { + case IndexKind::IdxSingle: + return 1; + case IndexKind::IdxSlice: + return 2; + case IndexKind::IdxArray: + return -1; + default: + return -2; + } + }(records[i].kind), + pool)); } } @@ -71,28 +88,81 @@ std::shared_ptr IndexBuilder::materialize(const char* label, std:: } } - std::vector> arrays; + std::vector> arrays; // same arrays.reserve(records.size()); - std::vector> fields; - fields.reserve(records.size()); arrays.push_back(self.result()); - fields.push_back(self.field()); for (auto i = 0U; i < builders.size(); ++i) { if (records[i+1].kind == soa::IndexKind::IdxSelf) { arrays.push_back(builders[i]->result()); - fields.push_back(builders[i]->field()); } else { arrays.push_back(std::static_pointer_cast(builders[i])->result()); - fields.push_back(std::static_pointer_cast(builders[i])->field()); } } - return framework::makeArrowTable(label, std::move(arrays), std::move(fields)); + return arrow::Table::Make(schema, arrays); } } // namespace o2::soa namespace o2::framework { +std::shared_ptr makeEmptyTableImpl(const char* name, std::shared_ptr& schema) +{ + schema = schema->WithMetadata(std::make_shared(std::vector{std::string{"label"}}, std::vector{std::string{name}})); + return arrow::Table::MakeEmpty(schema).ValueOrDie(); +} + +std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, + expressions::Projector* projectors, const char* name, + std::shared_ptr& projector) +{ + if (projector == nullptr) { + projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), newSchema->fields()); + } + + return spawnerHelper(fullTable, newSchema, name, nColumns, projector); +} + +std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, + const char* name, size_t nColumns, + std::shared_ptr const& projector) +{ + arrow::TableBatchReader reader(*fullTable); + std::shared_ptr batch; + arrow::ArrayVector v; + std::vector chunks; + chunks.resize(nColumns); + std::vector> arrays; + + while (true) { + auto s = reader.ReadNext(&batch); + if (!s.ok()) { + throw runtime_error_f("Cannot read batches from the source table to spawn %s: %s", name, s.ToString().c_str()); + } + if (batch == nullptr) { + break; + } + try { + s = projector->Evaluate(*batch, arrow::default_memory_pool(), &v); + if (!s.ok()) { + throw runtime_error_f("Cannot apply projector to the source table of %s: %s", name, s.ToString().c_str()); + } + } catch (std::exception& e) { + throw runtime_error_f("Cannot apply projector to the source table of %s: exception caught: %s", name, e.what()); + } + + for (auto i = 0U; i < nColumns; ++i) { + chunks[i].emplace_back(v.at(i)); + } + } + + arrays.reserve(nColumns); + for (auto i = 0U; i < nColumns; ++i) { + arrays.push_back(std::make_shared(chunks[i])); + } + + return arrow::Table::Make(newSchema, arrays); +} + void initializePartitionCaches(std::set const& hashes, std::shared_ptr const& schema, expressions::Filter const& filter, gandiva::NodePtr& tree, gandiva::FilterPtr& gfilter) { if (tree == nullptr) { @@ -153,7 +223,7 @@ std::shared_ptr Builder::materialize(ProcessingContext& pc) const { std::shared_ptr result; auto tables = extractSources(pc, labels); - result = o2::soa::IndexBuilder::materialize(binding.c_str(), std::move(tables), records, exclusive); + result = o2::soa::IndexBuilder::materialize(std::move(tables), records, outputSchema, exclusive); return result; } } // namespace o2::framework diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index 52d6080690fe1..604d1e7e5b064 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -22,7 +22,12 @@ namespace o2::framework { void cannotBuildAnArray() { - throw runtime_error("Cannot build an array"); + throw framework::runtime_error("Cannot build an array"); +} + +void cannotCreateIndexBuilder() +{ + throw framework::runtime_error("Cannot create index column builder: invalid kind of index column"); } ChunkedArrayIterator::ChunkedArrayIterator(std::shared_ptr source) @@ -33,19 +38,73 @@ ChunkedArrayIterator::ChunkedArrayIterator(std::shared_ptr mLast = mCurrent + mCurrentArray->length(); } -SelfIndexColumnBuilder::SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool) - : mColumnName{name}, - mArrowType{arrow::int32()} +SelfBuilder::SelfBuilder(arrow::MemoryPool* pool) +{ + auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); + if (!status.ok()) { + throw framework::runtime_error("Cannot create array builder for the self-index!"); + } +} + +SingleBuilder::SingleBuilder(std::shared_ptr source, arrow::MemoryPool* pool) + : arrayIterator{source} { auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); if (!status.ok()) { - throw runtime_error("Cannot create array builder!"); + throw framework::runtime_error("Cannot create array builder for the single-valued index!"); } } -std::shared_ptr SelfIndexColumnBuilder::field() const +SliceBuilder::SliceBuilder(std::shared_ptr source, arrow::MemoryPool* pool) + : arrayIterator{source} { - return std::make_shared(mColumnName, mArrowType); + if (!preSlice().ok()) { + throw framework::runtime_error("Cannot pre-slice the source for slice-index building"); + } + + std::unique_ptr builder; + auto status = arrow::MakeBuilder(pool, arrow::int32(), &builder); + if (!status.ok()) { + throw framework::runtime_error("Cannot create array for the slice-index builder!"); + } + mListBuilder = std::make_unique(pool, std::move(builder), 2); + mValueBuilder = static_cast(mListBuilder.get())->value_builder(); +} + +arrow::Status SliceBuilder::SliceBuilder::preSlice() +{ + arrow::Datum value_counts; + auto options = arrow::compute::ScalarAggregateOptions::Defaults(); + ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {arrayIterator.mSource}, &options)); + auto pair = static_cast(value_counts.array()); + mValues = std::make_shared>(pair.field(0)->data()); + mCounts = std::make_shared>(pair.field(1)->data()); + return arrow::Status::OK(); +} + +ArrayBuilder::ArrayBuilder(std::shared_ptr source, arrow::MemoryPool* pool) + : arrayIterator{source} +{ + if (!preFind().ok()) { + throw framework::runtime_error("Cannot pre-find in a source for array-index building"); + } + + std::unique_ptr builder; + auto status = arrow::MakeBuilder(pool, arrow::int32(), &builder); + if (!status.ok()) { + throw framework::runtime_error("Cannot create array for the array-index builder!"); + } + mListBuilder = std::make_unique(pool, std::move(builder)); + mValueBuilder = static_cast(mListBuilder.get())->value_builder(); +} + +SelfIndexColumnBuilder::SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool) + : mColumnName{name} +{ + auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); + if (!status.ok()) { + throw framework::runtime_error("Cannot create array builder!"); + } } IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool) @@ -57,13 +116,11 @@ IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr sour switch (mListSize) { case 1: { mValueBuilder = mBuilder.get(); - mArrowType = arrow::int32(); }; break; case 2: { if (preSlice().ok()) { mListBuilder = std::make_unique(pool, std::move(mBuilder), mListSize); mValueBuilder = static_cast(mListBuilder.get())->value_builder(); - mArrowType = arrow::fixed_size_list(arrow::int32(), 2); } else { throw runtime_error("Cannot pre-slice an array"); } @@ -72,7 +129,6 @@ IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr sour if (preFind().ok()) { mListBuilder = std::make_unique(pool, std::move(mBuilder)); mValueBuilder = static_cast(mListBuilder.get())->value_builder(); - mArrowType = arrow::list(arrow::int32()); } else { throw runtime_error("Cannot pre-find array groups"); } @@ -265,14 +321,4 @@ int ChunkedArrayIterator::valueAt(size_t pos) } return *(mCurrent + pos); } - -std::shared_ptr makeArrowTable(const char* label, std::vector>&& columns, std::vector>&& fields) -{ - auto schema = std::make_shared(fields); - schema->WithMetadata( - std::make_shared( - std::vector{std::string{"label"}}, - std::vector{std::string{label}})); - return arrow::Table::Make(schema, columns); -} } // namespace o2::framework diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index c80fef9f0533c..955fe686e12a8 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -81,94 +81,7 @@ void TableBuilder::validate() const void TableBuilder::setLabel(const char* label) { - mSchema = mSchema->WithMetadata(std::make_shared(std::vector{std::string{"label"}}, std::vector{std::string{label}})); -} - -std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, const char* name, - std::shared_ptr& projector) -{ - if (projector == nullptr) { - projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), newSchema->fields()); - } - - arrow::TableBatchReader reader(*fullTable); - std::shared_ptr batch; - arrow::ArrayVector v; - std::vector chunks; - chunks.resize(nColumns); - std::vector> arrays; - - while (true) { - auto s = reader.ReadNext(&batch); - if (!s.ok()) { - throw runtime_error_f("Cannot read batches from source table to spawn %s: %s", name, s.ToString().c_str()); - } - if (batch == nullptr) { - break; - } - try { - s = projector->Evaluate(*batch, arrow::default_memory_pool(), &v); - if (!s.ok()) { - throw runtime_error_f("Cannot apply projector to source table of %s: %s", name, s.ToString().c_str()); - } - } catch (std::exception& e) { - throw runtime_error_f("Cannot apply projector to source table of %s: exception caught: %s", name, e.what()); - } - - for (auto i = 0U; i < nColumns; ++i) { - chunks[i].emplace_back(v.at(i)); - } - } - - arrays.reserve(nColumns); - for (auto i = 0U; i < nColumns; ++i) { - arrays.push_back(std::make_shared(chunks[i])); - } - - addLabelToSchema(newSchema, name); - return arrow::Table::Make(newSchema, arrays); -} - -std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, - const char* name, size_t nColumns, - std::shared_ptr const& projector) -{ - arrow::TableBatchReader reader(*fullTable); - std::shared_ptr batch; - arrow::ArrayVector v; - std::vector chunks; - chunks.resize(nColumns); - std::vector> arrays; - - while (true) { - auto s = reader.ReadNext(&batch); - if (!s.ok()) { - throw runtime_error_f("Cannot read batches from the source table to spawn %s: %s", name, s.ToString().c_str()); - } - if (batch == nullptr) { - break; - } - try { - s = projector->Evaluate(*batch, arrow::default_memory_pool(), &v); - if (!s.ok()) { - throw runtime_error_f("Cannot apply projector to the source table of %s: %s", name, s.ToString().c_str()); - } - } catch (std::exception& e) { - throw runtime_error_f("Cannot apply projector to the source table of %s: exception caught: %s", name, e.what()); - } - - for (auto i = 0U; i < nColumns; ++i) { - chunks[i].emplace_back(v.at(i)); - } - } - - arrays.reserve(nColumns); - for (auto i = 0U; i < nColumns; ++i) { - arrays.push_back(std::make_shared(chunks[i])); - } - - return arrow::Table::Make(newSchema, arrays); + addLabelToSchema(mSchema, label); } } // namespace o2::framework From 54fb57ab5697a27d161665c93901cde3d3afd9d7 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Nov 2025 14:30:00 +0100 Subject: [PATCH 13/22] fixup! rework index builder --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 4 +- .../Core/include/Framework/AnalysisHelpers.h | 14 +- .../include/Framework/IndexBuilderHelpers.h | 261 +++++---- Framework/Core/src/AnalysisHelpers.cxx | 193 +++++-- Framework/Core/src/IndexBuilderHelpers.cxx | 540 +++++++++++++----- Framework/Core/test/test_IndexBuilder.cxx | 13 +- 6 files changed, 693 insertions(+), 332 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index da77fbfe4afed..fd0f74abfb287 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -72,7 +72,7 @@ struct Buildable { outputSchema, origin, description, - version}; + version, nullptr}; } }; @@ -91,7 +91,7 @@ AlgorithmSpec AODReaderHelpers::indexBuilderCallback(ConfigContext const& ctx) for (auto& b : buildables) { builders.push_back(b.createBuilder()); } - return [builders](ProcessingContext& pc) { + return [builders](ProcessingContext& pc) mutable { auto outputs = pc.outputs(); for (auto& builder : builders) { outputs.adopt(Output{builder.origin, builder.description, builder.version}, builder.materialize(pc)); diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index f52c32b2a8ac9..fb612424c2ef1 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -60,7 +60,11 @@ struct IndexRecord { }; struct IndexBuilder { - static std::shared_ptr materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); + static std::vector makeBuilders(std::vector>&& tables, std::vector const& records); + static void resetBuilders(std::vector& builders, std::vector>&& tables); + + // static std::shared_ptr materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); + static std::shared_ptr materializeNG(std::vector& builders, std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); }; } // namespace o2::soa @@ -161,7 +165,9 @@ struct Builder { header::DataDescription description; header::DataHeader::SubSpecificationType version; - std::shared_ptr materialize(ProcessingContext& pc) const; + std::shared_ptr> builders = nullptr; + + std::shared_ptr materialize(ProcessingContext& pc); }; } // namespace o2::framework @@ -722,6 +728,8 @@ struct Builds : decltype(transformBase()) { std::vector map = soa::getIndexMapping(); + std::vector builders; + T* operator->() { return table.get(); @@ -744,7 +752,7 @@ struct Builds : decltype(transformBase()) { auto build(std::vector>&& tables) { - this->table = std::make_shared(soa::IndexBuilder::materialize(std::forward>>(tables), map, outputSchema, metadata::exclusive)); + this->table = std::make_shared(soa::IndexBuilder::materializeNG(builders, std::forward>>(tables), map, outputSchema, metadata::exclusive)); return (this->table != nullptr); } }; diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index 1a758008db728..94fc322094a23 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -14,7 +14,6 @@ #include #include #include -#include #include namespace o2::soa @@ -35,6 +34,7 @@ void cannotCreateIndexBuilder(); struct ChunkedArrayIterator { ChunkedArrayIterator(std::shared_ptr source); + void reset(std::shared_ptr& source); std::shared_ptr mSource = nullptr; size_t mPosition = 0; @@ -44,6 +44,7 @@ struct ChunkedArrayIterator { int const* mCurrent = nullptr; int const* mLast = nullptr; size_t mFirstIndex = 0; + size_t mSourceSize = 0; std::shared_ptr getCurrentArray(); void nextChunk(); @@ -53,160 +54,170 @@ struct ChunkedArrayIterator { struct SelfBuilder { std::unique_ptr mBuilder = nullptr; + std::unique_ptr keyIndex = nullptr; SelfBuilder(arrow::MemoryPool* pool); + void reset(std::shared_ptr); + + inline bool find(int) const + { + return true; + } + void fill(int idx); + std::shared_ptr result() const; }; -struct SingleBuilder { - ChunkedArrayIterator arrayIterator; +struct SingleBuilder : public ChunkedArrayIterator { std::unique_ptr mBuilder = nullptr; SingleBuilder(std::shared_ptr source, arrow::MemoryPool* pool); + void reset(std::shared_ptr source); + + bool find(int idx); + void fill(int idx); + std::shared_ptr result() const; }; -struct SliceBuilder { - ChunkedArrayIterator arrayIterator; +struct SliceBuilder : public ChunkedArrayIterator { arrow::ArrayBuilder* mValueBuilder = nullptr; std::unique_ptr mListBuilder = nullptr; std::shared_ptr> mValues = nullptr; std::shared_ptr> mCounts = nullptr; + int mValuePos = 0; SliceBuilder(std::shared_ptr source, arrow::MemoryPool* pool); + void reset(std::shared_ptr source); + + bool find(int idx); + void fill(int idx); + std::shared_ptr result() const; arrow::Status preSlice(); }; -struct ArrayBuilder { - ChunkedArrayIterator arrayIterator; +struct ArrayBuilder : public ChunkedArrayIterator { arrow::ArrayBuilder* mValueBuilder = nullptr; std::vector mValues; std::vector> mIndices; std::unique_ptr mListBuilder = nullptr; ArrayBuilder(std::shared_ptr source, arrow::MemoryPool* pool); + void reset(std::shared_ptr source); + + bool find(int idx); + void fill(int idx); + std::shared_ptr result() const; arrow::Status preFind(); }; struct IndexColumnBuilderNG { std::variant builder; - - IndexColumnBuilderNG(soa::IndexKind kind, arrow::MemoryPool* pool, std::shared_ptr source = nullptr) - { - switch (kind) { - case soa::IndexKind::IdxSelf: - builder = SelfBuilder{pool}; - break; - case soa::IndexKind::IdxSingle: - builder = SingleBuilder{source, pool}; - break; - case soa::IndexKind::IdxSlice: - builder = SliceBuilder{source, pool}; - break; - case soa::IndexKind::IdxArray: - builder = ArrayBuilder{source, pool}; - break; - default: - cannotCreateIndexBuilder(); - } - } -}; - -struct SelfIndexColumnBuilder { - SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool); - virtual ~SelfIndexColumnBuilder() = default; - - inline std::shared_ptr result() const - { - std::shared_ptr array; - auto status = static_cast(mBuilder.get())->Finish(&array); - if (!status.ok()) { - cannotBuildAnArray(); - } - - return std::make_shared(array); - } - - inline bool find(int) - { - return true; - } - - inline void fill(int idx) - { - (void)static_cast(mBuilder.get())->Append(idx); - } - - std::string mColumnName; - std::unique_ptr mBuilder = nullptr; -}; - -class IndexColumnBuilder : public SelfIndexColumnBuilder, public ChunkedArrayIterator -{ - public: - IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool); - ~IndexColumnBuilder() override = default; - - inline std::shared_ptr result() const - { - if (mListSize == -1) { - return resultMulti(); - } else if (mListSize == 2) { - return resultSlice(); - } else { - return resultSingle(); - } - } - - inline bool find(int idx) - { - if (mListSize == -1) { - return findMulti(idx); - } else if (mListSize == 2) { - return findSlice(idx); - } else { - return findSingle(idx); - } - } - - inline void fill(int idx) - { - ++mResultSize; - if (mListSize == -1) { - fillMulti(idx); - } else if (mListSize == 2) { - fillSlice(idx); - } else { - fillSingle(idx); - } - } - - private: - arrow::Status preSlice(); - arrow::Status preFind(); - - bool findSingle(int idx); - bool findSlice(int idx); - bool findMulti(int idx); - - void fillSingle(int idx); - void fillSlice(int idx); - void fillMulti(int idx); - - std::shared_ptr resultSingle() const; - std::shared_ptr resultSlice() const; - std::shared_ptr resultMulti() const; - - int mListSize = 1; - arrow::ArrayBuilder* mValueBuilder = nullptr; - std::unique_ptr mListBuilder = nullptr; - - size_t mSourceSize = 0; size_t mResultSize = 0; + int mColumnPos = -1; + IndexColumnBuilderNG(soa::IndexKind kind, int pos, arrow::MemoryPool* pool, std::shared_ptr source = nullptr); + void reset(std::shared_ptr source = nullptr); - std::shared_ptr> mValuesArrow = nullptr; - std::shared_ptr> mCounts = nullptr; - std::vector mValues; - std::vector> mIndices; - int mFillOffset = 0; - int mValuePos = 0; + bool find(int idx); + void fill(int idx); + std::shared_ptr result() const; }; + +// struct SelfIndexColumnBuilder { +// SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool); +// virtual ~SelfIndexColumnBuilder() = default; + +// inline std::shared_ptr result() const +// { +// std::shared_ptr array; +// auto status = static_cast(mBuilder.get())->Finish(&array); +// if (!status.ok()) { +// cannotBuildAnArray(); +// } + +// return std::make_shared(array); +// } + +// inline bool find(int) +// { +// return true; +// } + +// inline void fill(int idx) +// { +// (void)static_cast(mBuilder.get())->Append(idx); +// } + +// std::string mColumnName; +// std::unique_ptr mBuilder = nullptr; +// }; + +// class IndexColumnBuilder : public SelfIndexColumnBuilder, public ChunkedArrayIterator +// { +// public: +// IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool); +// ~IndexColumnBuilder() override = default; + +// inline std::shared_ptr result() const +// { +// if (mListSize == -1) { +// return resultMulti(); +// } else if (mListSize == 2) { +// return resultSlice(); +// } else { +// return resultSingle(); +// } +// } + +// inline bool find(int idx) +// { +// if (mListSize == -1) { +// return findMulti(idx); +// } else if (mListSize == 2) { +// return findSlice(idx); +// } else { +// return findSingle(idx); +// } +// } + +// inline void fill(int idx) +// { +// ++mResultSize; +// if (mListSize == -1) { +// fillMulti(idx); +// } else if (mListSize == 2) { +// fillSlice(idx); +// } else { +// fillSingle(idx); +// } +// } + +// private: +// arrow::Status preSlice(); +// arrow::Status preFind(); + +// bool findSingle(int idx); +// bool findSlice(int idx); +// bool findMulti(int idx); + +// void fillSingle(int idx); +// void fillSlice(int idx); +// void fillMulti(int idx); + +// std::shared_ptr resultSingle() const; +// std::shared_ptr resultSlice() const; +// std::shared_ptr resultMulti() const; + +// int mListSize = 1; +// arrow::ArrayBuilder* mValueBuilder = nullptr; +// std::unique_ptr mListBuilder = nullptr; + +// size_t mSourceSize = 0; +// size_t mResultSize = 0; + +// std::shared_ptr> mValuesArrow = nullptr; +// std::shared_ptr> mCounts = nullptr; +// std::vector mValues; +// std::vector> mIndices; +// int mFillOffset = 0; +// int mValuePos = 0; +// }; } // namespace o2::framework #endif // O2_FRAMEWORK_INDEXBUILDERHELPERS_H_ diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 4628e970369e3..8330ef4efeb13 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -14,93 +14,165 @@ #include "IndexJSONHelpers.h" namespace o2::soa { -std::shared_ptr IndexBuilder::materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) +std::vector IndexBuilder::makeBuilders(std::vector>&& tables, std::vector const& records) { + std::vector builders; auto pool = arrow::default_memory_pool(); - std::vector> builders; // this needs to become a state to avoid reallocations - // can builders be reset and re-used? - framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; - std::unique_ptr keyIndex = nullptr; + builders.emplace_back(IndexKind::IdxSelf, records[0].pos, pool); if (records[0].kind != soa::IndexKind::IdxSelf) { - keyIndex = std::make_unique(tables[0]->column(records[0].pos)); + std::get(builders[0].builder).keyIndex = std::make_unique(tables[0]->column(records[0].pos)); } for (auto i = 1U; i < records.size(); ++i) { - if (records[i].kind == soa::IndexKind::IdxSelf) { - builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); - } else { - builders.emplace_back( - std::make_shared( - tables[i]->column(records[i].pos), - records[i].columnLabel.c_str(), - [](IndexKind kind) { - switch (kind) { - case IndexKind::IdxSingle: - return 1; - case IndexKind::IdxSlice: - return 2; - case IndexKind::IdxArray: - return -1; - default: - return -2; - } - }(records[i].kind), - pool)); - } + builders.emplace_back(records[i].kind, records[i].pos, pool, records[i].pos >= 0 ? tables[i]->column(records[i].pos) : nullptr); + } + + return builders; +} + +void IndexBuilder::resetBuilders(std::vector& builders, std::vector>&& tables) +{ + for (auto i = 0U; i < builders.size(); ++i) { + builders[i].reset(tables[i]->column(builders[i].mColumnPos)); + } + + if (std::get(builders[0].builder).keyIndex != nullptr) { + std::get(builders[0].builder).keyIndex = std::make_unique(tables[0]->column(builders[0].mColumnPos)); + } +} + +std::shared_ptr IndexBuilder::materializeNG(std::vector& builders, std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) +{ + auto size = tables[0]->num_rows(); + if (builders.empty()) { + builders = makeBuilders(std::move(tables), records); + } else { + resetBuilders(builders, std::move(tables)); } std::vector finds; finds.resize(builders.size()); - for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { + for (int64_t counter = 0; counter < size; ++counter) { int64_t idx = -1; - if (keyIndex == nullptr) { + if (std::get(builders[0].builder).keyIndex == nullptr) { idx = counter; } else { - idx = keyIndex->valueAt(counter); + idx = std::get(builders[0].builder).keyIndex->valueAt(counter); } for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - finds[i] = builders[i]->find(idx); - } else { - finds[i] = std::static_pointer_cast(builders[i])->find(idx); - } + finds[i] = builders[i].find(idx); } if (exclusive) { if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - builders[i]->fill(idx); - } else { - std::static_pointer_cast(builders[i])->fill(idx); - } + builders[0].fill(counter); + for (auto i = 1U; i < builders.size(); ++i) { + builders[i].fill(idx); } - self.fill(counter); } } else { - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - builders[i]->fill(idx); - } else { - std::static_pointer_cast(builders[i])->fill(idx); - } + builders[0].fill(counter); + for (auto i = 1U; i < builders.size(); ++i) { + builders[i].fill(idx); } - self.fill(counter); } } std::vector> arrays; // same - arrays.reserve(records.size()); - arrays.push_back(self.result()); - for (auto i = 0U; i < builders.size(); ++i) { - if (records[i+1].kind == soa::IndexKind::IdxSelf) { - arrays.push_back(builders[i]->result()); - } else { - arrays.push_back(std::static_pointer_cast(builders[i])->result()); - } + arrays.reserve(builders.size()); + for (auto& builder : builders) { + arrays.push_back(builder.result()); } return arrow::Table::Make(schema, arrays); } + +// std::shared_ptr IndexBuilder::materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) +// { +// auto pool = arrow::default_memory_pool(); +// std::vector> builders; // this needs to become a state to avoid reallocations +// // can builders be reset and re-used? +// framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; +// std::unique_ptr keyIndex = nullptr; +// if (records[0].kind != soa::IndexKind::IdxSelf) { +// keyIndex = std::make_unique(tables[0]->column(records[0].pos)); +// } + +// for (auto i = 1U; i < records.size(); ++i) { +// if (records[i].kind == soa::IndexKind::IdxSelf) { +// builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); +// } else { +// builders.emplace_back( +// std::make_shared( +// tables[i]->column(records[i].pos), +// records[i].columnLabel.c_str(), +// [](IndexKind kind) { +// switch (kind) { +// case IndexKind::IdxSingle: +// return 1; +// case IndexKind::IdxSlice: +// return 2; +// case IndexKind::IdxArray: +// return -1; +// default: +// return -2; +// } +// }(records[i].kind), +// pool)); +// } +// } + +// std::vector finds; +// finds.resize(builders.size()); +// for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { +// int64_t idx = -1; +// if (keyIndex == nullptr) { +// idx = counter; +// } else { +// idx = keyIndex->valueAt(counter); +// } +// for (auto i = 0U; i < builders.size(); ++i) { +// if (records[i+1].kind == soa::IndexKind::IdxSelf) { +// finds[i] = builders[i]->find(idx); +// } else { +// finds[i] = std::static_pointer_cast(builders[i])->find(idx); +// } +// } +// if (exclusive) { +// if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { +// for (auto i = 0U; i < builders.size(); ++i) { +// if (records[i+1].kind == soa::IndexKind::IdxSelf) { +// builders[i]->fill(idx); +// } else { +// std::static_pointer_cast(builders[i])->fill(idx); +// } +// } +// self.fill(counter); +// } +// } else { +// for (auto i = 0U; i < builders.size(); ++i) { +// if (records[i+1].kind == soa::IndexKind::IdxSelf) { +// builders[i]->fill(idx); +// } else { +// std::static_pointer_cast(builders[i])->fill(idx); +// } +// } +// self.fill(counter); +// } +// } + +// std::vector> arrays; // same +// arrays.reserve(records.size()); +// arrays.push_back(self.result()); +// for (auto i = 0U; i < builders.size(); ++i) { +// if (records[i+1].kind == soa::IndexKind::IdxSelf) { +// arrays.push_back(builders[i]->result()); +// } else { +// arrays.push_back(std::static_pointer_cast(builders[i])->result()); +// } +// } + +// return arrow::Table::Make(schema, arrays); +// } } // namespace o2::soa namespace o2::framework @@ -219,11 +291,14 @@ std::shared_ptr Spawner::materialize(ProcessingContext& pc) const return spawnerHelper(fullTable, schema, binding.c_str(), schema->num_fields(), projector); } -std::shared_ptr Builder::materialize(ProcessingContext& pc) const +std::shared_ptr Builder::materialize(ProcessingContext& pc) { + if (builders == nullptr) { + builders = std::make_shared>(); + } std::shared_ptr result; auto tables = extractSources(pc, labels); - result = o2::soa::IndexBuilder::materialize(std::move(tables), records, outputSchema, exclusive); + result = o2::soa::IndexBuilder::materializeNG(*builders.get(), std::move(tables), records, outputSchema, exclusive); return result; } } // namespace o2::framework diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index 604d1e7e5b064..871071881fe1c 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -12,6 +12,7 @@ #include "Framework/RuntimeError.h" #include "Framework/IndexBuilderHelpers.h" #include "Framework/CompilerBuiltins.h" +#include "Framework/VariantHelpers.h" #include #include #include @@ -31,8 +32,27 @@ void cannotCreateIndexBuilder() } ChunkedArrayIterator::ChunkedArrayIterator(std::shared_ptr source) - : mSource{source} + : mSource{source}, + mSourceSize{(size_t)source->length()} +{ + mCurrentArray = getCurrentArray(); + mCurrent = reinterpret_cast(mCurrentArray->values()->data()) + mOffset; + mLast = mCurrent + mCurrentArray->length(); +} + +void ChunkedArrayIterator::reset(std::shared_ptr& source) { + mPosition = 0; + mChunk = 0; + mOffset = 0; + mCurrentArray = nullptr; + mCurrent = nullptr; + mLast = nullptr; + mFirstIndex = 0; + mSourceSize = 0; + + mSource = source; + mSourceSize = (size_t)source->length(); mCurrentArray = getCurrentArray(); mCurrent = reinterpret_cast(mCurrentArray->values()->data()) + mOffset; mLast = mCurrent + mCurrentArray->length(); @@ -45,9 +65,31 @@ SelfBuilder::SelfBuilder(arrow::MemoryPool* pool) throw framework::runtime_error("Cannot create array builder for the self-index!"); } } +// static_cast(this)->reset(pool); +void SelfBuilder::reset(std::shared_ptr) +{ + mBuilder->Reset(); + keyIndex = nullptr; +} + +void SelfBuilder::fill(int idx) +{ + (void)static_cast(mBuilder.get())->Append(idx); +} + +std::shared_ptr SelfBuilder::result() const +{ + std::shared_ptr array; + auto status = static_cast(mBuilder.get())->Finish(&array); + if (!status.ok()) { + cannotBuildAnArray(); + } + + return std::make_shared(array); +} SingleBuilder::SingleBuilder(std::shared_ptr source, arrow::MemoryPool* pool) - : arrayIterator{source} + : ChunkedArrayIterator{source} { auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); if (!status.ok()) { @@ -55,8 +97,54 @@ SingleBuilder::SingleBuilder(std::shared_ptr source, arrow: } } +void SingleBuilder::reset(std::shared_ptr source) +{ + static_cast(this)->reset(source); + mBuilder->Reset(); +} + +bool SingleBuilder::find(int idx) +{ + auto count = mSourceSize - mPosition; + while (count > 0) { + size_t step = count / 2; + mPosition += step; + if (valueAt(mPosition) <= idx) { + count -= step + 1; + } else { + mPosition -= step; + count = step; + } + } + + if (mPosition < mSourceSize && valueAt(mPosition) < idx) { + ++mPosition; + } + + return (mPosition < mSourceSize && valueAt(mPosition) == idx); +} + +void SingleBuilder::fill(int idx) +{ + if (mPosition < mSourceSize && valueAt(mPosition) == idx) { + (void)static_cast(mBuilder.get())->Append((int)mPosition); + } else { + (void)static_cast(mBuilder.get())->Append(-1); + } +} + +std::shared_ptr SingleBuilder::result() const +{ + std::shared_ptr array; + auto status = static_cast(mBuilder.get())->Finish(&array); + if (!status.ok()) { + throw runtime_error("Cannot build an array"); + } + return std::make_shared(array); +} + SliceBuilder::SliceBuilder(std::shared_ptr source, arrow::MemoryPool* pool) - : arrayIterator{source} + : ChunkedArrayIterator{source} { if (!preSlice().ok()) { throw framework::runtime_error("Cannot pre-slice the source for slice-index building"); @@ -71,11 +159,68 @@ SliceBuilder::SliceBuilder(std::shared_ptr source, arrow::M mValueBuilder = static_cast(mListBuilder.get())->value_builder(); } +void SliceBuilder::reset(std::shared_ptr source) +{ + static_cast(this)->reset(source); + if (!preSlice().ok()) { + throw framework::runtime_error("Cannot pre-slice the source for slice-index building"); + } + mListBuilder->Reset(); + mValues = nullptr; + mCounts = nullptr; + mValuePos = 0; +} + +bool SliceBuilder::find(int idx) +{ + auto count = mValues->length() - mValuePos; + while (count > 0) { + auto step = count / 2; + mValuePos += step; + if (mValues->Value(mValuePos) <= idx) { + count -= step + 1; + } else { + mValuePos -= step; + count = step; + } + } + + if (mValuePos < mValues->length() && mValues->Value(mValuePos) <= idx) { + ++mPosition; + } + + return (mValuePos < mValues->length() && mValues->Value(mValuePos) == idx); +} + +void SliceBuilder::fill(int idx) +{ + int data[2] = {-1, -1}; + if (mValuePos < mValues->length() && mValues->Value(mValuePos) == idx) { + for (auto i = 0; i < mValuePos; ++i) { + data[0] += mCounts->Value(i); + } + data[0] += 1; + data[1] = data[0] + mCounts->Value(mValuePos) - 1; + } + (void)static_cast(mListBuilder.get())->AppendValues(1); + (void)static_cast(mValueBuilder)->AppendValues(data, 2); +} + +std::shared_ptr SliceBuilder::result() const +{ + std::shared_ptr array; + auto status = static_cast(mListBuilder.get())->Finish(&array); + if (!status.ok()) { + throw runtime_error("Cannot build an array"); + } + return std::make_shared(array); +} + arrow::Status SliceBuilder::SliceBuilder::preSlice() { arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); - ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {arrayIterator.mSource}, &options)); + ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options)); auto pair = static_cast(value_counts.array()); mValues = std::make_shared>(pair.field(0)->data()); mCounts = std::make_shared>(pair.field(1)->data()); @@ -83,7 +228,7 @@ arrow::Status SliceBuilder::SliceBuilder::preSlice() } ArrayBuilder::ArrayBuilder(std::shared_ptr source, arrow::MemoryPool* pool) - : arrayIterator{source} + : ChunkedArrayIterator{source} { if (!preFind().ok()) { throw framework::runtime_error("Cannot pre-find in a source for array-index building"); @@ -98,58 +243,43 @@ ArrayBuilder::ArrayBuilder(std::shared_ptr source, arrow::M mValueBuilder = static_cast(mListBuilder.get())->value_builder(); } -SelfIndexColumnBuilder::SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool) - : mColumnName{name} +void ArrayBuilder::reset(std::shared_ptr source) { - auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); - if (!status.ok()) { - throw framework::runtime_error("Cannot create array builder!"); + static_cast(this)->reset(source); + if (!preFind().ok()) { + throw framework::runtime_error("Cannot pre-find in a source for array-index building"); } + mValues.clear(); + mIndices.clear(); + mListBuilder->Reset(); } -IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool) - : SelfIndexColumnBuilder{name, pool}, - ChunkedArrayIterator{source}, - mListSize{listSize}, - mSourceSize{(size_t)source->length()} +bool ArrayBuilder::find(int idx) { - switch (mListSize) { - case 1: { - mValueBuilder = mBuilder.get(); - }; break; - case 2: { - if (preSlice().ok()) { - mListBuilder = std::make_unique(pool, std::move(mBuilder), mListSize); - mValueBuilder = static_cast(mListBuilder.get())->value_builder(); - } else { - throw runtime_error("Cannot pre-slice an array"); - } - }; break; - case -1: { - if (preFind().ok()) { - mListBuilder = std::make_unique(pool, std::move(mBuilder)); - mValueBuilder = static_cast(mListBuilder.get())->value_builder(); - } else { - throw runtime_error("Cannot pre-find array groups"); - } - }; break; - default: - throw runtime_error_f("Invalid list size for index column: %d", mListSize); + return (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()); +} + +void ArrayBuilder::fill(int idx) +{ + (void)static_cast(mListBuilder.get())->Append(); + if (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()) { + (void)static_cast(mValueBuilder)->AppendValues(mIndices[idx].data(), mIndices[idx].size()); + } else { + (void)static_cast(mValueBuilder)->AppendValues(nullptr, 0); } } -arrow::Status IndexColumnBuilder::preSlice() +std::shared_ptr ArrayBuilder::result() const { - arrow::Datum value_counts; - auto options = arrow::compute::ScalarAggregateOptions::Defaults(); - ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options)); - auto pair = static_cast(value_counts.array()); - mValuesArrow = std::make_shared>(pair.field(0)->data()); - mCounts = std::make_shared>(pair.field(1)->data()); - return arrow::Status::OK(); + std::shared_ptr array; + auto status = static_cast(mListBuilder.get())->Finish(&array); + if (!status.ok()) { + throw runtime_error("Cannot build an array"); + } + return std::make_shared(array); } -arrow::Status IndexColumnBuilder::preFind() +arrow::Status ArrayBuilder::preFind() { arrow::Datum max; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); @@ -171,116 +301,246 @@ arrow::Status IndexColumnBuilder::preFind() return arrow::Status::OK(); } -std::shared_ptr IndexColumnBuilder::resultSingle() const -{ - std::shared_ptr array; - auto status = static_cast(mValueBuilder)->Finish(&array); - if (!status.ok()) { - throw runtime_error("Cannot build an array"); - } - return std::make_shared(array); -} - -std::shared_ptr IndexColumnBuilder::resultSlice() const -{ - std::shared_ptr array; - auto status = static_cast(mListBuilder.get())->Finish(&array); - if (!status.ok()) { - throw runtime_error("Cannot build an array"); - } - return std::make_shared(array); -} - -std::shared_ptr IndexColumnBuilder::resultMulti() const -{ - std::shared_ptr array; - auto status = static_cast(mListBuilder.get())->Finish(&array); - if (!status.ok()) { - throw runtime_error("Cannot build an array"); - } - return std::make_shared(array); -} - -bool IndexColumnBuilder::findSingle(int idx) +IndexColumnBuilderNG::IndexColumnBuilderNG(soa::IndexKind kind, int pos, arrow::MemoryPool* pool, std::shared_ptr source) + : mColumnPos{pos} { - auto count = mSourceSize - mPosition; - while (count > 0) { - size_t step = count / 2; - mPosition += step; - if (valueAt(mPosition) <= idx) { - count -= step + 1; - } else { - mPosition -= step; - count = step; - } - } - - if (mPosition < mSourceSize && valueAt(mPosition) < idx) { - ++mPosition; + switch (kind) { + case soa::IndexKind::IdxSelf: + builder = SelfBuilder{pool}; + break; + case soa::IndexKind::IdxSingle: + builder = SingleBuilder{source, pool}; + break; + case soa::IndexKind::IdxSlice: + builder = SliceBuilder{source, pool}; + break; + case soa::IndexKind::IdxArray: + builder = ArrayBuilder{source, pool}; + break; + default: + cannotCreateIndexBuilder(); } - - return (mPosition < mSourceSize && valueAt(mPosition) == idx); } -bool IndexColumnBuilder::findSlice(int idx) +void IndexColumnBuilderNG::reset(std::shared_ptr source) { - auto count = mValuesArrow->length() - mValuePos; - while (count > 0) { - auto step = count / 2; - mValuePos += step; - if (mValuesArrow->Value(mValuePos) <= idx) { - count -= step + 1; - } else { - mValuePos -= step; - count = step; - } - } - - if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) <= idx) { - ++mPosition; - } - - return (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx); + std::visit( + overloaded{ + [](std::monostate) {}, + [&source](auto& b) { b.reset(source); }}, + builder); } -bool IndexColumnBuilder::findMulti(int idx) +bool IndexColumnBuilderNG::find(int idx) { - return (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()); + return std::visit( + overloaded{ + [](std::monostate) { return false; }, + [&idx](auto& b) { return b.find(idx); }, + }, + builder); } -void IndexColumnBuilder::fillSingle(int idx) +void IndexColumnBuilderNG::fill(int idx) { - // entry point - if (mPosition < mSourceSize && valueAt(mPosition) == idx) { - (void)static_cast(mValueBuilder)->Append((int)mPosition); - } else { - (void)static_cast(mValueBuilder)->Append(-1); - } + std::visit( + overloaded{ + [](std::monostate) {}, + [&idx](auto& b) { b.fill(idx); }}, + builder); } -void IndexColumnBuilder::fillSlice(int idx) +std::shared_ptr IndexColumnBuilderNG::result() const { - int data[2] = {-1, -1}; - if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx) { - for (auto i = 0; i < mValuePos; ++i) { - data[0] += mCounts->Value(i); - } - data[0] += 1; - data[1] = data[0] + mCounts->Value(mValuePos) - 1; - } - (void)static_cast(mListBuilder.get())->AppendValues(1); - (void)static_cast(mValueBuilder)->AppendValues(data, 2); + return std::visit( + overloaded{ + [](std::monostate) -> std::shared_ptr { return nullptr; }, + [](auto& b) { return b.result(); }}, + builder); } -void IndexColumnBuilder::fillMulti(int idx) -{ - (void)static_cast(mListBuilder.get())->Append(); - if (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()) { - (void)static_cast(mValueBuilder)->AppendValues(mIndices[idx].data(), mIndices[idx].size()); - } else { - (void)static_cast(mValueBuilder)->AppendValues(nullptr, 0); - } -} +// SelfIndexColumnBuilder::SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool) +// : mColumnName{name} +// { +// auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); +// if (!status.ok()) { +// throw framework::runtime_error("Cannot create array builder!"); +// } +// } +// IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool) +// : SelfIndexColumnBuilder{name, pool}, +// ChunkedArrayIterator{source}, +// mListSize{listSize}, +// mSourceSize{(size_t)source->length()} +// { +// switch (mListSize) { +// case 1: { +// mValueBuilder = mBuilder.get(); +// }; break; +// case 2: { +// if (preSlice().ok()) { +// mListBuilder = std::make_unique(pool, std::move(mBuilder), mListSize); +// mValueBuilder = static_cast(mListBuilder.get())->value_builder(); +// } else { +// throw runtime_error("Cannot pre-slice an array"); +// } +// }; break; +// case -1: { +// if (preFind().ok()) { +// mListBuilder = std::make_unique(pool, std::move(mBuilder)); +// mValueBuilder = static_cast(mListBuilder.get())->value_builder(); +// } else { +// throw runtime_error("Cannot pre-find array groups"); +// } +// }; break; +// default: +// throw runtime_error_f("Invalid list size for index column: %d", mListSize); +// } +// } + +// arrow::Status IndexColumnBuilder::preSlice() +// { +// arrow::Datum value_counts; +// auto options = arrow::compute::ScalarAggregateOptions::Defaults(); +// ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options)); +// auto pair = static_cast(value_counts.array()); +// mValuesArrow = std::make_shared>(pair.field(0)->data()); +// mCounts = std::make_shared>(pair.field(1)->data()); +// return arrow::Status::OK(); +// } + +// arrow::Status IndexColumnBuilder::preFind() +// { +// arrow::Datum max; +// auto options = arrow::compute::ScalarAggregateOptions::Defaults(); +// ARROW_ASSIGN_OR_RAISE(max, arrow::compute::CallFunction("max", {mSource}, &options)); +// auto maxValue = std::dynamic_pointer_cast(max.scalar())->value; +// mIndices.resize(maxValue + 1); + +// auto row = 0; +// for (auto i = 0; i < mSource->length(); ++i) { +// auto v = valueAt(i); +// if (v >= 0) { +// mValues.emplace_back(v); +// mIndices[v].push_back(row); +// } +// ++row; +// } +// std::sort(mValues.begin(), mValues.end()); + +// return arrow::Status::OK(); +// } + +// std::shared_ptr IndexColumnBuilder::resultSingle() const +// { +// std::shared_ptr array; +// auto status = static_cast(mValueBuilder)->Finish(&array); +// if (!status.ok()) { +// throw runtime_error("Cannot build an array"); +// } +// return std::make_shared(array); +// } + +// std::shared_ptr IndexColumnBuilder::resultSlice() const +// { +// std::shared_ptr array; +// auto status = static_cast(mListBuilder.get())->Finish(&array); +// if (!status.ok()) { +// throw runtime_error("Cannot build an array"); +// } +// return std::make_shared(array); +// } + +// std::shared_ptr IndexColumnBuilder::resultMulti() const +// { +// std::shared_ptr array; +// auto status = static_cast(mListBuilder.get())->Finish(&array); +// if (!status.ok()) { +// throw runtime_error("Cannot build an array"); +// } +// return std::make_shared(array); +// } + +// bool IndexColumnBuilder::findSingle(int idx) +// { +// auto count = mSourceSize - mPosition; +// while (count > 0) { +// size_t step = count / 2; +// mPosition += step; +// if (valueAt(mPosition) <= idx) { +// count -= step + 1; +// } else { +// mPosition -= step; +// count = step; +// } +// } + +// if (mPosition < mSourceSize && valueAt(mPosition) < idx) { +// ++mPosition; +// } + +// return (mPosition < mSourceSize && valueAt(mPosition) == idx); +// } + +// bool IndexColumnBuilder::findSlice(int idx) +// { +// auto count = mValuesArrow->length() - mValuePos; +// while (count > 0) { +// auto step = count / 2; +// mValuePos += step; +// if (mValuesArrow->Value(mValuePos) <= idx) { +// count -= step + 1; +// } else { +// mValuePos -= step; +// count = step; +// } +// } + +// if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) <= idx) { +// ++mPosition; +// } + +// return (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx); +// } + +// bool IndexColumnBuilder::findMulti(int idx) +// { +// return (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()); +// } + +// void IndexColumnBuilder::fillSingle(int idx) +// { +// // entry point +// if (mPosition < mSourceSize && valueAt(mPosition) == idx) { +// (void)static_cast(mValueBuilder)->Append((int)mPosition); +// } else { +// (void)static_cast(mValueBuilder)->Append(-1); +// } +// } + +// void IndexColumnBuilder::fillSlice(int idx) +// { +// int data[2] = {-1, -1}; +// if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx) { +// for (auto i = 0; i < mValuePos; ++i) { +// data[0] += mCounts->Value(i); +// } +// data[0] += 1; +// data[1] = data[0] + mCounts->Value(mValuePos) - 1; +// } +// (void)static_cast(mListBuilder.get())->AppendValues(1); +// (void)static_cast(mValueBuilder)->AppendValues(data, 2); +// } + +// void IndexColumnBuilder::fillMulti(int idx) +// { +// (void)static_cast(mListBuilder.get())->Append(); +// if (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()) { +// (void)static_cast(mValueBuilder)->AppendValues(mIndices[idx].data(), mIndices[idx].size()); +// } else { +// (void)static_cast(mValueBuilder)->AppendValues(nullptr, 0); +// } +// } std::shared_ptr ChunkedArrayIterator::getCurrentArray() { diff --git a/Framework/Core/test/test_IndexBuilder.cxx b/Framework/Core/test/test_IndexBuilder.cxx index b08de3d975c56..a73ae7fbb5eb4 100644 --- a/Framework/Core/test/test_IndexBuilder.cxx +++ b/Framework/Core/test/test_IndexBuilder.cxx @@ -103,7 +103,10 @@ TEST_CASE("TestIndexBuilder") Categorys st4{t4}; auto map = getIndexMapping>::metadata>(); - auto t5 = IndexBuilder::materialize("test1a", {t1, t2, t3, t4}, map, true); + auto schema1 = o2::aod::MetadataTrait>::metadata::getSchema(); + std::vector builders1; + auto t5 = IndexBuilder::materializeNG(builders1, {t1, t2, t3, t4}, map, schema1, true); + // auto t5 = IndexBuilder::materialize({t1, t2, t3, t4}, map, schema1, true); REQUIRE(t5->num_rows() == 4); IDXs idxt{t5}; idxt.bindExternalIndices(&st1, &st2, &st3, &st4); @@ -114,7 +117,9 @@ TEST_CASE("TestIndexBuilder") } map = getIndexMapping>::metadata>(); - auto t6 = IndexBuilder::materialize("test2", {t2, t1, t3, t4}, map, false); + auto schema2 = o2::aod::MetadataTrait>::metadata::getSchema(); + std::vector builders2; + auto t6 = IndexBuilder::materializeNG(builders2, {t2, t1, t3, t4}, map, schema2, false); REQUIRE(t6->num_rows() == st2.size()); IDX2s idxs{t6}; std::array fs{0, 1, 2, -1, -1, 4, -1}; @@ -213,7 +218,9 @@ TEST_CASE("AdvancedIndexTables") {8, 31, 42, 46, 58}}}; auto map = getIndexMapping>::metadata>(); - auto t3 = IndexBuilder::materialize("test3", {t1, t2, tc}, map, false); + auto schema3 = o2::aod::MetadataTrait>::metadata::getSchema(); + std::vector builders3; + auto t3 = IndexBuilder::materializeNG(builders3, {t1, t2, tc}, map, schema3, false); REQUIRE(t3->num_rows() == st1.size()); IDX3s idxs{t3}; idxs.bindExternalIndices(&st1, &st2, &st3); From 7ddffeb4e5bfdc203b2a10594f7e0e4f4770ed41 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Nov 2025 16:47:29 +0100 Subject: [PATCH 14/22] fixup! rework index builder --- Framework/Core/include/Framework/ASoA.h | 46 ++++++++++++------------- Framework/Core/src/AnalysisHelpers.cxx | 6 ++-- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index d8f3d1749544b..6ec7fde930aa6 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -3262,29 +3262,29 @@ consteval auto getIndexTargets() O2HASH(#_Name_ "CfgExtension"); \ DECLARE_SOA_CONFIGURABLE_EXTENDED_TABLE_FULL(_Name_, #_Name_ "CfgExtension", _Table_, "AOD", "EX" _Description_, 0, __VA_ARGS__) -#define DECLARE_SOA_INDEX_TABLE_FULL(_Name_, _Key_, _Origin_, _Version_, _Desc_, _Exclusive_, ...) \ - O2HASH(#_Name_); \ - O2HASH(_Desc_ "/" #_Version_); \ - template > \ - struct _Name_##MetadataFrom : o2::aod::TableMetadata, soa::Index<>, __VA_ARGS__> { \ - static constexpr bool exclusive = _Exclusive_; \ - using Key = _Key_; \ - using index_pack_t = framework::pack<__VA_ARGS__>; \ - static constexpr const auto sources = [](framework::pack) { \ - constexpr auto a = o2::soa::mergeOriginals(); \ - return o2::aod::filterForKey(); \ - }(framework::pack<__VA_ARGS__>{}); \ - static_assert(sources.size() == framework::pack_size(index_pack_t{}), "One of the referred tables does not have index to Key"); \ - }; \ - using _Name_##Metadata = _Name_##MetadataFrom>; \ - \ - template > \ - using _Name_##From = o2::soa::IndexTable, o2::aod::Hash<_Desc_ "/" #_Version_ ""_h>, O, _Key_, __VA_ARGS__>; \ - using _Name_ = _Name_##From>; \ - \ - template <> \ - struct MetadataTrait> { \ - using metadata = _Name_##Metadata; \ +#define DECLARE_SOA_INDEX_TABLE_FULL(_Name_, _Key_, _Origin_, _Version_, _Desc_, _Exclusive_, ...) \ + O2HASH(#_Name_); \ + O2HASH(_Desc_ "/" #_Version_); \ + template > \ + struct _Name_##MetadataFrom : o2::aod::TableMetadata, soa::Index<>, __VA_ARGS__> { \ + static constexpr bool exclusive = _Exclusive_; \ + using Key = _Key_; \ + using index_pack_t = framework::pack<__VA_ARGS__>; \ + static constexpr const auto sources = [](framework::pack) { \ + constexpr auto a = o2::soa::mergeOriginals(); \ + return o2::aod::filterForKey(); \ + }(framework::pack<__VA_ARGS__>{}); \ + static_assert(sources.size() - Key::originals.size() + 1 == framework::pack_size(index_pack_t{}), "One of the referred tables does not have index to Key"); \ + }; \ + using _Name_##Metadata = _Name_##MetadataFrom>; \ + \ + template > \ + using _Name_##From = o2::soa::IndexTable, o2::aod::Hash<_Desc_ "/" #_Version_ ""_h>, O, _Key_, __VA_ARGS__>; \ + using _Name_ = _Name_##From>; \ + \ + template <> \ + struct MetadataTrait> { \ + using metadata = _Name_##Metadata; \ }; // Declare were each row is associated to a timestamp column of an _TimestampSource_ diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 8330ef4efeb13..48ec4db57047c 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -19,7 +19,7 @@ std::vector IndexBuilder::makeBuilders(std::vec std::vector builders; auto pool = arrow::default_memory_pool(); builders.emplace_back(IndexKind::IdxSelf, records[0].pos, pool); - if (records[0].kind != soa::IndexKind::IdxSelf) { + if (records[0].pos >= 0) { std::get(builders[0].builder).keyIndex = std::make_unique(tables[0]->column(records[0].pos)); } @@ -33,10 +33,10 @@ std::vector IndexBuilder::makeBuilders(std::vec void IndexBuilder::resetBuilders(std::vector& builders, std::vector>&& tables) { for (auto i = 0U; i < builders.size(); ++i) { - builders[i].reset(tables[i]->column(builders[i].mColumnPos)); + builders[i].reset(builders[i].mColumnPos >= 0 ? tables[i]->column(builders[i].mColumnPos) : nullptr); } - if (std::get(builders[0].builder).keyIndex != nullptr) { + if (builders[0].mColumnPos >= 0) { std::get(builders[0].builder).keyIndex = std::make_unique(tables[0]->column(builders[0].mColumnPos)); } } From 7d97a972b0b50d1fe39502ab95b200fbb14f9cb8 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Nov 2025 16:56:59 +0100 Subject: [PATCH 15/22] cleanup --- .../Core/include/Framework/AnalysisHelpers.h | 16 +- .../Core/include/Framework/AnalysisManagers.h | 10 +- .../include/Framework/IndexBuilderHelpers.h | 100 ---------- Framework/Core/src/AnalysisHelpers.cxx | 88 --------- Framework/Core/src/IndexBuilderHelpers.cxx | 183 ------------------ 5 files changed, 9 insertions(+), 388 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index fb612424c2ef1..5559a37a3aca1 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -595,11 +595,6 @@ struct Spawns : decltype(transformBase()) { using expression_pack_t = typename metadata::expression_pack_t; static constexpr size_t N = framework::pack_size(expression_pack_t{}); - static consteval auto pack() - { - return expression_pack_t{}; - } - typename T::table_t* operator->() { return table.get(); @@ -651,11 +646,6 @@ struct Defines : decltype(transformBase()) { using placeholders_pack_t = typename metadata::placeholders_pack_t; static constexpr size_t N = framework::pack_size(placeholders_pack_t{}); - constexpr auto pack() - { - return placeholders_pack_t{}; - } - typename T::table_t* operator->() { return table.get(); @@ -674,7 +664,11 @@ struct Defines : decltype(transformBase()) { std::array projectors; std::shared_ptr projector = nullptr; - std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); + std::shared_ptr schema = []() { + auto s = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); + s->WithMetadata(std::make_shared(std::vector{std::string{"label"}}, std::vector{std::string{o2::aod::label()}})); + return s; + }(); std::shared_ptr inputSchema = nullptr; bool needRecompilation = false; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index e6f1ed86914ae..fbb499940b9b9 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -304,9 +304,8 @@ bool prepareOutput(ProcessingContext& context, T& defines) { using metadata = o2::aod::MetadataTrait>::metadata; auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); - if (originalTable->schema()->fields().empty() == true) { - using base_table_t = typename T::base_table_t::table_t; - originalTable = makeEmptyTable(o2::aod::label()); + if (originalTable->num_rows() == 0) { + originalTable = makeEmptyTable(); } if (defines.inputSchema == nullptr) { defines.inputSchema = originalTable->schema(); @@ -337,9 +336,8 @@ bool prepareDelayedOutput(ProcessingContext& context, T& defines) } using metadata = o2::aod::MetadataTrait>::metadata; auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); - if (originalTable->schema()->fields().empty() == true) { - using base_table_t = typename T::base_table_t::table_t; - originalTable = makeEmptyTable(o2::aod::label()); + if (originalTable->num_rows() == 0) { + originalTable = makeEmptyTable(); } if (defines.inputSchema == nullptr) { defines.inputSchema = originalTable->schema(); diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index 94fc322094a23..539219eb8137d 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -118,106 +118,6 @@ struct IndexColumnBuilderNG { void fill(int idx); std::shared_ptr result() const; }; - -// struct SelfIndexColumnBuilder { -// SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool); -// virtual ~SelfIndexColumnBuilder() = default; - -// inline std::shared_ptr result() const -// { -// std::shared_ptr array; -// auto status = static_cast(mBuilder.get())->Finish(&array); -// if (!status.ok()) { -// cannotBuildAnArray(); -// } - -// return std::make_shared(array); -// } - -// inline bool find(int) -// { -// return true; -// } - -// inline void fill(int idx) -// { -// (void)static_cast(mBuilder.get())->Append(idx); -// } - -// std::string mColumnName; -// std::unique_ptr mBuilder = nullptr; -// }; - -// class IndexColumnBuilder : public SelfIndexColumnBuilder, public ChunkedArrayIterator -// { -// public: -// IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool); -// ~IndexColumnBuilder() override = default; - -// inline std::shared_ptr result() const -// { -// if (mListSize == -1) { -// return resultMulti(); -// } else if (mListSize == 2) { -// return resultSlice(); -// } else { -// return resultSingle(); -// } -// } - -// inline bool find(int idx) -// { -// if (mListSize == -1) { -// return findMulti(idx); -// } else if (mListSize == 2) { -// return findSlice(idx); -// } else { -// return findSingle(idx); -// } -// } - -// inline void fill(int idx) -// { -// ++mResultSize; -// if (mListSize == -1) { -// fillMulti(idx); -// } else if (mListSize == 2) { -// fillSlice(idx); -// } else { -// fillSingle(idx); -// } -// } - -// private: -// arrow::Status preSlice(); -// arrow::Status preFind(); - -// bool findSingle(int idx); -// bool findSlice(int idx); -// bool findMulti(int idx); - -// void fillSingle(int idx); -// void fillSlice(int idx); -// void fillMulti(int idx); - -// std::shared_ptr resultSingle() const; -// std::shared_ptr resultSlice() const; -// std::shared_ptr resultMulti() const; - -// int mListSize = 1; -// arrow::ArrayBuilder* mValueBuilder = nullptr; -// std::unique_ptr mListBuilder = nullptr; - -// size_t mSourceSize = 0; -// size_t mResultSize = 0; - -// std::shared_ptr> mValuesArrow = nullptr; -// std::shared_ptr> mCounts = nullptr; -// std::vector mValues; -// std::vector> mIndices; -// int mFillOffset = 0; -// int mValuePos = 0; -// }; } // namespace o2::framework #endif // O2_FRAMEWORK_INDEXBUILDERHELPERS_H_ diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index 48ec4db57047c..a12fb1ae020c3 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -85,94 +85,6 @@ std::shared_ptr IndexBuilder::materializeNG(std::vector IndexBuilder::materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) -// { -// auto pool = arrow::default_memory_pool(); -// std::vector> builders; // this needs to become a state to avoid reallocations -// // can builders be reset and re-used? -// framework::SelfIndexColumnBuilder self{records[0].columnLabel.c_str(), pool}; -// std::unique_ptr keyIndex = nullptr; -// if (records[0].kind != soa::IndexKind::IdxSelf) { -// keyIndex = std::make_unique(tables[0]->column(records[0].pos)); -// } - -// for (auto i = 1U; i < records.size(); ++i) { -// if (records[i].kind == soa::IndexKind::IdxSelf) { -// builders.emplace_back(std::make_shared(records[i].columnLabel.c_str(), pool)); -// } else { -// builders.emplace_back( -// std::make_shared( -// tables[i]->column(records[i].pos), -// records[i].columnLabel.c_str(), -// [](IndexKind kind) { -// switch (kind) { -// case IndexKind::IdxSingle: -// return 1; -// case IndexKind::IdxSlice: -// return 2; -// case IndexKind::IdxArray: -// return -1; -// default: -// return -2; -// } -// }(records[i].kind), -// pool)); -// } -// } - -// std::vector finds; -// finds.resize(builders.size()); -// for (int64_t counter = 0; counter < tables[0]->num_rows(); ++counter) { -// int64_t idx = -1; -// if (keyIndex == nullptr) { -// idx = counter; -// } else { -// idx = keyIndex->valueAt(counter); -// } -// for (auto i = 0U; i < builders.size(); ++i) { -// if (records[i+1].kind == soa::IndexKind::IdxSelf) { -// finds[i] = builders[i]->find(idx); -// } else { -// finds[i] = std::static_pointer_cast(builders[i])->find(idx); -// } -// } -// if (exclusive) { -// if (std::none_of(finds.begin(), finds.end(), [](bool const x) { return x == false; })) { -// for (auto i = 0U; i < builders.size(); ++i) { -// if (records[i+1].kind == soa::IndexKind::IdxSelf) { -// builders[i]->fill(idx); -// } else { -// std::static_pointer_cast(builders[i])->fill(idx); -// } -// } -// self.fill(counter); -// } -// } else { -// for (auto i = 0U; i < builders.size(); ++i) { -// if (records[i+1].kind == soa::IndexKind::IdxSelf) { -// builders[i]->fill(idx); -// } else { -// std::static_pointer_cast(builders[i])->fill(idx); -// } -// } -// self.fill(counter); -// } -// } - -// std::vector> arrays; // same -// arrays.reserve(records.size()); -// arrays.push_back(self.result()); -// for (auto i = 0U; i < builders.size(); ++i) { -// if (records[i+1].kind == soa::IndexKind::IdxSelf) { -// arrays.push_back(builders[i]->result()); -// } else { -// arrays.push_back(std::static_pointer_cast(builders[i])->result()); -// } -// } - -// return arrow::Table::Make(schema, arrays); -// } } // namespace o2::soa namespace o2::framework diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index 871071881fe1c..e23851b9d5cc7 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -359,189 +359,6 @@ std::shared_ptr IndexColumnBuilderNG::result() const builder); } -// SelfIndexColumnBuilder::SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool) -// : mColumnName{name} -// { -// auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); -// if (!status.ok()) { -// throw framework::runtime_error("Cannot create array builder!"); -// } -// } -// IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr source, const char* name, int listSize, arrow::MemoryPool* pool) -// : SelfIndexColumnBuilder{name, pool}, -// ChunkedArrayIterator{source}, -// mListSize{listSize}, -// mSourceSize{(size_t)source->length()} -// { -// switch (mListSize) { -// case 1: { -// mValueBuilder = mBuilder.get(); -// }; break; -// case 2: { -// if (preSlice().ok()) { -// mListBuilder = std::make_unique(pool, std::move(mBuilder), mListSize); -// mValueBuilder = static_cast(mListBuilder.get())->value_builder(); -// } else { -// throw runtime_error("Cannot pre-slice an array"); -// } -// }; break; -// case -1: { -// if (preFind().ok()) { -// mListBuilder = std::make_unique(pool, std::move(mBuilder)); -// mValueBuilder = static_cast(mListBuilder.get())->value_builder(); -// } else { -// throw runtime_error("Cannot pre-find array groups"); -// } -// }; break; -// default: -// throw runtime_error_f("Invalid list size for index column: %d", mListSize); -// } -// } - -// arrow::Status IndexColumnBuilder::preSlice() -// { -// arrow::Datum value_counts; -// auto options = arrow::compute::ScalarAggregateOptions::Defaults(); -// ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options)); -// auto pair = static_cast(value_counts.array()); -// mValuesArrow = std::make_shared>(pair.field(0)->data()); -// mCounts = std::make_shared>(pair.field(1)->data()); -// return arrow::Status::OK(); -// } - -// arrow::Status IndexColumnBuilder::preFind() -// { -// arrow::Datum max; -// auto options = arrow::compute::ScalarAggregateOptions::Defaults(); -// ARROW_ASSIGN_OR_RAISE(max, arrow::compute::CallFunction("max", {mSource}, &options)); -// auto maxValue = std::dynamic_pointer_cast(max.scalar())->value; -// mIndices.resize(maxValue + 1); - -// auto row = 0; -// for (auto i = 0; i < mSource->length(); ++i) { -// auto v = valueAt(i); -// if (v >= 0) { -// mValues.emplace_back(v); -// mIndices[v].push_back(row); -// } -// ++row; -// } -// std::sort(mValues.begin(), mValues.end()); - -// return arrow::Status::OK(); -// } - -// std::shared_ptr IndexColumnBuilder::resultSingle() const -// { -// std::shared_ptr array; -// auto status = static_cast(mValueBuilder)->Finish(&array); -// if (!status.ok()) { -// throw runtime_error("Cannot build an array"); -// } -// return std::make_shared(array); -// } - -// std::shared_ptr IndexColumnBuilder::resultSlice() const -// { -// std::shared_ptr array; -// auto status = static_cast(mListBuilder.get())->Finish(&array); -// if (!status.ok()) { -// throw runtime_error("Cannot build an array"); -// } -// return std::make_shared(array); -// } - -// std::shared_ptr IndexColumnBuilder::resultMulti() const -// { -// std::shared_ptr array; -// auto status = static_cast(mListBuilder.get())->Finish(&array); -// if (!status.ok()) { -// throw runtime_error("Cannot build an array"); -// } -// return std::make_shared(array); -// } - -// bool IndexColumnBuilder::findSingle(int idx) -// { -// auto count = mSourceSize - mPosition; -// while (count > 0) { -// size_t step = count / 2; -// mPosition += step; -// if (valueAt(mPosition) <= idx) { -// count -= step + 1; -// } else { -// mPosition -= step; -// count = step; -// } -// } - -// if (mPosition < mSourceSize && valueAt(mPosition) < idx) { -// ++mPosition; -// } - -// return (mPosition < mSourceSize && valueAt(mPosition) == idx); -// } - -// bool IndexColumnBuilder::findSlice(int idx) -// { -// auto count = mValuesArrow->length() - mValuePos; -// while (count > 0) { -// auto step = count / 2; -// mValuePos += step; -// if (mValuesArrow->Value(mValuePos) <= idx) { -// count -= step + 1; -// } else { -// mValuePos -= step; -// count = step; -// } -// } - -// if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) <= idx) { -// ++mPosition; -// } - -// return (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx); -// } - -// bool IndexColumnBuilder::findMulti(int idx) -// { -// return (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()); -// } - -// void IndexColumnBuilder::fillSingle(int idx) -// { -// // entry point -// if (mPosition < mSourceSize && valueAt(mPosition) == idx) { -// (void)static_cast(mValueBuilder)->Append((int)mPosition); -// } else { -// (void)static_cast(mValueBuilder)->Append(-1); -// } -// } - -// void IndexColumnBuilder::fillSlice(int idx) -// { -// int data[2] = {-1, -1}; -// if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx) { -// for (auto i = 0; i < mValuePos; ++i) { -// data[0] += mCounts->Value(i); -// } -// data[0] += 1; -// data[1] = data[0] + mCounts->Value(mValuePos) - 1; -// } -// (void)static_cast(mListBuilder.get())->AppendValues(1); -// (void)static_cast(mValueBuilder)->AppendValues(data, 2); -// } - -// void IndexColumnBuilder::fillMulti(int idx) -// { -// (void)static_cast(mListBuilder.get())->Append(); -// if (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()) { -// (void)static_cast(mValueBuilder)->AppendValues(mIndices[idx].data(), mIndices[idx].size()); -// } else { -// (void)static_cast(mValueBuilder)->AppendValues(nullptr, 0); -// } -// } - std::shared_ptr ChunkedArrayIterator::getCurrentArray() { auto chunk = mSource->chunk(mChunk); From 2c44b9f7841b86cddc65af449a619b092c941d29 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 25 Nov 2025 09:36:20 +0100 Subject: [PATCH 16/22] fixup! cleanup --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 3 ++- .../Core/include/Framework/AnalysisHelpers.h | 13 ++++++------- .../include/Framework/IndexBuilderHelpers.h | 4 ++-- Framework/Core/src/AnalysisHelpers.cxx | 16 +++++++++------- Framework/Core/src/ExpressionJSONHelpers.cxx | 4 ++-- Framework/Core/src/IndexBuilderHelpers.cxx | 18 +++++++++--------- 6 files changed, 30 insertions(+), 28 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index fd0f74abfb287..a91aa52c05c80 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -72,7 +72,8 @@ struct Buildable { outputSchema, origin, description, - version, nullptr}; + version, + nullptr}; } }; diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 5559a37a3aca1..06b91ffcc39da 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -60,11 +60,10 @@ struct IndexRecord { }; struct IndexBuilder { - static std::vector makeBuilders(std::vector>&& tables, std::vector const& records); - static void resetBuilders(std::vector& builders, std::vector>&& tables); + static std::vector makeBuilders(std::vector>&& tables, std::vector const& records); + static void resetBuilders(std::vector& builders, std::vector>&& tables); - // static std::shared_ptr materialize(std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); - static std::shared_ptr materializeNG(std::vector& builders, std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); + static std::shared_ptr materialize(std::vector& builders, std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive); }; } // namespace o2::soa @@ -165,7 +164,7 @@ struct Builder { header::DataDescription description; header::DataHeader::SubSpecificationType version; - std::shared_ptr> builders = nullptr; + std::shared_ptr> builders = nullptr; std::shared_ptr materialize(ProcessingContext& pc); }; @@ -722,7 +721,7 @@ struct Builds : decltype(transformBase()) { std::vector map = soa::getIndexMapping(); - std::vector builders; + std::vector builders; T* operator->() { @@ -746,7 +745,7 @@ struct Builds : decltype(transformBase()) { auto build(std::vector>&& tables) { - this->table = std::make_shared(soa::IndexBuilder::materializeNG(builders, std::forward>>(tables), map, outputSchema, metadata::exclusive)); + this->table = std::make_shared(soa::IndexBuilder::materialize(builders, std::forward>>(tables), map, outputSchema, metadata::exclusive)); return (this->table != nullptr); } }; diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index 539219eb8137d..30754e62a8dc3 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -107,11 +107,11 @@ struct ArrayBuilder : public ChunkedArrayIterator { arrow::Status preFind(); }; -struct IndexColumnBuilderNG { +struct IndexColumnBuilder { std::variant builder; size_t mResultSize = 0; int mColumnPos = -1; - IndexColumnBuilderNG(soa::IndexKind kind, int pos, arrow::MemoryPool* pool, std::shared_ptr source = nullptr); + IndexColumnBuilder(soa::IndexKind kind, int pos, arrow::MemoryPool* pool, std::shared_ptr source = nullptr); void reset(std::shared_ptr source = nullptr); bool find(int idx); diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index a12fb1ae020c3..b15bcec0516d3 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -14,9 +14,10 @@ #include "IndexJSONHelpers.h" namespace o2::soa { -std::vector IndexBuilder::makeBuilders(std::vector>&& tables, std::vector const& records) +std::vector IndexBuilder::makeBuilders(std::vector>&& tables, std::vector const& records) { - std::vector builders; + std::vector builders; + builders.reserve(records.size()); auto pool = arrow::default_memory_pool(); builders.emplace_back(IndexKind::IdxSelf, records[0].pos, pool); if (records[0].pos >= 0) { @@ -30,7 +31,7 @@ std::vector IndexBuilder::makeBuilders(std::vec return builders; } -void IndexBuilder::resetBuilders(std::vector& builders, std::vector>&& tables) +void IndexBuilder::resetBuilders(std::vector& builders, std::vector>&& tables) { for (auto i = 0U; i < builders.size(); ++i) { builders[i].reset(builders[i].mColumnPos >= 0 ? tables[i]->column(builders[i].mColumnPos) : nullptr); @@ -41,7 +42,7 @@ void IndexBuilder::resetBuilders(std::vector& b } } -std::shared_ptr IndexBuilder::materializeNG(std::vector& builders, std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) +std::shared_ptr IndexBuilder::materialize(std::vector& builders, std::vector>&& tables, std::vector const& records, std::shared_ptr const& schema, bool exclusive) { auto size = tables[0]->num_rows(); if (builders.empty()) { @@ -77,7 +78,7 @@ std::shared_ptr IndexBuilder::materializeNG(std::vector> arrays; // same + std::vector> arrays; arrays.reserve(builders.size()); for (auto& builder : builders) { arrays.push_back(builder.result()); @@ -206,11 +207,12 @@ std::shared_ptr Spawner::materialize(ProcessingContext& pc) const std::shared_ptr Builder::materialize(ProcessingContext& pc) { if (builders == nullptr) { - builders = std::make_shared>(); + builders = std::make_shared>(); + builders->reserve(records.size()); } std::shared_ptr result; auto tables = extractSources(pc, labels); - result = o2::soa::IndexBuilder::materializeNG(*builders.get(), std::move(tables), records, outputSchema, exclusive); + result = o2::soa::IndexBuilder::materialize(*builders.get(), std::move(tables), records, outputSchema, exclusive); return result; } } // namespace o2::framework diff --git a/Framework/Core/src/ExpressionJSONHelpers.cxx b/Framework/Core/src/ExpressionJSONHelpers.cxx index 28685fecad468..a6e19875381cd 100644 --- a/Framework/Core/src/ExpressionJSONHelpers.cxx +++ b/Framework/Core/src/ExpressionJSONHelpers.cxx @@ -681,7 +681,7 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch bool StartArray() { - debug << "Starting array" << std::endl; + debug << "StartArray()" << std::endl; if (states.top() == State::IN_START && currentKey.compare("fields") == 0) { states.push(State::IN_LIST); return true; @@ -692,7 +692,7 @@ struct SchemaReader : public rapidjson::BaseReaderHandler, Sch bool EndArray(SizeType) { - debug << "Ending array" << std::endl; + debug << "EndArray()" << std::endl; if (states.top() == State::IN_LIST) { // finalize schema schema = std::make_shared(fields); diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index e23851b9d5cc7..d7231f72cbee8 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -23,7 +23,7 @@ namespace o2::framework { void cannotBuildAnArray() { - throw framework::runtime_error("Cannot build an array"); + throw framework::runtime_error("Cannot finish an array"); } void cannotCreateIndexBuilder() @@ -138,7 +138,7 @@ std::shared_ptr SingleBuilder::result() const std::shared_ptr array; auto status = static_cast(mBuilder.get())->Finish(&array); if (!status.ok()) { - throw runtime_error("Cannot build an array"); + cannotBuildAnArray(); } return std::make_shared(array); } @@ -211,7 +211,7 @@ std::shared_ptr SliceBuilder::result() const std::shared_ptr array; auto status = static_cast(mListBuilder.get())->Finish(&array); if (!status.ok()) { - throw runtime_error("Cannot build an array"); + cannotBuildAnArray(); } return std::make_shared(array); } @@ -274,7 +274,7 @@ std::shared_ptr ArrayBuilder::result() const std::shared_ptr array; auto status = static_cast(mListBuilder.get())->Finish(&array); if (!status.ok()) { - throw runtime_error("Cannot build an array"); + cannotBuildAnArray(); } return std::make_shared(array); } @@ -301,7 +301,7 @@ arrow::Status ArrayBuilder::preFind() return arrow::Status::OK(); } -IndexColumnBuilderNG::IndexColumnBuilderNG(soa::IndexKind kind, int pos, arrow::MemoryPool* pool, std::shared_ptr source) +IndexColumnBuilder::IndexColumnBuilder(soa::IndexKind kind, int pos, arrow::MemoryPool* pool, std::shared_ptr source) : mColumnPos{pos} { switch (kind) { @@ -322,7 +322,7 @@ IndexColumnBuilderNG::IndexColumnBuilderNG(soa::IndexKind kind, int pos, arrow:: } } -void IndexColumnBuilderNG::reset(std::shared_ptr source) +void IndexColumnBuilder::reset(std::shared_ptr source) { std::visit( overloaded{ @@ -331,7 +331,7 @@ void IndexColumnBuilderNG::reset(std::shared_ptr source) builder); } -bool IndexColumnBuilderNG::find(int idx) +bool IndexColumnBuilder::find(int idx) { return std::visit( overloaded{ @@ -341,7 +341,7 @@ bool IndexColumnBuilderNG::find(int idx) builder); } -void IndexColumnBuilderNG::fill(int idx) +void IndexColumnBuilder::fill(int idx) { std::visit( overloaded{ @@ -350,7 +350,7 @@ void IndexColumnBuilderNG::fill(int idx) builder); } -std::shared_ptr IndexColumnBuilderNG::result() const +std::shared_ptr IndexColumnBuilder::result() const { return std::visit( overloaded{ From ac6f3bd2496c3ab98d578e348416605d0c473587 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 25 Nov 2025 10:12:09 +0100 Subject: [PATCH 17/22] fixup! cleanup --- Framework/Core/test/test_IndexBuilder.cxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Framework/Core/test/test_IndexBuilder.cxx b/Framework/Core/test/test_IndexBuilder.cxx index a73ae7fbb5eb4..e34c5946ad55a 100644 --- a/Framework/Core/test/test_IndexBuilder.cxx +++ b/Framework/Core/test/test_IndexBuilder.cxx @@ -104,8 +104,8 @@ TEST_CASE("TestIndexBuilder") auto map = getIndexMapping>::metadata>(); auto schema1 = o2::aod::MetadataTrait>::metadata::getSchema(); - std::vector builders1; - auto t5 = IndexBuilder::materializeNG(builders1, {t1, t2, t3, t4}, map, schema1, true); + std::vector builders1; + auto t5 = IndexBuilder::materialize(builders1, {t1, t2, t3, t4}, map, schema1, true); // auto t5 = IndexBuilder::materialize({t1, t2, t3, t4}, map, schema1, true); REQUIRE(t5->num_rows() == 4); IDXs idxt{t5}; @@ -118,8 +118,8 @@ TEST_CASE("TestIndexBuilder") map = getIndexMapping>::metadata>(); auto schema2 = o2::aod::MetadataTrait>::metadata::getSchema(); - std::vector builders2; - auto t6 = IndexBuilder::materializeNG(builders2, {t2, t1, t3, t4}, map, schema2, false); + std::vector builders2; + auto t6 = IndexBuilder::materialize(builders2, {t2, t1, t3, t4}, map, schema2, false); REQUIRE(t6->num_rows() == st2.size()); IDX2s idxs{t6}; std::array fs{0, 1, 2, -1, -1, 4, -1}; @@ -219,8 +219,8 @@ TEST_CASE("AdvancedIndexTables") auto map = getIndexMapping>::metadata>(); auto schema3 = o2::aod::MetadataTrait>::metadata::getSchema(); - std::vector builders3; - auto t3 = IndexBuilder::materializeNG(builders3, {t1, t2, tc}, map, schema3, false); + std::vector builders3; + auto t3 = IndexBuilder::materialize(builders3, {t1, t2, tc}, map, schema3, false); REQUIRE(t3->num_rows() == st1.size()); IDX3s idxs{t3}; idxs.bindExternalIndices(&st1, &st2, &st3); From ab27751de15a16ac1a02c8079235d2ed4d24b90f Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 25 Nov 2025 10:15:45 +0100 Subject: [PATCH 18/22] fixup! rework index builder --- Framework/Core/include/Framework/AnalysisHelpers.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 06b91ffcc39da..92ed7fc7dc9de 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -626,7 +626,6 @@ struct Spawns : decltype(transformBase()) { template concept is_spawns = requires(T t) { typename T::metadata; - requires std::same_as; requires std::same_as>; }; @@ -684,7 +683,6 @@ using DefinesDelayed = Defines; template concept is_defines = requires(T t) { typename T::metadata; - requires std::same_as; requires std::same_as>; requires std::same_as; &T::recompile; @@ -754,7 +752,7 @@ template concept is_builds = requires(T t) { typename T::metadata; typename T::Key; - requires std::same_as; + requires std::same_as>; }; /// This helper class allows you to declare things which will be created by a From 181cd9a23aab9e46c78b57fa7f8a60755b181481 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 25 Nov 2025 12:14:13 +0100 Subject: [PATCH 19/22] intersecting concepts --- Framework/Core/include/Framework/AnalysisHelpers.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 92ed7fc7dc9de..7ad0fdd23f66e 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -640,7 +640,6 @@ struct Defines : decltype(transformBase()) { using spawnable_t = T; using metadata = decltype(transformBase())::metadata; using extension_t = typename metadata::extension_table_t; - using base_table_t = typename metadata::base_table_t; using placeholders_pack_t = typename metadata::placeholders_pack_t; static constexpr size_t N = framework::pack_size(placeholders_pack_t{}); @@ -682,8 +681,6 @@ using DefinesDelayed = Defines; template concept is_defines = requires(T t) { - typename T::metadata; - requires std::same_as>; requires std::same_as; &T::recompile; }; From 8a650a8bdaa9e721cb9ab71358fb217c468ede2b Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 25 Nov 2025 12:18:48 +0100 Subject: [PATCH 20/22] fixup! intersecting concepts --- Framework/Core/include/Framework/AnalysisHelpers.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 7ad0fdd23f66e..2f5b5001d97d6 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -626,6 +626,7 @@ struct Spawns : decltype(transformBase()) { template concept is_spawns = requires(T t) { typename T::metadata; + typename T::expression_pack_t; requires std::same_as>; }; @@ -681,6 +682,9 @@ using DefinesDelayed = Defines; template concept is_defines = requires(T t) { + typename T::metadata; + typename T::placeholders_pack_t; + requires std::same_as>; requires std::same_as; &T::recompile; }; From d548c7bf2503baa18e4adf09975b74ff8dcd3222 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Tue, 25 Nov 2025 13:25:40 +0000 Subject: [PATCH 21/22] Please consider the following formatting changes --- .../AnalysisSupport/src/AODReaderHelpers.cxx | 3 +-- Framework/AnalysisSupport/src/AODReaderHelpers.h | 1 - Framework/Core/include/Framework/ASoA.h | 5 +++-- .../Core/include/Framework/AnalysisHelpers.h | 6 ++++-- Framework/Core/src/AnalysisHelpers.cxx | 3 ++- Framework/Core/src/ArrowSupport.cxx | 2 +- Framework/Core/src/IndexJSONHelpers.cxx | 15 +++++++++------ Framework/Core/src/WorkflowHelpers.cxx | 4 ++-- Framework/Core/test/test_IndexBuilder.cxx | 1 - 9 files changed, 22 insertions(+), 18 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx index a91aa52c05c80..40aa5a9537c7f 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.cxx @@ -47,7 +47,7 @@ struct Buildable { std::stringstream iws(loc->defaultValue.get()); records = IndexJSONHelpers::read(iws); - loc = std::find_if(spec.metadata.begin(), spec.metadata.end(), [](ConfigParamSpec const& cps){ return cps.name.compare("index-exclusive") == 0; }); + loc = std::find_if(spec.metadata.begin(), spec.metadata.end(), [](ConfigParamSpec const& cps) { return cps.name.compare("index-exclusive") == 0; }); exclusive = loc->defaultValue.get(); for (auto const& r : records) { @@ -75,7 +75,6 @@ struct Buildable { version, nullptr}; } - }; } // namespace diff --git a/Framework/AnalysisSupport/src/AODReaderHelpers.h b/Framework/AnalysisSupport/src/AODReaderHelpers.h index 505bc95b863c0..197907ca3ccb1 100644 --- a/Framework/AnalysisSupport/src/AODReaderHelpers.h +++ b/Framework/AnalysisSupport/src/AODReaderHelpers.h @@ -18,7 +18,6 @@ namespace o2::framework::readers { - struct AODReaderHelpers { static AlgorithmSpec rootFileReaderCallback(); static AlgorithmSpec aodSpawnerCallback(ConfigContext const& ctx); diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 6ec7fde930aa6..a30363605af36 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -217,7 +217,8 @@ using is_self_index_t = typename std::conditional_t, std namespace o2::aod { -namespace { +namespace +{ template map> static consteval int getIndexPosToKey_impl() { @@ -228,7 +229,7 @@ static consteval int getIndexPosToKey_impl() return -1; } } -} +} // namespace /// Base type for table metadata template diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 2f5b5001d97d6..b973118abd417 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -263,8 +263,10 @@ inline constexpr auto getIndexMapping() } else { idx.emplace_back(o2::aod::label(), C::columnLabel(), getIndexKind(), pos); } - }.template operator()>(), ...); - }(std::make_index_sequence());; + }.template operator()>(), + ...); + }(std::make_index_sequence()); + ; return idx; } diff --git a/Framework/Core/src/AnalysisHelpers.cxx b/Framework/Core/src/AnalysisHelpers.cxx index b15bcec0516d3..b8e0348d5df9c 100644 --- a/Framework/Core/src/AnalysisHelpers.cxx +++ b/Framework/Core/src/AnalysisHelpers.cxx @@ -13,7 +13,8 @@ #include "ExpressionJSONHelpers.h" #include "IndexJSONHelpers.h" -namespace o2::soa { +namespace o2::soa +{ std::vector IndexBuilder::makeBuilders(std::vector>&& tables, std::vector const& records) { std::vector builders; diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 98b0c07e55551..cf2d364027932 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -618,7 +618,7 @@ o2::framework::ServiceSpec ArrowSupport::arrowBackendSpec() builder->outputs.clear(); // replace AlgorithmSpec // FIXME: it should be made more generic, so it does not need replacement... - builder->algorithm = PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTableBuilder", ctx);//readers::AODReaderHelpers::indexBuilderCallback(ctx); + builder->algorithm = PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTableBuilder", ctx); // readers::AODReaderHelpers::indexBuilderCallback(ctx); AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, *builder); } diff --git a/Framework/Core/src/IndexJSONHelpers.cxx b/Framework/Core/src/IndexJSONHelpers.cxx index d18af7e43ddba..19ae94a4bcd4c 100644 --- a/Framework/Core/src/IndexJSONHelpers.cxx +++ b/Framework/Core/src/IndexJSONHelpers.cxx @@ -19,8 +19,10 @@ #include #include -namespace o2::framework { -namespace { +namespace o2::framework +{ +namespace +{ struct IndexRecordsReader : public rapidjson::BaseReaderHandler, IndexRecordsReader> { using Ch = rapidjson::UTF8<>::Ch; using SizeType = rapidjson::SizeType; @@ -179,7 +181,7 @@ struct IndexRecordsReader : public rapidjson::BaseReaderHandler IndexJSONHelpers::read(std::istream& s) { @@ -195,7 +197,8 @@ std::vector IndexJSONHelpers::read(std::istream& s) return irreader.records; } -namespace { +namespace +{ void writeRecords(rapidjson::Writer& w, std::vector& records) { for (auto& r : records) { @@ -211,7 +214,7 @@ void writeRecords(rapidjson::Writer& w, std::vector& irs) { @@ -224,4 +227,4 @@ void IndexJSONHelpers::write(std::ostream& o, std::vector& w.EndArray(); w.EndObject(); } -} +} // namespace o2::framework diff --git a/Framework/Core/src/WorkflowHelpers.cxx b/Framework/Core/src/WorkflowHelpers.cxx index 08660ebf5c3ad..61443f5f71616 100644 --- a/Framework/Core/src/WorkflowHelpers.cxx +++ b/Framework/Core/src/WorkflowHelpers.cxx @@ -415,7 +415,7 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext "internal-dpl-aod-index-builder", {}, {}, - PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTableBuilder", ctx),//readers::AODReaderHelpers::indexBuilderCallback(ctx), + PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "IndexTableBuilder", ctx), // readers::AODReaderHelpers::indexBuilderCallback(ctx), {}}; AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); @@ -435,7 +435,7 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext "internal-dpl-aod-spawner", {}, {}, - PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "ExtendedTableSpawner", ctx),//readers::AODReaderHelpers::aodSpawnerCallback(ctx), + PluginManager::loadAlgorithmFromPlugin("O2FrameworkOnDemandTablesSupport", "ExtendedTableSpawner", ctx), // readers::AODReaderHelpers::aodSpawnerCallback(ctx), {}}; AnalysisSupportHelpers::addMissingOutputsToSpawner({}, ac.spawnerInputs, ac.requestedAODs, aodSpawner); diff --git a/Framework/Core/test/test_IndexBuilder.cxx b/Framework/Core/test/test_IndexBuilder.cxx index e34c5946ad55a..e357b1164af80 100644 --- a/Framework/Core/test/test_IndexBuilder.cxx +++ b/Framework/Core/test/test_IndexBuilder.cxx @@ -243,7 +243,6 @@ TEST_CASE("AdvancedIndexTables") } } - TEST_CASE("IndexRecordsSerialization") { auto map = getIndexMapping>::metadata>(); From 290177da54b3902a3619c07e1c5882efeec002bd Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 26 Nov 2025 14:46:34 +0100 Subject: [PATCH 22/22] Apply suggestion from @ktf --- Framework/Core/include/Framework/AnalysisHelpers.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index b973118abd417..3666fe1299489 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -25,7 +25,6 @@ #include "Framework/TableBuilder.h" #include "Framework/Traits.h" -#include #include namespace o2::soa {