From 25ba295a1d7aaba245b16b29cc3c3cfc492fe85b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 8 Aug 2025 11:38:49 +0200 Subject: [PATCH] DPL Analysis: add support for BinaryView columns The idea is to be able to have BinaryViews on top of the CCDB object blobs which are already cached in shared memory, so that we can have a table with rows of the kind: (timestamp, blob-requested-ccdb-object-2, blob-requested-ccdb-object-2) which then can be joined to the timestamps to provide access to the associated CCDB Object. --- Framework/Core/include/Framework/ASoA.h | 21 +++++++---- Framework/Core/include/Framework/ArrowTypes.h | 5 +++ .../Core/include/Framework/TableBuilder.h | 33 ++++++++++++++++- Framework/Core/test/test_TableBuilder.cxx | 35 ++++++++++++++++++- 4 files changed, 86 insertions(+), 8 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index ccf2cab5e6807..872f4d9fc5a79 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -23,11 +23,12 @@ #include "Framework/ArrowTableSlicingCache.h" // IWYU pragma: export #include "Framework/SliceCache.h" // IWYU pragma: export #include "Framework/VariantHelpers.h" // IWYU pragma: export -#include // IWYU pragma: export -#include // IWYU pragma: export -#include // IWYU pragma: export -#include // IWYU pragma: export -#include // IWYU pragma: export +#include +#include // IWYU pragma: export +#include // IWYU pragma: export +#include // IWYU pragma: export +#include // IWYU pragma: export +#include // IWYU pragma: export #include #include #include @@ -579,7 +580,15 @@ class ColumnIterator : ChunkingPolicy } decltype(auto) operator*() const - requires((!std::same_as>) && !std::same_as, arrow::ListArray>) + requires((!std::same_as>) && std::same_as, arrow::BinaryViewArray>) + { + checkSkipChunk(); + auto array = std::static_pointer_cast(mColumn->chunk(mCurrentChunk)); + return array->GetView(*mCurrentPos - mFirstIndex); + } + + decltype(auto) operator*() const + requires((!std::same_as>) && !std::same_as, arrow::ListArray> && !std::same_as, arrow::BinaryViewArray>) { checkSkipChunk(); return *(mCurrent + (*mCurrentPos >> SCALE_FACTOR)); diff --git a/Framework/Core/include/Framework/ArrowTypes.h b/Framework/Core/include/Framework/ArrowTypes.h index 69946b6f35a50..6fd70113fede7 100644 --- a/Framework/Core/include/Framework/ArrowTypes.h +++ b/Framework/Core/include/Framework/ArrowTypes.h @@ -12,6 +12,7 @@ #ifndef O2_FRAMEWORK_ARROWTYPES_H #define O2_FRAMEWORK_ARROWTYPES_H #include "arrow/type_fwd.h" +#include namespace o2::soa { @@ -62,6 +63,10 @@ template <> struct arrow_array_for { using type = arrow::DoubleArray; }; +template <> +struct arrow_array_for> { + using type = arrow::BinaryViewArray; +}; template struct arrow_array_for { using type = arrow::FixedSizeListArray; diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index eb56791acfd3c..74395a2680077 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -98,6 +98,7 @@ O2_ARROW_STL_CONVERSION(long unsigned, UInt64Type) O2_ARROW_STL_CONVERSION(float, FloatType) O2_ARROW_STL_CONVERSION(double, DoubleType) O2_ARROW_STL_CONVERSION(std::string, StringType) +O2_ARROW_STL_CONVERSION(std::span, BinaryViewType) } // namespace detail void addLabelToSchema(std::shared_ptr& schema, const char* label); @@ -274,6 +275,29 @@ struct BuilderMaker { } }; +template <> +struct BuilderMaker> { + using FillType = std::span; + using STLValueType = std::span; + using ArrowType = typename detail::ConversionTraits>::ArrowType; + using BuilderType = typename arrow::TypeTraits::BuilderType; + + static std::unique_ptr make(arrow::MemoryPool* pool) + { + return std::make_unique(pool); + } + + static std::shared_ptr make_datatype() + { + return arrow::TypeTraits::type_singleton(); + } + + static arrow::Status append(BuilderType& builder, std::span value) + { + return builder.Append((char*)value.data(), (int64_t)value.size()); + } +}; + template struct BuilderMaker> { using FillType = std::pair; @@ -422,6 +446,13 @@ struct DirectInsertion { return builder->Append(value); } + template + requires std::same_as, T> + arrow::Status append(BUILDER& builder, T value) + { + return builder->Append((char*)value.data(), (int64_t)value.size()); + } + template arrow::Status flush(BUILDER&) { @@ -569,7 +600,7 @@ template using IndexedHoldersTuple = decltype(makeHolderTypes()); template -concept ShouldNotDeconstruct = std::is_bounded_array_v || std::is_arithmetic_v || framework::is_base_of_template_v; +concept ShouldNotDeconstruct = std::is_bounded_array_v || std::is_arithmetic_v || framework::is_base_of_template_v || std::same_as, T>; /// Helper class which creates a lambda suitable for building /// an arrow table from a tuple. This can be used, for example diff --git a/Framework/Core/test/test_TableBuilder.cxx b/Framework/Core/test/test_TableBuilder.cxx index 00cbbbc59b725..02bc8fbe67833 100644 --- a/Framework/Core/test/test_TableBuilder.cxx +++ b/Framework/Core/test/test_TableBuilder.cxx @@ -1,4 +1,4 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. // See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. // All rights not expressly granted are reserved. // @@ -19,6 +19,8 @@ #include #include +#include + using namespace o2::framework; // We use a different namespace to avoid clashes with the @@ -27,10 +29,12 @@ namespace test2 { DECLARE_SOA_COLUMN_FULL(X, x, uint64_t, "x"); DECLARE_SOA_COLUMN_FULL(Y, y, uint64_t, "y"); +DECLARE_SOA_COLUMN_FULL(Blob, blob, std::span, "blob"); DECLARE_SOA_COLUMN_FULL(Pos, pos, int[4], "pos"); } // namespace test2 using TestTable = o2::soa::InPlaceTable<0, test2::X, test2::Y>; +using SpanTable = o2::soa::InPlaceTable<0, test2::Blob>; using ArrayTable = o2::soa::InPlaceTable<0, test2::Pos>; TEST_CASE("TestTableBuilder") @@ -189,6 +193,35 @@ TEST_CASE("TestTableBuilderMore") REQUIRE(table->schema()->field(3)->type()->id() == arrow::boolean()->id()); } +TEST_CASE("TestSpan") +{ + TableBuilder builder; + std::vector buffer{10, std::byte{'c'}}; + std::vector buffer1{10, std::byte{'a'}}; + + auto rowWriter = builder.persist>({"blob"}); + rowWriter(0, std::span(buffer)); + rowWriter(0, std::span(buffer.data() + 1, 9)); + rowWriter(0, std::span(buffer1.data(), 3)); + rowWriter(0, std::span(buffer1.data(), 1)); + auto table = builder.finalize(); + + REQUIRE(table->num_columns() == 1); + REQUIRE(table->num_rows() == 4); + REQUIRE(table->schema()->field(0)->name() == "blob"); + REQUIRE(table->schema()->field(0)->type()->id() == arrow::binary_view()->id()); + + auto readBack = SpanTable{table}; + auto row = readBack.begin(); + REQUIRE(row.blob() == "cccccccccc"); + ++row; + REQUIRE(row.blob() == "ccccccccc"); + ++row; + REQUIRE(row.blob() == "aaa"); + ++row; + REQUIRE(row.blob() == "a"); +} + TEST_CASE("TestSoAIntegration") { TableBuilder builder;