From bd0c539101b43c6d8765781df64b3ad5a73b5c1f Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 11 Aug 2025 11:55:55 +0200 Subject: [PATCH 1/6] add offset cache --- .../Framework/ArrowTableSlicingCache.h | 4 ++ Framework/Core/src/ArrowTableSlicingCache.cxx | 64 ++++++++++++------- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 292a67023fc5e..e5e595e560377 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -23,6 +23,8 @@ using ListVector = std::vector>; struct SliceInfoPtr { gsl::span values; gsl::span counts; + std::vector const* offsets; + std::vector const* sizes; std::pair getSliceFor(int value) const; }; @@ -66,6 +68,8 @@ struct ArrowTableSlicingCache { Cache bindingsKeys; std::vector>> values; std::vector>> counts; + std::vector> offsets; + std::vector> sizes; Cache bindingsKeysUnsorted; std::vector> valuesUnsorted; diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index e001e293c4733..a640ce799e540 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -35,25 +35,8 @@ std::pair SliceInfoPtr::getSliceFor(int value) const if (values.empty()) { return {offset, 0}; } - int64_t p = static_cast(values.size()) - 1; - while (values[p] < 0) { - --p; - if (p < 0) { - return {offset, 0}; - } - } - if (value > values[p]) { - return {offset, 0}; - } - - for (auto i = 0U; i < values.size(); ++i) { - if (values[i] == value) { - return {offset, counts[i]}; - } - offset += counts[i]; - } - return {offset, 0}; + return {(*offsets)[value], (*sizes)[value]}; } gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const @@ -84,6 +67,8 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorte { values.resize(bindingsKeys.size()); counts.resize(bindingsKeys.size()); + offsets.resize(bindingsKeys.size()); + sizes.resize(bindingsKeys.size()); valuesUnsorted.resize(bindingsKeysUnsorted.size()); groups.resize(bindingsKeysUnsorted.size()); @@ -97,6 +82,10 @@ void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) values.resize(bindingsKeys.size()); counts.clear(); counts.resize(bindingsKeys.size()); + offsets.clear(); + offsets.resize(bindingsKeys.size()); + sizes.clear(); + sizes.resize(bindingsKeys.size()); valuesUnsorted.clear(); valuesUnsorted.resize(bindingsKeysUnsorted.size()); groups.clear(); @@ -108,6 +97,8 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< if (table->num_rows() == 0) { values[pos].reset(); counts[pos].reset(); + offsets[pos].clear(); + sizes[pos].clear(); return arrow::Status::OK(); } auto& [b, k, e] = bindingsKeys[pos]; @@ -125,6 +116,31 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< counts[pos].reset(); values[pos] = std::make_shared>(pair.field(0)->data()); counts[pos] = std::make_shared>(pair.field(1)->data()); + + int maxValue = 0; + for (auto i = values[pos]->length() - 1; i >= 0; --i) { + if (values[pos]->Value(i) < 0) { + continue; + } else { + maxValue = values[pos]->Value(i); + break; + } + } + + offsets[pos].resize(maxValue + 1); + sizes[pos].resize(maxValue); + std::fill(offsets[pos].begin(), offsets[pos].end(), 0); + std::fill(sizes[pos].begin(), sizes[pos].end(), 0); + int64_t offset = 0; + for (auto i = 0U; i < values[pos]->length(); ++i) { + auto value = values[pos]->Value(i); + if (value >= 0) { + offsets[pos][value] = offset; + sizes[pos][value] = counts[pos]->Value(i); + } + offset += counts[pos]->Value(i); + } + offsets[pos][maxValue] = offset; return arrow::Status::OK(); } @@ -221,14 +237,18 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const { if (values[pos] == nullptr && counts[pos] == nullptr) { return { - {}, - {} // + {},// + {},// + nullptr, // + nullptr // }; } return { - {reinterpret_cast(values[pos]->values()->data()), static_cast(values[pos]->length())}, - {reinterpret_cast(counts[pos]->values()->data()), static_cast(counts[pos]->length())} // + {reinterpret_cast(values[pos]->values()->data()), static_cast(values[pos]->length())}, // + {reinterpret_cast(counts[pos]->values()->data()), static_cast(counts[pos]->length())}, // + &(offsets[pos]), // + &(sizes[pos]) // }; } From 0315fa16cd9d4c6bff13206c5d26e1358d08be69 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 12 Aug 2025 08:25:53 +0200 Subject: [PATCH 2/6] fix failing tests --- .../include/Framework/ArrowTableSlicingCache.h | 2 -- Framework/Core/include/Framework/GroupSlicer.h | 4 +--- Framework/Core/src/ArrowTableSlicingCache.cxx | 14 ++++++-------- Framework/Core/test/test_GroupSlicer.cxx | 14 +++++--------- 4 files changed, 12 insertions(+), 22 deletions(-) diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index e5e595e560377..3285db458bd71 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -21,8 +21,6 @@ namespace o2::framework using ListVector = std::vector>; struct SliceInfoPtr { - gsl::span values; - gsl::span counts; std::vector const* offsets; std::vector const* sizes; diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index b8436314b057e..112bf7e147ff0 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -246,9 +246,7 @@ struct GroupSlicer { pos = position; } // optimized split - auto oc = sliceInfos[index].getSliceFor(pos); - uint64_t offset = oc.first; - auto count = oc.second; + auto [offset, count] = sliceInfos[index].getSliceFor(pos); auto groupedElementsTable = originalTable.rawSlice(offset, offset + count - 1); groupedElementsTable.bindInternalIndicesTo(&originalTable); return groupedElementsTable; diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index a640ce799e540..91e47e60fb9d2 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -32,7 +32,10 @@ void updatePairList(Cache& list, std::string const& binding, std::string const& std::pair SliceInfoPtr::getSliceFor(int value) const { int64_t offset = 0; - if (values.empty()) { + if ((*offsets).empty()) { + return {offset, 0}; + } + if ((size_t)value > (*offsets).size()) { return {offset, 0}; } @@ -117,7 +120,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< values[pos] = std::make_shared>(pair.field(0)->data()); counts[pos] = std::make_shared>(pair.field(1)->data()); - int maxValue = 0; + int maxValue = -1; for (auto i = values[pos]->length() - 1; i >= 0; --i) { if (values[pos]->Value(i) < 0) { continue; @@ -128,7 +131,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< } offsets[pos].resize(maxValue + 1); - sizes[pos].resize(maxValue); + sizes[pos].resize(maxValue + 1); std::fill(offsets[pos].begin(), offsets[pos].end(), 0); std::fill(sizes[pos].begin(), sizes[pos].end(), 0); int64_t offset = 0; @@ -140,7 +143,6 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< } offset += counts[pos]->Value(i); } - offsets[pos][maxValue] = offset; return arrow::Status::OK(); } @@ -237,16 +239,12 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const { if (values[pos] == nullptr && counts[pos] == nullptr) { return { - {},// - {},// nullptr, // nullptr // }; } return { - {reinterpret_cast(values[pos]->values()->data()), static_cast(values[pos]->length())}, // - {reinterpret_cast(counts[pos]->values()->data()), static_cast(counts[pos]->length())}, // &(offsets[pos]), // &(sizes[pos]) // }; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 091c21eeae229..9e64dc6738ed0 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -260,21 +260,19 @@ TEST_CASE("GroupSlicerMismatchedGroups") auto s = slices.updateCacheEntry(0, trkTable); o2::framework::GroupSlicer g(e, tt, slices); - auto count = 0; for (auto& slice : g) { auto as = slice.associatedTables(); auto gg = slice.groupingElement(); - REQUIRE(gg.globalIndex() == count); + REQUIRE(gg.globalIndex() == (int64_t)slice.position); auto trks = std::get(as); - if (count == 3 || count == 10 || count == 12 || count == 16 || count == 19) { + if (slice.position == 3 || slice.position == 10 || slice.position == 12 || slice.position == 16 || slice.position == 19) { REQUIRE(trks.size() == 0); } else { REQUIRE(trks.size() == 10); } for (auto& trk : trks) { - REQUIRE(trk.eventId() == count); + REQUIRE(trk.eventId() == (int64_t)slice.position); } - ++count; } } @@ -510,7 +508,7 @@ TEST_CASE("GroupSlicerMismatchedUnsortedFilteredGroupsWithSelfIndex") { TableBuilder builderE; auto evtsWriter = builderE.cursor(); - for (auto i = 0; i < 20; ++i) { + for (auto i = 0; i < 10; ++i) { evtsWriter(0, i, 0.5f * i, 2.f * i, 3.f * i); } auto evtTable = builderE.finalize(); @@ -523,13 +521,12 @@ TEST_CASE("GroupSlicerMismatchedUnsortedFilteredGroupsWithSelfIndex") std::uniform_int_distribution<> distrib(0, 99); for (auto i = 0; i < 100; ++i) { - filler[0] = distrib(gen); filler[1] = distrib(gen); if (filler[0] > filler[1]) { std::swap(filler[0], filler[1]); } - partsWriter(0, std::floor(i / 10.), i, filler); + partsWriter(0, std::floor( i / 10.), i, filler); } auto partsTable = builderP.finalize(); @@ -541,7 +538,6 @@ TEST_CASE("GroupSlicerMismatchedUnsortedFilteredGroupsWithSelfIndex") auto thingsTable = builderT.finalize(); aod::Events e{evtTable}; - // aod::Parts p{partsTable}; aod::Things t{thingsTable}; using FilteredParts = soa::Filtered; auto size = distrib(gen); From 28e0239f27e3a0c55a0e3fe6f61ca36acb0e2e70 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Tue, 12 Aug 2025 06:26:57 +0000 Subject: [PATCH 3/6] Please consider the following formatting changes --- Framework/Core/src/ArrowTableSlicingCache.cxx | 6 +++--- Framework/Core/test/test_GroupSlicer.cxx | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 91e47e60fb9d2..1baa2fed6db05 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -240,13 +240,13 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const if (values[pos] == nullptr && counts[pos] == nullptr) { return { nullptr, // - nullptr // + nullptr // }; } return { - &(offsets[pos]), // - &(sizes[pos]) // + &(offsets[pos]), // + &(sizes[pos]) // }; } diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 9e64dc6738ed0..7f2d301a6203a 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -526,7 +526,7 @@ TEST_CASE("GroupSlicerMismatchedUnsortedFilteredGroupsWithSelfIndex") if (filler[0] > filler[1]) { std::swap(filler[0], filler[1]); } - partsWriter(0, std::floor( i / 10.), i, filler); + partsWriter(0, std::floor(i / 10.), i, filler); } auto partsTable = builderP.finalize(); From 7bfaf51512152b865efa650f21145927073bcbfa Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 12 Aug 2025 10:17:27 +0200 Subject: [PATCH 4/6] fixup! fix failing tests --- Framework/Core/src/ArrowTableSlicingCache.cxx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 1baa2fed6db05..88f05764da4a7 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -97,11 +97,11 @@ void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr const& table) { + values[pos].reset(); + counts[pos].reset(); + offsets[pos].clear(); + sizes[pos].clear(); if (table->num_rows() == 0) { - values[pos].reset(); - counts[pos].reset(); - offsets[pos].clear(); - sizes[pos].clear(); return arrow::Status::OK(); } auto& [b, k, e] = bindingsKeys[pos]; @@ -137,11 +137,12 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< int64_t offset = 0; for (auto i = 0U; i < values[pos]->length(); ++i) { auto value = values[pos]->Value(i); + auto count = counts[pos]->Value(i); if (value >= 0) { offsets[pos][value] = offset; - sizes[pos][value] = counts[pos]->Value(i); + sizes[pos][value] = count; } - offset += counts[pos]->Value(i); + offset += count; } return arrow::Status::OK(); } From 93239b68e993a656a79f6fb364fde810985e134d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 13 Aug 2025 10:14:55 +0200 Subject: [PATCH 5/6] use span instead of a pointer --- .../include/Framework/ArrowTableSlicingCache.h | 4 ++-- Framework/Core/src/ArrowTableSlicingCache.cxx | 14 +++++++------- Framework/Core/test/test_GroupSlicer.cxx | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 3285db458bd71..41d6b33e48476 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -21,8 +21,8 @@ namespace o2::framework using ListVector = std::vector>; struct SliceInfoPtr { - std::vector const* offsets; - std::vector const* sizes; + gsl::span offsets; + gsl::span sizes; std::pair getSliceFor(int value) const; }; diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 88f05764da4a7..f4bda85537828 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -32,14 +32,14 @@ void updatePairList(Cache& list, std::string const& binding, std::string const& std::pair SliceInfoPtr::getSliceFor(int value) const { int64_t offset = 0; - if ((*offsets).empty()) { + if (offsets.empty()) { return {offset, 0}; } - if ((size_t)value > (*offsets).size()) { + if ((size_t)value >= offsets.size()) { return {offset, 0}; } - return {(*offsets)[value], (*sizes)[value]}; + return {offsets[value], sizes[value]}; } gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const @@ -240,14 +240,14 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const { if (values[pos] == nullptr && counts[pos] == nullptr) { return { - nullptr, // - nullptr // + {}, // + {} // }; } return { - &(offsets[pos]), // - &(sizes[pos]) // + gsl::span{offsets[pos].data(), offsets[pos].size()}, // + gsl::span(sizes[pos].data(), sizes[pos].size()) // }; } diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 7f2d301a6203a..2f21d7dd17975 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -245,8 +245,8 @@ TEST_CASE("GroupSlicerMismatchedGroups") if (i == 3 || i == 10 || i == 12 || i == 16 || i == 19) { continue; } - for (auto j = 0.f; j < 5; j += 0.5f) { - trksWriter(0, i, 0.5f * j); + for (auto j = 0; j < 10; ++j) { + trksWriter(0, i, 0.5f * (j / 2.)); } } auto trkTable = builderT.finalize(); @@ -297,8 +297,8 @@ TEST_CASE("GroupSlicerMismatchedUnassignedGroups") ++skip; continue; } - for (auto j = 0.f; j < 5; j += 0.5f) { - trksWriter(0, i, 0.5f * j); + for (auto j = 0; j < 10; ++j) { + trksWriter(0, i, 0.5f * (j / 2.)); } } for (auto i = 0; i < 5; ++i) { From 1a4b80ff44bad4e7acf2a2b61f8a4122e7fb32ff Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Wed, 13 Aug 2025 08:15:38 +0000 Subject: [PATCH 6/6] Please consider the following formatting changes --- Framework/Core/src/ArrowTableSlicingCache.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index f4bda85537828..26bb9bcee80eb 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -247,7 +247,7 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const return { gsl::span{offsets[pos].data(), offsets[pos].size()}, // - gsl::span(sizes[pos].data(), sizes[pos].size()) // + gsl::span(sizes[pos].data(), sizes[pos].size()) // }; }