From e95ab9af3b10a348b9dbfa11c40f823bfb820e53 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 11:13:02 +0100 Subject: [PATCH 01/19] Add commented fix to check failure --- src/serialize_utils.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index 45ab9708..1fff3e96 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -93,6 +93,10 @@ namespace sparrow_ipc return col.data_type(); } ); +// for (const auto& col : rb.columns()) +// { +// dtypes.push_back(col.data_type()); +// } return dtypes; } } From c85ac857d7c9603c7530abc7736c33dff1a39d9f Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 11:19:23 +0100 Subject: [PATCH 02/19] Uncomment fix --- src/serialize_utils.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index 1fff3e96..6fa824e2 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -85,18 +85,18 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); - std::ranges::transform( - rb.columns(), - std::back_inserter(dtypes), - [](const auto& col) - { - return col.data_type(); - } - ); -// for (const auto& col : rb.columns()) -// { -// dtypes.push_back(col.data_type()); -// } +// std::ranges::transform( +// rb.columns(), +// std::back_inserter(dtypes), +// [](const auto& col) +// { +// return col.data_type(); +// } +// ); + for (const auto& col : rb.columns()) + { + dtypes.push_back(col.data_type()); + } return dtypes; } } From 6bae15f8a0d133e2af43948c96abc322d8537063 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 11:33:42 +0100 Subject: [PATCH 03/19] Attempt on metadata.cpp --- src/metadata.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/metadata.cpp b/src/metadata.cpp index 3638f766..1b2ceead 100644 --- a/src/metadata.cpp +++ b/src/metadata.cpp @@ -10,14 +10,10 @@ namespace sparrow_ipc { std::vector sparrow_metadata; sparrow_metadata.reserve(metadata.size()); - std::ranges::transform( - metadata, - std::back_inserter(sparrow_metadata), - [](const auto& kv) - { - return sparrow::metadata_pair{kv->key()->str(), kv->value()->str()}; - } - ); + for (const auto& kv : metadata) + { + sparrow_metadata.emplace_back(kv->key()->str(), kv->value()->str()); + } return sparrow_metadata; } } \ No newline at end of file From 2608881a6e02a85d5db2e74ca1226ee7b13fd1e6 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 11:48:03 +0100 Subject: [PATCH 04/19] Attempt on equal --- include/sparrow_ipc/magic_values.hpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/sparrow_ipc/magic_values.hpp b/include/sparrow_ipc/magic_values.hpp index e90187cd..f97240ea 100644 --- a/include/sparrow_ipc/magic_values.hpp +++ b/include/sparrow_ipc/magic_values.hpp @@ -36,13 +36,21 @@ namespace sparrow_ipc template [[nodiscard]] bool is_continuation(const R& buf) { - return std::ranges::equal(buf, continuation); + if (std::ranges::size(buf) != continuation.size()) + { + return false; + } + return std::equal(std::ranges::begin(buf), std::ranges::end(buf), continuation.begin()); } template [[nodiscard]] bool is_end_of_stream(const R& buf) { - return std::ranges::equal(buf, end_of_stream); + if (std::ranges::size(buf) != end_of_stream.size()) + { + return false; + } + return std::equal(std::ranges::begin(buf), std::ranges::end(buf), end_of_stream.begin()); } template From cdd45610f8479ec65a9546a80b8c931340887b21 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 11:57:42 +0100 Subject: [PATCH 05/19] Try debug without junit report --- .github/workflows/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f695e56d..743f4248 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -55,7 +55,7 @@ jobs: - name: Run tests working-directory: build run: | - cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report + cmake --build . --config ${{ matrix.build_type }} --target run_tests - name: Build example working-directory: build From 74e0d210263ac71ac12ce6ecb1fff2ef519964dc Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 13:08:34 +0100 Subject: [PATCH 06/19] Debug --- .github/workflows/windows.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 743f4248..ea44e247 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -53,9 +53,18 @@ jobs: run: cmake --build . --config ${{ matrix.build_type }} --target test_sparrow_ipc_lib - name: Run tests + id: run_tests_step working-directory: build run: | cmake --build . --config ${{ matrix.build_type }} --target run_tests + continue-on-error: true + + - name: Display test log on failure + if: steps.run_tests_step.outcome == 'failure' + working-directory: build + run: | + echo "Test run failed. Displaying log file:" + cat ./tests/Testing/Temporary/LastTest.log - name: Build example working-directory: build From aa81bd7ec0864af1768f9da2318b969aac0a58ee Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 14:18:00 +0100 Subject: [PATCH 07/19] Another try --- include/sparrow_ipc/flatbuffer_utils.hpp | 3 ++- src/flatbuffer_utils.cpp | 31 ++++++++++++------------ src/serialize_utils.cpp | 18 +++++--------- src/serializer.cpp | 4 +-- 4 files changed, 25 insertions(+), 31 deletions(-) diff --git a/include/sparrow_ipc/flatbuffer_utils.hpp b/include/sparrow_ipc/flatbuffer_utils.hpp index 592c006a..4b3e42f3 100644 --- a/include/sparrow_ipc/flatbuffer_utils.hpp +++ b/include/sparrow_ipc/flatbuffer_utils.hpp @@ -200,8 +200,9 @@ namespace sparrow_ipc { std::vector buffers; int64_t offset = 0; - for (const auto& column : record_batch.columns()) + for (size_t i = 0; i < record_batch.nb_columns(); ++i) { + const auto& column = record_batch.get_column(i); const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column); fill_buffers_func(arrow_proxy, buffers, offset); } diff --git a/src/flatbuffer_utils.cpp b/src/flatbuffer_utils.cpp index 4c4e79a7..ec453c9d 100644 --- a/src/flatbuffer_utils.cpp +++ b/src/flatbuffer_utils.cpp @@ -468,13 +468,13 @@ namespace sparrow_ipc ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> create_children(flatbuffers::FlatBufferBuilder& builder, const sparrow::record_batch& record_batch) { - const auto& columns = record_batch.columns(); std::vector> children_vec; - children_vec.reserve(columns.size()); + children_vec.reserve(record_batch.nb_columns()); const auto names = record_batch.names(); - for (size_t i = 0; i < columns.size(); ++i) + for (size_t i = 0; i < record_batch.nb_columns(); ++i) { - const auto& arrow_schema = sparrow::detail::array_access::get_arrow_proxy(columns[i]).schema(); + const auto& column = record_batch.get_column(i); + const auto& arrow_schema = sparrow::detail::array_access::get_arrow_proxy(column).schema(); flatbuffers::Offset field = create_field( builder, arrow_schema, @@ -523,9 +523,10 @@ namespace sparrow_ipc create_fieldnodes(const sparrow::record_batch& record_batch) { std::vector nodes; - nodes.reserve(record_batch.columns().size()); - for (const auto& column : record_batch.columns()) + nodes.reserve(record_batch.nb_columns()); + for (size_t i = 0; i < record_batch.nb_columns(); ++i) { + const auto& column = record_batch.get_column(i); fill_fieldnodes(sparrow::detail::array_access::get_arrow_proxy(column), nodes); } return nodes; @@ -608,16 +609,14 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - return std::accumulate( - record_batch.columns().begin(), - record_batch.columns().end(), - int64_t{0}, - [&](int64_t acc, const sparrow::array& arr) - { - const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); - return acc + calculate_body_size(arrow_proxy, compression, cache); - } - ); + int64_t acc = 0; + for (size_t i = 0; i < record_batch.nb_columns(); ++i) + { + const auto& arr = record_batch.get_column(i); + const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); + acc += calculate_body_size(arrow_proxy, compression, cache); + } + return acc; } flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch& record_batch, diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index 6fa824e2..1efa8647 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -38,10 +38,12 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - std::for_each(record_batch.columns().begin(), record_batch.columns().end(), [&](const auto& column) { + for (size_t i = 0; i < record_batch.nb_columns(); ++i) + { + const auto& column = record_batch.get_column(i); const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column); fill_body(arrow_proxy, stream, compression, cache); - }); + } } std::size_t calculate_schema_message_size(const sparrow::record_batch& record_batch) @@ -85,17 +87,9 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); -// std::ranges::transform( -// rb.columns(), -// std::back_inserter(dtypes), -// [](const auto& col) -// { -// return col.data_type(); -// } -// ); - for (const auto& col : rb.columns()) + for (size_t i = 0; i < rb.nb_columns(); ++i) { - dtypes.push_back(col.data_type()); + dtypes.push_back(rb.get_column(i).data_type()); } return dtypes; } diff --git a/src/serializer.cpp b/src/serializer.cpp index b12459c3..0b222bb0 100644 --- a/src/serializer.cpp +++ b/src/serializer.cpp @@ -23,9 +23,9 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); - for (const auto& col : rb.columns()) + for (size_t i = 0; i < rb.nb_columns(); ++i) { - dtypes.push_back(col.data_type()); + dtypes.push_back(rb.get_column(i).data_type()); } return dtypes; } From 4904b0efd8df718d23c00adba6a7ffa699bcdf54 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Fri, 5 Dec 2025 16:18:56 +0100 Subject: [PATCH 08/19] Remove unnecessary fixes --- .github/workflows/windows.yml | 11 +---------- include/sparrow_ipc/magic_values.hpp | 12 ++---------- src/metadata.cpp | 14 +++++++++----- 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index ea44e247..f695e56d 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -53,18 +53,9 @@ jobs: run: cmake --build . --config ${{ matrix.build_type }} --target test_sparrow_ipc_lib - name: Run tests - id: run_tests_step working-directory: build run: | - cmake --build . --config ${{ matrix.build_type }} --target run_tests - continue-on-error: true - - - name: Display test log on failure - if: steps.run_tests_step.outcome == 'failure' - working-directory: build - run: | - echo "Test run failed. Displaying log file:" - cat ./tests/Testing/Temporary/LastTest.log + cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report - name: Build example working-directory: build diff --git a/include/sparrow_ipc/magic_values.hpp b/include/sparrow_ipc/magic_values.hpp index f97240ea..e90187cd 100644 --- a/include/sparrow_ipc/magic_values.hpp +++ b/include/sparrow_ipc/magic_values.hpp @@ -36,21 +36,13 @@ namespace sparrow_ipc template [[nodiscard]] bool is_continuation(const R& buf) { - if (std::ranges::size(buf) != continuation.size()) - { - return false; - } - return std::equal(std::ranges::begin(buf), std::ranges::end(buf), continuation.begin()); + return std::ranges::equal(buf, continuation); } template [[nodiscard]] bool is_end_of_stream(const R& buf) { - if (std::ranges::size(buf) != end_of_stream.size()) - { - return false; - } - return std::equal(std::ranges::begin(buf), std::ranges::end(buf), end_of_stream.begin()); + return std::ranges::equal(buf, end_of_stream); } template diff --git a/src/metadata.cpp b/src/metadata.cpp index 1b2ceead..699229b0 100644 --- a/src/metadata.cpp +++ b/src/metadata.cpp @@ -10,10 +10,14 @@ namespace sparrow_ipc { std::vector sparrow_metadata; sparrow_metadata.reserve(metadata.size()); - for (const auto& kv : metadata) - { - sparrow_metadata.emplace_back(kv->key()->str(), kv->value()->str()); - } + std::ranges::transform( + metadata, + std::back_inserter(sparrow_metadata), + [](const auto& kv) + { + return sparrow::metadata_pair{kv->key()->str(), kv->value()->str()}; + } + ); return sparrow_metadata; } -} \ No newline at end of file +} From 1d3a6b653a478f51163cdd2e692e90d150785ac3 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 10:49:05 +0100 Subject: [PATCH 09/19] Try another syntax --- src/serializer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/serializer.cpp b/src/serializer.cpp index 0b222bb0..45b851dc 100644 --- a/src/serializer.cpp +++ b/src/serializer.cpp @@ -23,9 +23,10 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); - for (size_t i = 0; i < rb.nb_columns(); ++i) + auto columns = rb.columns(); + for (const auto& col : columns) { - dtypes.push_back(rb.get_column(i).data_type()); + dtypes.push_back(col.data_type()); } return dtypes; } @@ -39,4 +40,4 @@ namespace sparrow_ipc m_stream.write(end_of_stream); m_ended = true; } -} \ No newline at end of file +} From 03cd8b3b6e1470c7ff4ddc9c8dcf467be04f9f25 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 11:29:20 +0100 Subject: [PATCH 10/19] Not use intermediate var --- src/serializer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/serializer.cpp b/src/serializer.cpp index 45b851dc..9821cb6e 100644 --- a/src/serializer.cpp +++ b/src/serializer.cpp @@ -23,8 +23,8 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); - auto columns = rb.columns(); - for (const auto& col : columns) +// auto columns = rb.columns(); + for (const auto& col : rb.columns()) { dtypes.push_back(col.data_type()); } From 84e59e15002dfe99b6346fdb231c65e90e15376d Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 11:58:58 +0100 Subject: [PATCH 11/19] Try another syntax change --- src/serialize_utils.cpp | 16 ++++++++++++---- src/serializer.cpp | 1 - 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index 1efa8647..1f074d4c 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -87,10 +87,18 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); - for (size_t i = 0; i < rb.nb_columns(); ++i) - { - dtypes.push_back(rb.get_column(i).data_type()); - } + std::ranges::transform( + rb.columns(), + std::back_inserter(dtypes), + [](const auto& col) + { + return col.data_type(); + } + ); +// for (size_t i = 0; i < rb.nb_columns(); ++i) +// { +// dtypes.push_back(rb.get_column(i).data_type()); +// } return dtypes; } } diff --git a/src/serializer.cpp b/src/serializer.cpp index 9821cb6e..f0e99dbc 100644 --- a/src/serializer.cpp +++ b/src/serializer.cpp @@ -23,7 +23,6 @@ namespace sparrow_ipc { std::vector dtypes; dtypes.reserve(rb.nb_columns()); -// auto columns = rb.columns(); for (const auto& col : rb.columns()) { dtypes.push_back(col.data_type()); From 23c18fd19b6ab1ecd68d846867a276d3b8a356c3 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 13:40:08 +0100 Subject: [PATCH 12/19] Restore for_each --- src/serialize_utils.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index 1f074d4c..7e3eed43 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -38,12 +38,14 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - for (size_t i = 0; i < record_batch.nb_columns(); ++i) - { - const auto& column = record_batch.get_column(i); + std::for_each(record_batch.columns().begin(), record_batch.columns().end(), [&](const auto& column) { +// for (size_t i = 0; i < record_batch.nb_columns(); ++i) +// { +// const auto& column = record_batch.get_column(i); const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column); fill_body(arrow_proxy, stream, compression, cache); - } + }); +// } } std::size_t calculate_schema_message_size(const sparrow::record_batch& record_batch) @@ -95,10 +97,6 @@ namespace sparrow_ipc return col.data_type(); } ); -// for (size_t i = 0; i < rb.nb_columns(); ++i) -// { -// dtypes.push_back(rb.get_column(i).data_type()); -// } return dtypes; } } From 37fb507e30e737a46c10fa941d52d71aea8d1a5f Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 13:50:55 +0100 Subject: [PATCH 13/19] Replace for_each --- src/serialize_utils.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index 7e3eed43..df5c50d3 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -38,14 +38,11 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - std::for_each(record_batch.columns().begin(), record_batch.columns().end(), [&](const auto& column) { -// for (size_t i = 0; i < record_batch.nb_columns(); ++i) -// { -// const auto& column = record_batch.get_column(i); + for (const auto& column : record_batch.columns()) + { const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column); fill_body(arrow_proxy, stream, compression, cache); - }); -// } + } } std::size_t calculate_schema_message_size(const sparrow::record_batch& record_batch) From 147cb50bad48d4bb6d5ccc90dedba5bf7213b488 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 14:17:49 +0100 Subject: [PATCH 14/19] Replace indexes loop --- include/sparrow_ipc/flatbuffer_utils.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/sparrow_ipc/flatbuffer_utils.hpp b/include/sparrow_ipc/flatbuffer_utils.hpp index 4b3e42f3..592c006a 100644 --- a/include/sparrow_ipc/flatbuffer_utils.hpp +++ b/include/sparrow_ipc/flatbuffer_utils.hpp @@ -200,9 +200,8 @@ namespace sparrow_ipc { std::vector buffers; int64_t offset = 0; - for (size_t i = 0; i < record_batch.nb_columns(); ++i) + for (const auto& column : record_batch.columns()) { - const auto& column = record_batch.get_column(i); const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column); fill_buffers_func(arrow_proxy, buffers, offset); } From 6f45cba9509eea9e728849d8b36f30bf47264f38 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 15:12:00 +0100 Subject: [PATCH 15/19] Try column loop --- src/flatbuffer_utils.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/flatbuffer_utils.cpp b/src/flatbuffer_utils.cpp index ec453c9d..a2da49d9 100644 --- a/src/flatbuffer_utils.cpp +++ b/src/flatbuffer_utils.cpp @@ -468,13 +468,13 @@ namespace sparrow_ipc ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> create_children(flatbuffers::FlatBufferBuilder& builder, const sparrow::record_batch& record_batch) { + const auto& columns = record_batch.columns(); std::vector> children_vec; - children_vec.reserve(record_batch.nb_columns()); + children_vec.reserve(columns.size()); const auto names = record_batch.names(); - for (size_t i = 0; i < record_batch.nb_columns(); ++i) + for (size_t i = 0; i < columns.size(); ++i) { - const auto& column = record_batch.get_column(i); - const auto& arrow_schema = sparrow::detail::array_access::get_arrow_proxy(column).schema(); + const auto& arrow_schema = sparrow::detail::array_access::get_arrow_proxy(columns[i]).schema(); flatbuffers::Offset field = create_field( builder, arrow_schema, @@ -524,9 +524,8 @@ namespace sparrow_ipc { std::vector nodes; nodes.reserve(record_batch.nb_columns()); - for (size_t i = 0; i < record_batch.nb_columns(); ++i) + for (const auto& column : record_batch.columns()) { - const auto& column = record_batch.get_column(i); fill_fieldnodes(sparrow::detail::array_access::get_arrow_proxy(column), nodes); } return nodes; From 79de94f63f3e73fdc8459e40f6a7cbc6141205ce Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 15:22:30 +0100 Subject: [PATCH 16/19] Try accumulate on iterators --- src/flatbuffer_utils.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/flatbuffer_utils.cpp b/src/flatbuffer_utils.cpp index a2da49d9..6f70158c 100644 --- a/src/flatbuffer_utils.cpp +++ b/src/flatbuffer_utils.cpp @@ -608,14 +608,24 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - int64_t acc = 0; - for (size_t i = 0; i < record_batch.nb_columns(); ++i) - { - const auto& arr = record_batch.get_column(i); - const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); - acc += calculate_body_size(arrow_proxy, compression, cache); - } - return acc; + return std::accumulate( + record_batch.columns().begin(), + record_batch.columns().end(), + int64_t{0}, + [&](int64_t acc, const sparrow::array& arr) + { + const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); + return acc + calculate_body_size(arrow_proxy, compression, cache); + } + ); +// int64_t acc = 0; +// for (size_t i = 0; i < record_batch.nb_columns(); ++i) +// { +// const auto& arr = record_batch.get_column(i); +// const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); +// acc += calculate_body_size(arrow_proxy, compression, cache); +// } +// return acc; } flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch& record_batch, From 54a750e523ab45e2b693b1377263eedd9919e4bb Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 15:28:28 +0100 Subject: [PATCH 17/19] Revert "Try accumulate on iterators" This reverts commit 79de94f63f3e73fdc8459e40f6a7cbc6141205ce. --- src/flatbuffer_utils.cpp | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/flatbuffer_utils.cpp b/src/flatbuffer_utils.cpp index 6f70158c..a2da49d9 100644 --- a/src/flatbuffer_utils.cpp +++ b/src/flatbuffer_utils.cpp @@ -608,24 +608,14 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - return std::accumulate( - record_batch.columns().begin(), - record_batch.columns().end(), - int64_t{0}, - [&](int64_t acc, const sparrow::array& arr) - { - const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); - return acc + calculate_body_size(arrow_proxy, compression, cache); - } - ); -// int64_t acc = 0; -// for (size_t i = 0; i < record_batch.nb_columns(); ++i) -// { -// const auto& arr = record_batch.get_column(i); -// const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); -// acc += calculate_body_size(arrow_proxy, compression, cache); -// } -// return acc; + int64_t acc = 0; + for (size_t i = 0; i < record_batch.nb_columns(); ++i) + { + const auto& arr = record_batch.get_column(i); + const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); + acc += calculate_body_size(arrow_proxy, compression, cache); + } + return acc; } flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch& record_batch, From 947881e862152d80ae11d4b7645aabba43a7c1e4 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 16:18:22 +0100 Subject: [PATCH 18/19] Try something else --- src/flatbuffer_utils.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/flatbuffer_utils.cpp b/src/flatbuffer_utils.cpp index a2da49d9..7ce90ffd 100644 --- a/src/flatbuffer_utils.cpp +++ b/src/flatbuffer_utils.cpp @@ -1,3 +1,4 @@ +#include #include #include "compression_impl.hpp" @@ -608,14 +609,25 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - int64_t acc = 0; - for (size_t i = 0; i < record_batch.nb_columns(); ++i) - { - const auto& arr = record_batch.get_column(i); - const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); - acc += calculate_body_size(arrow_proxy, compression, cache); - } - return acc; + auto cols = record_batch.columns(); + return std::accumulate( + cols.begin(), + cols.end(), + int64_t{0}, + [&](int64_t acc, const sparrow::array& arr) + { + const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); + return acc + calculate_body_size(arrow_proxy, compression, cache); + } + ); +// int64_t acc = 0; +// for (size_t i = 0; i < record_batch.nb_columns(); ++i) +// { +// const auto& arr = record_batch.get_column(i); +// const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); +// acc += calculate_body_size(arrow_proxy, compression, cache); +// } +// return acc; } flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch& record_batch, From cad78a4b756bbee2eef7bdfc5b797ca628a3366b Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 8 Dec 2025 16:40:07 +0100 Subject: [PATCH 19/19] Use std::ranges::for_each --- src/flatbuffer_utils.cpp | 8 -------- src/serialize_utils.cpp | 6 +++--- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/flatbuffer_utils.cpp b/src/flatbuffer_utils.cpp index 7ce90ffd..e8c39bdd 100644 --- a/src/flatbuffer_utils.cpp +++ b/src/flatbuffer_utils.cpp @@ -620,14 +620,6 @@ namespace sparrow_ipc return acc + calculate_body_size(arrow_proxy, compression, cache); } ); -// int64_t acc = 0; -// for (size_t i = 0; i < record_batch.nb_columns(); ++i) -// { -// const auto& arr = record_batch.get_column(i); -// const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(arr); -// acc += calculate_body_size(arrow_proxy, compression, cache); -// } -// return acc; } flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch& record_batch, diff --git a/src/serialize_utils.cpp b/src/serialize_utils.cpp index df5c50d3..528ff6f4 100644 --- a/src/serialize_utils.cpp +++ b/src/serialize_utils.cpp @@ -1,3 +1,4 @@ +#include #include #include "sparrow_ipc/flatbuffer_utils.hpp" @@ -38,11 +39,10 @@ namespace sparrow_ipc std::optional compression, std::optional> cache) { - for (const auto& column : record_batch.columns()) - { + std::ranges::for_each(record_batch.columns(), [&](const auto& column) { const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column); fill_body(arrow_proxy, stream, compression, cache); - } + }); } std::size_t calculate_schema_message_size(const sparrow::record_batch& record_batch)