From e7c05732cdac9a8eda8f7bd63ae0b0049896d130 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Fri, 26 Dec 2025 21:07:07 +0100 Subject: [PATCH] attempt to fix --- src/Processors/Formats/Impl/Parquet/Decoding.cpp | 2 ++ .../DataLakes/Iceberg/IcebergWrites.cpp | 9 +++++++++ .../DataLakes/Iceberg/ManifestFile.cpp | 13 ++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/Parquet/Decoding.cpp b/src/Processors/Formats/Impl/Parquet/Decoding.cpp index 060d14f2cefc..e667182d0c89 100644 --- a/src/Processors/Formats/Impl/Parquet/Decoding.cpp +++ b/src/Processors/Formats/Impl/Parquet/Decoding.cpp @@ -1096,6 +1096,8 @@ void IntConverter::convertField(std::span data, bool /*is_max*/, Fie UInt64 val = 0; switch (input_size) { + case 1: val = unalignedLoad(data.data()); break; + case 2: val = unalignedLoad(data.data()); break; case 4: val = unalignedLoad(data.data()); break; case 8: val = unalignedLoad(data.data()); break; default: chassert(false); diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp index 75fe44315faa..dce4b87c58a3 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp @@ -116,6 +116,7 @@ bool canDumpIcebergStats(const Field & field, DataTypePtr type) case TypeIndex::Int64: case TypeIndex::DateTime64: case TypeIndex::String: + case TypeIndex::UInt8: /// Boolean return true; default: return false; @@ -156,6 +157,14 @@ std::vector dumpFieldToBytes(const Field & field, DataTypePtr type) return dumpValue(field.safeGet()); case TypeIndex::Float32: return dumpValue(field.safeGet()); + case TypeIndex::UInt8: /// Boolean - stored as single byte in Iceberg + { + /// Field can be Bool or UInt64 type depending on source + UInt8 value = (field.getType() == Field::Types::Bool) + ? static_cast(field.safeGet()) + : static_cast(field.safeGet()); + return dumpValue(value); + } default: { throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not dump such stats"); diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp index d5c50a6c9994..82ab834e6a48 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp @@ -116,9 +116,20 @@ namespace return std::nullopt; } } + else if (DB::isBool(non_nullable_type)) + { + /// Boolean type needs special handling: + /// When UInt8 is extracted from column to Field, it becomes Field(Types::UInt64). + /// But query conditions like WHERE bool_col = true create Field(Types::Bool). + /// While accurateLess/accurateEquals handle type differences correctly, + /// we create a Bool-typed Field for consistency. + if (str.empty()) + return std::nullopt; + return static_cast(static_cast(str[0]) != 0); + } else { - /// For all other types except decimal binary representation + /// For all other types except decimal and boolean, binary representation /// matches our internal representation column->insertData(str.data(), str.length()); DB::Field result;