Skip to content

Commit e64a67e

Browse files
committed
Intermediate changes
commit_hash:c0613ad0911a3c98d16b2ef283ddb4c101b05f36
1 parent 42e8af2 commit e64a67e

File tree

5 files changed

+149
-12
lines changed

5 files changed

+149
-12
lines changed

contrib/libs/apache/arrow/cpp/src/arrow/python/arrow_to_pandas.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ Status SetBufferBase(PyArrayObject* arr, const std::shared_ptr<Buffer>& buffer)
234234
}
235235

236236
inline void set_numpy_metadata(int type, const DataType* datatype, PyArray_Descr* out) {
237-
auto metadata = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(out->c_metadata);
237+
auto metadata =
238+
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(out));
238239
if (type == NPY_DATETIME) {
239240
if (datatype->id() == Type::TIMESTAMP) {
240241
const auto& timestamp_type = checked_cast<const TimestampType&>(*datatype);
@@ -255,7 +256,7 @@ Status PyArray_NewFromPool(int nd, npy_intp* dims, PyArray_Descr* descr, MemoryP
255256
//
256257
// * Track allocations
257258
// * Get better performance through custom allocators
258-
int64_t total_size = descr->elsize;
259+
int64_t total_size = PyDataType_ELSIZE(descr);
259260
for (int i = 0; i < nd; ++i) {
260261
total_size *= dims[i];
261262
}
@@ -511,8 +512,9 @@ class PandasWriter {
511512

512513
void SetDatetimeUnit(NPY_DATETIMEUNIT unit) {
513514
PyAcquireGIL lock;
514-
auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(
515-
PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))->c_metadata);
515+
auto date_dtype =
516+
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(
517+
PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))));
516518
date_dtype->meta.base = unit;
517519
}
518520

contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_convert.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
4646
PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
4747
auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
4848
data_ = const_cast<const uint8_t*>(ptr);
49-
size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
49+
size_ = PyArray_NBYTES(ndarray);
5050
capacity_ = size_;
5151
is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
5252
}
@@ -148,7 +148,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
148148
TO_ARROW_TYPE_CASE(UNICODE, utf8);
149149
case NPY_DATETIME: {
150150
auto date_dtype =
151-
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
151+
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
152152
switch (date_dtype->meta.base) {
153153
case NPY_FR_s:
154154
*out = timestamp(TimeUnit::SECOND);
@@ -173,7 +173,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
173173
} break;
174174
case NPY_TIMEDELTA: {
175175
auto timedelta_dtype =
176-
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
176+
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
177177
switch (timedelta_dtype->meta.base) {
178178
case NPY_FR_s:
179179
*out = duration(TimeUnit::SECOND);

contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_interop.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@
6767
#define NPY_INT32_IS_INT 0
6868
#endif
6969

70+
// Backported NumPy 2 API (can be removed if numpy 2 is required)
71+
#if NPY_ABI_VERSION < 0x02000000
72+
#define PyDataType_ELSIZE(descr) ((descr)->elsize)
73+
#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
74+
#define PyDataType_FIELDS(descr) ((descr)->fields)
75+
#endif
76+
7077
namespace arrow {
7178
namespace py {
7279

contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_to_arrow.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class NumPyConverter {
193193
mask_ = reinterpret_cast<PyArrayObject*>(mo);
194194
}
195195
length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
196-
itemsize_ = static_cast<int>(PyArray_DESCR(arr_)->elsize);
196+
itemsize_ = static_cast<int64_t>(PyArray_ITEMSIZE(arr_));
197197
stride_ = static_cast<int64_t>(PyArray_STRIDES(arr_)[0]);
198198
}
199199

@@ -470,7 +470,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
470470

471471
RETURN_NOT_OK(PrepareInputData<Date32Type>(data));
472472

473-
auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
473+
auto date_dtype =
474+
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
474475
if (dtype_->type_num == NPY_DATETIME) {
475476
// If we have inbound datetime64[D] data, this needs to be downcasted
476477
// separately here from int64_t to int32_t, because this data is not
@@ -506,7 +507,8 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
506507

507508
RETURN_NOT_OK(PrepareInputData<Date64Type>(data));
508509

509-
auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
510+
auto date_dtype =
511+
reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
510512
if (dtype_->type_num == NPY_DATETIME) {
511513
// If we have inbound datetime64[D] data, this needs to be downcasted
512514
// separately here from int64_t to int32_t, because this data is not
@@ -736,12 +738,13 @@ Status NumPyConverter::Visit(const StructType& type) {
736738
PyAcquireGIL gil_lock;
737739

738740
// Create converters for each struct type field
739-
if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) {
741+
if (PyDataType_FIELDS(dtype_) == NULL || !PyDict_Check(PyDataType_FIELDS(dtype_))) {
740742
return Status::TypeError("Expected struct array");
741743
}
742744

743745
for (auto field : type.fields()) {
744-
PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str());
746+
PyObject* tup =
747+
PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str());
745748
if (tup == NULL) {
746749
return Status::Invalid("Missing field '", field->name(), "' in struct array");
747750
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
--- contrib/libs/apache/arrow/cpp/src/arrow/python/arrow_to_pandas.cc (index)
2+
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/arrow_to_pandas.cc (working tree)
3+
@@ -234,7 +234,8 @@ Status SetBufferBase(PyArrayObject* arr, const std::shared_ptr<Buffer>& buffer)
4+
}
5+
6+
inline void set_numpy_metadata(int type, const DataType* datatype, PyArray_Descr* out) {
7+
- auto metadata = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(out->c_metadata);
8+
+ auto metadata =
9+
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(out));
10+
if (type == NPY_DATETIME) {
11+
if (datatype->id() == Type::TIMESTAMP) {
12+
const auto& timestamp_type = checked_cast<const TimestampType&>(*datatype);
13+
@@ -255,7 +256,7 @@ Status PyArray_NewFromPool(int nd, npy_intp* dims, PyArray_Descr* descr, MemoryP
14+
//
15+
// * Track allocations
16+
// * Get better performance through custom allocators
17+
- int64_t total_size = descr->elsize;
18+
+ int64_t total_size = PyDataType_ELSIZE(descr);
19+
for (int i = 0; i < nd; ++i) {
20+
total_size *= dims[i];
21+
}
22+
@@ -511,8 +512,9 @@ class PandasWriter {
23+
24+
void SetDatetimeUnit(NPY_DATETIMEUNIT unit) {
25+
PyAcquireGIL lock;
26+
- auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(
27+
- PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))->c_metadata);
28+
+ auto date_dtype =
29+
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(
30+
+ PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))));
31+
date_dtype->meta.base = unit;
32+
}
33+
34+
--- contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_convert.cc (index)
35+
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_convert.cc (working tree)
36+
@@ -46,7 +46,7 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
37+
PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
38+
auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
39+
data_ = const_cast<const uint8_t*>(ptr);
40+
- size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
41+
+ size_ = PyArray_NBYTES(ndarray);
42+
capacity_ = size_;
43+
is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
44+
}
45+
@@ -148,7 +148,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
46+
TO_ARROW_TYPE_CASE(UNICODE, utf8);
47+
case NPY_DATETIME: {
48+
auto date_dtype =
49+
- reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
50+
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
51+
switch (date_dtype->meta.base) {
52+
case NPY_FR_s:
53+
*out = timestamp(TimeUnit::SECOND);
54+
@@ -173,7 +173,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
55+
} break;
56+
case NPY_TIMEDELTA: {
57+
auto timedelta_dtype =
58+
- reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
59+
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
60+
switch (timedelta_dtype->meta.base) {
61+
case NPY_FR_s:
62+
*out = duration(TimeUnit::SECOND);
63+
--- contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_interop.h (index)
64+
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_interop.h (working tree)
65+
@@ -67,6 +67,13 @@
66+
#define NPY_INT32_IS_INT 0
67+
#endif
68+
69+
+// Backported NumPy 2 API (can be removed if numpy 2 is required)
70+
+#if NPY_ABI_VERSION < 0x02000000
71+
+#define PyDataType_ELSIZE(descr) ((descr)->elsize)
72+
+#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
73+
+#define PyDataType_FIELDS(descr) ((descr)->fields)
74+
+#endif
75+
+
76+
namespace arrow {
77+
namespace py {
78+
79+
--- contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_to_arrow.cc (index)
80+
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_to_arrow.cc (working tree)
81+
@@ -193,7 +193,7 @@ class NumPyConverter {
82+
mask_ = reinterpret_cast<PyArrayObject*>(mo);
83+
}
84+
length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
85+
- itemsize_ = static_cast<int>(PyArray_DESCR(arr_)->elsize);
86+
+ itemsize_ = static_cast<int64_t>(PyArray_ITEMSIZE(arr_));
87+
stride_ = static_cast<int64_t>(PyArray_STRIDES(arr_)[0]);
88+
}
89+
90+
@@ -470,7 +470,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
91+
92+
RETURN_NOT_OK(PrepareInputData<Date32Type>(data));
93+
94+
- auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
95+
+ auto date_dtype =
96+
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
97+
if (dtype_->type_num == NPY_DATETIME) {
98+
// If we have inbound datetime64[D] data, this needs to be downcasted
99+
// separately here from int64_t to int32_t, because this data is not
100+
@@ -506,7 +507,8 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
101+
102+
RETURN_NOT_OK(PrepareInputData<Date64Type>(data));
103+
104+
- auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
105+
+ auto date_dtype =
106+
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
107+
if (dtype_->type_num == NPY_DATETIME) {
108+
// If we have inbound datetime64[D] data, this needs to be downcasted
109+
// separately here from int64_t to int32_t, because this data is not
110+
@@ -736,12 +738,13 @@ Status NumPyConverter::Visit(const StructType& type) {
111+
PyAcquireGIL gil_lock;
112+
113+
// Create converters for each struct type field
114+
- if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) {
115+
+ if (PyDataType_FIELDS(dtype_) == NULL || !PyDict_Check(PyDataType_FIELDS(dtype_))) {
116+
return Status::TypeError("Expected struct array");
117+
}
118+
119+
for (auto field : type.fields()) {
120+
- PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str());
121+
+ PyObject* tup =
122+
+ PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str());
123+
if (tup == NULL) {
124+
return Status::Invalid("Missing field '", field->name(), "' in struct array");
125+
}

0 commit comments

Comments
 (0)