Skip to content

Commit 597a4d4

Browse files
Added support for all of the signed and unsigned fixed width integer
types when ingesting data frames supporting the Arrow PyCapsule interface. Previously only int64 was supported.
1 parent 4dd92ab commit 597a4d4

File tree

10 files changed

+175
-17
lines changed

10 files changed

+175
-17
lines changed

doc/src/release_notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ Thick Mode Changes
3737
Common Changes
3838
++++++++++++++
3939

40+
#) Added support for all of the signed and unsigned fixed width integer types
41+
when ingesting data frames supporting the Arrow PyCapsule interface.
42+
Previously only int64 was supported.
4043
#) Added ``fetch_lobs`` and ``fetch_decimals`` parameters where applicable to
4144
the methods used for fetching rows or dataframes from the database. Note
4245
that for the creation of pipeline operations, if these parameters are not

src/oracledb/arrow_impl.pxd

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ cdef extern from "nanoarrow.h":
6868
NANOARROW_TYPE_FIXED_SIZE_LIST
6969
NANOARROW_TYPE_FLOAT
7070
NANOARROW_TYPE_INT8
71+
NANOARROW_TYPE_INT16
72+
NANOARROW_TYPE_INT32
7173
NANOARROW_TYPE_INT64
7274
NANOARROW_TYPE_LARGE_BINARY
7375
NANOARROW_TYPE_LARGE_STRING
@@ -77,7 +79,9 @@ cdef extern from "nanoarrow.h":
7779
NANOARROW_TYPE_STRUCT
7880
NANOARROW_TYPE_TIMESTAMP
7981
NANOARROW_TYPE_UINT8
82+
NANOARROW_TYPE_UINT16
8083
NANOARROW_TYPE_UINT32
84+
NANOARROW_TYPE_UINT64
8185
NANOARROW_TYPE_UNINITIALIZED
8286

8387
cpdef enum ArrowTimeUnit:
@@ -139,10 +143,12 @@ cdef class ArrowArrayImpl:
139143
double* value) except -1
140144
cdef int get_float(self, int64_t index, bint* is_null,
141145
float* value) except -1
142-
cdef int get_int64(self, int64_t index, bint* is_null,
143-
int64_t* value) except -1
146+
cdef int get_int(self, ArrowType arrow_type, int64_t index, bint* is_null,
147+
int64_t* value) except -1
144148
cdef int get_length(self, int64_t* length) except -1
145149
cdef object get_sparse_vector(self, int64_t index, bint* is_null)
150+
cdef int get_uint(self, ArrowType arrow_type, int64_t index, bint* is_null,
151+
uint64_t* value) except -1
146152
cdef object get_vector(self, int64_t index, bint* is_null)
147153
cdef int populate_from_array(self, ArrowSchema* schema,
148154
ArrowArray* array) except -1

src/oracledb/base_impl.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ from libc.stdint cimport UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX
4141
from libc.stdlib cimport atoi, atof
4242
from libc.string cimport memcpy
4343
from cpython cimport array
44+
from cpython.conversion cimport PyOS_snprintf
4445

4546
from .constants import VECTOR_META_FLAG_SPARSE_VECTOR
4647

@@ -59,6 +60,8 @@ from .arrow_impl cimport (
5960
NANOARROW_TYPE_FIXED_SIZE_LIST,
6061
NANOARROW_TYPE_FLOAT,
6162
NANOARROW_TYPE_INT8,
63+
NANOARROW_TYPE_INT16,
64+
NANOARROW_TYPE_INT32,
6265
NANOARROW_TYPE_INT64,
6366
NANOARROW_TYPE_LIST,
6467
NANOARROW_TYPE_LARGE_BINARY,
@@ -67,6 +70,9 @@ from .arrow_impl cimport (
6770
NANOARROW_TYPE_STRUCT,
6871
NANOARROW_TYPE_TIMESTAMP,
6972
NANOARROW_TYPE_UINT8,
73+
NANOARROW_TYPE_UINT16,
74+
NANOARROW_TYPE_UINT32,
75+
NANOARROW_TYPE_UINT64,
7076
ArrowArrayImpl,
7177
)
7278

src/oracledb/impl/arrow/array.pyx

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -351,16 +351,24 @@ cdef class ArrowArrayImpl:
351351
ptr = <float*> self.arrow_array.buffers[1]
352352
value[0] = ptr[index]
353353

354-
cdef int get_int64(self, int64_t index, bint* is_null,
355-
int64_t* value) except -1:
354+
cdef int get_int(self, ArrowType arrow_type, int64_t index, bint* is_null,
355+
int64_t* value) except -1:
356356
"""
357-
Return an int64_t value at the specified index from the Arrow array.
357+
Return an int64_t value at the specified index from the Arrow array
358+
for all signed integer types.
358359
"""
359-
cdef int64_t* ptr
360+
cdef const void* ptr
360361
self._get_is_null(index, is_null)
361362
if not is_null[0]:
362-
ptr = <int64_t*> self.arrow_array.buffers[1]
363-
value[0] = ptr[index]
363+
ptr = self.arrow_array.buffers[1]
364+
if arrow_type == NANOARROW_TYPE_INT8:
365+
value[0] = (<int8_t*> ptr)[index]
366+
elif arrow_type == NANOARROW_TYPE_INT16:
367+
value[0] = (<int16_t*> ptr)[index]
368+
elif arrow_type == NANOARROW_TYPE_INT32:
369+
value[0] = (<int32_t*> ptr)[index]
370+
else:
371+
value[0] = (<int64_t*> ptr)[index]
364372

365373
cdef int get_length(self, int64_t* length) except -1:
366374
"""
@@ -415,6 +423,25 @@ cdef class ArrowArrayImpl:
415423
num_elements * self.schema_impl.child_element_size)
416424
return (num_dimensions, indices, values)
417425

426+
cdef int get_uint(self, ArrowType arrow_type, int64_t index, bint* is_null,
427+
uint64_t* value) except -1:
428+
"""
429+
Return a uint64_t value at the specified index from the Arrow array
430+
for all unsigned integer types.
431+
"""
432+
cdef const void* ptr
433+
self._get_is_null(index, is_null)
434+
if not is_null[0]:
435+
ptr = self.arrow_array.buffers[1]
436+
if arrow_type == NANOARROW_TYPE_UINT8:
437+
value[0] = (<uint8_t*> ptr)[index]
438+
elif arrow_type == NANOARROW_TYPE_UINT16:
439+
value[0] = (<uint16_t*> ptr)[index]
440+
elif arrow_type == NANOARROW_TYPE_UINT32:
441+
value[0] = (<uint32_t*> ptr)[index]
442+
else:
443+
value[0] = (<uint64_t*> ptr)[index]
444+
418445
cdef object get_vector(self, int64_t index, bint* is_null):
419446
"""
420447
Return a vector value at the specified index from the Arrow array.

src/oracledb/impl/arrow/schema.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,17 @@ cdef class ArrowSchemaImpl:
146146
NANOARROW_TYPE_DOUBLE,
147147
NANOARROW_TYPE_FIXED_SIZE_BINARY,
148148
NANOARROW_TYPE_FLOAT,
149+
NANOARROW_TYPE_INT8,
150+
NANOARROW_TYPE_INT16,
151+
NANOARROW_TYPE_INT32,
149152
NANOARROW_TYPE_INT64,
150153
NANOARROW_TYPE_LARGE_BINARY,
151154
NANOARROW_TYPE_LARGE_STRING,
152155
NANOARROW_TYPE_STRING,
156+
NANOARROW_TYPE_UINT8,
157+
NANOARROW_TYPE_UINT16,
158+
NANOARROW_TYPE_UINT32,
159+
NANOARROW_TYPE_UINT64,
153160
) and not (
154161
schema_view.type == NANOARROW_TYPE_STRUCT
155162
and self._is_sparse_vector()

src/oracledb/impl/base/converters.pyx

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,39 @@ cdef object convert_arrow_to_oracle_data(OracleMetadata metadata,
3737
Converts the value stored in Arrow format to an OracleData structure.
3838
"""
3939
cdef:
40-
int64_t int64_value, days, seconds, useconds
40+
int64_t int_value, days, seconds, useconds
4141
SparseVectorImpl sparse_impl
4242
ArrowType arrow_type
43+
uint64_t uint_value
4344
OracleRawBytes* rb
4445
tuple sparse_info
4546
bytes temp_bytes
47+
ssize_t buf_len
48+
char buf[21]
4649

4750
arrow_type = metadata._schema_impl.arrow_type
48-
if arrow_type == NANOARROW_TYPE_INT64:
49-
array_impl.get_int64(array_index, &data.is_null, &int64_value)
51+
if arrow_type in (
52+
NANOARROW_TYPE_INT8,
53+
NANOARROW_TYPE_INT16,
54+
NANOARROW_TYPE_INT32,
55+
NANOARROW_TYPE_INT64,
56+
):
57+
array_impl.get_int(arrow_type, array_index, &data.is_null, &int_value)
58+
if not data.is_null:
59+
buf_len = PyOS_snprintf(buf, sizeof(buf), "%lld", int_value)
60+
temp_bytes = buf[:buf_len]
61+
convert_bytes_to_oracle_data(&data.buffer, temp_bytes)
62+
return temp_bytes
63+
elif arrow_type in (
64+
NANOARROW_TYPE_UINT8,
65+
NANOARROW_TYPE_UINT16,
66+
NANOARROW_TYPE_UINT32,
67+
NANOARROW_TYPE_UINT64,
68+
):
69+
array_impl.get_uint(arrow_type, array_index, &data.is_null, &uint_value)
5070
if not data.is_null:
51-
temp_bytes = str(int64_value).encode()
71+
buf_len = PyOS_snprintf(buf, sizeof(buf), "%llu", uint_value)
72+
temp_bytes = buf[:buf_len]
5273
convert_bytes_to_oracle_data(&data.buffer, temp_bytes)
5374
return temp_bytes
5475
elif arrow_type == NANOARROW_TYPE_DOUBLE:
@@ -70,10 +91,10 @@ cdef object convert_arrow_to_oracle_data(OracleMetadata metadata,
7091
array_impl.get_bytes(array_index, &data.is_null, <char**> &rb.ptr,
7192
&rb.num_bytes)
7293
elif arrow_type == NANOARROW_TYPE_TIMESTAMP:
73-
array_impl.get_int64(array_index, &data.is_null, &int64_value)
94+
array_impl.get_int(arrow_type, array_index, &data.is_null, &int_value)
7495
if not data.is_null:
75-
seconds = int64_value // array_impl.schema_impl.time_factor
76-
useconds = int64_value % array_impl.schema_impl.time_factor
96+
seconds = int_value // array_impl.schema_impl.time_factor
97+
useconds = int_value % array_impl.schema_impl.time_factor
7798
days = seconds // (24 * 60 * 60)
7899
seconds = seconds % (24 * 60 * 60)
79100
if array_impl.schema_impl.time_factor == 1_000:

src/oracledb/impl/base/metadata.pyx

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,17 @@ cdef class OracleMetadata:
162162
cdef:
163163
OracleMetadata metadata = OracleMetadata.__new__(OracleMetadata)
164164
ArrowType arrow_type = schema_impl.arrow_type
165-
if arrow_type in (NANOARROW_TYPE_DECIMAL128,
166-
NANOARROW_TYPE_INT64):
165+
if arrow_type in (
166+
NANOARROW_TYPE_DECIMAL128,
167+
NANOARROW_TYPE_INT8,
168+
NANOARROW_TYPE_INT16,
169+
NANOARROW_TYPE_INT32,
170+
NANOARROW_TYPE_INT64,
171+
NANOARROW_TYPE_UINT8,
172+
NANOARROW_TYPE_UINT16,
173+
NANOARROW_TYPE_UINT32,
174+
NANOARROW_TYPE_UINT64,
175+
):
167176
metadata.dbtype = DB_TYPE_NUMBER
168177
elif arrow_type == NANOARROW_TYPE_STRING:
169178
metadata.dbtype = DB_TYPE_VARCHAR

tests/sql/create_schema.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ create table &main_user..TestDataframe (
410410
CreditScore number(3, 0),
411411
LastUpdated timestamp,
412412
DecimalData number(15, 4),
413+
IntegerData number(20),
413414
FloatData binary_float,
414415
DoubleData binary_double,
415416
RawData raw(100),

tests/test_8900_dataframe_ingestion.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,45 @@ def test_8918(self):
859859
fetched_df = pyarrow.table(odf)
860860
self.assertTrue(fetched_df.equals(df))
861861

862+
def test_8919(self):
863+
"8919 - test ingestion with various integer data types"
864+
scenarios = [
865+
([-(2**7), 0, 2**7 - 1], pyarrow.int8()),
866+
([-(2**15), 0, 2**15 - 1], pyarrow.int16()),
867+
([-(2**31), 0, 2**31 - 1], pyarrow.int32()),
868+
([-(2**63), 0, 2**63 - 1], pyarrow.int64()),
869+
([0, 2**7, 2**8 - 1], pyarrow.uint8()),
870+
([0, 2**15, 2**16 - 1], pyarrow.uint16()),
871+
([0, 2**31, 2**32 - 1], pyarrow.uint32()),
872+
([0, 2**63, 2**64 - 1], pyarrow.uint64()),
873+
]
874+
names = ["Id", "IntegerData"]
875+
for values, dtype in scenarios:
876+
with self.subTest(dtype=str(dtype)):
877+
arrays = [
878+
pyarrow.array([1, 2, 3], pyarrow.int8()),
879+
pyarrow.array(values, dtype),
880+
]
881+
df = pyarrow.table(arrays, names)
882+
self.cursor.execute("delete from TestDataFrame")
883+
self.cursor.executemany(
884+
"""
885+
insert into TestDataFrame (Id, IntegerData)
886+
values (:1, :2)
887+
""",
888+
df,
889+
)
890+
self.conn.commit()
891+
self.cursor.execute(
892+
"""
893+
select to_char(IntegerData)
894+
from TestDataFrame
895+
order by Id
896+
"""
897+
)
898+
fetched_values = [int(s) for s, in self.cursor]
899+
self.assertEqual(fetched_values, values)
900+
862901

863902
if __name__ == "__main__":
864903
test_env.run_test_cases()

tests/test_9000_dataframe_ingestion_async.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,45 @@ async def test_9018(self):
860860
fetched_df = pyarrow.table(odf)
861861
self.assertTrue(fetched_df.equals(df))
862862

863+
async def test_9019(self):
864+
"9019 - test ingestion with various integer data types"
865+
scenarios = [
866+
([-(2**7), 0, 2**7 - 1], pyarrow.int8()),
867+
([-(2**15), 0, 2**15 - 1], pyarrow.int16()),
868+
([-(2**31), 0, 2**31 - 1], pyarrow.int32()),
869+
([-(2**63), 0, 2**63 - 1], pyarrow.int64()),
870+
([0, 2**7, 2**8 - 1], pyarrow.uint8()),
871+
([0, 2**15, 2**16 - 1], pyarrow.uint16()),
872+
([0, 2**31, 2**32 - 1], pyarrow.uint32()),
873+
([0, 2**63, 2**64 - 1], pyarrow.uint64()),
874+
]
875+
names = ["Id", "IntegerData"]
876+
for values, dtype in scenarios:
877+
with self.subTest(dtype=str(dtype)):
878+
arrays = [
879+
pyarrow.array([1, 2, 3], pyarrow.int8()),
880+
pyarrow.array(values, dtype),
881+
]
882+
df = pyarrow.table(arrays, names)
883+
await self.cursor.execute("delete from TestDataFrame")
884+
await self.cursor.executemany(
885+
"""
886+
insert into TestDataFrame (Id, IntegerData)
887+
values (:1, :2)
888+
""",
889+
df,
890+
)
891+
await self.conn.commit()
892+
await self.cursor.execute(
893+
"""
894+
select to_char(IntegerData)
895+
from TestDataFrame
896+
order by Id
897+
"""
898+
)
899+
fetched_values = [int(s) async for s, in self.cursor]
900+
self.assertEqual(fetched_values, values)
901+
863902

864903
if __name__ == "__main__":
865904
test_env.run_test_cases()

0 commit comments

Comments
 (0)