Skip to content

Commit ff47fe9

Browse files
Further refactoring to simplify code and set stage for further
enhancements.
1 parent f9c435b commit ff47fe9

File tree

4 files changed

+108
-88
lines changed

4 files changed

+108
-88
lines changed

src/oracledb/arrow_impl.pxd

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,12 @@ cdef class ArrowArrayImpl:
8484
str name
8585
ArrowType arrow_type
8686
ArrowTimeUnit time_unit
87-
double factor
87+
int time_factor
8888
ArrowArray *arrow_array
8989
ArrowSchema *arrow_schema
9090
ArrowType child_arrow_type
9191

92+
cdef int _set_time_unit(self, ArrowTimeUnit time_unit) except -1
9293
cdef int append_bytes(self, void* ptr, int64_t num_bytes) except -1
9394
cdef int append_decimal(self, void* ptr, int64_t num_bytes) except -1
9495
cdef int append_double(self, double value) except -1
@@ -101,6 +102,10 @@ cdef class ArrowArrayImpl:
101102
array.array values) except -1
102103
cdef int append_vector(self, array.array value) except -1
103104
cdef int finish_building(self) except -1
105+
cdef int populate_from_metadata(self, ArrowType arrow_type, str name,
106+
int8_t precision, int8_t scale,
107+
ArrowTimeUnit time_unit,
108+
ArrowType child_arrow_type) except -1
104109

105110

106111
cdef class DataFrameImpl:

src/oracledb/impl/arrow/array.pyx

Lines changed: 99 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -30,93 +30,11 @@
3030

3131
cdef class ArrowArrayImpl:
3232

33-
def __cinit__(self, ArrowType arrow_type, str name, int8_t precision,
34-
int8_t scale, ArrowTimeUnit time_unit,
35-
ArrowType child_arrow_type):
36-
cdef ArrowType storage_type = arrow_type
37-
self.arrow_type = arrow_type
38-
self.child_arrow_type = child_arrow_type
39-
self.time_unit = time_unit
40-
self.name = name
33+
def __cinit__(self):
4134
self.arrow_array = \
42-
<ArrowArray*> cpython.PyMem_Malloc(sizeof(ArrowArray))
43-
if arrow_type == NANOARROW_TYPE_TIMESTAMP:
44-
storage_type = NANOARROW_TYPE_INT64
45-
if time_unit == NANOARROW_TIME_UNIT_MILLI:
46-
self.factor = 1e3
47-
elif time_unit == NANOARROW_TIME_UNIT_MICRO:
48-
self.factor = 1e6
49-
elif time_unit == NANOARROW_TIME_UNIT_NANO:
50-
self.factor = 1e9
51-
else:
52-
self.factor = 1
53-
35+
<ArrowArray*> cpython.PyMem_Calloc(1, sizeof(ArrowArray))
5436
self.arrow_schema = \
55-
<ArrowSchema*> cpython.PyMem_Malloc(sizeof(ArrowSchema))
56-
if arrow_type == NANOARROW_TYPE_DECIMAL128:
57-
self.precision = precision
58-
self.scale = scale
59-
ArrowSchemaInit(self.arrow_schema)
60-
_check_nanoarrow(
61-
ArrowSchemaSetTypeDecimal(
62-
self.arrow_schema,
63-
arrow_type,
64-
precision,
65-
scale
66-
)
67-
)
68-
elif arrow_type == NANOARROW_TYPE_STRUCT:
69-
# Currently struct is used for Sparse vector only
70-
build_arrow_schema_for_sparse_vector(self.arrow_schema,
71-
child_arrow_type)
72-
else:
73-
_check_nanoarrow(
74-
ArrowSchemaInitFromType(
75-
self.arrow_schema,
76-
storage_type
77-
)
78-
)
79-
if arrow_type == NANOARROW_TYPE_TIMESTAMP:
80-
_check_nanoarrow(
81-
ArrowSchemaSetTypeDateTime(
82-
self.arrow_schema,
83-
arrow_type,
84-
time_unit,
85-
NULL
86-
)
87-
)
88-
if arrow_type == NANOARROW_TYPE_LIST:
89-
# Set the schema for child using child_arrow_type
90-
_check_nanoarrow(
91-
ArrowSchemaSetType(
92-
self.arrow_schema.children[0],
93-
child_arrow_type
94-
)
95-
)
96-
_check_nanoarrow(
97-
ArrowArrayInitFromSchema(
98-
self.arrow_array,
99-
self.arrow_schema,
100-
NULL
101-
)
102-
)
103-
elif arrow_type == NANOARROW_TYPE_STRUCT:
104-
_check_nanoarrow(
105-
ArrowArrayInitFromSchema(
106-
self.arrow_array,
107-
self.arrow_schema,
108-
NULL
109-
)
110-
)
111-
else: # primitive type array init
112-
_check_nanoarrow(
113-
ArrowArrayInitFromType(
114-
self.arrow_array,
115-
storage_type
116-
)
117-
)
118-
_check_nanoarrow(ArrowArrayStartAppending(self.arrow_array))
119-
_check_nanoarrow(ArrowSchemaSetName(self.arrow_schema, name.encode()))
37+
<ArrowSchema*> cpython.PyMem_Calloc(1, sizeof(ArrowSchema))
12038

12139
def __dealloc__(self):
12240
if self.arrow_array != NULL:
@@ -128,6 +46,20 @@ cdef class ArrowArrayImpl:
12846
ArrowSchemaRelease(self.arrow_schema)
12947
cpython.PyMem_Free(self.arrow_schema)
13048

49+
cdef int _set_time_unit(self, ArrowTimeUnit time_unit) except -1:
50+
"""
51+
Sets the time unit and the corresponding factor.
52+
"""
53+
self.time_unit = time_unit
54+
if time_unit == NANOARROW_TIME_UNIT_MILLI:
55+
self.time_factor = 1_000
56+
elif time_unit == NANOARROW_TIME_UNIT_MICRO:
57+
self.time_factor = 1_000_000
58+
elif time_unit == NANOARROW_TIME_UNIT_NANO:
59+
self.time_factor = 1_000_000_000
60+
else:
61+
self.time_factor = 1
62+
13163
cdef int append_bytes(self, void* ptr, int64_t num_bytes) except -1:
13264
"""
13365
Append a value of type bytes to the array.
@@ -318,6 +250,88 @@ cdef class ArrowArrayImpl:
318250
_check_nanoarrow(ArrowArrayFinishBuildingDefault(self.arrow_array,
319251
NULL))
320252

253+
cdef int populate_from_metadata(self, ArrowType arrow_type, str name,
254+
int8_t precision, int8_t scale,
255+
ArrowTimeUnit time_unit,
256+
ArrowType child_arrow_type) except -1:
257+
"""
258+
Populate the array from the supplied metadata.
259+
"""
260+
cdef ArrowType storage_type = arrow_type
261+
self.arrow_type = arrow_type
262+
self._set_time_unit(time_unit)
263+
self.name = name
264+
self.child_arrow_type = child_arrow_type
265+
if arrow_type == NANOARROW_TYPE_TIMESTAMP:
266+
storage_type = NANOARROW_TYPE_INT64
267+
268+
_check_nanoarrow(ArrowArrayInitFromType(self.arrow_array,
269+
storage_type))
270+
if arrow_type == NANOARROW_TYPE_DECIMAL128:
271+
self.precision = precision
272+
self.scale = scale
273+
ArrowSchemaInit(self.arrow_schema)
274+
_check_nanoarrow(
275+
ArrowSchemaSetTypeDecimal(
276+
self.arrow_schema,
277+
arrow_type,
278+
precision,
279+
scale
280+
)
281+
)
282+
elif arrow_type == NANOARROW_TYPE_STRUCT:
283+
# Currently struct is used for Sparse vector only
284+
build_arrow_schema_for_sparse_vector(self.arrow_schema,
285+
child_arrow_type)
286+
else:
287+
_check_nanoarrow(
288+
ArrowSchemaInitFromType(
289+
self.arrow_schema,
290+
storage_type
291+
)
292+
)
293+
if arrow_type == NANOARROW_TYPE_TIMESTAMP:
294+
_check_nanoarrow(
295+
ArrowSchemaSetTypeDateTime(
296+
self.arrow_schema,
297+
arrow_type,
298+
time_unit,
299+
NULL
300+
)
301+
)
302+
if arrow_type == NANOARROW_TYPE_LIST:
303+
# Set the schema for child using child_arrow_type
304+
_check_nanoarrow(
305+
ArrowSchemaSetType(
306+
self.arrow_schema.children[0],
307+
child_arrow_type
308+
)
309+
)
310+
_check_nanoarrow(
311+
ArrowArrayInitFromSchema(
312+
self.arrow_array,
313+
self.arrow_schema,
314+
NULL
315+
)
316+
)
317+
elif arrow_type == NANOARROW_TYPE_STRUCT:
318+
_check_nanoarrow(
319+
ArrowArrayInitFromSchema(
320+
self.arrow_array,
321+
self.arrow_schema,
322+
NULL
323+
)
324+
)
325+
else: # primitive type array init
326+
_check_nanoarrow(
327+
ArrowArrayInitFromType(
328+
self.arrow_array,
329+
storage_type
330+
)
331+
)
332+
_check_nanoarrow(ArrowArrayStartAppending(self.arrow_array))
333+
_check_nanoarrow(ArrowSchemaSetName(self.arrow_schema, name.encode()))
334+
321335
def get_array_capsule(self):
322336
"""
323337
Internal method for getting a PyCapsule pointer to the array.

src/oracledb/impl/base/converters.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ cdef int convert_date_to_arrow_timestamp(ArrowArrayImpl arrow_array,
5959
int64_t ts
6060
dt = convert_date_to_python(buffer)
6161
td = dt - EPOCH_DATE
62-
ts = int(cydatetime.total_seconds(td) * arrow_array.factor)
62+
ts = int(cydatetime.total_seconds(td) * arrow_array.time_factor)
6363
arrow_array.append_int64(ts)
6464

6565

src/oracledb/impl/base/var.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ cdef class BaseVarImpl:
280280
else:
281281
errors._raise_err(errors.ERR_ARROW_UNSUPPORTED_VECTOR_FORMAT)
282282

283-
self._arrow_array = ArrowArrayImpl(
283+
self._arrow_array = ArrowArrayImpl.__new__(ArrowArrayImpl)
284+
self._arrow_array.populate_from_metadata(
284285
arrow_type=self.metadata._arrow_type,
285286
name=self.metadata.name,
286287
precision=self.metadata.precision,

0 commit comments

Comments
 (0)