Skip to content

Commit 253d569

Browse files
Support LOB types in the fetch data frame APIs.
1 parent 92ac5a6 commit 253d569

File tree

11 files changed

+114
-8
lines changed

11 files changed

+114
-8
lines changed

doc/src/release_notes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ Common Changes
6262
data frame
6363
- eliminated small memory leak with production of all data frames
6464

65+
#) Added support for CLOB, BLOB and RAW data types in
66+
:meth:`Connection.fetch_df_all()` and :meth:`Connection.fetch_df_batches()`
6567
#) Fixed bug when NUMBER data is fetched with
6668
:meth:`Connection.fetch_df_all()` or :meth:`Connection.fetch_df_batches()`
6769
that does not have precision or scale specified and

doc/src/user_guide/sql_execution.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,12 @@ Oracle Database will result in an exception.
850850
- TIMESTAMP
851851
* - DB_TYPE_TIMESTAMP_TZ
852852
- TIMESTAMP
853+
* - DB_TYPE_CLOB
854+
- LARGE_STRING
855+
* - DB_TYPE_BLOB
856+
- LARGE_BINARY
857+
* - DB_TYPE_RAW
858+
- BINARY
853859

854860
When converting Oracle Database NUMBERs:
855861

@@ -864,6 +870,10 @@ When converting Oracle Database NUMBERs:
864870

865871
- In all other cases, the Arrow data type is DOUBLE.
866872

873+
When converting Oracle Database CLOBs and BLOBs:
874+
875+
- The LOBs must be no more than 1 GB in length.
876+
867877
When converting Oracle Database DATEs and TIMESTAMPs:
868878

869879
- For Oracle Database DATE types, the Arrow TIMESTAMP will have a time unit of

src/oracledb/base_impl.pyx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ from .interchange.nanoarrow_bridge cimport (
4848
NANOARROW_TIME_UNIT_MICRO,
4949
NANOARROW_TIME_UNIT_NANO,
5050
NANOARROW_TYPE_BOOL,
51+
NANOARROW_TYPE_BINARY,
5152
NANOARROW_TYPE_DECIMAL128,
5253
NANOARROW_TYPE_DOUBLE,
5354
NANOARROW_TYPE_FLOAT,
5455
NANOARROW_TYPE_INT64,
56+
NANOARROW_TYPE_LARGE_BINARY,
57+
NANOARROW_TYPE_LARGE_STRING,
5558
NANOARROW_TYPE_STRING,
5659
NANOARROW_TYPE_TIMESTAMP,
5760
)

src/oracledb/impl/base/converters.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,12 @@ cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata,
232232
arrow_array.append_double(data.buffer.as_double)
233233
elif arrow_type == NANOARROW_TYPE_FLOAT:
234234
arrow_array.append_float(data.buffer.as_float)
235-
elif arrow_type == NANOARROW_TYPE_STRING:
235+
elif arrow_type in (
236+
NANOARROW_TYPE_BINARY,
237+
NANOARROW_TYPE_STRING,
238+
NANOARROW_TYPE_LARGE_BINARY,
239+
NANOARROW_TYPE_LARGE_STRING
240+
):
236241
rb = &data.buffer.as_raw_bytes
237242
arrow_array.append_bytes(<void*> rb.ptr, rb.num_bytes)
238243
elif arrow_type == NANOARROW_TYPE_TIMESTAMP:

src/oracledb/impl/base/cursor.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ cdef class BaseCursorImpl:
221221
var_impl.outconverter = conn_impl.decode_oson
222222
elif metadata.is_json and db_type_num != DB_TYPE_NUM_JSON:
223223
var_impl.outconverter = self._build_json_converter_fn()
224-
elif not C_DEFAULTS.fetch_lobs:
224+
elif not C_DEFAULTS.fetch_lobs or self.fetching_arrow:
225225
if db_type_num == DB_TYPE_NUM_BLOB:
226226
var_impl.metadata.dbtype = DB_TYPE_LONG_RAW
227227
var_impl._fetch_metadata.dbtype = DB_TYPE_LONG_RAW

src/oracledb/impl/base/metadata.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ cdef class OracleMetadata:
8484
DB_TYPE_NUM_TIMESTAMP_LTZ,
8585
DB_TYPE_NUM_TIMESTAMP_TZ):
8686
self._arrow_type = NANOARROW_TYPE_TIMESTAMP
87+
elif db_type_num == DB_TYPE_NUM_LONG_RAW:
88+
self._arrow_type = NANOARROW_TYPE_LARGE_BINARY
89+
elif db_type_num == DB_TYPE_NUM_LONG_VARCHAR:
90+
self._arrow_type = NANOARROW_TYPE_LARGE_STRING
91+
elif db_type_num == DB_TYPE_NUM_RAW:
92+
self._arrow_type = NANOARROW_TYPE_BINARY
8793
else:
8894
errors._raise_err(errors.ERR_ARROW_UNSUPPORTED_DATA_TYPE,
8995
db_type_name=self.dbtype.name)

src/oracledb/interchange/column.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,12 @@
4545
NANOARROW_TIME_UNIT_MILLI,
4646
NANOARROW_TIME_UNIT_MICRO,
4747
NANOARROW_TIME_UNIT_NANO,
48+
NANOARROW_TYPE_BINARY,
4849
NANOARROW_TYPE_DOUBLE,
4950
NANOARROW_TYPE_FLOAT,
5051
NANOARROW_TYPE_INT64,
52+
NANOARROW_TYPE_LARGE_BINARY,
53+
NANOARROW_TYPE_LARGE_STRING,
5154
NANOARROW_TYPE_STRING,
5255
NANOARROW_TYPE_TIMESTAMP,
5356
NANOARROW_TYPE_DECIMAL128,
@@ -88,7 +91,13 @@ def _offsets_buffer(self):
8891
offsets_buffer = OracleColumnBuffer(
8992
size_in_bytes=size_bytes, address=address, buffer_type="offsets"
9093
)
91-
dtype = (DtypeKind.INT, 32, "i", "=")
94+
if self.ora_arrow_array.arrow_type in (
95+
NANOARROW_TYPE_LARGE_STRING,
96+
NANOARROW_TYPE_LARGE_BINARY,
97+
):
98+
dtype = (DtypeKind.INT, 64, "l", "=")
99+
else:
100+
dtype = (DtypeKind.INT, 32, "i", "=")
92101
return offsets_buffer, dtype
93102

94103
def _validity_buffer(self):
@@ -149,6 +158,12 @@ def dtype(self) -> Dtype:
149158
f"d:{array.precision}.{array.scale}",
150159
"=",
151160
)
161+
elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_BINARY:
162+
return (DtypeKind.STRING, 8, "z", "=")
163+
elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_LARGE_BINARY:
164+
return (DtypeKind.STRING, 8, "Z", "=")
165+
elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_LARGE_STRING:
166+
return (DtypeKind.STRING, 8, "U", "=")
152167

153168
def get_buffers(self) -> ColumnBuffers:
154169
"""

src/oracledb/interchange/nanoarrow_bridge.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ cdef extern from "nanoarrow.h":
4848

4949
cpdef enum ArrowType:
5050
NANOARROW_TYPE_BOOL
51+
NANOARROW_TYPE_BINARY
5152
NANOARROW_TYPE_DECIMAL128
5253
NANOARROW_TYPE_DOUBLE
5354
NANOARROW_TYPE_FLOAT
5455
NANOARROW_TYPE_INT64
56+
NANOARROW_TYPE_LARGE_BINARY
57+
NANOARROW_TYPE_LARGE_STRING
5558
NANOARROW_TYPE_STRING
5659
NANOARROW_TYPE_TIMESTAMP
5760

src/oracledb/interchange/nanoarrow_bridge.pyx

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,10 @@ cdef class OracleArrowArray:
302302
ArrowDecimalSetBytes(&decimal, ptr)
303303
_check_nanoarrow(ArrowArrayAppendDecimal(self.arrow_array,
304304
&decimal))
305-
elif array.arrow_type == NANOARROW_TYPE_STRING:
305+
elif array.arrow_type in (
306+
NANOARROW_TYPE_BINARY,
307+
NANOARROW_TYPE_STRING
308+
):
306309
offsets_buffer = ArrowArrayBuffer(array.arrow_array, 1)
307310
data_buffer = ArrowArrayBuffer(array.arrow_array, 2)
308311
as_int32 = <int32_t*> offsets_buffer.data
@@ -316,6 +319,23 @@ cdef class OracleArrowArray:
316319
finally:
317320
cpython.PyMem_Free(temp)
318321

322+
elif array.arrow_type in (
323+
NANOARROW_TYPE_LARGE_BINARY,
324+
NANOARROW_TYPE_LARGE_STRING
325+
):
326+
offsets_buffer = ArrowArrayBuffer(array.arrow_array, 1)
327+
data_buffer = ArrowArrayBuffer(array.arrow_array, 2)
328+
as_int64 = <int64_t*> offsets_buffer.data
329+
start_offset = as_int64[index]
330+
end_offset = as_int64[index + 1]
331+
temp = cpython.PyMem_Malloc(end_offset - start_offset)
332+
memcpy(temp, &data_buffer.data[start_offset],
333+
end_offset - start_offset)
334+
try:
335+
self.append_bytes(temp, end_offset - start_offset)
336+
finally:
337+
cpython.PyMem_Free(temp)
338+
319339
cdef int append_null(self) except -1:
320340
"""
321341
Append a null value to the array.

tests/test_8000_dataframe.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,8 @@ def test_8015(self):
466466
self.assertEqual(col.null_count, 1)
467467

468468
def test_8016(self):
469-
"8016 - check unsupported error for LOBs"
470-
statement = "select to_clob('test_8016') from dual"
469+
"8016 - check unsupported error"
470+
statement = "select cursor(select user from dual) from dual"
471471
with self.assertRaisesFullCode("DPY-3030"):
472472
self.conn.fetch_df_all(statement)
473473

@@ -538,6 +538,48 @@ def test_8022(self):
538538
fetched_data = self.__get_data_from_df(fetched_df)
539539
self.assertEqual(fetched_data, data)
540540

541+
def test_8023(self):
542+
"8023 - fetch clob"
543+
data = [("test_8023",)]
544+
self.__check_interop()
545+
ora_df = self.conn.fetch_df_all(
546+
"select to_clob('test_8023') from dual"
547+
)
548+
fetched_tab = pyarrow.Table.from_arrays(
549+
ora_df.column_arrays(), names=ora_df.column_names()
550+
)
551+
fetched_df = fetched_tab.to_pandas()
552+
fetched_data = self.__get_data_from_df(fetched_df)
553+
self.assertEqual(fetched_data, data)
554+
555+
def test_8024(self):
556+
"8024 - fetch blob"
557+
data = [(b"test_8024",)]
558+
self.__check_interop()
559+
ora_df = self.conn.fetch_df_all(
560+
"select to_blob(utl_raw.cast_to_raw('test_8024')) from dual"
561+
)
562+
fetched_tab = pyarrow.Table.from_arrays(
563+
ora_df.column_arrays(), names=ora_df.column_names()
564+
)
565+
fetched_df = fetched_tab.to_pandas()
566+
fetched_data = self.__get_data_from_df(fetched_df)
567+
self.assertEqual(fetched_data, data)
568+
569+
def test_8025(self):
570+
"8025 - fetch raw"
571+
data = [(b"test_8025",)]
572+
self.__check_interop()
573+
ora_df = self.conn.fetch_df_all(
574+
"select utl_raw.cast_to_raw('test_8025') from dual"
575+
)
576+
fetched_tab = pyarrow.Table.from_arrays(
577+
ora_df.column_arrays(), names=ora_df.column_names()
578+
)
579+
fetched_df = fetched_tab.to_pandas()
580+
fetched_data = self.__get_data_from_df(fetched_df)
581+
self.assertEqual(fetched_data, data)
582+
541583

542584
if __name__ == "__main__":
543585
test_env.run_test_cases()

tests/test_8100_dataframe_async.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,8 @@ async def test_8115(self):
478478
self.assertEqual(col.null_count, 1)
479479

480480
async def test_8116(self):
481-
"8116 - check unsupported error for LOBs"
482-
statement = "select to_clob('test_8116') from dual"
481+
"8116 - check unsupported error"
482+
statement = "select cursor(select user from dual) from dual"
483483
with self.assertRaisesFullCode("DPY-3030"):
484484
await self.conn.fetch_df_all(statement)
485485

0 commit comments

Comments
 (0)