Added support for fetching data frames using asyncio.

anthony-tuininga · anthony-tuininga · commit c73b9373d35a · 2025-02-18T14:24:59.000-07:00
diff --git a/doc/src/api_manual/async_connection.rst b/doc/src/api_manual/async_connection.rst
@@ -124,6 +124,61 @@ AsyncConnection Methods
     This is a shortcut for calling :meth:`AsyncConnection.cursor()`,
     :meth:`AsyncCursor.executemany()`, and then :meth:`AsyncCursor.close()`.
 
+.. method:: AsyncConnection.fetch_df_all(statement, parameters=None, \
+            arraysize=None)
+
+    Fetches all rows of the SQL query ``statement``, returning them in an
+    :ref:`OracleDataFrame <oracledataframeobj>` object. An empty
+    OracleDataFrame is returned if there are no rows available.
+
+    The ``parameters`` parameter can be a list of tuples, where each tuple item
+    maps to one :ref:`bind variable placeholder <bind>` in ``statement``. It
+    can also be a list of dictionaries, where the keys match the bind variable
+    placeholder names in ``statement``.
+
+    The ``arraysize`` parameter can be specified to tune performance of fetching
+    data across the network. It defaults to :attr:`defaults.arraysize`.
+    Internally, the ``fetch_df_all()``'s :attr:`Cursor.prefetchrows` size is
+    always set to the value of the explicit or default ``arraysize`` parameter
+    value.
+
+    See :ref:`dataframeformat` for the supported data types and examples.
+
+    .. note::
+
+        The data frame support in python-oracledb 3.0.0 is a pre-release and
+        may change in the next version.
+
+    .. versionadded:: 3.0.0
+
+.. method:: AsyncConnection.fetch_df_batches(statement, parameters=None, \
+            size=None)
+
+    This returns an iterator yielding the next ``size`` rows of the SQL query
+    ``statement`` in each iteration as an :ref:`OracleDataFrame
+    <oracledataframeobj>` object. An empty OracleDataFrame is returned if there
+    are no rows available.
+
+    The ``parameters`` parameter can be a list of tuples, where each tuple item
+    maps to one :ref:`bind variable placeholder <bind>` in ``statement``. It
+    can also be a list of dictionaries, where the keys match the bind variable
+    placeholder names in ``statement``.
+
+    The ``size`` parameter controls the number of records fetched in each
+    batch. It defaults to :attr:`defaults.arraysize`. Internally, the
+    ``fetch_df_batches()``'s :attr:`Cursor.arraysize` and
+    :attr:`Cursor.prefetchrows` sizes are always set to the value of the
+    explicit or default ``size`` parameter value.
+
+    See :ref:`dataframeformat` for the supported data types and examples.
+
+    .. note::
+
+        The data frame support in python-oracledb 3.0.0 is a pre-release and
+        may change in the next version.
+
+    .. versionadded:: 3.0.0
+
 .. method:: AsyncConnection.fetchall(statement, parameters=None, \
                 arraysize=None, rowfactory=None)
 
diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst
@@ -92,8 +92,10 @@ Thick Mode Changes
 Common Changes
 ++++++++++++++
 
-#)  Added new methods :meth:`Connection.fetch_df_all()` and
-    :meth:`Connection.fetch_df_batches()` to fetch data as DataFrames
+#)  Added new methods :meth:`Connection.fetch_df_all()`,
+    :meth:`Connection.fetch_df_batches()`,
+    :meth:`AsyncConnection.fetch_df_all()`, and
+    :meth:`AsyncConnection.fetch_df_batches()` to fetch data as DataFrames
     compliant with the Python DataFrame Interchange protocol.  See
     :ref:`dataframeformat`.
 #)  Added support for Oracle Database 23ai SPARSE vectors.
diff --git a/doc/src/user_guide/asyncio.rst b/doc/src/user_guide/asyncio.rst
@@ -171,7 +171,9 @@ You can also use shortcut methods on the :ref:`asyncconnobj` object such as
 :meth:`AsyncConnection.execute()` or
 :meth:`AsyncConnection.executemany()`. Rows can be fetched using one of the
 shortcut methods :meth:`AsyncConnection.fetchone()`,
-:meth:`AsyncConnection.fetchmany()`, or :meth:`AsyncConnection.fetchall()`.
+:meth:`AsyncConnection.fetchmany()`, :meth:`AsyncConnection.fetchall()`,
+:meth:`AsyncConnection.fetch_df_all()`, or
+:meth:`AsyncConnection.fetch_df_batches()`.
 
 An example of using :meth:`AsyncConnection.fetchall()`:
 
diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst
@@ -13,7 +13,9 @@ executed. Statements are executed using one of these methods
 :meth:`Cursor.execute()`, :meth:`Cursor.executemany()`,
 :meth:`Connection.fetch_df_all()`, :meth:`Connection.fetch_df_batches()`,
 :meth:`AsyncCursor.execute()`, :meth:`AsyncCursor.executemany()`,
-:meth:`AsyncConnection.execute()`, :meth:`AsyncConnection.executemany()`, or
+:meth:`AsyncConnection.execute()`, :meth:`AsyncConnection.executemany()`,
+:meth:`AsyncConnection.fetch_df_all()`,
+:meth:`AsyncConnection.fetch_df_batches()`, or
 :meth:`AsyncConnection.run_pipeline()`.
 
 This chapter discusses python-oracledb's synchronous methods. The asynchronous
diff --git a/samples/dataframe_pandas_async.py b/samples/dataframe_pandas_async.py
@@ -0,0 +1,97 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# This software is dual-licensed to you under the Universal Permissive License
+# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
+# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
+# either license.
+#
+# If you elect to accept the software under the Apache License, Version 2.0,
+# the following applies:
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# dataframe_pandas_async.py
+#
+# An asynchronous version of dataframe_pandas.py
+#
+# Shows how to use AsyncConnection.fetch_df_all() and
+# AsyncConnection.fetch_df_batches(). This example then creates Pandas
+# dataframes. Alternative dataframe libraries could be used similar to the
+# other, synchronous, data frame samples.
+# -----------------------------------------------------------------------------
+
+import asyncio
+
+import pandas
+import oracledb
+import sample_env
+
+
+async def main():
+    connection = await oracledb.connect_async(
+        user=sample_env.get_main_user(),
+        password=sample_env.get_main_password(),
+        dsn=sample_env.get_connect_string(),
+        params=sample_env.get_connect_params(),
+    )
+
+    SQL = "select id, name from SampleQueryTab order by id"
+
+    # Get an OracleDataFrame.
+    # Adjust arraysize to tune the query fetch performance
+    odf = await connection.fetch_df_all(statement=SQL, arraysize=100)
+
+    # Get a Pandas DataFrame from the data.
+    # This is a zero copy call
+    df = pandas.api.interchange.from_dataframe(odf)
+
+    # Perform various Pandas operations on the DataFrame
+
+    print("Columns:")
+    print(df.columns)
+
+    print("\nDataframe description:")
+    print(df.describe())
+
+    print("\nLast three rows:")
+    print(df.tail(3))
+
+    print("\nTransform:")
+    print(df.T)
+
+    # -------------------------------------------------------------------------
+
+    # An example of batch fetching
+    #
+    # Note that since this particular example ends up with all query rows being
+    # held in memory, it would be more efficient to use fetch_df_all() as shown
+    # above.
+
+    print("\nFetching in batches:")
+    df = pandas.DataFrame()
+
+    # Tune 'size' for your data set. Here it is small to show the batch fetch
+    # behavior on the sample table.
+    async for odf in connection.fetch_df_batches(statement=SQL, size=10):
+        df_b = pandas.api.interchange.from_dataframe(odf)
+        print(f"Appending {df_b.shape[0]} rows")
+        df = pandas.concat([df, df_b], ignore_index=True)
+
+    print("\nLast three rows:")
+    print(df.tail(3))
+
+
+asyncio.run(main())
diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py
@@ -1781,6 +1781,44 @@ async def fetchall(
             cursor.rowfactory = rowfactory
             return await cursor.fetchall()
 
+    async def fetch_df_all(
+        self,
+        statement: str,
+        parameters: Optional[Union[list, tuple, dict]] = None,
+        arraysize: Optional[int] = None,
+    ):
+        """
+        Fetch all data as OracleDataFrame.
+        """
+        cursor = self.cursor()
+        cursor._impl.fetching_arrow = True
+        if arraysize is not None:
+            cursor.arraysize = arraysize
+        cursor.prefetchrows = cursor.arraysize
+        await cursor.execute(statement, parameters)
+        return await cursor._impl.fetch_df_all(cursor)
+
+    async def fetch_df_batches(
+        self,
+        statement: str,
+        parameters: Optional[Union[list, tuple, dict]] = None,
+        size: Optional[int] = None,
+    ):
+        """
+        Fetch data in batches. Each batch is an OracleDataFrame
+        """
+        cursor = self.cursor()
+        cursor._impl.fetching_arrow = True
+        if size is not None:
+            cursor.arraysize = size
+        cursor.prefetchrows = cursor.arraysize
+        await cursor.execute(statement, parameters)
+        if size is None:
+            yield await cursor._impl.fetch_df_all(cursor)
+        else:
+            async for df in cursor._impl.fetch_df_batches(cursor, size):
+                yield df
+
     async def fetchmany(
         self,
         statement: str,
diff --git a/src/oracledb/impl/thin/cursor.pyx b/src/oracledb/impl/thin/cursor.pyx
@@ -340,6 +340,27 @@ cdef class AsyncThinCursorImpl(BaseThinCursorImpl):
             await protocol._process_single_message(message)
         self.warning = message.warning
 
+    async def fetch_df_all(self, cursor):
+        """
+        Internal method used for fetching all data as OracleDataFrame
+        """
+        while self._more_rows_to_fetch:
+            await self._fetch_rows_async(cursor)
+        return self._finish_building_arrow_arrays()
+
+    async def fetch_df_batches(self, cursor, int batch_size):
+        """
+        Internal method used for fetching next batch as OracleDataFrame.
+        """
+        # Return the prefetched batch
+        yield self._finish_building_arrow_arrays()
+
+        while self._more_rows_to_fetch:
+            self._create_arrow_arrays()
+            await self._fetch_rows_async(cursor)
+            if self._buffer_rowcount > 0:
+                yield self._finish_building_arrow_arrays()
+
     async def fetch_next_row(self, cursor):
         """
         Internal method used for fetching the next row from a cursor.
diff --git a/tests/test_8100_dataframe_async.py b/tests/test_8100_dataframe_async.py
diff --git a/utils/templates/connection.py b/utils/templates/connection.py