Update doc and samples for new features.

anthony-tuininga · anthony-tuininga · commit 3bc7d376207f · 2025-02-19T18:36:16.000-07:00
diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt
@@ -697,3 +697,29 @@ software distributed under the License is distributed on an
 KIND, either express or implied.  See the License for the
 specific language governing permissions and limitations
 under the License.
+
+___________________________________________________________________________________________
+
+Python dataframe interchange protocol
+
+MIT License
+
+Copyright (c) 2020 Consortium for Python Data API Standards contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst
@@ -96,7 +96,8 @@ Common Changes
     :meth:`AsyncConnection.fetch_df_batches()` to fetch data as DataFrames
     compliant with the Python DataFrame Interchange protocol.  See
     :ref:`dataframeformat`.
-#)  Added support for Oracle Database 23.7 SPARSE vectors.
+#)  Added support for Oracle Database 23.7
+    :ref:`SPARSE vectors <sparsevectors>`.
 #)  Added support for :ref:`naming and caching connection pools
     <connpoolcache>` during creation, and retrieving them later from the
     python-oracledb pool cache with :meth:`oracledb.get_pool()`.
diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst
@@ -743,12 +743,13 @@ Fetching using the DataFrame Interchange Protocol
 
 Python-oracledb can fetch directly to the `Python DataFrame Interchange
 Protocol <https://data-apis.org/dataframe-protocol/latest/index.html>`__
-format. This then allows zero-copy data interchanges between Python data frame
-libraries. It is an efficient way to work with data using Python libraries such
-as `Apache Arrow <https://arrow.apache.org/>`__, `Pandas
-<https://pandas.pydata.org>`__, `Polars <https://pola.rs/>`__, `NumPy
-<https://numpy.org/>`__, `PyTorch <https://pytorch.org/>`__, or to write files
-in `Apache Parquet <https://parquet.apache.org/>`__ format.
+format. This can reduce application memory requirements and allow zero-copy
+data interchanges between Python data frame libraries. It is an efficient way
+to work with data using Python libraries such as `Apache Arrow
+<https://arrow.apache.org/>`__, `Pandas <https://pandas.pydata.org>`__, `Polars
+<https://pola.rs/>`__, `NumPy <https://numpy.org/>`__, `PyTorch
+<https://pytorch.org/>`__, or to write files in `Apache Parquet
+<https://parquet.apache.org/>`__ format.
 
 .. note::
 
@@ -914,7 +915,6 @@ org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame>`__ is:
     odf = connection.fetch_df_all(statement=sql, parameters=[myid], arraysize=1000)
 
     # Get a Pandas DataFrame from the data.
-    # This is a zero copy call
     df = pandas.api.interchange.from_dataframe(odf)
 
     # Perform various Pandas operations on the DataFrame
diff --git a/doc/src/user_guide/vector_data_type.rst b/doc/src/user_guide/vector_data_type.rst
@@ -218,14 +218,30 @@ Using SPARSE Vectors
 ====================
 
 A Sparse vector is a vector which has zero value for most of its dimensions.
-This vector only physically stores the non-zero values. A sparse vector is
-supported when you are using Oracle Database 23.7 or later.
+This vector only physically stores the non-zero values. For more information
+on sparse vectors, see the `Oracle AI Vector search User's Guide <https://
+www.oracle.com/pls/topic/lookup?ctx=dblatest&id=GUID-6015566C-3277-4A3C-8DD0-
+08B346A05478>`__.
 
-Sparse vectors can store the total number of dimensions, an array of indices,
-and an array of values. The storage formats that can be used with sparse
-vectors are float32, float64, and int8. Note that the binary storage format
-cannot be used with sparse vectors. You can define a column for a sparse
-vector using the following format::
+Sparse vectors are supported when you are using Oracle Database 23.7 or later.
+
+Sparse vectors are represented by the total number of vector dimensions, an
+array of indices, and an array of values where each value's location in the
+vector is indicated by the corresponding indices array position. All other
+vector values are treated as zero.  The storage formats that can be used with
+sparse vectors are float32, float64, and int8. Note that the binary storage
+format cannot be used with sparse vectors.
+
+For example, a string representation could be::
+
+    [25, [5, 8, 11], [25.25, 6.125, 8.25]]
+
+In this example, the sparse vector has 25 dimensions. Only indices 5, 8, and 11
+have values which are 25.25, 6.125, and 8.25 respectively. All of the other
+values are zero.
+
+In Oracle Database, you can define a column for a sparse vector using the
+following format::
 
     VECTOR(number_of_dimensions, dimension_storage_format, sparse)
 
@@ -239,7 +255,7 @@ For example, to create a table with three columns for sparse vectors:
         int8sparsecol vector(35, int8, sparse)
     )
 
-In this example the:
+In this example:
 
 - The float32sparsecol column can store sparse vector data of 25 dimensions
   where each dimension value is a 32-bit floating-point number.
@@ -256,18 +272,9 @@ Inserting SPARSE Vectors
 ------------------------
 
 With python-oracledb, sparse vector data can be inserted using
-:ref:`SparseVector objects <sparsevectorsobj>`. You can specify the number of
-dimensions, an array of indices, and an array of values as the data for a
-sparse vector. For example, the string representation is::
-
-    [25, [5,8,11], [25.25, 6.125, 8.25]]
-
-In this example, the sparse vector has 25 dimensions. Only indices 5, 8, and
-11 have values 25.25, 6.125, and 8.25 respectively. All of the other values
-are zero.
-
-The SparseVector objects are used as bind values when inserting sparse vector
-columns. For example:
+:ref:`SparseVector objects <sparsevectorsobj>`.  The SparseVector objects are
+used when fetching vectors, and as bind values when inserting sparse vector
+columns. For example to insert data:
 
 .. code-block:: python
 
@@ -289,7 +296,7 @@ columns. For example:
     )
 
     cursor.execute(
-        "insert into vector_sparse_table (:1, :2, :3)",
+        "insert into vector_sparse_table values (:1, :2, :3)",
         [float32_val, float64_val, int8_val]
     )
 
@@ -298,23 +305,43 @@ columns. For example:
 Fetching Sparse Vectors
 -----------------------
 
-With python-oracledb, sparse vector columns are fetched in the same format
-accepted by Oracle Database by using the str() function. For example:
+With python-oracledb, sparse vector columns are fetched as :ref:`SparseVector
+objects <sparsevectorsobj>`:
 
 .. code-block:: python
 
-    cursor.execute("select * from vec_sparse")
+    cursor.execute("select * from vector_sparse_table")
+    for row in cursor:
+       print(row)
+
+
+This prints::
+
+    (oracledb.SparseVector(25, array('I', [6, 10, 18]), array('f', [26.25, 129.625, 579.875])),
+     oracledb.SparseVector(30, array('I', [9, 16, 24]), array('d', [19.125, 78.5, 977.375])),
+     oracledb.SparseVector(35, array('I', [10, 20, 30]), array('b', [26, 125, -37])))
+
+Depending on context, the SparseVector type will be treated as a string:
+
+.. code-block:: python
+
+    cursor.execute("select * from vector_sparse_table")
     for float32_val, float64_val, int8_val in cursor:
-        print("float32:", str(float32_val))
-        print("float64:", str(float64_val))
-        print("int8:", str(int8_val))
+        print("float32:", float32_val)
+        print("float64:", float64_val)
+        print("int8:", int8_val)
 
-This prints the following output::
+This prints::
 
     float32: [25, [6, 10, 18], [26.25, 129.625, 579.875]]
     float64: [30, [9, 16, 24], [19.125, 78.5, 977.375]]
     int8: [35, [10, 20, 30], [26, 125, -37]]
 
+Values can also be explicitly passed to `str()
+<https://docs.python.org/3/library/stdtypes.html#str>`__, if needed.
+
+**SPARSE Vector Metadata**
+
 The :ref:`FetchInfo <fetchinfoobj>` object that is returned as part of the
 fetched metadata contains attributes :attr:`FetchInfo.vector_dimensions`,
 :attr:`FetchInfo.vector_format`, and :attr:`FetchInfo.vector_is_sparse` which
diff --git a/samples/create_schema.py b/samples/create_schema.py
@@ -54,7 +54,7 @@
     sample_env.run_sql_script(
         conn, "create_schema_21", main_user=sample_env.get_main_user()
     )
-if sample_env.get_server_version() >= (23, 5):
+if sample_env.get_server_version() >= (23, 7):
     sample_env.run_sql_script(
         conn, "create_schema_23", main_user=sample_env.get_main_user()
     )
diff --git a/samples/dataframe_pandas.py b/samples/dataframe_pandas.py
@@ -51,7 +51,6 @@
 odf = connection.fetch_df_all(statement=SQL, arraysize=100)
 
 # Get a Pandas DataFrame from the data.
-# This is a zero copy call
 df = pandas.api.interchange.from_dataframe(odf)
 
 # Perform various Pandas operations on the DataFrame
diff --git a/samples/dataframe_pandas_async.py b/samples/dataframe_pandas_async.py
@@ -55,7 +55,6 @@ async def main():
     odf = await connection.fetch_df_all(statement=SQL, arraysize=100)
 
     # Get a Pandas DataFrame from the data.
-    # This is a zero copy call
     df = pandas.api.interchange.from_dataframe(odf)
 
     # Perform various Pandas operations on the DataFrame
diff --git a/samples/sql/create_schema_23.sql b/samples/sql/create_schema_23.sql
@@ -27,15 +27,16 @@
  *
  * Performs the actual work of creating and populating the schemas with the
  * database objects used by the python-oracledb samples that require Oracle
- * Database 23.5 or higher. It is executed by the Python script
+ * Database 23.7 or higher. It is executed by the Python script
  * create_schema.py.
  *---------------------------------------------------------------------------*/
 
 create table &main_user..SampleVectorTab (
-    v32  vector(3, float32),
-    v64  vector(3, float64),
-    v8   vector(3, int8),
-    vbin vector(24, binary)
+    v32       vector(3, float32),
+    v64       vector(3, float64),
+    v8        vector(3, int8),
+    vbin      vector(24, binary),
+    v64sparse vector(30, float64, sparse)
 )
 /
 
diff --git a/samples/vector.py b/samples/vector.py
@@ -45,16 +45,21 @@
     params=sample_env.get_connect_params(),
 )
 
-# this script only works with Oracle Database 23.5 or later
-if sample_env.get_server_version() < (23, 5):
-    sys.exit("This example requires Oracle Database 23.5 or later.")
+# this script only works with Oracle Database 23.7 or later
+#
+# The VECTOR datatype was initially introduced in Oracle Database 23.4.
+# The BINARY vector format was introduced in Oracle Database 23.5.
+# The SPARSE vector format was introduced in Oracle Database 23.7.
+
+if sample_env.get_server_version() < (23, 7):
+    sys.exit("This example requires Oracle Database 23.7 or later.")
 
-# this script works with thin mode, or with thick mode using Oracle Client 23.5
+# this script works with thin mode, or with thick mode using Oracle Client 23.7
 # or later
-if not connection.thin and oracledb.clientversion()[:2] < (23, 5):
+if not connection.thin and oracledb.clientversion()[:2] < (23, 7):
     sys.exit(
         "This example requires python-oracledb thin mode, or Oracle Client"
-        " 23.5 or later"
+        " 23.7 or later"
     )
 
 with connection.cursor() as cursor:
@@ -63,38 +68,66 @@
     vector1_data_64 = array.array("d", [11.25, 11.75, 11.5])
     vector1_data_8 = array.array("b", [1, 2, 3])
     vector1_data_bin = array.array("B", [180, 150, 100])
+    vector1_data_sparse64 = oracledb.SparseVector(
+        30, [9, 16, 24], array.array("d", [19.125, 78.5, 977.375])
+    )
 
     cursor.execute(
-        """insert into SampleVectorTab (v32, v64, v8, vbin)
-           values (:1, :2, :3, :4)""",
-        [vector1_data_32, vector1_data_64, vector1_data_8, vector1_data_bin],
+        """insert into SampleVectorTab (v32, v64, v8, vbin, v64sparse)
+           values (:1, :2, :3, :4, :5)""",
+        [
+            vector1_data_32,
+            vector1_data_64,
+            vector1_data_8,
+            vector1_data_bin,
+            vector1_data_sparse64,
+        ],
     )
 
     # Multi-row insert
     vector2_data_32 = array.array("f", [2.625, 2.5, 2.0])
     vector2_data_64 = array.array("d", [22.25, 22.75, 22.5])
     vector2_data_8 = array.array("b", [4, 5, 6])
     vector2_data_bin = array.array("B", [40, 15, 255])
+    vector2_data_sparse64 = oracledb.SparseVector(
+        30, [3, 10, 12], array.array("d", [2.5, 2.5, 1.0])
+    )
 
     vector3_data_32 = array.array("f", [3.625, 3.5, 3.0])
     vector3_data_64 = array.array("d", [33.25, 33.75, 33.5])
     vector3_data_8 = array.array("b", [7, 8, 9])
     vector3_data_bin = array.array("B", [0, 17, 101])
+    vector3_data_sparse64 = oracledb.SparseVector(
+        30, [8, 15, 29], array.array("d", [1.125, 200.5, 100.0])
+    )
 
     rows = [
-        (vector2_data_32, vector2_data_64, vector2_data_8, vector2_data_bin),
-        (vector3_data_32, vector3_data_64, vector3_data_8, vector3_data_bin),
+        (
+            vector2_data_32,
+            vector2_data_64,
+            vector2_data_8,
+            vector2_data_bin,
+            vector2_data_sparse64,
+        ),
+        (
+            vector3_data_32,
+            vector3_data_64,
+            vector3_data_8,
+            vector3_data_bin,
+            vector3_data_sparse64,
+        ),
     ]
 
     cursor.executemany(
-        """insert into SampleVectorTab (v32, v64, v8, vbin)
-           values (:1, :2, :3, :4)""",
+        """insert into SampleVectorTab (v32, v64, v8, vbin, v64sparse)
+           values (:1, :2, :3, :4, :5)""",
         rows,
     )
 
     # Query
     cursor.execute("select * from SampleVectorTab")
 
-    # Each vector is represented as an array.array type
+    # Each non-sparse vector is represented as an array.array type.
+    # Sparse vectors are represented as oracledb.SparseVector() instances
     for row in cursor:
         print(row)
diff --git a/samples/vector_async.py b/samples/vector_async.py

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@`
`54`	`54`	`sample_env.run_sql_script(`
`55`	`55`	`conn, "create_schema_21", main_user=sample_env.get_main_user()`
`56`	`56`	`)`
`57`		`-if sample_env.get_server_version() >= (23, 5):`
	`57`	`+if sample_env.get_server_version() >= (23, 7):`
`58`	`58`	`sample_env.run_sql_script(`
`59`	`59`	`conn, "create_schema_23", main_user=sample_env.get_main_user()`
`60`	`60`	`)`