From 34817b252c749d609b9c8e5300f428e071768fc1 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:18:02 -0700 Subject: [PATCH 001/178] Update ODPI-C. --- src/oracledb/impl/thick/odpi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oracledb/impl/thick/odpi b/src/oracledb/impl/thick/odpi index 56f155ed..31e7bd40 160000 --- a/src/oracledb/impl/thick/odpi +++ b/src/oracledb/impl/thick/odpi @@ -1 +1 @@ -Subproject commit 56f155ed070c0b6ed44942aea12fab7ef9d07dc3 +Subproject commit 31e7bd40301e264459fe2654ecf55c150c6277a7 From 52d836412d2b270fa3df60c636808a1584dcff83 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:28:05 -0700 Subject: [PATCH 002/178] Test suite improvements. --- tests/ext/test_env.py | 6 - tests/sql/create_schema.sql | 27 +++ tests/test_4100_cursor_callproc.py | 300 +++++++++++++++++++++++++++++ tests/test_4500_connect_params.py | 77 +++++++- tests/test_6400_vector_var.py | 85 +++++--- tests/test_7200_tnsnames.py | 40 ++++ tests/test_env.py | 6 + 7 files changed, 505 insertions(+), 36 deletions(-) diff --git a/tests/ext/test_env.py b/tests/ext/test_env.py index 3c0fca7c..b2fa1129 100644 --- a/tests/ext/test_env.py +++ b/tests/ext/test_env.py @@ -29,8 +29,6 @@ import configparser import os -import secrets -import string dir_name = os.path.dirname(os.path.dirname(__file__)) file_name = os.path.join(dir_name, os.path.basename(__file__)) @@ -75,7 +73,3 @@ def get_extended_config_str(name, fallback=None): return _extended_config.parser.get( _extended_config.section_name, name, fallback=fallback ) - - -def get_random_string(length=10): - return "".join(secrets.choice(string.ascii_letters) for i in range(length)) diff --git a/tests/sql/create_schema.sql b/tests/sql/create_schema.sql index 14074d5b..ede9aa9f 100644 --- a/tests/sql/create_schema.sql +++ b/tests/sql/create_schema.sql @@ -632,6 +632,20 @@ begin end; / +create procedure &main_user..proc_Test2 ( + a_InValue varchar2, + a_InOutValue in out number, + a_OutValue out boolean +) as +begin + a_InOutValue := a_InOutValue * length(a_InValue); + a_OutValue := false; + if length(a_InValue) >= 2 then + a_OutValue := true; + end if; +end; +/ + create procedure &main_user..proc_TestNoArgs as begin null; @@ -659,6 +673,19 @@ begin end; / +create function &main_user..func_Test2 ( + a_String varchar2, + a_ExtraAmount number, + a_Boolean boolean +) return number as +begin + if a_Boolean then + return length(a_String) + a_ExtraAmount; + end if; + return length(a_String) - a_ExtraAmount; +end; +/ + create function &main_user..func_TestNoArgs return number as begin diff --git a/tests/test_4100_cursor_callproc.py b/tests/test_4100_cursor_callproc.py index 267380e3..62a11006 100644 --- a/tests/test_4100_cursor_callproc.py +++ b/tests/test_4100_cursor_callproc.py @@ -144,6 +144,306 @@ def test_4110(self): self.assertEqual(results, ["hi", 10]) self.assertEqual(out_value.getvalue(), 2.0) + def test_4111(self): + "4111 - test callproc with setinputsizes" + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER, out_value + ) + results = self.cursor.callproc("proc_Test2", ("hi", 5, out_value)) + self.assertEqual(results, ["hi", 10, True]) + self.assertTrue(out_value.getvalue()) + + def test_4112(self): + "4112 - test callfunc with setinputsizes" + self.cursor.setinputsizes( + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_VARCHAR, + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_BOOLEAN, + ) + results = self.cursor.callfunc( + "func_Test2", oracledb.NUMBER, ("hi", 5, True) + ) + self.assertEqual(results, 7) + + def test_4113(self): + "4113 - test callproc with setinputsizes with kwargs" + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER, out_value + ) + kwargs = dict(a_OutValue=out_value) + results = self.cursor.callproc("proc_Test2", ("hi", 5), kwargs) + self.assertEqual(results, ["hi", 10]) + self.assertTrue(out_value.getvalue()) + + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER, out_value + ) + kwargs = dict(a_InValue="hi", a_InOutValue=5, a_OutValue=out_value) + results = self.cursor.callproc("proc_Test2", [], kwargs) + self.assertEqual(results, []) + self.assertTrue(out_value.getvalue()) + + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_BOOLEAN, + ) + kwargs = dict(a_InValue="hi", a_InOutValue=5, a_OutValue=out_value) + results = self.cursor.callproc("proc_Test2", [], kwargs) + self.assertEqual(results, []) + self.assertTrue(out_value.getvalue()) + + def test_4114(self): + "4114 - test callproc with setinputsizes with kwargs in mixed order" + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER, out_value + ) + kwargs = dict(a_OutValue=out_value, a_InValue="hi", a_InOutValue=5) + with self.assertRaisesFullCode("ORA-06550"): + results = self.cursor.callproc( + "proc_Test2", keyword_parameters=kwargs + ) + self.assertEqual(results, []) + self.assertTrue(out_value.getvalue()) + + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_BOOLEAN, + ) + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callproc("proc_Test2", keyword_parameters=kwargs) + + def test_4115(self): + "4115 - test callfunc with setinputsizes with kwargs" + extra_amount = self.cursor.var(oracledb.DB_TYPE_NUMBER) + extra_amount.setvalue(0, 5) + test_values = [ + (["hi"], dict(a_ExtraAmount=extra_amount, a_Boolean=True)), + ( + [], + dict( + a_String="hi", a_ExtraAmount=extra_amount, a_Boolean=True + ), + ), + ] + for args, kwargs in test_values: + self.cursor.setinputsizes( + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_VARCHAR, + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_BOOLEAN, + ) + results = self.cursor.callfunc( + "func_Test2", oracledb.DB_TYPE_NUMBER, args, kwargs + ) + self.assertEqual(results, 7) + + def test_4116(self): + "4116 - test callproc with setinputsizes with extra arguments" + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + test_values = [ + (("hi", 5, out_value), None), + (("hi",), dict(a_InOutValue=5, a_OutValue=out_value)), + ([], dict(a_InValue="hi", a_InOutValue=5, a_OutValue=out_value)), + ] + for args, kwargs in test_values: + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, + oracledb.NUMBER, + out_value, + oracledb.DB_TYPE_VARCHAR, # extra argument + ) + with self.assertRaisesFullCode("ORA-01036", "DPY-4009"): + self.cursor.callproc("proc_Test2", args, kwargs) + + def test_4117(self): + "4117 - test callfunc with setinputsizes with extra arguments" + extra_amount = self.cursor.var(oracledb.DB_TYPE_NUMBER) + extra_amount.setvalue(0, 5) + test_values = [ + (["hi", extra_amount], None), + (["hi"], dict(a_ExtraAmount=extra_amount)), + ([], dict(a_ExtraAmount=extra_amount, a_String="hi")), + ] + for args, kwargs in test_values: + self.cursor.setinputsizes( + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_VARCHAR, + oracledb.DB_TYPE_NUMBER, + oracledb.DB_TYPE_BOOLEAN, + oracledb.DB_TYPE_VARCHAR, # extra argument + ) + with self.assertRaisesFullCode("ORA-01036", "DPY-4009"): + self.cursor.callfunc( + "func_Test2", oracledb.DB_TYPE_NUMBER, args, kwargs + ) + + def test_4118(self): + "4118 - test callproc with setinputsizes with too few parameters" + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + + # setinputsizes for 2 args (missed 1 args) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER + ) + results = self.cursor.callproc("proc_Test2", ("hi", 5, out_value)) + self.assertEqual(results, ["hi", 10, out_value.getvalue()]) + self.assertTrue(out_value.getvalue()) + + # setinputsizes for 2 args (missed 1 kwargs) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER + ) + kwargs = dict(a_OutValue=out_value) + results = self.cursor.callproc("proc_Test2", ("hi", 5), kwargs) + self.assertEqual(results, ["hi", 10]) + self.assertTrue(out_value.getvalue()) + + # setinputsizes for 1 args (missed 2 args) + self.cursor.setinputsizes(oracledb.DB_TYPE_VARCHAR) + results = self.cursor.callproc("proc_Test2", ("hi", 5, out_value)) + self.assertEqual(results, ["hi", 10, out_value.getvalue()]) + self.assertTrue(out_value.getvalue()) + + # setinputsizes for 1 args (missed 1 args and 1 kwargs) + self.cursor.setinputsizes(oracledb.DB_TYPE_VARCHAR) + kwargs = dict(a_OutValue=out_value) + results = self.cursor.callproc("proc_Test2", ("hi", 5), kwargs) + self.assertEqual(results, ["hi", 10]) + self.assertTrue(out_value.getvalue()) + + # setinputsizes for 2 kwargs (missed 1 kwargs) + self.cursor.setinputsizes( + oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_NUMBER + ) + kwargs = dict(a_InValue="hi", a_InOutValue=5, a_OutValue=out_value) + results = self.cursor.callproc("proc_Test2", [], kwargs) + self.assertEqual(results, []) + self.assertTrue(out_value.getvalue()) + + def test_4119(self): + """ + 4119 - test callproc with setinputsizes with wrong order of parameters + """ + # setinputsizes for 2 args (missed 1 kwargs) + out_value = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + self.cursor.setinputsizes(bool, oracledb.DB_TYPE_VARCHAR) + kwargs = dict(a_OutValue=out_value) + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callproc("proc_Test2", ["hi", 5], kwargs) + + # setinputsizes for 2 kwargs (missed 1 kwargs) + self.cursor.setinputsizes(bool, oracledb.DB_TYPE_VARCHAR) + kwargs = dict(a_InValue="hi", a_InOutValue=5, a_OutValue=out_value) + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callproc("proc_Test2", [], kwargs) + + def test_4120(self): + "4120 - test callfunc with setinputsizes with too few parameters" + # setinputsizes for return_type and 1 kwargs (missed 2 kwargs) + bool_var = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + bool_var.setvalue(0, False) + kwargs = dict(a_Boolean=bool_var, a_String="hi", a_ExtraAmount=3) + self.cursor.setinputsizes(oracledb.NUMBER, oracledb.DB_TYPE_VARCHAR) + results = self.cursor.callfunc( + "func_Test2", oracledb.NUMBER, [], kwargs + ) + self.assertEqual(results, -1) + + # setinputsizes for return_type (missed 3 kwargs) + bool_var.setvalue(0, False) + kwargs = dict(a_Boolean=bool_var, a_String="hi", a_ExtraAmount=1) + self.cursor.setinputsizes(oracledb.NUMBER) + results = self.cursor.callfunc( + "func_Test2", oracledb.NUMBER, [], kwargs + ) + self.assertEqual(results, 1) + + # setinputsizes for return_type (missed 3 args) + bool_var.setvalue(0, True) + self.cursor.setinputsizes(oracledb.NUMBER) + results = self.cursor.callfunc( + "func_Test2", oracledb.NUMBER, ["hi", 2, bool_var] + ) + self.assertEqual(results, 4) + + def test_4121(self): + """ + 4121 - test callfunc with setinputsizes with wrong order of parameters + """ + # setinputsizes for 2 args (missed 2 kwargs) + bool_var = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + bool_var.setvalue(0, True) + self.cursor.setinputsizes(oracledb.NUMBER, oracledb.DB_TYPE_BOOLEAN) + kwargs = dict(a_Boolean=bool_var) + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callfunc( + "func_Test2", oracledb.NUMBER, ["hi", bool_var], kwargs + ) + + def test_4122(self): + "4122 - test callfunc with setinputsizes without type for return_type" + # setinputsizes for 1 args and 1 kwargs + bool_var = self.cursor.var(oracledb.DB_TYPE_BOOLEAN) + bool_var.setvalue(0, False) + self.cursor.setinputsizes(oracledb.NUMBER, oracledb.DB_TYPE_BOOLEAN) + kwargs = dict(a_Boolean=bool_var) + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callfunc( + "func_Test2", oracledb.DB_TYPE_NUMBER, ["hi"], kwargs + ) + + # setinputsizes for 2 kwargs (missed 1 kwargs) + bool_var.setvalue(0, False) + kwargs = dict(a_Boolean=bool_var, a_String="hi", a_ExtraAmount=0) + self.cursor.setinputsizes( + oracledb.DB_TYPE_BOOLEAN, oracledb.DB_TYPE_VARCHAR + ) + results = self.cursor.callfunc( + "func_Test2", oracledb.DB_TYPE_NUMBER, [], kwargs + ) + self.assertEqual(results, 2) + + # setinputsizes for 2 args and 1 kwargs + bool_var.setvalue(0, False) + self.cursor.setinputsizes( + oracledb.DB_TYPE_BOOLEAN, oracledb.DB_TYPE_NUMBER + ) + kwargs = dict(a_Boolean=bool_var) + results = self.cursor.callfunc( + "func_Test2", oracledb.DB_TYPE_NUMBER, ["Bye", 2], kwargs + ) + self.assertEqual(results, 1) + + # setinputsizes for 2 args (missed 1 args) + bool_var.setvalue(0, False) + self.cursor.setinputsizes( + oracledb.DB_TYPE_BOOLEAN, oracledb.DB_TYPE_NUMBER + ) + kwargs = dict(a_Boolean=bool_var) + results = self.cursor.callfunc( + "func_Test2", oracledb.DB_TYPE_NUMBER, ["Light", -1, bool_var] + ) + self.assertEqual(results, 6) + + def test_4123(self): + "4123 - test executing a procedure with callfunc" + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callfunc( + "proc_Test2", oracledb.NUMBER, ("hello", 3, False) + ) + + def test_4124(self): + "4124 - test executing a function with callproc" + with self.assertRaisesFullCode("ORA-06550"): + self.cursor.callproc("func_Test2", ("hello", 5, True)) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index 46827cea..01f1f169 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -26,6 +26,7 @@ 4500 - Module for testing connection parameters. """ +import base64 import random import ssl @@ -1019,7 +1020,7 @@ def hook2(passed_protocol): for hook in [hook1, hook2]: oracledb.register_protocol(protocol, hook) params = oracledb.ConnectParams() - with self.assertRaisesFullCode("DPY-4018"): + with self.assertRaisesFullCode("DPY-2056"): params.parse_connect_string(f"{protocol}://args") finally: oracledb.register_protocol(protocol, None) @@ -1110,6 +1111,80 @@ def test_4564(self): self.assertEqual(params.service_name, service_name) self.assertEqual(getattr(params, name), actual_value) + def test_4565(self): + "4565 - test set_from_config() with no user and password set" + host = "host_4565" + service_name = "service_4565" + connect_string = f"{host}/{service_name}" + user = "user_4565" + password = test_env.get_random_string() + config = dict( + connect_descriptor=connect_string, + user=user, + password=dict( + type="base64", + value=base64.b64encode(password.encode()).decode(), + ), + ) + params = oracledb.ConnectParams() + params.set_from_config(config) + self.assertEqual(params.host, host) + self.assertEqual(params.service_name, service_name) + self.assertEqual(params.user, user) + + def test_4566(self): + "4566 - test set_from_config() with user and password already set" + host = "host_4566" + service_name = "service_4566" + connect_string = f"{host}/{service_name}" + user = "user_4566" + password = test_env.get_random_string() + config_user = "user_4566_in_config" + config_password = test_env.get_random_string() + config = dict( + connect_descriptor=connect_string, + user=config_user, + password=dict( + type="base64", + value=base64.b64encode(config_password.encode()).decode(), + ), + ) + params = oracledb.ConnectParams(user=user, password=password) + params.set_from_config(config) + self.assertEqual(params.host, host) + self.assertEqual(params.service_name, service_name) + self.assertEqual(params.user, user) + + def test_4567(self): + "4567 - test set_from_config() without connect_descriptor" + params = oracledb.ConnectParams() + with self.assertRaisesFullCode("DPY-2059"): + params.set_from_config(dict(connect_descriptor_missing="missing")) + + def test_4568(self): + "4568 - test set_from_config() with extended parameters" + host = "host_4566" + service_name = "service_4566" + connect_string = f"{host}/{service_name}" + stmtcachesize = 35 + user = "user_4566" + password = test_env.get_random_string() + config = dict( + connect_descriptor=connect_string, + user=user, + password=dict( + type="base64", + value=base64.b64encode(password.encode()).decode(), + ), + pyo=dict(stmtcachesize=stmtcachesize), + ) + params = oracledb.ConnectParams(user=user, password=password) + params.set_from_config(config) + self.assertEqual(params.host, host) + self.assertEqual(params.service_name, service_name) + self.assertEqual(params.user, user) + self.assertEqual(params.stmtcachesize, stmtcachesize) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_6400_vector_var.py b/tests/test_6400_vector_var.py index 24f8ec95..2a106afd 100644 --- a/tests/test_6400_vector_var.py +++ b/tests/test_6400_vector_var.py @@ -64,6 +64,36 @@ def __test_insert_and_fetch(self, value, column_name, expected_typecode): self.assertEqual(fetched_value, expected_value) self.assertEqual(fetched_value.typecode, expected_typecode) + def __test_plsql_insert_and_fetch(self, vec1, vec2, expected_distance): + in_out_vec = self.cursor.var(oracledb.DB_TYPE_VECTOR) + in_out_vec.setvalue(0, vec2) + + distance = self.cursor.var(oracledb.DB_TYPE_BINARY_DOUBLE) + output_vec = self.cursor.var(oracledb.DB_TYPE_VECTOR) + + plsql_block = """ + BEGIN + select + vector_distance(:in_vec, :in_out_vec, euclidean) + into :distance; + :output_vec := :in_out_vec; + :in_out_vec := :in_vec; + END; + """ + + self.cursor.execute( + plsql_block, + in_vec=vec1, + in_out_vec=in_out_vec, + distance=distance, + output_vec=output_vec, + ) + self.assertEqual(output_vec.getvalue(), vec2) + self.assertEqual(in_out_vec.getvalue(), vec1) + self.assertAlmostEqual( + distance.getvalue(), expected_distance, places=2 + ) + def test_6400(self): "6400 - test binding in a vector from a Python list" value = [1, 2] @@ -661,37 +691,34 @@ def test_6444(self): self.__test_insert_and_fetch(value, "Vector8Col", "b") def test_6445(self): - "6445 - test setting a PL-SQL type to a vector" + "6445 - test setting a PL-SQL type to a float32 vector" vec1 = array.array("f", [1, 1.5, 2, 2.5]) vec2 = array.array("f", [4, 4.5, 5, 5.5]) - - in_out_vec = self.cursor.var(oracledb.DB_TYPE_VECTOR) - in_out_vec.setvalue(0, vec2) - - distance = self.cursor.var(oracledb.DB_TYPE_BINARY_DOUBLE) - output_vec = self.cursor.var(oracledb.DB_TYPE_VECTOR) - - plsql_block = """ - DECLARE - dist BINARY_DOUBLE; - BEGIN - select vector_distance(:in_vec,:in_out_vec,euclidean) into dist; - :distance := dist; - :output_vec := :in_out_vec; - :in_out_vec :=:in_vec; - END; - """ - - self.cursor.execute( - plsql_block, - in_vec=vec1, - in_out_vec=in_out_vec, - distance=distance, - output_vec=output_vec, - ) - self.assertEqual(output_vec.getvalue(), vec2) - self.assertEqual(in_out_vec.getvalue(), vec1) - self.assertEqual(distance.getvalue(), 6) + self.__test_plsql_insert_and_fetch(vec1, vec2, 6) + + vec3 = array.array("f", [3.5] * 65535) + vec4 = array.array("f", [2.5] * 65535) + self.__test_plsql_insert_and_fetch(vec3, vec4, 256) + + def test_6446(self): + "6446 - test setting a PL-SQL type to a float64 vector" + vec1 = array.array("d", [1, 1.5, 2, 2.5]) + vec2 = array.array("d", [4, 4.5, 5, 5.5]) + self.__test_plsql_insert_and_fetch(vec1, vec2, 6) + + vec3 = array.array("d", [3.5] * 65535) + vec4 = array.array("d", [2.5] * 65535) + self.__test_plsql_insert_and_fetch(vec3, vec4, 256) + + def test_6447(self): + "6447 - test setting a PL-SQL type to a int8 vector" + vec1 = array.array("b", [1, 2, 3, 4]) + vec2 = array.array("b", [5, 6, 7, 8]) + self.__test_plsql_insert_and_fetch(vec1, vec2, 8) + + vec3 = array.array("b", [3] * 65535) + vec4 = array.array("b", [2] * 65535) + self.__test_plsql_insert_and_fetch(vec3, vec4, 256) if __name__ == "__main__": diff --git a/tests/test_7200_tnsnames.py b/tests/test_7200_tnsnames.py index 77af21fd..0126b465 100644 --- a/tests/test_7200_tnsnames.py +++ b/tests/test_7200_tnsnames.py @@ -501,6 +501,46 @@ def test_7220(self): [network_service_name.upper()], ) + def test_7221(self): + "7221 - test tnsnames.ora with a comment between aliases" + test_values = [ + ("nsn_7221_1", "tcp://host_7221:7221/service_7222_1"), + ("nsn_7221_2", "tcp://host_7222:7222/service_7222_2"), + ] + with tempfile.TemporaryDirectory() as temp_dir: + file_name = os.path.join(temp_dir, "tnsnames.ora") + for i in range(3): + entries = [f"{n} = {c}\n" for n, c in test_values] + entries.insert(i, "# COMMENT \n") + with open(file_name, "w") as f: + f.writelines(entries) + params = oracledb.ConnectParams(config_dir=temp_dir) + self.assertEqual( + params.get_network_service_names(), + [n.upper() for n, _ in test_values], + ) + + def test_7222(self): + "7222 - test tnsnames.ora with easy connect and connect descriptors" + network_service_name1 = "nsn_7222_1" + connect_string1 = """ + (DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=host_7220)(PORT=7222)) + (CONNECT_DATA=(SERVICE_NAME=service_7222_1)))""" + + network_service_name2 = "nsn_7222_2" + connect_string2 = "tcp://host_7222:7222/service_7222_2" + + with tempfile.TemporaryDirectory() as temp_dir: + file_name = os.path.join(temp_dir, "tnsnames.ora") + with open(file_name, "w") as f: + f.write(f"{network_service_name1} = {connect_string1}\n") + f.write(f"{network_service_name2} = {connect_string2}\n") + params = oracledb.ConnectParams(config_dir=temp_dir) + self.assertEqual( + params.get_network_service_names(), + [network_service_name1.upper(), network_service_name2.upper()], + ) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_env.py b/tests/test_env.py index 82b47766..2a7c3958 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -66,7 +66,9 @@ import getpass import os +import secrets import sys +import string import unittest import oracledb @@ -368,6 +370,10 @@ def get_external_user(): return get_value("EXTERNAL_USER", "External User") +def get_random_string(length=10): + return "".join(secrets.choice(string.ascii_letters) for i in range(length)) + + def is_on_oracle_cloud(connection): server = get_server_version() if server < (18, 0): From 6989c1f6f3d739ae7a7a74483bea16e6bb5c2ea6 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:28:57 -0700 Subject: [PATCH 003/178] Documentation improvements. --- doc/src/api_manual/aq.rst | 18 +- doc/src/api_manual/async_connection.rst | 53 +-- doc/src/api_manual/async_connection_pool.rst | 16 +- doc/src/api_manual/async_cursor.rst | 62 +-- doc/src/api_manual/connect_params.rst | 53 +-- doc/src/api_manual/connection.rst | 287 +++++++------- doc/src/api_manual/connection_pool.rst | 38 +- doc/src/api_manual/cursor.rst | 103 ++--- doc/src/api_manual/dbobject_type.rst | 24 +- doc/src/api_manual/defaults.rst | 41 +- doc/src/api_manual/fetch_info.rst | 21 +- doc/src/api_manual/module.rst | 391 ++++++++++--------- doc/src/api_manual/pipeline.rst | 13 +- doc/src/api_manual/pool_params.rst | 167 ++++---- doc/src/api_manual/soda.rst | 89 +++-- doc/src/api_manual/subscription.rst | 32 +- doc/src/api_manual/variable.rst | 12 +- doc/src/user_guide/installation.rst | 11 +- 18 files changed, 735 insertions(+), 696 deletions(-) diff --git a/doc/src/api_manual/aq.rst b/doc/src/api_manual/aq.rst index dcbe14d5..e171668f 100644 --- a/doc/src/api_manual/aq.rst +++ b/doc/src/api_manual/aq.rst @@ -25,7 +25,7 @@ Queue Methods Dequeues up to the specified number of messages from the queue and returns a list of these messages. Each element of the returned list is a - :ref:`message property` object. + :ref:`message property ` object. For consistency and compliance with the PEP 8 naming style, the name of the method was changed from `deqMany()`. The old name will continue to @@ -34,8 +34,8 @@ Queue Methods .. method:: Queue.deqone() Dequeues at most one message from the queue. If a message is dequeued, it - will be a :ref:`message property` object; otherwise, it will - be the value None. + will be a :ref:`message property ` object; otherwise, it will + be the value *None*. For consistency and compliance with the PEP 8 naming style, the name of the method was changed from `deqOne()`. The old name will continue to @@ -85,7 +85,7 @@ Queue Attributes ` that will be used when dequeuing messages from the queue. For consistency and compliance with the PEP 8 naming style, the name of - the attribute was changed from `deqOptions`. The old name will continue + the attribute was changed from ``deqOptions``. The old name will continue to work for a period of time. .. attribute:: Queue.enqoptions @@ -94,7 +94,7 @@ Queue Attributes ` that will be used when enqueuing messages into the queue. For consistency and compliance with the PEP 8 naming style, the name of - the attribute was changed from `enqOptions`. The old name will continue + the attribute was changed from ``enqOptions``. The old name will continue to work for a period of time. .. attribute:: Queue.name @@ -105,10 +105,10 @@ Queue Attributes This read-only attribute returns the object type for payloads that can be enqueued and dequeued. If using a JSON queue, this returns the value - ``"JSON"``. If using a raw queue, this returns the value ``None``. + ``"JSON"``. If using a raw queue, this returns the value *None*. For consistency and compliance with the PEP 8 naming style, the name of - the attribute was changed from `payloadType`. The old name will + the attribute was changed from ``payloadType``. The old name will continue to work for a period of time. @@ -311,7 +311,7 @@ Message Properties This read-only attribute specifies the id of the message in the last queue that enqueued or dequeued the message. If the message has never been - dequeued or enqueued, the value will be `None`. + dequeued or enqueued, the value will be *None*. .. attribute:: MessageProperties.payload @@ -327,7 +327,7 @@ Message Properties This read-write attribute specifies the priority of the message. A smaller number indicates a higher priority. The priority can be any integer, including - negative numbers. The default value is zero. + negative numbers. The default value is *0*. .. attribute:: MessageProperties.state diff --git a/doc/src/api_manual/async_connection.rst b/doc/src/api_manual/async_connection.rst index 3ef001ba..51c0faf8 100644 --- a/doc/src/api_manual/async_connection.rst +++ b/doc/src/api_manual/async_connection.rst @@ -160,10 +160,10 @@ AsyncConnection Methods rowfactory=None) Executes a query and returns the first row of the result set if one exists - (or None if no rows exist). + (or *None* if no rows exist). Internally, this method's :attr:`Cursor.prefetchrows` and - :attr:`Cursor.arraysize` sizes will be set to 1. + :attr:`Cursor.arraysize` sizes will be set to *1*. Since only one fetch is performed for a query, consider adding a ``WHERE`` condition or using a ``FETCH NEXT`` clause in the statement to prevent the @@ -191,8 +191,8 @@ AsyncConnection Methods existing standalone connection. Pooled connections internally perform this check before returning a connection to the application. - If this function returns False, the connection should be not be used by the - application and a new connection should be established instead. + If this function returns *False*, the connection should be not be used by + the application and a new connection should be established instead. This function performs a local check. To fully check a connection's health, use :meth:`AsyncConnection.ping()` which performs a :ref:`round-trip @@ -214,11 +214,11 @@ AsyncConnection Methods The ``continue_on_error`` parameter determines whether operations should continue to run after an error has occurred. If this parameter is set to - True, then the :attr:`PipelineOpResult.error` attribute will be populated + *True*, then the :attr:`PipelineOpResult.error` attribute will be populated with an :ref:`_Error ` instance which identifies the error - that occurred. If this parameter is set to False, then an exception will be - raised as soon as an error is detected and all subsequent operations will - be terminated. The default value is False. + that occurred. If this parameter is set to *False*, then an exception will + be raised as soon as an error is detected and all subsequent operations + will be terminated. The default value is *False*. See :ref:`pipelining` for more information. @@ -258,7 +258,7 @@ AsyncConnection Methods transaction can be inactive before it is automatically terminated by the system. A transaction is inactive between the time it is detached with :meth:`AsyncConnection.tpc_end()` and the time it is resumed with - :meth:`AsyncConnection.tpc_begin()`.The default is 0 seconds. + :meth:`AsyncConnection.tpc_begin()`.The default is *0* seconds. The following code sample demonstrates the ``tpc_begin()`` function:: @@ -284,8 +284,8 @@ AsyncConnection Methods transaction and is intended for use in recovery. The ``one_phase`` parameter is a boolean identifying whether to perform a - one-phase or two-phase commit. If ``one_phase`` parameter is True, a - single-phase commit is performed. The default value is False. This + one-phase or two-phase commit. If ``one_phase`` parameter is *True*, a + single-phase commit is performed. The default value is *False*. This parameter is only examined if a value is provided for the ``xid`` parameter. Otherwise, the driver already knows whether :meth:`~AsyncConnection.tpc_prepare()` was called for the transaction and @@ -356,8 +356,8 @@ AsyncConnection Methods ``ORA-24756: transaction does not exist``. If an ``xid`` parameter is passed, then an object should be returned by the - :meth:`~Connection.xid()` function. If an xid parameter is not passed, then - the transaction identifier used by the previous + :meth:`~Connection.xid()` function. If an ``xid`` parameter is not passed, + then the transaction identifier used by the previous :meth:`~AsyncConnection.tpc_begin()` is used. The following code sample demonstrates the ``tpc_prepare()`` function:: @@ -417,7 +417,7 @@ AsyncConnection Attributes .. attribute:: AsyncConnection.action This write-only attribute sets the ACTION column in the V$SESSION view. It - is a string attribute but the value None is accepted and treated as an + is a string attribute but the value *None* is accepted and treated as an empty string. .. attribute:: AsyncConnection.autocommit @@ -430,10 +430,10 @@ AsyncConnection Attributes This read-write attribute specifies the amount of time (in milliseconds) that a single round-trip to the database may take before a timeout will - occur. A value of 0 means that no timeout will take place. + occur. A value of *0* means that no timeout will take place. - If a timeout occurs, the error *DPI-1067* will be returned if the - connection is still usable. Alternatively the error *DPI-1080* will be + If a timeout occurs, the error ``DPI-1067`` will be returned if the + connection is still usable. Alternatively, the error ``DPI-1080`` will be returned if the connection has become invalid and can no longer be used. .. attribute:: AsyncConnection.client_identifier @@ -498,9 +498,10 @@ AsyncConnection Attributes This read-write attribute specifies a method called for each value that is bound to a statement executed on any cursor associated with this connection. The method signature is handler(cursor, value, arraysize) and - the return value is expected to be a variable object or None in which case - a default variable object will be created. If this attribute is None, the - default behavior will take place for all values bound to statements. + the return value is expected to be a variable object or *None* in which + case a default variable object will be created. If this attribute is + *None*, the default behavior will take place for all values bound to + statements. .. attribute:: AsyncConnection.instance_name @@ -545,15 +546,15 @@ AsyncConnection Attributes This write-only attribute sets the MODULE column in the V$SESSION view. The maximum length for this string is 48 and if you exceed this length you - will get ORA-24960. + will get ``ORA-24960``. .. attribute:: AsyncConnection.outputtypehandler This read-write attribute specifies a method called for each column that is going to be fetched from any cursor associated with this connection. The method signature is ``handler(cursor, metadata)`` and the return value is - expected to be a :ref:`variable object` or None in which case a - default variable object will be created. If this attribute is None, the + expected to be a :ref:`variable object ` or *None* in which case a + default variable object will be created. If this attribute is *None*, the default behavior will take place for all columns fetched from cursors. See :ref:`outputtypehandlers`. @@ -615,15 +616,15 @@ AsyncConnection Attributes value can make a significant difference in performance if you have a small number of statements that you execute repeatedly. - The default value is 20. + The default value is *20*. See :ref:`Statement Caching ` for more information. .. attribute:: AsyncConnection.thin This read-only attribute returns a boolean indicating if the connection was - established with the python-oracledb Thin mode (True) or python-oracledb - Thick mode (False). + established with the python-oracledb Thin mode (*True*) or python-oracledb + Thick mode (*False*). .. attribute:: AsyncConnection.transaction_in_progress diff --git a/doc/src/api_manual/async_connection_pool.rst b/doc/src/api_manual/async_connection_pool.rst index f4e5368e..96cd0eff 100644 --- a/doc/src/api_manual/async_connection_pool.rst +++ b/doc/src/api_manual/async_connection_pool.rst @@ -124,7 +124,7 @@ AsyncConnectionPool Attributes closed. They become candidates for termination only when they are released back to the pool and have existed for longer than max_lifetime_session seconds. Note that termination only occurs when the pool is accessed. A - value of 0 means that there is no maximum length of time that a pooled + value of *0* means that there is no maximum length of time that a pooled connection may exist. This attribute is only available in Oracle Database 12.1 or later. @@ -158,11 +158,11 @@ AsyncConnectionPool Attributes ` ping to the database is performed. If the connection is unusable, it is discarded and a different connection is selected to be returned by :meth:`AsyncConnectionPool.acquire()`. Setting - ``ping_interval`` to a negative value disables pinging. Setting it to 0 + ``ping_interval`` to a negative value disables pinging. Setting it to *0* forces a ping for every :meth:`AsyncConnectionPool.acquire()` and is not recommended. - Prior to cx_Oracle 8.2, the ping interval was fixed at 60 seconds. + Prior to cx_Oracle 8.2, the ping interval was fixed at *60* seconds. .. attribute:: AsyncConnectionPool.soda_metadata_cache @@ -175,23 +175,23 @@ AsyncConnectionPool Attributes This read-write attribute specifies the size of the statement cache that will be used for connections obtained from the pool. Once a connection is created, that connection’s statement cache size can only be changed by - setting the stmtcachesize attribute on the connection itself. + setting the ``stmtcachesize`` attribute on the connection itself. See :ref:`Statement Caching ` for more information. .. attribute:: AsyncConnectionPool.thin This attribute returns a boolean which indicates the python-oracledb mode - in which the pool was created. If the value of this attribute is True, it + in which the pool was created. If the value of this attribute is *True*, it indicates that the pool was created in the python-oracledb Thin mode. If - the value of this attribute is False, it indicates that the pool was created - in the python-oracledb Thick mode. + the value of this attribute is *False*, it indicates that the pool was + created in the python-oracledb Thick mode. .. attribute:: AsyncConnectionPool.timeout This read-write attribute specifies the time (in seconds) after which idle connections will be terminated in order to maintain an optimum number of - open connections. A value of 0 means that no idle connections are + open connections. A value of *0* means that no idle connections are terminated. .. attribute:: AsyncConnectionPool.username diff --git a/doc/src/api_manual/async_cursor.rst b/doc/src/api_manual/async_cursor.rst index 253e6a2a..6733bcd6 100644 --- a/doc/src/api_manual/async_cursor.rst +++ b/doc/src/api_manual/async_cursor.rst @@ -126,8 +126,8 @@ AsyncCursor Methods to the bind variable name used by the statement and the value maps to the Python value you wish bound to that bind variable. - A reference to the statement will be retained by the cursor. If None or the - same string object is passed in again, the cursor will execute that + A reference to the statement will be retained by the cursor. If *None* or + the same string object is passed in again, the cursor will execute that statement again without performing a prepare or rebinding and redefining. This is most effective for algorithms where the same statement is used, but different parameters are bound to it (many times). Note that parameters @@ -136,13 +136,13 @@ AsyncCursor Methods For maximum efficiency when reusing a statement, it is best to use the :meth:`AsyncCursor.setinputsizes()` method to specify the parameter types and - sizes ahead of time; in particular, None is assumed to be a string of + sizes ahead of time; in particular, *None* is assumed to be a string of length 1 so any values that are later bound as numbers or dates will raise a TypeError exception. If the statement is a query, the cursor is returned as a convenience to the caller (so it can be used directly as an iterator over the rows in the - cursor); otherwise, ``None`` is returned. + cursor); otherwise, *None* is returned. .. method:: AsyncCursor.executemany(statement, parameters, batcherrors=False, \ arraydmlrowcounts=False) @@ -163,9 +163,9 @@ AsyncCursor Methods specifying the number of iterations. In python-oracledb Thick mode, if the size of the buffers allocated for any - of the parameters exceeds 2 GB, you will receive the error "DPI-1015: array - size of is too large". If you receive this error, decrease the number - of rows being inserted. + of the parameters exceeds 2 GB, you will receive the error ``DPI-1015: + array size of is too large``. If you receive this error, decrease the + number of rows being inserted. When True, the ``batcherrors`` parameter enables batch error support within Oracle and ensures that the call succeeds even if an exception takes place @@ -182,7 +182,7 @@ AsyncCursor Methods For maximum efficiency, it is best to use the :meth:`AsyncCursor.setinputsizes()` method to specify the parameter types - and sizes ahead of time. In particular, the value None is assumed to be a + and sizes ahead of time. In particular, the value *None* is assumed to be a string of length 1 so any values that are later bound as numbers or dates will raise a TypeError exception. @@ -216,7 +216,7 @@ AsyncCursor Methods .. method:: AsyncCursor.fetchone() Fetches the next row of a query result set, returning a single tuple or - None when no more data is available. + *None* when no more data is available. An exception is raised if the previous call to :meth:`AsyncCursor.execute()` did not produce any result set or no call @@ -225,7 +225,7 @@ AsyncCursor Methods .. method:: AsyncCursor.getarraydmlrowcounts() A synchronous method that retrieves the DML row counts after a call to - :meth:`AsyncCursor.executemany()` with arraydmlrowcounts enabled. This + :meth:`AsyncCursor.executemany()` with ``arraydmlrowcounts`` enabled. This will return a list of integers corresponding to the number of rows affected by the DML statement for each element of the array passed to :meth:`AsyncCursor.executemany()`. @@ -237,8 +237,8 @@ AsyncCursor Methods .. method:: AsyncCursor.getbatcherrors() A synchronous method that retrieves the exceptions that took place after a - call to :meth:`AsyncCursor.executemany()` with batcherrors enabled. This - will return a list of Error objects, one error for each iteration that + call to :meth:`AsyncCursor.executemany()` with ``batcherrors`` enabled. + This will return a list of Error objects, one error for each iteration that failed. The offset can be determined by looking at the offset attribute of the error object. @@ -277,14 +277,14 @@ AsyncCursor Methods A synchronous method that can be used before a call to :meth:`AsyncCursor.execute()` to define the statement that will be executed. When this is done, the prepare phase will not be performed when - the call to :meth:`AsyncCursor.execute()` is made with None or the same + the call to :meth:`AsyncCursor.execute()` is made with *None* or the same string object as the statement. If the ``tag`` parameter is specified and the ``cache_statement`` parameter - is True, the statement will be returned to the statement cache with the + is *True*, the statement will be returned to the statement cache with the given tag. - If the ``cache_statement`` parameter is False, the statement will be + If the ``cache_statement`` parameter is *False*, the statement will be removed from the statement cache (if it was found there) or will simply not be cached. @@ -302,7 +302,7 @@ AsyncCursor Methods Use keyword parameters when :ref:`binding by name `. Use positional parameters when :ref:`binding by position `. The - parameter value can be None to indicate that python-oracledb should + parameter value can be *None* to indicate that python-oracledb should determine the required space from the data value provided. The parameters or keyword names correspond to the bind variable @@ -378,10 +378,10 @@ AsyncCursor Methods The ``size`` parameter specifies the length of string and raw variables and is ignored in all other cases. If not specified for string and raw variables, - the value 4000 is used. + the value *4000* is used. The ``arraysize`` parameter specifies the number of elements the variable will - have. If not specified the bind array size (usually 1) is used. When a + have. If not specified the bind array size (usually *1*) is used. When a variable is created in an output type handler this parameter should be set to the cursor's array size. @@ -408,7 +408,7 @@ AsyncCursor Methods data ` for more information. The ``convert_nulls`` parameter, if specified, should be passed as a boolean - value. Passing the value ``True`` causes the ``outconverter`` to be called + value. Passing the value *True* causes the ``outconverter`` to be called when a null value is fetched from the database; otherwise, the ``outconverter`` is only called when non-null values are fetched from the database. @@ -432,8 +432,8 @@ AsyncCursor Attributes The attribute is only used for tuning row and SODA document fetches from the database. It does not affect data inserts. - Due to the performance benefits, the default ``Cursor.arraysize`` is 100 - instead of the 1 that the Python DB API recommends. + Due to the performance benefits, the default ``Cursor.arraysize`` is *100* + instead of the *1* that the Python DB API recommends. See :ref:`Tuning Fetch Performance ` for more information. @@ -448,8 +448,8 @@ AsyncCursor Attributes .. attribute:: AsyncCursor.description This read-only attribute is a sequence of :ref:`FetchInfo` - objects. This attribute will be None for operations that do not return rows - or if the cursor has not had an operation invoked via the + objects. This attribute will be *None* for operations that do not return + rows or if the cursor has not had an operation invoked via the :meth:`AsyncCursor.execute()` method yet. .. attribute:: AsyncCursor.fetchvars @@ -465,23 +465,23 @@ AsyncCursor Attributes bound to a statement executed on the cursor and overrides the attribute with the same name on the connection if specified. The method signature is handler(cursor, value, arraysize) and the return value is expected to be a - variable object or None in which case a default variable object will be - created. If this attribute is None, the default behavior will take place + variable object or *None* in which case a default variable object will be + created. If this attribute is *None*, the default behavior will take place for all values bound to the statements. .. attribute:: AsyncCursor.lastrowid This read-only attribute returns the rowid of the last row modified by the cursor. If no row was modified by the last operation performed on the - cursor, the value None is returned. + cursor, the value *None* is returned. .. attribute:: AsyncCursor.outputtypehandler This read-write attribute specifies a method called for each column that is to be fetched from this cursor. The method signature is handler(cursor, metadata) and the return value is expected to be a - :ref:`variable object` or None in which case a default variable - object will be created. If this attribute is None, then the default + :ref:`variable object` or *None* in which case a default variable + object will be created. If this attribute is *None*, then the default behavior will take place for all columns fetched from this cursor. See :ref:`outputtypehandlers`. @@ -491,7 +491,7 @@ AsyncCursor Attributes This read-write attribute can be used to tune the number of rows that the python-oracledb fetches when a SELECT statement is executed. This value can reduce the number of round-trips to the database that are required to fetch - rows but at the cost of additional memory. Setting this value to 0 can be + rows but at the cost of additional memory. Setting this value to *0* can be useful when the timing of fetches must be explicitly controlled. The attribute is only used for tuning row fetches from the database. It @@ -504,8 +504,8 @@ AsyncCursor Attributes This read-only attribute specifies the number of rows that have currently been fetched from the cursor (for select statements) or that have been affected by the operation (for insert, update, delete and merge - statements). For all other statements the value is always zero. If the - cursor or connection is closed, the value returned is -1. + statements). For all other statements the value is always *0*. If the + cursor or connection is closed, the value returned is *-1*. .. attribute:: AsyncCursor.rowfactory diff --git a/doc/src/api_manual/connect_params.rst b/doc/src/api_manual/connect_params.rst index e824a9cc..bfe80edd 100644 --- a/doc/src/api_manual/connect_params.rst +++ b/doc/src/api_manual/connect_params.rst @@ -44,7 +44,7 @@ ConnectParams Methods Parses a DSN in the form /@ or in the form / and returns a 3-tuple containing the parsed user, password and connect string. Empty strings are returned as the value - ``None``. + *None*. .. versionadded:: 1.3.0 @@ -174,8 +174,8 @@ ConnectParams Attributes .. attribute:: ConnectParams.disable_oob This read-only attribute is a boolean that indicates whether out-of-band - breaks should be disabled. The default value is False. Note that this value - has no effect on Windows, which does not support this functionality. + breaks should be disabled. The default value is *False*. Note that this + value has no effect on Windows, which does not support this functionality. This attribute is only supported in python-oracledb Thin mode. @@ -209,7 +209,7 @@ ConnectParams Attributes mode should be enabled. This attribute is needed for continuous query notification (CQN) and high - availability event notifications. The default value is False. + availability event notifications. The default value is *False*. This attribute is only supported in python-oracledb Thick mode. @@ -218,7 +218,7 @@ ConnectParams Attributes This read-only attribute is an integer that returns the number of minutes between the sending of keepalive probes. - The default value is 0. If this attribute is set to a value greater than + The default value is *0*. If this attribute is set to a value greater than zero, it enables keepalive. This attribute is supported in both python-oracledb Thin and Thick modes. @@ -226,12 +226,12 @@ ConnectParams Attributes .. attribute:: ConnectParams.externalauth This read-only attribute is a boolean that specifies whether external - authentication should be used. The default value is False. + authentication should be used. The default value is *False*. For standalone connections, external authentication occurs when the ``user`` and ``password`` attributes are not used. If these attributes, are not used, you can optionally set the ``externalauth`` attribute to - True, which may aid code auditing. + *True*, which may aid code auditing. This attribute is only supported in python-oracledb Thick mode. @@ -252,7 +252,7 @@ ConnectParams Attributes .. attribute:: ConnectParams.https_proxy_port This read-only attribute is an integer that returns the port to be used to - communicate with the proxy host. The default value is 0. + communicate with the proxy host. The default value is *0*. This attribute is supported in both python-oracledb Thin and Thick modes. @@ -271,7 +271,8 @@ ConnectParams Attributes .. attribute:: ConnectParams.matchanytag This read-only attribute is a boolean that specifies whether any tag can be - used when acquiring a connection from the pool. The default value is False. + used when acquiring a connection from the pool. The default value is + *False*. This attribute is only supported in python-oracledb Thick mode. @@ -296,13 +297,13 @@ ConnectParams Attributes .. attribute:: ConnectParams.pool_boundary - This read-only attribute is one of the strings "statement" or "transaction" + This read-only attribute is one of the strings *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be - returned to the pool. If the value is "statement", then pooled DRCP or PRCP + returned to the pool. If the value is *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or temporary LOBs). If the value is - "transaction", then pooled DRCP or PRCP connections are implicitly released + *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`Connection.commit()` or :meth:`Connection.rollback()` are called. This attribute requires the use of DRCP or PRCP with Oracle Database 23ai @@ -315,7 +316,7 @@ ConnectParams Attributes .. attribute:: ConnectParams.port This read-only attribute is an integer that returns the port number on - which the database listener is listening. The default value is 1521. + which the database listener is listening. The default value is *1521*. This attribute is supported in both python-oracledb Thin and Thick modes. @@ -336,7 +337,7 @@ ConnectParams Attributes This read-only attribute is a string that indicates whether unencrypted network traffic or encrypted network traffic (TLS) is used and it can have - the value "tcp" or "tcps". The default value is "tcp". + the value *tcp* or *tcps*. The default value is *tcp*. This attribute is supported in both python-oracledb Thin and Thick modes. @@ -363,20 +364,20 @@ ConnectParams Attributes This read-only attribute is an integer that returns the number of times that a connection attempt should be retried before the attempt is - terminated. The default value is 0. + terminated. The default value is *0*. This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: ConnectParams.retry_delay This read-only attribute is an integer that returns the number of seconds - to wait before making a new connection attempt. The default value is 1. + to wait before making a new connection attempt. The default value is *1*. This attribute is supported in both python-oracledb Thin and Thick modes. .. versionchanged:: 2.3.0 - The default value of this attribute was changed from 0 seconds to 1 + The default value of this attribute was changed from *0* seconds to *1* second. .. attribute:: ConnectParams.sdu @@ -399,7 +400,7 @@ ConnectParams Attributes This read-only attribute is a string that returns the type of server connection that should be established. If specified, it should be one of - `dedicated`, `shared`, or `pooled`. + *dedicated*, *shared*, or *pooled*. This attribute is supported in both python-oracledb Thin and Thick modes. @@ -445,7 +446,7 @@ ConnectParams Attributes then it is used for any verification. Otherwise, the hostname will be used. This value is ignored if the :attr:`~ConnectParams.ssl_server_dn_match` - attribute is not set to the value `True`. + attribute is not set to the value *True*. This attribute is supported in both python-oracledb Thin and Thick modes. @@ -454,7 +455,7 @@ ConnectParams Attributes This read-only attribute is a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. The default value is - True. + *True*. Note that if the :attr:`~ConnectParams.ssl_server_cert_dn` attribute is not specified, then host name matching is performed instead. @@ -463,8 +464,8 @@ ConnectParams Attributes .. attribute:: ConnectParams.ssl_version - This read-only attribute is one of the constants "ssl.TLSVersion.TLSv1_2" - or "ssl.TLSVersion.TLSv1_3" which identifies the TLS protocol version + This read-only attribute is one of the constants *ssl.TLSVersion.TLSv1_2* + or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. @@ -499,14 +500,14 @@ ConnectParams Attributes This read-only attribute is a float that indicates the maximum number of seconds to wait for a connection to be established to the database host. - The default value is 20.0. + The default value is *20.0*. This attribute is supported in both python-oracledb Thin and Thick modes. .. versionchanged:: 2.3.0 - The default value of this attribute was changed from 60.0 seconds to - 20.0 seconds. + The default value of this attribute was changed from *60.0* seconds to + *20.0* seconds. .. attribute:: ConnectParams.terminal @@ -528,7 +529,7 @@ ConnectParams Attributes adbsb/adbsb-overview.html#GUID-A7435462-9D74-44B4-8240-4A6F06E92348>`__ specific feature that can reduce the latency in round-trips to the database after a connection has been established. This feature is only available - with certain versions of ADB-S. The default value is False. + with certain versions of ADB-S. The default value is *False*. This attribute is supported in both python-oracledb Thin and Thick modes. diff --git a/doc/src/api_manual/connection.rst b/doc/src/api_manual/connection.rst index 0faaa491..ec02e85c 100644 --- a/doc/src/api_manual/connection.rst +++ b/doc/src/api_manual/connection.rst @@ -112,18 +112,17 @@ Connection Methods .. method:: Connection.decode_oson(data) - Decodes `OSON-encoded - `__ - bytes and returns the object encoded in those bytes. This is useful for - fetching columns which have the check constraint ``IS JSON FORMAT OSON`` - enabled. + Decodes `OSON-encoded `__ bytes and returns the + object encoded in those bytes. This is useful for fetching columns which + have the check constraint ``IS JSON FORMAT OSON`` enabled. .. versionadded:: 2.1.0 .. method:: Connection.encode_oson(value) - Encodes a Python value into `OSON-encoded - `__ + Encodes a Python value into `OSON-encoded `__ bytes and returns them. This is useful for inserting into columns which have the check constraint ``IS JSON FORMAT OSON`` enabled. @@ -144,8 +143,8 @@ Connection Methods .. method:: Connection.gettype(name) - Returns a :ref:`type object ` given its name. This can then be - used to create objects which can be bound to cursors created by this + Returns a :ref:`type object ` given its name. This can then + be used to create objects which can be bound to cursors created by this connection. .. note:: @@ -154,11 +153,12 @@ Connection Methods .. method:: Connection.is_healthy() - This function returns a boolean indicating the health status of a connection. + This function returns a boolean indicating the health status of a + connection. - Connections may become unusable in several cases, such as, if the network socket - is broken, if an Oracle error indicates the connection is unusable, or, after - receiving a planned down notification from the database. + Connections may become unusable in several cases, such as, if the network + socket is broken, if an Oracle error indicates the connection is unusable, + or, after receiving a planned down notification from the database. This function is best used before starting a new database request on an existing :ref:`standalone connections `. For pooled @@ -166,8 +166,8 @@ Connection Methods performs this check before returning a connection to the application, see :ref:`poolhealth`. - If this function returns False, the connection should be not be used by the - application and a new connection should be established instead. + If this function returns *False*, the connection should be not be used by + the application and a new connection should be established instead. This function performs a local check. To fully check a connection's health, use :meth:`Connection.ping()` which performs a round-trip to the database. @@ -200,9 +200,9 @@ Connection Methods .. method:: Connection.prepare() - Prepares the distributed (global) transaction for commit. Return a boolean + Prepares the distributed (global) transaction for commit. Returns a boolean indicating if a transaction was actually prepared in order to avoid the - error ORA-24756 (transaction does not exist). + error ``ORA-24756 (transaction does not exist)``. .. deprecated:: python-oracledb 1.0 @@ -226,7 +226,7 @@ Connection Methods dequeued. If not specified, RAW data is enqueued and dequeued. For consistency and compliance with the PEP 8 naming style, the - parameter `payloadType` was renamed to `payload_type`. The old name + parameter ``payloadType`` was renamed to ``payload_type``. The old name will continue to work as a keyword parameter for a period of time. .. note:: @@ -239,9 +239,10 @@ Connection Methods .. method:: Connection.shutdown([mode]) - Shuts down the database. In order to do this the connection must be connected - as :data:`~oracledb.SYSDBA` or :data:`~oracledb.SYSOPER`. Two calls must - be made unless the mode specified is :data:`~oracledb.DBSHUTDOWN_ABORT`. + Shuts down the database. In order to do this the connection must be + connected as :data:`~oracledb.SYSDBA` or :data:`~oracledb.SYSOPER`. Two + calls must be made unless the mode specified is + :data:`~oracledb.DBSHUTDOWN_ABORT`. An example is shown below: :: @@ -262,14 +263,14 @@ Connection Methods .. method:: Connection.startup(force=False, restrict=False, pfile=None) - Starts up the database. This is equivalent to the SQL\*Plus command "startup - nomount". The connection must be connected as :data:`~oracledb.SYSDBA` or - :data:`~oracledb.SYSOPER` with the :data:`~oracledb.PRELIM_AUTH` option - specified for this to work. + Starts up the database. This is equivalent to the SQL\*Plus command + ``startup nomount``. The connection must be connected as + :data:`~oracledb.SYSDBA` or :data:`~oracledb.SYSOPER` with the + :data:`~oracledb.PRELIM_AUTH` option specified for this to work. - The ``pfile`` parameter, if specified, is expected to be a string identifying - the location of the parameter file (PFILE) which will be used instead of - the stored parameter file (SPFILE). + The ``pfile`` parameter, if specified, is expected to be a string + identifying the location of the parameter file (PFILE) which will be used + instead of the stored parameter file (SPFILE). An example is shown below: @@ -299,39 +300,40 @@ Connection Methods notifications for events that take place in the database that match the given parameters. - The ``namespace`` parameter specifies the namespace the subscription uses. It - can be one of :data:`oracledb.SUBSCR_NAMESPACE_DBCHANGE` or + The ``namespace`` parameter specifies the namespace the subscription uses. + It can be one of :data:`oracledb.SUBSCR_NAMESPACE_DBCHANGE` or :data:`oracledb.SUBSCR_NAMESPACE_AQ`. - The ``protocol`` parameter specifies the protocol to use when notifications are - sent. Currently the only valid value is :data:`oracledb.SUBSCR_PROTO_OCI`. + The ``protocol`` parameter specifies the protocol to use when notifications + are sent. Currently the only valid value is + :data:`oracledb.SUBSCR_PROTO_OCI`. - The ``callback`` is expected to be a callable that accepts a single parameter. - A :ref:`message object ` is passed to this callback whenever a - notification is received. + The ``callback`` is expected to be a callable that accepts a single + parameter. A :ref:`message object ` is passed to this callback + whenever a notification is received. - The ``timeout`` value specifies that the subscription expires after the given - time in seconds. The default value of 0 indicates that the subscription - never expires. + The ``timeout`` value specifies that the subscription expires after the + given time in seconds. The default value of *0* indicates that the + subscription never expires. - The ``operations`` parameter enables filtering of the messages that are sent - (insert, update, delete). The default value will send notifications for all - operations. This parameter is only used when the namespace is set to - :data:`oracledb.SUBSCR_NAMESPACE_DBCHANGE`. + The ``operations`` parameter enables filtering of the messages that are + sent (insert, update, delete). The default value will send notifications + for all operations. This parameter is only used when the namespace is set + to :data:`oracledb.SUBSCR_NAMESPACE_DBCHANGE`. - The ``port`` parameter specifies the listening port for callback notifications - from the database server. If not specified, an unused port will be selected - by the Oracle Client libraries. + The ``port`` parameter specifies the listening port for callback + notifications from the database server. If not specified, an unused port + will be selected by the Oracle Client libraries. - The ``qos`` parameter specifies quality of service options. It should be one or - more of the following flags, OR'ed together: + The ``qos`` parameter specifies quality of service options. It should be + one or more of the following flags, OR'ed together: :data:`oracledb.SUBSCR_QOS_RELIABLE`, :data:`oracledb.SUBSCR_QOS_DEREG_NFY`, :data:`oracledb.SUBSCR_QOS_ROWIDS`, :data:`oracledb.SUBSCR_QOS_QUERY`, :data:`oracledb.SUBSCR_QOS_BEST_EFFORT`. - The ``ip_address`` parameter specifies the IP address (IPv4 or IPv6) in + The ``ip_address`` parameter specifies the IP address (*IPv4* or *IPv6*) in standard string notation to bind for callback notifications from the database server. If not specified, the client IP address will be determined by the Oracle Client libraries. @@ -340,12 +342,12 @@ Connection Methods notifications should take place. Currently, if set, this value can only be set to the value :data:`oracledb.SUBSCR_GROUPING_CLASS_TIME`, which will group notifications by the number of seconds specified in the - ``grouping_value`` parameter. The ``grouping_type`` parameter should be one of the - values :data:`oracledb.SUBSCR_GROUPING_TYPE_SUMMARY` (the default) or - :data:`oracledb.SUBSCR_GROUPING_TYPE_LAST`. + ``grouping_value`` parameter. The ``grouping_type`` parameter should be one + of the values :data:`oracledb.SUBSCR_GROUPING_TYPE_SUMMARY` (the default) + or :data:`oracledb.SUBSCR_GROUPING_TYPE_LAST`. - The ``name`` parameter is used to identify the subscription and is specific to - the selected namespace. If the namespace parameter is + The ``name`` parameter is used to identify the subscription and is + specific to the selected namespace. If the namespace parameter is :data:`oracledb.SUBSCR_NAMESPACE_DBCHANGE` then the name is optional and can be any value. If the namespace parameter is :data:`oracledb.SUBSCR_NAMESPACE_AQ`, however, the name must be in the @@ -360,11 +362,11 @@ Connection Methods Client 19.4 and Oracle Database 19.4 and higher. For consistency and compliance with the PEP 8 naming style, the - parameter `ipAddress` was renamed to `ip_address`, the parameter - `groupingClass` was renamed to `grouping_class`, the parameter - `groupingValue` was renamed to `grouping_value`, the parameter - `groupingType` was renamed to `grouping_type` and the parameter - `clientInitiated` was renamed to `client_initiated`. The old names will + parameter ``ipAddress`` was renamed to ``ip_address``, the parameter + ``groupingClass`` was renamed to ``grouping_class``, the parameter + ``groupingValue`` was renamed to ``grouping_value``, the parameter + ``groupingType`` was renamed to ``grouping_type`` and the parameter + ``clientInitiated`` was renamed to ``client_initiated``. The old names will continue to work as keyword parameters for a period of time. .. note:: @@ -384,21 +386,23 @@ Connection Methods Begins a Two-Phase Commit (TPC) on a global transaction using the specified transaction identifier (xid). - The ``xid`` parameter should be an object returned by the :meth:`~Connection.xid()` - method. + The ``xid`` parameter should be an object returned by the + :meth:`~Connection.xid()` method. - The ``flags`` parameter is one of the constants :data:`oracledb.TPC_BEGIN_JOIN`, - :data:`oracledb.TPC_BEGIN_NEW`, :data:`oracledb.TPC_BEGIN_PROMOTE`, or - :data:`oracledb.TPC_BEGIN_RESUME`. The default is :data:`oracledb.TPC_BEGIN_NEW`. + The ``flags`` parameter is one of the constants + :data:`oracledb.TPC_BEGIN_JOIN`, :data:`oracledb.TPC_BEGIN_NEW`, + :data:`oracledb.TPC_BEGIN_PROMOTE`, or :data:`oracledb.TPC_BEGIN_RESUME`. + The default is :data:`oracledb.TPC_BEGIN_NEW`. - The ``timeout`` parameter is the number of seconds to wait for a transaction to - become available for resumption when :data:`~oracledb.TPC_BEGIN_RESUME` is - specified in the ``flags`` parameter. When :data:`~oracledb.TPC_BEGIN_NEW` is - specified in the ``flags`` parameter, the ``timeout`` parameter indicates the - number of seconds the transaction can be inactive before it is automatically - terminated by the system. A transaction is inactive between the time it is - detached with :meth:`Connection.tpc_end()` and the time it is resumed with - :meth:`Connection.tpc_begin()`.The default is 0 seconds. + The ``timeout`` parameter is the number of seconds to wait for a + transaction to become available for resumption when + :data:`~oracledb.TPC_BEGIN_RESUME` is specified in the ``flags`` parameter. + When :data:`~oracledb.TPC_BEGIN_NEW` is specified in the ``flags`` + parameter, the ``timeout`` parameter indicates the number of seconds the + transaction can be inactive before it is automatically terminated by the + system. A transaction is inactive between the time it is detached with + :meth:`Connection.tpc_end()` and the time it is resumed with + :meth:`Connection.tpc_begin()`.The default is *0* seconds. The following code sample demonstrates the ``tpc_begin()`` function:: @@ -409,22 +413,25 @@ Connection Methods .. method:: Connection.tpc_commit(xid, one_phase) - Commits a global transaction. When called with no arguments, this method commits - a transaction previously prepared with :meth:`~Connection.tpc_begin()` and optionally - prepared with :meth:`~Connection.tpc_prepare()`. If :meth:`~Connection.tpc_prepare()` - is not called, a single phase commit is performed. A transaction manager may choose - to do this if only a single resource is participating in the global transaction. + Commits a global transaction. When called with no arguments, this method + commits a transaction previously prepared with + :meth:`~Connection.tpc_begin()` and optionally prepared with + :meth:`~Connection.tpc_prepare()`. If :meth:`~Connection.tpc_prepare()` + is not called, a single phase commit is performed. A transaction manager + may choose to do this if only a single resource is participating in the + global transaction. If an ``xid`` parameter is passed, then an object should be returned by the :meth:`~Connection.xid()` function. This form should be called outside of a transaction and is intended for use in recovery. - The ``one_phase`` parameter is a boolean identifying whether to perform a one-phase - or two-phase commit. If ``one_phase`` parameter is True, a single-phase commit is performed. - The default value is False. This parameter is only examined if a value is provided - for the ``xid`` parameter. Otherwise, the driver already knows whether - :meth:`~Connection.tpc_prepare()` was called for the transaction and whether a - one-phase or two-phase commit is required. + The ``one_phase`` parameter is a boolean identifying whether to perform a + one-phase or two-phase commit. If ``one_phase`` parameter is *True*, a + single-phase commit is performed. The default value is *False*. This + parameter is only examined if a value is provided for the ``xid`` + parameter. Otherwise, the driver already knows whether + :meth:`~Connection.tpc_prepare()` was called for the transaction and + whether a one-phase or two-phase commit is required. The following code sample demonstrates the ``tpc_commit()`` function:: @@ -435,15 +442,17 @@ Connection Methods .. method:: Connection.tpc_end(xid, flags) - Ends or suspends work on a global transaction. This function is only intended - for use by transaction managers. + Ends or suspends work on a global transaction. This function is only + intended for use by transaction managers. If an ``xid`` parameter is passed, then an object should be returned by the :meth:`~Connection.xid()` function. If no xid parameter is passed, then the - transaction identifier used by the previous :meth:`~Connection.tpc_begin()` is used. + transaction identifier used by the previous :meth:`~Connection.tpc_begin()` + is used. - The ``flags`` parameter is one of the constants :data:`oracledb.TPC_END_NORMAL` or - :data:`oracledb.TPC_END_SUSPEND`. The default is :data:`oracledb.TPC_END_NORMAL`. + The ``flags`` parameter is one of the constants + :data:`oracledb.TPC_END_NORMAL` or :data:`oracledb.TPC_END_SUSPEND`. The + default is :data:`oracledb.TPC_END_NORMAL`. If the flag is :data:`oracledb.TPC_END_SUSPEND` then the transaction may be resumed later by calling :meth:`Connection.tpc_begin()` with the flag @@ -458,11 +467,11 @@ Connection Methods .. method:: Connection.tpc_forget(xid) - Causes the database to forget a heuristically completed TPC transaction. This - function is only intended to be called by transaction managers. + Causes the database to forget a heuristically completed TPC transaction. + This function is only intended to be called by transaction managers. - The ``xid`` parameter is mandatory and should be an object should be returned by - the :meth:`~Connection.xid()` function. + The ``xid`` parameter is mandatory and should be an object should be + returned by the :meth:`~Connection.xid()` function. The following code sample demonstrates the ``tpc_forget()`` function:: @@ -478,13 +487,14 @@ Connection Methods :meth:`~Connection.tpc_commit()` or :meth:`~Connection.tpc_rollback()` have been called. - Returns a boolean indicating whether a commit is needed or not. If you attempt to - commit when not needed, then it results in the error ``ORA-24756: transaction does not - exist``. + Returns a boolean indicating whether a commit is needed or not. If you + attempt to commit when not needed, then it results in the error + ``ORA-24756: transaction does not exist``. - If an ``xid`` parameter is passed, then an object should be returned by the - :meth:`~Connection.xid()` function. If an xid parameter is not passed, then the - transaction identifier used by the previous :meth:`~Connection.tpc_begin()` is used. + If an ``xid`` parameter is passed, then an object should be returned by + the :meth:`~Connection.xid()` function. If an ``xid`` parameter is not + passed, then the transaction identifier used by the previous + :meth:`~Connection.tpc_begin()` is used. The following code sample demonstrates the ``tpc_prepare()`` function:: @@ -495,13 +505,14 @@ Connection Methods .. method:: Connection.tpc_recover() - Returns a list of pending transaction identifiers that require recovery. Objects of - type ``Xid`` (as returned by the :meth:`~Connection.xid()` function) are returned and - these can be passed to :meth:`~Connection.tpc_commit()` or :meth:`~Connection.tpc_rollback()` - as needed. + Returns a list of pending transaction identifiers that require recovery. + Objects of type ``Xid`` (as returned by the :meth:`~Connection.xid()` + function) are returned and these can be passed to + :meth:`~Connection.tpc_commit()` or :meth:`~Connection.tpc_rollback()` as + needed. - This function queries the view ``DBA_PENDING_TRANSACTIONS`` and requires ``SELECT`` - privilege on that view. + This function queries the DBA_PENDING_TRANSACTIONS view and requires + "SELECT" privilege on that view. The following code sample demonstrates the ``tpc_recover()`` function:: @@ -514,12 +525,13 @@ Connection Methods Rolls back a global transaction. - If an ``xid`` parameter is not passed, then it rolls back the transaction that was previously - started with :meth:`~Connection.tpc_begin()`. + If an ``xid`` parameter is not passed, then it rolls back the transaction + that was previously started with :meth:`~Connection.tpc_begin()`. If an ``xid`` parameter is passed, then an object should be returned by - :meth:`~Connection.xid()` and the specified transaction is rolled back. This form - should be called outside of a transaction and is intended for use in recovery. + :meth:`~Connection.xid()` and the specified transaction is rolled back. + This form should be called outside of a transaction and is intended for + use in recovery. The following code sample demonstrates the ``tpc_rollback()`` function:: @@ -545,13 +557,15 @@ Connection Methods values are checked by ODPI-C when they are passed to the relevant functions. .. When this functionality is also supported in the thin driver the checks will be performed at the Python level as well. - The ``format_id`` parameter should be a positive 32-bit integer. This value identifies - the format of the global_transaction_id and branch_qualifier parameters and the - value is determined by the Transaction Manager (TM), if one is in use. + The ``format_id`` parameter should be a positive 32-bit integer. This + value identifies the format of the ``global_transaction_id`` and + ``branch_qualifier`` parameters and the value is determined by the + Transaction Manager (TM), if one is in use. - The ``global_transaction_id`` and branch_qualifier parameters should be of type - bytes or string. If a value of type string is passed, then this value will be - UTF-8 encoded to bytes. The values cannot exceed 64 bytes in length. + The ``global_transaction_id`` and ``branch_qualifier`` parameters should + be of type bytes or string. If a value of type string is passed, then + this value will be UTF-8 encoded to bytes. The values cannot exceed 64 + bytes in length. The following code sample demonstrates the ``xid()`` function:: @@ -567,7 +581,7 @@ Connection Attributes .. attribute:: Connection.action This write-only attribute sets the ACTION column in the V$SESSION view. It - is a string attribute but the value None is accepted and treated as an + is a string attribute but the value *None* is accepted and treated as an empty string. .. note:: @@ -588,15 +602,15 @@ Connection Attributes This read-write attribute specifies the amount of time (in milliseconds) that a single round-trip to the database may take before a timeout will - occur. A value of 0 means that no timeout will take place. + occur. A value of *0* means that no timeout will take place. - If a timeout occurs, the error *DPI-1067* will be returned if the - connection is still usable. Alternatively the error *DPI-1080* will be + If a timeout occurs, the error ``DPI-1067`` will be returned if the + connection is still usable. Alternatively the error ``DPI-1080`` will be returned if the connection has become invalid and can no longer be used. For consistency and compliance with the PEP 8 naming style, the - attribute `callTimeout` was renamed to `call_timeout`. The old name - will continue to work for a period of time. The error *DPI-1080* was + attribute ``callTimeout`` was renamed to ``call_timeout``. The old name + will continue to work for a period of time. The error ``DPI-1080`` was also introduced in this release. .. note:: @@ -718,9 +732,10 @@ Connection Attributes This read-write attribute specifies a method called for each value that is bound to a statement executed on any cursor associated with this connection. The method signature is handler(cursor, value, arraysize) and - the return value is expected to be a variable object or None in which case - a default variable object will be created. If this attribute is None, the - default behavior will take place for all values bound to statements. + the return value is expected to be a variable object or *None* in which + case a default variable object will be created. If this attribute is + *None*, the default behavior will take place for all values bound to + statements. See :ref:`inputtypehandlers`. @@ -769,7 +784,7 @@ Connection Attributes established. See `Database Object Naming Rules `__. The value may be - ``None``, 30, or 128. The value ``None`` indicates the size cannot be + *None*, *30*, or *128*. The value *None* indicates the size cannot be reliably determined by python-oracledb, which occurs when using Thick mode with Oracle Client libraries 12.1 (or older) to connect to Oracle Database 12.2, or later. @@ -792,7 +807,7 @@ Connection Attributes This write-only attribute sets the MODULE column in the V$SESSION view. The maximum length for this string is 48 and if you exceed this length you - will get ORA-24960. + will get ``ORA-24960``. .. note: @@ -804,8 +819,8 @@ Connection Attributes This read-write attribute specifies a method called for each column that is going to be fetched from any cursor associated with this connection. The method signature is ``handler(cursor, metadata)`` and the return value is - expected to be a :ref:`variable object` or None in which case a - default variable object will be created. If this attribute is None, the + expected to be a :ref:`variable object` or *None* in which case a + default variable object will be created. If this attribute is *None*, the default behavior will take place for all columns fetched from cursors. See :ref:`outputtypehandlers`. @@ -901,7 +916,7 @@ Connection Attributes value can make a significant difference in performance if you have a small number of statements that you execute repeatedly. - The default value is 20. + The default value is *20*. See :ref:`Statement Caching ` for more information. @@ -914,13 +929,13 @@ Connection Attributes This read-write attribute initially contains the actual tag of the session that was acquired from a pool by :meth:`ConnectionPool.acquire()`. If the connection was not acquired from a pool or no tagging parameters were - specified (``tag`` and ``matchanytag``) when the connection was acquired from the - pool, this value will be None. If the value is changed, it must be a string - containing name=value pairs like "k1=v1;k2=v2". + specified (``tag`` and ``matchanytag``) when the connection was acquired + from the pool, this value will be None. If the value is changed, it must + be a string containing name=value pairs like "k1=v1;k2=v2". - If this value is not None when the connection is released back to the pool - it will be used to retag the session. This value can be overridden in the - call to :meth:`ConnectionPool.release()`. + If this value is not *None* when the connection is released back to the + pool it will be used to retag the session. This value can be overridden in + the call to :meth:`ConnectionPool.release()`. .. note:: @@ -929,8 +944,8 @@ Connection Attributes .. attribute:: Connection.thin This read-only attribute returns a boolean indicating if the connection was - established with the python-oracledb Thin mode (True) or python-oracledb - Thick mode (False). + established with the python-oracledb Thin mode (*True*) or python-oracledb + Thick mode (*False*). .. note:: @@ -994,7 +1009,7 @@ Connection Attributes connection creations, in which case those connection warnings will be returned. - If no warning was generated the value ``None`` is returned. + If no warning was generated the value *None* is returned. .. versionadded:: 2.0.0 diff --git a/doc/src/api_manual/connection_pool.rst b/doc/src/api_manual/connection_pool.rst index 6b53999b..214feb27 100644 --- a/doc/src/api_manual/connection_pool.rst +++ b/doc/src/api_manual/connection_pool.rst @@ -66,7 +66,7 @@ ConnectionPool Methods The ``tag`` parameter, if specified, is expected to be a string with name=value pairs like "k1=v1;k2=v2" and will limit the connections that can be returned from a connection pool unless the ``matchanytag`` parameter is - set to True. In that case, connections with the specified tag will be + set to *True*. In that case, connections with the specified tag will be preferred over others, but if no such connections are available, then a connection with a different tag may be returned instead. In any case, untagged connections will always be returned if no connections with the @@ -75,8 +75,8 @@ ConnectionPool Methods The ``shardingkey`` and ``supershardingkey`` parameters, if specified, are expected to be a sequence of values which will be used to identify the - database shard to connect to. The key values can be strings, numbers, bytes - or dates. See :ref:`connsharding`. + database shard to connect to. The key values can be strings, numbers, + bytes, or dates. See :ref:`connsharding`. When using the :ref:`connection pool cache `, calling :meth:`oracledb.connect()` with a ``pool_alias`` parameter is the same as @@ -88,7 +88,7 @@ ConnectionPool Methods released, which makes it unusable for further work. If any connections have been acquired and not released back to the pool, - this method will fail unless the ``force`` parameter is set to True. + this method will fail unless the ``force`` parameter is set to *True*. .. method:: ConnectionPool.drop(connection) @@ -163,11 +163,11 @@ ConnectionPool Methods connections back to the pool in order to ensure sufficient resources are available. - If the tag is not None, it is expected to be a string with name=value pairs - like "k1=v1;k2=v2" and will override the value in the property + If the tag is not *None*, it is expected to be a string with name=value + pairs like "k1=v1;k2=v2" and will override the value in the property :attr:`Connection.tag`. If either :attr:`Connection.tag` or the tag - parameter are not None, the connection will be retagged when it is released - back to the pool. + parameter are not *None*, the connection will be retagged when it is + released back to the pool. .. _connpoolattr: @@ -221,7 +221,7 @@ ConnectionPool Attributes closed. They become candidates for termination only when they are released back to the pool and have existed for longer than max_lifetime_session seconds. Note that termination only occurs when the pool is accessed. A - value of 0 means that there is no maximum length of time that a pooled + value of *0* means that there is no maximum length of time that a pooled connection may exist. This attribute is only available in Oracle Database 12.1 or later. @@ -231,7 +231,7 @@ ConnectionPool Attributes created per shard in the pool. Setting this attribute greater than zero specifies the maximum number of sessions in the pool that can be used for any given shard in a sharded database. This lets connections in the pool be - balanced across the shards. A value of zero will not set any maximum number + balanced across the shards. A value of *0* will not set any maximum number of sessions for each shard. This attribute is only available in Oracle Client 18.3 and higher. @@ -259,17 +259,17 @@ ConnectionPool Attributes ` ping to the database is performed. If the connection is unusable, it is discarded and a different connection is selected to be returned by :meth:`ConnectionPool.acquire()`. Setting ``ping_interval`` to - a negative value disables pinging. Setting it to 0 forces a ping for every - :meth:`ConnectionPool.acquire()` and is not recommended. + a negative value disables pinging. Setting it to *0* forces a ping for + every :meth:`ConnectionPool.acquire()` and is not recommended. - Prior to cx_Oracle 8.2, the ping interval was fixed at 60 seconds. + Prior to cx_Oracle 8.2, the ping interval was fixed at *60* seconds. .. attribute:: ConnectionPool.soda_metadata_cache This read-write boolean attribute returns whether the SODA metadata cache is enabled or not. Enabling the cache significantly improves the performance of methods :meth:`SodaDatabase.createCollection()` (when not - specifying a value for the metadata parameter) and + specifying a value for the ``metadata`` parameter) and :meth:`SodaDatabase.openCollection()`. Note that the cache can become out of date if changes to the metadata of cached collections are made externally. @@ -279,23 +279,23 @@ ConnectionPool Attributes This read-write attribute specifies the size of the statement cache that will be used for connections obtained from the pool. Once a connection is created, that connection’s statement cache size can only be changed by - setting the stmtcachesize attribute on the connection itself. + setting the ``stmtcachesize`` attribute on the connection itself. See :ref:`Statement Caching ` for more information. .. attribute:: ConnectionPool.thin This attribute returns a boolean which indicates the python-oracledb mode - in which the pool was created. If the value of this attribute is True, it + in which the pool was created. If the value of this attribute is *True*, it indicates that the pool was created in the python-oracledb Thin mode. If - the value of this attribute is False, it indicates that the pool was created - in the python-oracledb Thick mode. + the value of this attribute is *False*, it indicates that the pool was + created in the python-oracledb Thick mode. .. attribute:: ConnectionPool.timeout This read-write attribute specifies the time (in seconds) after which idle connections will be terminated in order to maintain an optimum number of - open connections. A value of 0 means that no idle connections are + open connections. A value of *0* means that no idle connections are terminated. Note that in python-oracledb Thick mode with older Oracle Client Libraries, the termination only occurs when the pool is accessed. diff --git a/doc/src/api_manual/cursor.rst b/doc/src/api_manual/cursor.rst index e9372ac2..a2b4d14d 100644 --- a/doc/src/api_manual/cursor.rst +++ b/doc/src/api_manual/cursor.rst @@ -152,8 +152,8 @@ Cursor Methods to the bind variable name used by the statement and the value maps to the Python value you wish bound to that bind variable. - A reference to the statement will be retained by the cursor. If None or the - same string object is passed in again, the cursor will execute that + A reference to the statement will be retained by the cursor. If *None* or + the same string object is passed in again, the cursor will execute that statement again without performing a prepare or rebinding and redefining. This is most effective for algorithms where the same statement is used, but different parameters are bound to it (many times). Note that parameters @@ -162,13 +162,13 @@ Cursor Methods For maximum efficiency when reusing a statement, it is best to use the :meth:`Cursor.setinputsizes()` method to specify the parameter types and - sizes ahead of time; in particular, None is assumed to be a string of + sizes ahead of time; in particular, *None* is assumed to be a string of length 1 so any values that are later bound as numbers or dates will raise a TypeError exception. If the statement is a query, the cursor is returned as a convenience to the caller (so it can be used directly as an iterator over the rows in the - cursor); otherwise, ``None`` is returned. + cursor); otherwise, *None* is returned. .. note:: @@ -193,27 +193,27 @@ Cursor Methods specifying the number of iterations. In python-oracledb Thick mode, if the size of the buffers allocated for any - of the parameters exceeds 2 GB, you will receive the error "DPI-1015: array - size of is too large". If you receive this error, decrease the number - of rows being inserted. + of the parameters exceeds 2 GB, you will receive the error ``DPI-1015: + array size of is too large``. If you receive this error, decrease the + number of rows being inserted. - When True, the ``batcherrors`` parameter enables batch error support within - Oracle Database and ensures that the call succeeds even if an exception - takes place in one or more of the sequence of bind values. The errors can - then be retrieved using :meth:`Cursor.getbatcherrors()`. + When *True*, the ``batcherrors`` parameter enables batch error support + within Oracle Database and ensures that the call succeeds even if an + exception takes place in one or more of the sequence of bind values. The + errors can then be retrieved using :meth:`Cursor.getbatcherrors()`. - When True, the ``arraydmlrowcounts`` parameter enables DML row counts to be - retrieved from Oracle after the method has completed. The row counts can + When *True*, the ``arraydmlrowcounts`` parameter enables DML row counts to + be retrieved from Oracle after the method has completed. The row counts can then be retrieved using :meth:`Cursor.getarraydmlrowcounts()`. Both the ``batcherrors`` parameter and the ``arraydmlrowcounts`` parameter - can only be True when executing an insert, update, delete or merge + can only be *True* when executing an insert, update, delete, or merge statement; in all other cases an error will be raised. For maximum efficiency, it is best to use the :meth:`Cursor.setinputsizes()` method to specify the bind value types and sizes. In particular, if the type is not explicitly specified, the value - None is assumed to be a string of length 1 so any values that are later + *None* is assumed to be a string of length 1 so any values that are later bound as numbers or dates will raise a TypeError exception. .. method:: Cursor.fetchall() @@ -236,9 +236,9 @@ Cursor Methods cursor's arraysize attribute can affect the performance of this operation. The number of rows to fetch is specified by the parameter. If it is not - given, the cursor's arraysize attribute determines the number of rows to be - fetched. If the number of rows available to be fetched is fewer than the - amount requested, fewer rows will be returned. + given, the cursor's ``arraysize`` attribute determines the number of rows + to be fetched. If the number of rows available to be fetched is fewer than + the amount requested, fewer rows will be returned. An exception is raised if the previous call to :meth:`Cursor.execute()` did not produce any result set or no call was issued yet. @@ -247,8 +247,8 @@ Cursor Methods .. method:: Cursor.fetchone() - Fetches the next row of a query result set, returning a single tuple or None - when no more data is available. + Fetches the next row of a query result set, returning a single tuple or + *None* when no more data is available. An exception is raised if the previous call to :meth:`Cursor.execute()` did not produce any result set or no call was issued yet. @@ -258,7 +258,7 @@ Cursor Methods .. method:: Cursor.getarraydmlrowcounts() Retrieves the DML row counts after a call to :meth:`Cursor.executemany()` - with arraydmlrowcounts enabled. This will return a list of integers + with ``arraydmlrowcounts`` enabled. This will return a list of integers corresponding to the number of rows affected by the DML statement for each element of the array passed to :meth:`Cursor.executemany()`. @@ -270,7 +270,7 @@ Cursor Methods .. method:: Cursor.getbatcherrors() Retrieves the exceptions that took place after a call to - :meth:`Cursor.executemany()` with batcherrors enabled. This will return a + :meth:`Cursor.executemany()` with ``batcherrors`` enabled. This will return a list of Error objects, one error for each iteration that failed. The offset can be determined by looking at the offset attribute of the error object. @@ -321,13 +321,13 @@ Cursor Methods :meth:`Cursor.executemany()` to define the statement that will be executed. When this is done, the prepare phase will not be performed when the call to :meth:`Cursor.execute()` or :meth:`Cursor.executemany()` is - made with None or the same string object as the statement. + made with *None* or the same string object as the statement. If the ``tag`` parameter is specified and the ``cache_statement`` parameter - is True, the statement will be returned to the statement cache with the + is *True*, the statement will be returned to the statement cache with the given tag. - If the ``cache_statement`` parameter is False, the statement will be + If the ``cache_statement`` parameter is *False*, the statement will be removed from the statement cache (if it was found there) or will simply not be cached. @@ -342,13 +342,13 @@ Cursor Methods Scrolls the cursor in the result set to a new position according to the mode. - If mode is "relative" (the default value), the value is taken as an offset - to the current position in the result set. If set to "absolute", value - states an absolute target position. If set to "first", the cursor is - positioned at the first row and if set to "last", the cursor is set to the + If mode is *relative* (the default value), the value is taken as an offset + to the current position in the result set. If set to *absolute*, value + states an absolute target position. If set to *first*, the cursor is + positioned at the first row and if set to *last*, the cursor is set to the last row in the result set. - An error is raised if the mode is "relative" or "absolute" and the scroll + An error is raised if the mode is *relative* or *absolute* and the scroll operation would position the cursor outside of the result set. .. note:: @@ -367,7 +367,7 @@ Cursor Methods Use keyword parameters when :ref:`binding by name `. Use positional parameters when :ref:`binding by position `. The - parameter value can be None to indicate that python-oracledb should + parameter value can be *None* to indicate that python-oracledb should determine the required space from the data value provided. The parameters or keyword names correspond to the bind variable @@ -442,10 +442,10 @@ Cursor Methods The ``size`` parameter specifies the length of string and raw variables and is ignored in all other cases. If not specified for string and raw variables, - the value 4000 is used. + the value *4000* is used. The ``arraysize`` parameter specifies the number of elements the variable will - have. If not specified the bind array size (usually 1) is used. When a + have. If not specified the bind array size (usually *1*) is used. When a variable is created in an output type handler this parameter should be set to the cursor's array size. @@ -464,15 +464,15 @@ Cursor Methods function. The ``bypass_decode`` parameter, if specified, should be passed as a - boolean value. Passing a `True` value causes values of database types + boolean value. Passing a *True* value causes values of database types :data:`~oracledb.DB_TYPE_VARCHAR`, :data:`~oracledb.DB_TYPE_CHAR`, :data:`~oracledb.DB_TYPE_NVARCHAR`, :data:`~oracledb.DB_TYPE_NCHAR` and - :data:`~oracledb.DB_TYPE_LONG` to be returned as `bytes` instead of `str`, + :data:`~oracledb.DB_TYPE_LONG` to be returned as bytes instead of str, meaning that python-oracledb does not do any decoding. See :ref:`Fetching raw data ` for more information. The ``convert_nulls`` parameter, if specified, should be passed as a boolean - value. Passing the value ``True`` causes the ``outconverter`` to be called + value. Passing the value *True* causes the ``outconverter`` to be called when a null value is fetched from the database; otherwise, the ``outconverter`` is only called when non-null values are fetched from the database. @@ -506,8 +506,8 @@ Cursor Attributes The attribute is only used for tuning row and SODA document fetches from the database. It does not affect data inserts. - Due to the performance benefits, the default ``Cursor.arraysize`` is 100 - instead of the 1 that the Python DB API recommends. + Due to the performance benefits, the default ``Cursor.arraysize`` is *100* + instead of the *1* that the Python DB API recommends. See :ref:`Tuning Fetch Performance ` for more information. @@ -537,7 +537,7 @@ Cursor Attributes This read-only attribute contains information about the columns used in a query. It is a sequence of :ref:`FetchInfo ` objects, one per - column. This attribute will be None for statements that are not SELECT or + column. This attribute will be *None* for statements that are not SELECT or WITH statements, or if the cursor has not had :meth:`Cursor.execute()` invoked yet. @@ -564,8 +564,8 @@ Cursor Attributes bound to a statement executed on the cursor and overrides the attribute with the same name on the connection if specified. The method signature is handler(cursor, value, arraysize) and the return value is expected to be a - variable object or None in which case a default variable object will be - created. If this attribute is None, the default behavior will take place + variable object or *None* in which case a default variable object will be + created. If this attribute is *None*, the default behavior will take place for all values bound to the statements. See :ref:`inputtypehandlers`. @@ -578,15 +578,15 @@ Cursor Attributes This read-only attribute returns the rowid of the last row modified by the cursor. If no row was modified by the last operation performed on the - cursor, the value None is returned. + cursor, the value *None* is returned. .. attribute:: Cursor.outputtypehandler This read-write attribute specifies a method called for each column that is to be fetched from this cursor. The method signature is handler(cursor, metadata) and the return value is expected to be a - :ref:`variable object` or None in which case a default variable - object will be created. If this attribute is None, then the default + :ref:`variable object ` or *None* in which case a default variable + object will be created. If this attribute is *None*, then the default behavior will take place for all columns fetched from this cursor. See :ref:`outputtypehandlers`. @@ -605,9 +605,10 @@ Cursor Attributes This read-write attribute can be used to tune the number of rows that the Oracle Client library fetches when a SELECT statement is executed. This - value can reduce the number of round-trips to the database that are required - to fetch rows but at the cost of additional memory. Setting this value to 0 - can be useful when the timing of fetches must be explicitly controlled. + value can reduce the number of round-trips to the database that are + required to fetch rows but at the cost of additional memory. Setting this + value to *0* can be useful when the timing of fetches must be explicitly + controlled. The attribute is only used for tuning row fetches from the database. It does not affect data inserts. @@ -625,9 +626,9 @@ Cursor Attributes This read-only attribute specifies the number of rows that have currently been fetched from the cursor (for select statements) or that have been - affected by the operation (for insert, update, delete and merge - statements). For all other statements the value is always zero. If the - cursor or connection is closed, the value returned is -1. + affected by the operation (for insert, update, delete, and merge + statements). For all other statements the value is always *0*. If the + cursor or connection is closed, the value returned is *-1*. .. attribute:: Cursor.rowfactory @@ -673,7 +674,7 @@ Cursor Attributes :meth:`Cursor.execute()` or :meth:`Cursor.executemany()`. This value is automatically cleared on the next call to :meth:`Cursor.execute()` or :meth:`Cursor.executemany()`. If no warning was generated the value - ``None`` is returned. + *None* is returned. See :ref:`plsqlwarning` for more information. diff --git a/doc/src/api_manual/dbobject_type.rst b/doc/src/api_manual/dbobject_type.rst index 123ec699..1591a657 100644 --- a/doc/src/api_manual/dbobject_type.rst +++ b/doc/src/api_manual/dbobject_type.rst @@ -38,8 +38,8 @@ DbObjectType Attributes .. attribute:: DbObjectType.element_type This read-only attribute returns the type of elements found in collections - of this type, if :attr:`~DbObjectType.iscollection` is True; otherwise, - it returns None. If the collection contains objects, this will be + of this type, if :attr:`~DbObjectType.iscollection` is *True*; otherwise, + it returns *None*. If the collection contains objects, this will be another object type; otherwise, it will be one of the :ref:`database type constants `. @@ -58,7 +58,7 @@ DbObjectType Attributes .. attribute:: DbObjectType.package_name This read-only attribute returns the name of the package, if the type - refers to a PL/SQL type (otherwise, it returns the value `None`). + refers to a PL/SQL type (otherwise, it returns the value *None*). .. attribute:: DbObjectType.schema @@ -115,8 +115,8 @@ DbObject Methods .. method:: DbObject.exists(index) - Returns True or False indicating if an element exists in the collection at - the specified index. + Returns *True* or *False* indicating if an element exists in the collection + at the specified index. .. method:: DbObject.extend(sequence) @@ -129,7 +129,7 @@ DbObject Methods .. method:: DbObject.first() Returns the index of the first element in the collection. If the collection - is empty, None is returned. + is empty, *None* is returned. .. method:: DbObject.getelement(index) @@ -141,21 +141,21 @@ DbObject Methods .. method:: DbObject.last() Returns the index of the last element in the collection. If the collection - is empty, None is returned. + is empty, *None* is returned. .. method:: DbObject.next(index) Returns the index of the next element in the collection following the specified index. If there are no elements in the collection following the - specified index, None is returned. + specified index, *None* is returned. .. method:: DbObject.prev(index) Returns the index of the element in the collection preceding the specified index. If there are no elements in the collection preceding the - specified index, None is returned. + specified index, *None* is returned. .. method:: DbObject.setelement(index, value) @@ -199,7 +199,7 @@ DbObjectAttribute Objects :data:`oracledb.DB_TYPE_CHAR`, :data:`oracledb.DB_TYPE_NCHAR`, :data:`oracledb.DB_TYPE_NVARCHAR`, :data:`oracledb.DB_TYPE_RAW`, or :data:`oracledb.DB_TYPE_VARCHAR`. For all other types the value returned is - `None`. + *None*. .. versionadded:: 3.0.0 @@ -213,7 +213,7 @@ DbObjectAttribute Objects This read-only attribute returns the precision of the attribute when the attribute's type is :data:`oracledb.DB_TYPE_NUMBER`. For all other types - the value returned is `None`. + the value returned is *None*. .. versionadded:: 3.0.0 @@ -222,7 +222,7 @@ DbObjectAttribute Objects This read-only attribute returns the scale of the attribute when the attribute's type is :data:`oracledb.DB_TYPE_NUMBER`. For all other types - the value returned is `None`. + the value returned is *None*. .. versionadded:: 3.0.0 diff --git a/doc/src/api_manual/defaults.rst b/doc/src/api_manual/defaults.rst index 938e52d1..11b44ae1 100644 --- a/doc/src/api_manual/defaults.rst +++ b/doc/src/api_manual/defaults.rst @@ -25,7 +25,7 @@ Defaults Attributes The default value for :attr:`Cursor.arraysize`. This is a query tuning attribute, see :ref:`Tuning Fetch Performance `. - This attribute has an initial value of 100. + This attribute has an initial value of *100*. .. attribute:: defaults.config_dir @@ -43,7 +43,7 @@ Defaults Attributes to Oracle Database. This is the value used in the CLIENT_DRIVER column of the V$SESSION_CONNECT_INFO view. - This attribute has an initial value of None. It is used as required in + This attribute has an initial value of *None*. It is used as required in python-oracledb Thick and Thin mode. In python-oracledb Thick mode, this attribute is used if the @@ -52,7 +52,7 @@ Defaults Attributes used if the ``driver_name`` parameter is not specified in :meth:`oracledb.connect()`, :meth:`oracledb.connect_async()`, :meth:`oracledb.create_pool()`, or :meth:`oracledb.create_pool_async()`. - If the value of this attribute is None, the value set when connecting in + If the value of this attribute is *None*, the value set when connecting in python-oracledb Thick mode is like "python-oracledb thk : " and in Thin mode is like "python-oracledb thn : ". See :ref:`otherinit`. @@ -71,31 +71,32 @@ Defaults Attributes blob/main/samples/return_numbers_as_decimals.py>`__) can alternatively be used to adjust the returned type. If a type handler exists and returns a variable (that is, ``cursor.var(...)``), then that return variable is used. - If the type handler returns None, then the value of + If the type handler returns *None*, then the value of ``oracledb.defaults.fetch_decimals`` is used to determine whether to return ``decimal.Decimal`` values. - This attribute has an initial value of False. + This attribute has an initial value of *False*. .. attribute:: defaults.fetch_lobs - When the value of this attribute is True, then queries to LOB columns - return LOB locators. When the value of this attribute is False, then CLOBs - and NCLOBs are fetched as strings, and BLOBs are fetched as bytes. If LOBs - are larger than 1 GB, then this attribute should be set to True and the - LOBs should be streamed. See :ref:`lobdata`. + When the value of this attribute is *True*, then queries to LOB columns + return LOB locators. When the value of this attribute is *False*, then + CLOBs and NCLOBs are fetched as strings, and BLOBs are fetched as bytes. If + LOBs are larger than 1 GB, then this attribute should be set to *True* and + the LOBs should be streamed. See :ref:`lobdata`. - An output type handler such as the one previously required in cx_Oracle (see - `return_lobs_as_strings.py `__) can alternatively be used to adjust the returned type. - If a type handler exists and returns a variable (that is, `cursor.var(...)`), then - that return variable is used. If the type handler returns None, then the value of - ``oracledb.defaults.fetch_lobs`` is used. + An output type handler such as the one previously required in cx_Oracle + (see `return_lobs_as_strings.py `__) can + alternatively be used to adjust the returned type. If a type handler + exists and returns a variable (that is, `cursor.var(...)`), then that + return variable is used. If the type handler returns *None*, then the value + of ``oracledb.defaults.fetch_lobs`` is used. The value of ``oracledb.defaults.fetch_lobs`` does not affect LOBs returned as OUT binds. - This attribute has an initial value of True. + This attribute has an initial value of *True*. .. attribute:: defaults.machine @@ -127,7 +128,7 @@ Defaults Attributes The default value for :attr:`Cursor.prefetchrows`. This is a query tuning attribute, see :ref:`Tuning Fetch Performance `. - This attribute has an initial value of 2. + This attribute has an initial value of *2*. .. attribute:: defaults.program @@ -148,7 +149,7 @@ Defaults Attributes :attr:`ConnectionPool.stmtcachesize`. This is a tuning attribute, see :ref:`stmtcache`. - This attribute has an initial value of 20. + This attribute has an initial value of *20*. .. attribute:: defaults.terminal @@ -156,7 +157,7 @@ Defaults Attributes connection originates. This is the value used in the TERMINAL column of the V$SESSION view. - This attribute has an initial value of "unknown". + This attribute has an initial value of *unknown*. This attribute is only used in python-oracledb Thin mode. diff --git a/doc/src/api_manual/fetch_info.rst b/doc/src/api_manual/fetch_info.rst index 61bb2ed5..3cf75fd6 100644 --- a/doc/src/api_manual/fetch_info.rst +++ b/doc/src/api_manual/fetch_info.rst @@ -26,7 +26,7 @@ FetchInfo Attributes This read-only attribute returns a dictionary containing the `annotations `__ associated with the fetched column. If - there are no annotations, the value ``None`` is returned. Annotations + there are no annotations, the value *None* is returned. Annotations require Oracle Database 23ai. If using python-oracledb Thick mode, Oracle Client 23ai is also required. @@ -42,7 +42,7 @@ FetchInfo Attributes This read-only attribute returns the name of the `data use case domain `__ associated with the fetched column. If - there is no data use case domain, the value ``None`` is returned. `Data + there is no data use case domain, the value *None* is returned. `Data use case domains `__ require Oracle Database 23ai. If using python-oracledb Thick mode, Oracle Client 23ai is also required. @@ -54,7 +54,7 @@ FetchInfo Attributes This read-only attribute returns the schema of the `data use case domain `__ associated with the fetched column. If - there is no data use case domain, the value ``None`` is returned. `Data + there is no data use case domain, the value *None* is returned. `Data use case domains `__ require Oracle Database 23ai. If using python-oracledb Thick mode, Oracle Client 23ai is also required. @@ -69,17 +69,16 @@ FetchInfo Attributes .. attribute:: FetchInfo.is_json This read-only attribute returns whether the column is known to contain - JSON data. This will be ``True`` when the type code is - ``oracledb.DB_TYPE_JSON`` as well as when an "IS JSON" constraint is + JSON data. This will be *True* when the type code is + :data:`oracledb.DB_TYPE_JSON` as well as when an "IS JSON" constraint is enabled on LOB and VARCHAR2 columns. .. attribute:: FetchInfo.is_oson This read-only attribute returns whether the column is known to contain - binary encoded `OSON - `__ - data. This will be ``True`` when an "IS JSON FORMAT OSON" check constraint - is enabled on BLOB columns. + binary encoded `OSON `__ data. This will be *True* + when an "IS JSON FORMAT OSON" check constraint is enabled on BLOB columns. .. versionadded:: 2.1.0 @@ -121,7 +120,7 @@ FetchInfo Attributes This read-only attribute returns the number of dimensions required by VECTOR columns. If the column is not a VECTOR column or allows for any - number of dimensions, the value returned is ``None``. + number of dimensions, the value returned is *None*. .. versionadded:: 2.2.0 @@ -140,6 +139,6 @@ FetchInfo Attributes floating-point numbers If the column is not a VECTOR column or allows for any type of storage, - the value returned is ``None``. + the value returned is *None*. .. versionadded:: 2.2.0 diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 10593163..cb1b683e 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -147,12 +147,13 @@ Oracledb Methods python-oracledb Thin and Thick modes. The ``port`` parameter is expected to be an integer which indicates the - port number on which the listener is listening. The default value is 1521. - This value is used in both the python-oracledb Thin and Thick modes. + port number on which the listener is listening. The default value is + *1521*. This value is used in both the python-oracledb Thin and Thick + modes. - The ``protocol`` parameter is expected to be one of the strings "tcp" or - "tcps" which indicates whether to use unencrypted network traffic or - encrypted network traffic (TLS). The default value is tcp. This value is + The ``protocol`` parameter is expected to be one of the strings *tcp* or + *tcps* which indicates whether to use unencrypted network traffic or + encrypted network traffic (TLS). The default value is *tcp*. This value is used in both the python-oracledb Thin and Thick modes. The ``https_proxy`` parameter is expected to be a string which indicates @@ -162,8 +163,8 @@ Oracledb Methods The ``https_proxy_port`` parameter is expected to be an integer which indicates the port that is to be used to communicate with the proxy host. - The default value is 0. This value is used in both the python-oracledb Thin - and Thick modes. + The default value is *0*. This value is used in both the python-oracledb + Thin and Thick modes. The ``service_name`` parameter is expected to be a string which indicates the service name of the database. This value is used in both the @@ -175,7 +176,7 @@ Oracledb Methods The ``server_type`` parameter is expected to be a string that indicates the type of server connection that should be established. If specified, it - should be one of `dedicated`, `shared`, or `pooled`. This value is used in + should be one of *dedicated*, *shared*, or *pooled*. This value is used in both the python-oracledb Thin and Thick modes. The ``cclass`` parameter is expected to be a string that identifies the @@ -194,34 +195,34 @@ Oracledb Methods the number of minutes between the sending of keepalive probes. If this parameter is set to a value greater than zero it enables keepalive. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 0. + value is *0*. The ``retry_count`` parameter is expected to be an integer that identifies the number of times that a connection attempt should be retried before the attempt is terminated. This value is used in both the python-oracledb Thin - and Thick modes. The default value is 0. + and Thick modes. The default value is *0*. The ``retry_delay`` parameter is expected to be an integer that identifies the number of seconds to wait before making a new connection attempt. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 1. + value is *1*. The ``tcp_connect_timeout`` parameter is expected to be a float that indicates the maximum number of seconds to wait for establishing a connection to the database host. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 20.0. + python-oracledb Thin and Thick modes. The default value is *20.0*. The ``ssl_server_dn_match`` parameter is expected to be a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. Note that if the ``ssl_server_cert_dn`` parameter is not provided, host name matching is performed instead. This value is used in - both the python-oracledb Thin and Thick modes. The default value is True. + both the python-oracledb Thin and Thick modes. The default value is *True*. The ``ssl_server_cert_dn`` parameter is expected to be a string that indicates the distinguished name (DN) which should be matched with the server. This value is ignored if the ``ssl_server_dn_match`` parameter is - not set to the value True. This value is used in both the python-oracledb + not set to the value *True*. This value is used in both the python-oracledb Thin and Thick modes. The ``wallet_location`` parameter is expected to be a string that @@ -235,15 +236,15 @@ Oracledb Methods the events mode should be enabled. This value is only used in the python-oracledb Thick mode and is ignored in the Thin mode. This parameter is needed for continuous query notification and high availability event - notifications. The default value is False. + notifications. The default value is *False*. The ``externalauth`` parameter is a boolean that specifies whether external authentication should be used. This value is only used in the python-oracledb Thick mode and is ignored in the Thin mode. The default - value is False. For standalone connections, external authentication occurs - when the ``user`` and ``password`` attributes are not used. If these + value is *False*. For standalone connections, external authentication + occurs when the ``user`` and ``password`` attributes are not used. If these attributes are not used, you can optionally set the ``externalauth`` - attribute to True, which may aid code auditing. + attribute to *True*, which may aid code auditing. If the ``mode`` parameter is specified, it must be one of the :ref:`connection authorization modes ` @@ -254,7 +255,7 @@ Oracledb Methods The ``disable_oob`` parameter is expected to be a boolean that indicates whether out-of-band breaks should be disabled. This value is only used in the python-oracledb Thin mode and has no effect on Windows which - does not support this functionality. The default value is False. + does not support this functionality. The default value is *False*. The ``stmtcachesize`` parameter is expected to be an integer which specifies the initial size of the statement cache. This value is used in @@ -274,7 +275,7 @@ Oracledb Methods whether any tag can be used when acquiring a connection from the pool. This value is only used in the python-oracledb Thick mode when acquiring a connection from a pool. This value is ignored in the python-oracledb Thin - mode. The default value is False. + mode. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the directory in which configuration files (tnsnames.ora) are found. This value @@ -325,15 +326,15 @@ Oracledb Methods configuration value. See the `SQL*Net documentation `__ for more details. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 8192 bytes. + python-oracledb Thin and Thick modes. The default value is *8192* bytes. The ``pool_boundary`` parameter is expected to be one of the strings - "statement" or "transaction" which indicates when pooled :ref:`DRCP ` + *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be returned to the pool. If the value is - "statement", then pooled DRCP or PRCP connections are implicitly released + *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or - temporary LOBs). If the value is "transaction", then pooled DRCP or PRCP + temporary LOBs). If the value is *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`Connection.commit()` or :meth:`Connection.rollback()` are called. This parameter requires the use @@ -349,15 +350,15 @@ Oracledb Methods reduce the latency in round-trips to the database after a connection has been established. This feature is only available with certain versions of ADB-S. This value is used in both python-oracledb Thin and Thick modes. - The default value is False. + The default value is *False*. The ``ssl_version`` parameter is expected to be one of the constants - "ssl.TLSVersion.TLSv1_2" or "ssl.TLSVersion.TLSv1_3" which identifies the + *ssl.TLSVersion.TLSv1_2* or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. This parameter can be specified when establishing connections with the protocol - "tcps". This value is used in both python-oracledb Thin and Thick modes. - The value "ssl.TLSVersion.TLSv1_3" requires Oracle Database 23ai. If you + *tcps*. This value is used in both python-oracledb Thin and Thick modes. + The value *ssl.TLSVersion.TLSv1_3* requires Oracle Database 23ai. If you are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. @@ -392,7 +393,7 @@ Oracledb Methods created should *never* be used after the source handle has been closed or destroyed. This value is only used in the python-oracledb Thick mode and is ignored in the Thin mode. It should be used with extreme caution. The - default value is 0. + default value is *0*. .. versionchanged:: 3.0.0 @@ -525,11 +526,12 @@ Oracledb Methods initial connection to the database. The ``port`` parameter is expected to be an integer which indicates the - port number on which the listener is listening. The default value is 1521. + port number on which the listener is listening. The default value is + *1521*. - The ``protocol`` parameter is expected to be one of the strings "tcp" or - "tcps" which indicates whether to use unencrypted network traffic or - encrypted network traffic (TLS). The default value is tcp. + The ``protocol`` parameter is expected to be one of the strings *tcp* or + *tcps* which indicates whether to use unencrypted network traffic or + encrypted network traffic (TLS). The default value is *tcp*. The ``https_proxy`` parameter is expected to be a string which indicates the name or IP address of a proxy host to use for tunneling secure @@ -537,7 +539,7 @@ Oracledb Methods The ``https_proxy_port`` parameter is expected to be an integer which indicates the port that is to be used to communicate with the proxy host. - The default value is 0. + The default value is *0*. The ``service_name`` parameter is expected to be a string which indicates the service name of the database. @@ -547,7 +549,7 @@ Oracledb Methods The ``server_type`` parameter is expected to be a string that indicates the type of server connection that should be established. If specified, it - should be one of `dedicated`, `shared`, or `pooled`. + should be one of *dedicated*, *shared*, or *pooled*. The ``cclass`` parameter is expected to be a string that identifies the connection class to use for :ref:`drcp`. @@ -562,31 +564,31 @@ Oracledb Methods The ``expire_time`` parameter is expected to be an integer which indicates the number of minutes between the sending of keepalive probes. If this parameter is set to a value greater than zero it enables keepalive. The - default value is 0. + default value is *0*. The ``retry_count`` parameter is expected to be an integer that identifies the number of times that a connection attempt should be retried before the - attempt is terminated. The default value is 0. + attempt is terminated. The default value is *0*. The ``retry_delay`` parameter is expected to be an integer that identifies the number of seconds to wait before making a new connection attempt. The - default value is 1. + default value is *1*. The ``tcp_connect_timeout`` parameter is expected to be a float that indicates the maximum number of seconds to wait for establishing a - connection to the database host. The default value is 20.0. + connection to the database host. The default value is *20.0*. The ``ssl_server_dn_match`` parameter is expected to be a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. Note that if the ``ssl_server_cert_dn`` parameter is not provided, host name matching is performed instead. The default value is - True. + *True*. The ``ssl_server_cert_dn`` parameter is expected to be a string that indicates the distinguished name (DN) which should be matched with the server. This value is ignored if the ``ssl_server_dn_match`` parameter is - not set to the value True. + not set to the value *True*. The ``wallet_location`` parameter is expected to be a string that identifies the directory where the wallet can be found. In python-oracledb @@ -605,7 +607,7 @@ Oracledb Methods The ``disable_oob`` parameter is expected to be a boolean that indicates whether out-of-band breaks should be disabled. This value has no effect on Windows which does not support this functionality. The default value is - False. + *False*. The ``stmtcachesize`` parameter is expected to be an integer which specifies the initial size of the statement cache. The default is the @@ -651,15 +653,15 @@ Oracledb Methods negotiated down to the lower of this value and the database network SDU configuration value. See the `SQL*Net documentation `__ for more details. The default value is 8192 bytes. + 77949C8A2B04>`__ for more details. The default value is *8192* bytes. The ``pool_boundary`` parameter is expected to be one of the strings - "statement" or "transaction" which indicates when pooled :ref:`DRCP ` + *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be returned to the pool. If the value is - "statement", then pooled DRCP or PRCP connections are implicitly released + *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or - temporary LOBs). If the value is "transaction", then pooled DRCP or PRCP + temporary LOBs). If the value is *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`AsyncConnection.commit()` or :meth:`AsyncConnection.rollback()` are called. This parameter requires the @@ -675,15 +677,15 @@ Oracledb Methods reduce the latency in round-trips to the database after a connection has been established. This feature is only available with certain versions of ADB-S. This value is used in both python-oracledb Thin and Thick modes. - The default value is False. + The default value is *False*. The ``ssl_version`` parameter is expected to be one of the constants - "ssl.TLSVersion.TLSv1_2" or "ssl.TLSVersion.TLSv1_3" which identifies the + *ssl.TLSVersion.TLSv1_2* or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. This parameter can be specified when establishing connections with the protocol - "tcps". This value is used in both python-oracledb Thin and Thick modes. - The value "ssl.TLSVersion.TLSv1_3" requires Oracle Database 23ai. If you + *tcps*. This value is used in both python-oracledb Thin and Thick modes. + The value *ssl.TLSVersion.TLSv1_3* requires Oracle Database 23ai. If you are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. @@ -811,12 +813,13 @@ Oracledb Methods python-oracledb Thin and Thick modes. The ``port`` parameter is expected to be an integer which indicates the - port number on which the listener is listening. The default value is 1521. - This value is used in both the python-oracledb Thin and Thick modes. + port number on which the listener is listening. The default value is + *1521*. This value is used in both the python-oracledb Thin and Thick + modes. - The ``protocol`` parameter is expected to be one of the strings "tcp" or - "tcps" which indicates whether to use unencrypted network traffic or - encrypted network traffic (TLS). The default value is tcp. This value is + The ``protocol`` parameter is expected to be one of the strings *tcp* or + *tcps* which indicates whether to use unencrypted network traffic or + encrypted network traffic (TLS). The default value is *tcp*. This value is used in both the python-oracledb Thin and Thick modes. The ``https_proxy`` parameter is expected to be a string which indicates @@ -826,7 +829,7 @@ Oracledb Methods The ``https_proxy_port`` parameter is expected to be an integer which indicates the port that is to be used to communicate with the proxy host. - The default value is 0. This value is used in both the python-oracledb Thin + The default value is *0*. This value is used in both the python-oracledb Thin and Thick modes. The ``service_name`` parameter is expected to be a string which indicates @@ -839,7 +842,7 @@ Oracledb Methods The ``server_type`` parameter is expected to be a string that indicates the type of server connection that should be established. If specified, it - should be one of "dedicated", "shared", or "pooled". This value is used in + should be one of *dedicated*, *shared*, or *pooled*. This value is used in both the python-oracledb Thin and Thick modes. The ``cclass`` parameter is expected to be a string that identifies the @@ -858,34 +861,34 @@ Oracledb Methods the number of minutes between the sending of keepalive probes. If this parameter is set to a value greater than zero it enables keepalive. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 0. + value is *0*. The ``retry_count`` parameter is expected to be an integer that identifies the number of times that a connection attempt should be retried before the attempt is terminated. This value is used in both the python-oracledb Thin - and Thick modes. The default value is 0. + and Thick modes. The default value is *0*. The ``retry_delay`` parameter is expected to be an integer that identifies the number of seconds to wait before making a new connection attempt. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 1. + value is *1*. The ``tcp_connect_timeout`` parameter is expected to be a float that indicates the maximum number of seconds to wait for establishing a connection to the database host. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 20.0. + python-oracledb Thin and Thick modes. The default value is *20.0*. The ``ssl_server_dn_match`` parameter is expected to be a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. Note that if the ``ssl_server_cert_dn`` parameter is not provided, host name matching is performed instead. This value is used in - both the python-oracledb Thin and Thick modes. The default value is True. + both the python-oracledb Thin and Thick modes. The default value is *True*. The ``ssl_server_cert_dn`` parameter is expected to be a string that indicates the distinguished name (DN) which should be matched with the server. This value is ignored if the ``ssl_server_dn_match`` parameter is - not set to the value True. This value is used in both the python-oracledb + not set to the value *True*. This value is used in both the python-oracledb Thin and Thick modes. The ``wallet_location`` parameter is expected to be a string that @@ -899,14 +902,14 @@ Oracledb Methods the events mode should be enabled. This value is only used in the python-oracledb Thick mode. This parameter is needed for continuous query notification and high availability event notifications. The default - value is False. + value is *False*. The ``externalauth`` parameter is a boolean that specifies whether external authentication should be used. This value is only used in the - python-oracledb Thick mode. The default value is False. For standalone + python-oracledb Thick mode. The default value is *False*. For standalone connections, external authentication occurs when the ``user`` and ``password`` attributes are not used. If these attributes are not used, you - can optionally set the ``externalauth`` attribute to True, which may aid + can optionally set the ``externalauth`` attribute to *True*, which may aid code auditing. The ``mode`` parameter is expected to be an integer that identifies the @@ -917,7 +920,7 @@ Oracledb Methods The ``disable_oob`` parameter is expected to be a boolean that indicates whether out-of-band breaks should be disabled. This value is only used in the python-oracledb Thin mode and has no effect on Windows which - does not support this functionality. The default value is False. + does not support this functionality. The default value is *False*. The ``stmtcachesize`` parameter is expected to be an integer that identifies the initial size of the statement cache. This value is used in @@ -936,7 +939,7 @@ Oracledb Methods The ``matchanytag`` parameter is expected to be a boolean specifying whether any tag can be used when acquiring a connection from the pool. This value is only used in the python-oracledb Thick mode when acquiring a - connection from a pool. The default value is False. + connection from a pool. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the directory in which configuration files (tnsnames.ora) are found. This value @@ -987,15 +990,15 @@ Oracledb Methods configuration value. See the `SQL*Net documentation `__ for more details. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 8192 bytes. + python-oracledb Thin and Thick modes. The default value is *8192* bytes. The ``pool_boundary`` parameter is expected to be one of the strings - "statement" or "transaction" which indicates when pooled :ref:`DRCP ` + *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be returned to the pool. If the value is - "statement", then pooled DRCP or PRCP connections are implicitly released + *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or - temporary LOBs). If the value is "transaction", then pooled DRCP or PRCP + temporary LOBs). If the value is *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`Connection.commit()` or :meth:`Connection.rollback()` are called. This parameter requires the use @@ -1011,15 +1014,15 @@ Oracledb Methods reduce the latency in round-trips to the database after a connection has been established. This feature is only available with certain versions of ADB-S. This value is used in both python-oracledb Thin and Thick modes. - The default value is False. + The default value is *False*. The ``ssl_version`` parameter is expected to be one of the constants - "ssl.TLSVersion.TLSv1_2" or "ssl.TLSVersion.TLSv1_3" which identifies the + *ssl.TLSVersion.TLSv1_2* or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. This parameter can be specified when establishing connections with the protocol "tcps". This value is used in both python-oracledb Thin and Thick modes. - The value "ssl.TLSVersion.TLSv1_3" requires Oracle Database 23ai. If you + The value *ssl.TLSVersion.TLSv1_3* requires Oracle Database 23ai. If you are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. @@ -1051,7 +1054,7 @@ Oracledb Methods The ``handle`` parameter is expected to be an integer which represents a pointer to a valid service context handle. This value is only used in the python-oracledb Thick mode. It should be used with extreme caution. The - default value is 0. + default value is *0*. .. versionchanged:: 2.5.0 @@ -1180,12 +1183,12 @@ Oracledb Methods :ref:`recommended ` to help prevent connection storms and to help overall system stability. The ``min`` parameter is the number of connections opened when the pool is created. The default value of the - ``min`` parameter is 1. The ``increment`` parameter is the number of + ``min`` parameter is *1*. The ``increment`` parameter is the number of connections that are opened whenever a connection request exceeds the number of currently open connections. The default value of the - ``increment`` parameter is 1. The ``max`` parameter is the maximum number + ``increment`` parameter is *1*. The ``max`` parameter is the maximum number of connections that can be open in the connection pool. The default value - of the ``max`` parameter is 2. + of the ``max`` parameter is *2*. If the ``connectiontype`` parameter is specified, all calls to :meth:`ConnectionPool.acquire()` will create connection objects of that @@ -1200,23 +1203,24 @@ Oracledb Methods The ``homogeneous`` parameter is a boolean that indicates whether the connections are homogeneous (same user) or heterogeneous (multiple - users). The default value is True. + users). The default value is *True*. The ``timeout`` parameter is the length of time (in seconds) that a connection may remain idle in the pool before it is terminated. This applies only when the pool has more than ``min`` connections open, allowing it to shrink to the specified minimum size. If the value of this parameter - is 0, then the connections are never terminated. The default value is 0. + is 0, then the connections are never terminated. The default value is *0* + seconds. The ``wait_timeout`` parameter is the length of time (in milliseconds) that a caller should wait when acquiring a connection from the pool with ``getmode`` set to :data:`oracledb.POOL_GETMODE_TIMEDWAIT`. The default - value is 0. + value is *0* milliseconds. The ``max_lifetime_session`` parameter is the length of time (in seconds) that connections can remain in the pool. If the value of this parameter is 0, then the connections may remain in the pool indefinitely. The default - value is 0. + value is *0* seconds. The ``session_callback`` parameter is a callable that is invoked when a connection is returned from the pool for the first time, or when the @@ -1225,12 +1229,12 @@ Oracledb Methods The ``max_sessions_per_shard`` parameter is the maximum number of connections that may be associated with a particular shard. This value is only used in the python-oracledb Thick mode and is ignored in the - python-oracledb Thin mode. The default value is 0. + python-oracledb Thin mode. The default value is *0*. The ``soda_metadata_cache`` parameter is a boolean that indicates whether or not the SODA metadata cache should be enabled. This value is only used in the python-oracledb Thick mode and is ignored in the python-oracledb - Thin mode. The default value is False. + Thin mode. The default value is *False*. The ``ping_interval`` parameter is the length of time (in seconds) after which an unused connection in the pool will be a candidate for pinging when @@ -1238,7 +1242,7 @@ Oracledb Methods indicates the connection is not alive a replacement connection will be returned by :meth:`~ConnectionPool.acquire()`. If ``ping_interval`` is a negative value, then the ping functionality will be disabled. The default - value is 60 seconds. + value is *60* seconds. The ``ping_timeout`` parameter is the maximum length of time (in milliseconds) that :meth:`ConnectionPool.acquire()` waits for a connection @@ -1246,7 +1250,7 @@ Oracledb Methods respond within the specified time, then the connection is destroyed and :meth:`~ConnectionPool.acquire()` returns a different connection. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 5000 milliseconds. + value is *5000* milliseconds. The ``proxy_user`` parameter is expected to be a string which indicates the name of the proxy user to connect to. If this value is not specified, it @@ -1281,12 +1285,13 @@ Oracledb Methods python-oracledb Thin and Thick modes. The ``port`` parameter is expected to be an integer which indicates the - port number on which the listener is listening. The default value is 1521. - This value is used in both the python-oracledb Thin and Thick modes. + port number on which the listener is listening. The default value is + *1521*. This value is used in both the python-oracledb Thin and Thick + modes. - The ``protocol`` parameter is expected to be one of the strings "tcp" or - "tcps" which indicates whether to use unencrypted network traffic or - encrypted network traffic (TLS). The default value is tcp. This value is + The ``protocol`` parameter is expected to be one of the strings *tcp* or + *tcps* which indicates whether to use unencrypted network traffic or + encrypted network traffic (TLS). The default value is *tcp*. This value is used in both the python-oracledb Thin and Thick modes. The ``https_proxy`` parameter is expected to be a string which indicates @@ -1296,8 +1301,8 @@ Oracledb Methods The ``https_proxy_port`` parameter is expected to be an integer which indicates the port that is to be used to communicate with the proxy host. - The default value is 0. This value is used in both the python-oracledb Thin - and Thick modes. + The default value is *0*. This value is used in both the python-oracledb + Thin and Thick modes. The ``service_name`` parameter is expected to be a string which indicates the service name of the database. This value is used in both the @@ -1309,7 +1314,7 @@ Oracledb Methods The ``server_type`` parameter is expected to be a string that indicates the type of server connection that should be established. If specified, it - should be one of `dedicated`, `shared`, or `pooled`. This value is used in + should be one of *dedicated*, *shared*, or *pooled*. This value is used in both the python-oracledb Thin and Thick modes. The ``cclass`` parameter is expected to be a string that identifies the @@ -1326,34 +1331,34 @@ Oracledb Methods the number of minutes between the sending of keepalive probes. If this parameter is set to a value greater than zero it enables keepalive. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 0. + value is *0* minutes. The ``retry_count`` parameter is expected to be an integer that identifies the number of times that a connection attempt should be retried before the attempt is terminated. This value is used in both the python-oracledb Thin - and Thick modes. The default value is 0. + and Thick modes. The default value is *0*. The ``retry_delay`` parameter is expected to be an integer that identifies the number of seconds to wait before making a new connection attempt. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 1. + value is *1* seconds. The ``tcp_connect_timeout`` parameter is expected to be a float that indicates the maximum number of seconds to wait for establishing a connection to the database host. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 20.0. + python-oracledb Thin and Thick modes. The default value is *20.0* seconds. The ``ssl_server_dn_match`` parameter is expected to be a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. Note that if the ``ssl_server_cert_dn`` parameter is not provided, host name matching is performed instead. This value is used in - both the python-oracledb Thin and Thick modes. The default value is True. + both the python-oracledb Thin and Thick modes. The default value is *True*. The ``ssl_server_cert_dn`` parameter is expected to be a string that indicates the distinguished name (DN) which should be matched with the server. This value is ignored if the ``ssl_server_dn_match`` parameter is - not set to the value True. This value is used in both the python-oracledb + not set to the value *True*. This value is used in both the python-oracledb Thin and Thick modes. The ``wallet_location`` parameter is expected to be a string that @@ -1367,14 +1372,14 @@ Oracledb Methods the events mode should be enabled. This value is only used in the python-oracledb Thick mode and is ignored in the Thin mode. This parameter is needed for continuous query notification and high availability event - notifications. The default value is False. + notifications. The default value is *False*. The ``externalauth`` parameter is a boolean that determines whether to use external authentication. This value is only used in python-oracledb Thick - mode and is ignored in Thin mode. The default value is False. For pooled + mode and is ignored in Thin mode. The default value is *False*. For pooled connections in Thick mode, external authentication requires the use of a heterogeneous pool. For this reason, you must set the ``homogeneous`` - parameter to False. See :ref:`extauth`. + parameter to *False*. See :ref:`extauth`. If the ``mode`` parameter is specified, it must be one of the :ref:`connection authorization modes ` @@ -1385,7 +1390,7 @@ Oracledb Methods The ``disable_oob`` parameter is expected to be a boolean that indicates whether out-of-band breaks should be disabled. This value is only used in the python-oracledb Thin mode and has no effect on Windows which - does not support this functionality. The default value is False. + does not support this functionality. The default value is *False*. The ``stmtcachesize`` parameter is expected to be an integer which specifies the initial size of the statement cache. This value is used in @@ -1405,7 +1410,7 @@ Oracledb Methods whether any tag can be used when acquiring a connection from the pool. This value is only used in the python-oracledb Thick mode when acquiring a connection from a pool. This value is ignored in the python-oracledb Thin - mode. The default value is False. + mode. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the directory in which configuration files (tnsnames.ora) are found. This value @@ -1456,15 +1461,15 @@ Oracledb Methods configuration value. See the `SQL*Net documentation `__ for more details. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 8192 bytes. + python-oracledb Thin and Thick modes. The default value is *8192* bytes. The ``pool_boundary`` parameter is expected to be one of the strings - "statement" or "transaction" which indicates when pooled :ref:`DRCP ` + *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be returned to the pool. If the value is - "statement", then pooled DRCP or PRCP connections are implicitly released + *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or - temporary LOBs). If the value is "transaction", then pooled DRCP or PRCP + temporary LOBs). If the value is *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`Connection.commit()` or :meth:`Connection.rollback()` are called. This parameter requires the use @@ -1480,15 +1485,15 @@ Oracledb Methods reduce the latency in round-trips to the database after a connection has been established. This feature is only available with certain versions of ADB-S. This value is used in both python-oracledb Thin and Thick modes. - The default value is False. + The default value is *False*. The ``ssl_version`` parameter is expected to be one of the constants - "ssl.TLSVersion.TLSv1_2" or "ssl.TLSVersion.TLSv1_3" which identifies the + *ssl.TLSVersion.TLSv1_2* or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. This parameter can be specified when establishing connections with the protocol "tcps". This value is used in both python-oracledb Thin and Thick modes. - The value "ssl.TLSVersion.TLSv1_3" requires Oracle Database 23ai. If you + The value *ssl.TLSVersion.TLSv1_3* requires Oracle Database 23ai. If you are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. @@ -1523,7 +1528,7 @@ Oracledb Methods created should *never* be used after the source handle has been closed or destroyed. This value is only used in the python-oracledb Thick mode and is ignored in the Thin mode. It should be used with extreme caution. The - default value is 0. + default value is *0*. .. versionchanged:: 3.0.0 @@ -1537,9 +1542,9 @@ Oracledb Methods .. versionchanged:: 2.3.0 - The default value of the ``retry_delay`` parameter was changed from 0 - seconds to 1 second. The default value of the ``tcp_connect_timeout`` - parameter was changed from 60.0 seconds to 20.0 seconds. The + The default value of the ``retry_delay`` parameter was changed from *0* + seconds to *1* second. The default value of the ``tcp_connect_timeout`` + parameter was changed from *60.0* seconds to *20.0* seconds. The ``ping_timeout`` and ``ssl_version`` parameters were added. .. versionchanged:: 2.1.0 @@ -1626,12 +1631,12 @@ Oracledb Methods :ref:`recommended ` to help prevent connection storms and to help overall system stability. The ``min`` parameter is the number of connections opened when the pool is created. The default value of the - ``min`` parameter is 1. The ``increment`` parameter is the number of + ``min`` parameter is *1*. The ``increment`` parameter is the number of connections that are opened whenever a connection request exceeds the number of currently open connections. The default value of the - ``increment`` parameter is 1. The ``max`` parameter is the maximum number + ``increment`` parameter is *1*. The ``max`` parameter is the maximum number of connections that can be open in the connection pool. The default value - of the ``max`` parameter is 2. + of the ``max`` parameter is *2*. If the ``connectiontype`` parameter is specified, all calls to :meth:`AsyncConnectionPool.acquire()` will create connection objects of @@ -1646,23 +1651,24 @@ Oracledb Methods The ``homogeneous`` parameter is a boolean that indicates whether the connections are homogeneous (same user) or heterogeneous (multiple - users). The default value is True. + users). The default value is *True*. The ``timeout`` parameter is the length of time (in seconds) that a connection may remain idle in the pool before it is terminated. This applies only when the pool has more than ``min`` connections open, allowing it to shrink to the specified minimum size. If the value of this parameter - is 0, then the connections are never terminated. The default value is 0. + is 0, then the connections are never terminated. The default value is *0* + seconds. The ``wait_timeout`` parameter is the length of time (in milliseconds) that a caller should wait when acquiring a connection from the pool with ``getmode`` set to :data:`oracledb.POOL_GETMODE_TIMEDWAIT`. The default - value is 0. + value is *0* milliseconds. The ``max_lifetime_session`` parameter is the length of time (in seconds) that connections can remain in the pool. If the value of this parameter is 0, then the connections may remain in the pool indefinitely. The default - value is 0. + value is *0* seconds. The ``session_callback`` parameter is a callable that is invoked when a connection is returned from the pool for the first time, or when the @@ -1680,7 +1686,7 @@ Oracledb Methods database indicates the connection is not alive a replacement connection will be returned by :meth:`~AsyncConnectionPool.acquire()`. If ``ping_interval`` is a negative value, then the ping functionality will be - disabled. The default value is 60 seconds. + disabled. The default value is *60* seconds. The ``ping_timeout`` parameter is the maximum length of time (in milliseconds) that :meth:`AsyncConnectionPool.acquire()` waits for a @@ -1688,7 +1694,7 @@ Oracledb Methods does not respond within the specified time, then the connection is destroyed and :meth:`~AsyncConnectionPool.acquire()` returns a different connection. This value is used in both the python-oracledb Thin and Thick - modes. The default value is 5000 milliseconds. + modes. The default value is *5000* milliseconds. The ``proxy_user`` parameter is expected to be a string which indicates the name of the proxy user to connect to. If this value is not specified, it @@ -1717,11 +1723,12 @@ Oracledb Methods initial connection to the database. The ``port`` parameter is expected to be an integer which indicates the - port number on which the listener is listening. The default value is 1521. + port number on which the listener is listening. The default value is + *1521*. - The ``protocol`` parameter is expected to be one of the strings "tcp" or - "tcps" which indicates whether to use unencrypted network traffic or - encrypted network traffic (TLS). The default value is tcp. + The ``protocol`` parameter is expected to be one of the strings *tcp* or + *tcps* which indicates whether to use unencrypted network traffic or + encrypted network traffic (TLS). The default value is *tcp*. The ``https_proxy`` parameter is expected to be a string which indicates the name or IP address of a proxy host to use for tunneling secure @@ -1729,7 +1736,7 @@ Oracledb Methods The ``https_proxy_port`` parameter is expected to be an integer which indicates the port that is to be used to communicate with the proxy host. - The default value is 0. + The default value is *0*. The ``service_name`` parameter is expected to be a string which indicates the service name of the database. @@ -1739,7 +1746,7 @@ Oracledb Methods The ``server_type`` parameter is expected to be a string that indicates the type of server connection that should be established. If specified, it - should be one of `dedicated`, `shared`, or `pooled`. + should be one of *dedicated*, *shared*, or *pooled*. The ``cclass`` parameter is expected to be a string that identifies the connection class to use for :ref:`drcp`. @@ -1752,31 +1759,31 @@ Oracledb Methods The ``expire_time`` parameter is expected to be an integer which indicates the number of minutes between the sending of keepalive probes. If this parameter is set to a value greater than zero it enables keepalive. The - default value is 0. + default value is *0* minutes. The ``retry_count`` parameter is expected to be an integer that identifies the number of times that a connection attempt should be retried before the - attempt is terminated. The default value is 0. + attempt is terminated. The default value is *0*. The ``retry_delay`` parameter is expected to be an integer that identifies the number of seconds to wait before making a new connection attempt. The - default value is 1. + default value is *1* seconds. The ``tcp_connect_timeout`` parameter is expected to be a float that indicates the maximum number of seconds to wait for establishing a - connection to the database host. The default value is 20.0. + connection to the database host. The default value is *20.0* seconds. The ``ssl_server_dn_match`` parameter is expected to be a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. Note that if the ``ssl_server_cert_dn`` parameter is not provided, host name matching is performed instead. The default value is - True. + *True*. The ``ssl_server_cert_dn`` parameter is expected to be a string that indicates the distinguished name (DN) which should be matched with the server. This value is ignored if the ``ssl_server_dn_match`` parameter is - not set to the value True. + not set to the value *True*. The ``wallet_location`` parameter is expected to be a string that identifies the directory where the wallet can be found. In python-oracledb @@ -1795,7 +1802,7 @@ Oracledb Methods The ``disable_oob`` parameter is expected to be a boolean that indicates whether out-of-band breaks should be disabled. This value has no effect on Windows which does not support this functionality. The default value - is False. + is *False*. The ``stmtcachesize`` parameter is expected to be an integer which specifies the initial size of the statement cache. The default is the @@ -1841,15 +1848,15 @@ Oracledb Methods negotiated down to the lower of this value and the database network SDU configuration value. See the `SQL*Net documentation `__ for more details. The default value is 8192 bytes. + 77949C8A2B04>`__ for more details. The default value is *8192* bytes. The ``pool_boundary`` parameter is expected to be one of the strings - "statement" or "transaction" which indicates when pooled :ref:`DRCP ` + *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be returned to the pool. If the value is - "statement", then pooled DRCP or PRCP connections are implicitly released + *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or - temporary LOBs). If the value is "transaction", then pooled DRCP or PRCP + temporary LOBs). If the value is *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`AsyncConnection.commit()` or :meth:`AsyncConnection.rollback()` are called. This parameter requires the @@ -1865,15 +1872,15 @@ Oracledb Methods reduce the latency in round-trips to the database after a connection has been established. This feature is only available with certain versions of ADB-S. This value is used in both python-oracledb Thin and Thick modes. - The default value is False. + The default value is *False*. The ``ssl_version`` parameter is expected to be one of the constants - "ssl.TLSVersion.TLSv1_2" or "ssl.TLSVersion.TLSv1_3" which identifies the + *ssl.TLSVersion.TLSv1_2* or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. This parameter can be specified when establishing connections with the protocol - "tcps". This value is used in both python-oracledb Thin and Thick modes. - The value "ssl.TLSVersion.TLSv1_3" requires Oracle Database 23ai. If you + *tcps*. This value is used in both python-oracledb Thin and Thick modes. + The value *ssl.TLSVersion.TLSv1_3* requires Oracle Database 23ai. If you are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. @@ -1980,7 +1987,7 @@ Oracledb Methods ``pool_alias`` value to :meth:`oracledb.create_pool()` or :meth:`oracledb.create_pool_async()`. - If a pool with the given name does not exist, None is returned. + If a pool with the given name does not exist, *None* is returned. See :ref:`connpoolcache` for more information. @@ -2045,7 +2052,7 @@ Oracledb Methods version. There should be a single space character before and after the colon. If this parameter is not set, then the value specified in :attr:`oracledb.defaults.driver_name ` is used. If - the value of this attribute is None, then the default value in + the value of this attribute is *None*, then the default value in python-oracledb Thick mode is like "python-oracledb thk : ". See :ref:`otherinit`. @@ -2068,7 +2075,7 @@ Oracledb Methods Returns a boolean indicating if Thin mode is in use. Immediately after python-oracledb is imported, this function will return - True indicating that python-oracledb defaults to Thin mode. If + *True* indicating that python-oracledb defaults to Thin mode. If :func:`oracledb.init_oracle_client()` is called, then a subsequent call to ``is_thin_mode()`` will return False indicating that Thick mode is enabled. Once the first standalone connection or connection pool is @@ -2130,14 +2137,14 @@ Oracledb Methods All the parameters are optional. The ``min`` parameter is the minimum number of connections that the pool - should contain. The default value is 1. + should contain. The default value is *1*. The ``max`` parameter is the maximum number of connections that the pool - should contain. The default value is 2. + should contain. The default value is *2*. The ``increment`` parameter is the number of connections that should be added to the pool whenever a new connection needs to be created. The - default value is 1. + default value is *1*. The ``connectiontype`` parameter is the class of the connection that should be returned during calls to :meth:`ConnectionPool.acquire()`. It must be a @@ -2152,23 +2159,24 @@ Oracledb Methods The ``homogeneous`` parameter is a boolean that indicates whether the connections are homogeneous (same user) or heterogeneous (multiple users). - The default value is True. + The default value is *True*. The ``timeout`` parameter is the length of time (in seconds) that a connection may remain idle in the pool before it is terminated. This applies only when the pool has more than ``min`` connections open, allowing it to shrink to the specified minimim size. If the value of this parameter - is 0, then the connections are never terminated. The default value is 0. + is 0, then the connections are never terminated. The default value is *0* + seconds. The ``wait_timeout`` parameter is the length of time (in milliseconds) that a caller should wait when acquiring a connection from the pool with ``getmode`` set to :data:`oracledb.POOL_GETMODE_TIMEDWAIT`. The default - value is 0. + value is *0* milliseconds. The ``max_lifetime_session`` parameter is the length of time (in seconds) that connections can remain in the pool. If the value of this parameter is 0, then the connections may remain in the pool indefinitely. The default - value is 0. + value is *0* seconds. The ``session_callback`` parameter is a callable that is invoked when a connection is returned from the pool for the first time, or when the @@ -2176,11 +2184,11 @@ Oracledb Methods The ``max_sessions_per_shard`` parameter is the maximum number of connections that may be associated with a particular shard. The default - value is 0. + value is *0*. The ``soda_metadata_cache`` parameter is a boolean that indicates whether or not the SODA metadata cache should be enabled. The default value is - False. + *False*. The ``ping_interval`` parameter is the length of time (in seconds) after which an unused connection in the pool will be a candidate for pinging when @@ -2188,7 +2196,7 @@ Oracledb Methods indicates the connection is not alive a replacement connection will be returned by :meth:`ConnectionPool.acquire()`. If ping_interval is a negative value, then the ping functionality will be disabled. The default - value is 60 seconds. + value is *60* seconds. The ``ping_timeout`` parameter is the maximum length of time (in milliseconds) that :meth:`ConnectionPool.acquire()` waits for a connection @@ -2196,7 +2204,7 @@ Oracledb Methods respond within the specified time, then the connection is destroyed and :meth:`~ConnectionPool.acquire()` returns a different connection. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 5000 milliseconds. + value is *5000* milliseconds. The ``user`` parameter is expected to be a string which indicates the name of the user to connect to. This value is used in both the python-oracledb @@ -2239,12 +2247,13 @@ Oracledb Methods python-oracledb Thin and Thick modes. The ``port`` parameter is expected to be an integer which indicates the - port number on which the listener is listening. The default value is 1521. - This value is used in both the python-oracledb Thin and Thick modes. + port number on which the listener is listening. The default value is + *1521*. This value is used in both the python-oracledb Thin and Thick + modes. - The ``protocol`` parameter is expected to be one of the strings "tcp" or - "tcps" which indicates whether to use unencrypted network traffic or - encrypted network traffic (TLS). The default value is tcp. This value is + The ``protocol`` parameter is expected to be one of the strings *tcp* or + *tcps* which indicates whether to use unencrypted network traffic or + encrypted network traffic (TLS). The default value is *tcp*. This value is used in both the python-oracledb Thin and Thick modes. The ``https_proxy`` parameter is expected to be a string which indicates @@ -2254,7 +2263,7 @@ Oracledb Methods The ``https_proxy_port`` parameter is expected to be an integer which indicates the port that is to be used to communicate with the proxy host. - The default value is 0. This value is used in both the python-oracledb Thin + The default value is *0*. This value is used in both the python-oracledb Thin and Thick modes. The ``service_name`` parameter is expected to be a string which indicates @@ -2267,7 +2276,7 @@ Oracledb Methods The ``server_type`` parameter is expected to be a string that indicates the type of server connection that should be established. If specified, it - should be one of `dedicated`, `shared`, or `pooled`. This value is used in + should be one of *dedicated*, *shared*, or *pooled*. This value is used in both the python-oracledb Thin and Thick modes. The ``cclass`` parameter is expected to be a string that identifies the @@ -2284,34 +2293,34 @@ Oracledb Methods the number of minutes between the sending of keepalive probes. If this parameter is set to a value greater than zero it enables keepalive. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 0. + value is *0* minutes. The ``retry_count`` parameter is expected to be an integer that identifies the number of times that a connection attempt should be retried before the attempt is terminated. This value is used in both the python-oracledb Thin - and Thick modes. The default value is 0. + and Thick modes. The default value is *0*. The ``retry_delay`` parameter is expected to be an integer that identifies the number of seconds to wait before making a new connection attempt. This value is used in both the python-oracledb Thin and Thick modes. The default - value is 1. + value is *1* seconds. The ``tcp_connect_timeout`` parameter is expected to be a float that indicates the maximum number of seconds to wait for establishing a connection to the database host. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 20.0. + python-oracledb Thin and Thick modes. The default value is *20.0* seconds. The ``ssl_server_dn_match`` parameter is expected to be a boolean that indicates whether the server certificate distinguished name (DN) should be matched in addition to the regular certificate verification that is performed. Note that if the ssl_server_cert_dn parameter is not provided, host name matching is performed instead. This value is used in both the - python-oracledb Thin and Thick modes. The default value is True. + python-oracledb Thin and Thick modes. The default value is *True*. The ``ssl_server_cert_dn`` parameter is expected to be a string that indicates the distinguished name (DN) which should be matched with the server. This value is ignored if the ssl_server_dn_match parameter is not - set to the value True. This value is used in both the python-oracledb Thin + set to the value *True*. This value is used in both the python-oracledb Thin and Thick modes. The ``wallet_location`` parameter is expected to be a string that @@ -2323,13 +2332,13 @@ Oracledb Methods The ``externalauth`` parameter is a boolean that determines whether to use external authentication. This value is only used in the python-oracledb - Thick mode. The default value is False. + Thick mode. The default value is *False*. The ``events`` parameter is expected to be a boolean that specifies whether the events mode should be enabled. This value is only used in the python-oracledb Thick mode. This parameter is needed for continuous query notification and high availability event notifications. The default - value is False. + value is *False*. The ``mode`` parameter is expected to be an integer that identifies the authorization mode to use. This value is used in both the python-oracledb @@ -2339,7 +2348,7 @@ Oracledb Methods The ``disable_oob`` parameter is expected to be a boolean that indicates whether out-of-band breaks should be disabled. This value is only used in the python-oracledb Thin mode and has no effect on Windows which - does not support this functionality. The default value is False. + does not support this functionality. The default value is *False*. The ``stmtcachesize`` parameter is expected to be an integer that identifies the initial size of the statement cache. This value is used in @@ -2358,7 +2367,7 @@ Oracledb Methods The ``matchanytag`` parameter is expected to be a boolean specifying whether any tag can be used when acquiring a connection from the pool. This value is only used in the python-oracledb Thick mode when acquiring a - connection from a pool. The default value is False. + connection from a pool. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the directory in which configuration files (tnsnames.ora) are found. This value @@ -2409,15 +2418,15 @@ Oracledb Methods configuration value. See the `SQL*Net documentation `__ for more details. This value is used in both the - python-oracledb Thin and Thick modes. The default value is 8192 bytes. + python-oracledb Thin and Thick modes. The default value is *8192* bytes. The ``pool_boundary`` parameter is expected to be one of the strings - "statement" or "transaction" which indicates when pooled :ref:`DRCP ` + *statement* or *transaction* which indicates when pooled :ref:`DRCP ` or PRCP connections can be returned to the pool. If the value is - "statement", then pooled DRCP or PRCP connections are implicitly released + *statement*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when the connection is stateless (that is, there are no active cursors, active transactions, temporary tables, or - temporary LOBs). If the value is "transaction", then pooled DRCP or PRCP + temporary LOBs). If the value is *transaction*, then pooled DRCP or PRCP connections are implicitly released back to the DRCP or PRCP pool when either one of the methods :meth:`Connection.commit()` or :meth:`Connection.rollback()` are called. This parameter requires the use @@ -2433,15 +2442,15 @@ Oracledb Methods reduce the latency in round-trips to the database after a connection has been established. This feature is only available with certain versions of ADB-S. This value is used in both python-oracledb Thin and Thick modes. - The default value is False. + The default value is *False*. The ``ssl_version`` parameter is expected to be one of the constants - "ssl.TLSVersion.TLSv1_2" or "ssl.TLSVersion.TLSv1_3" which identifies the + *ssl.TLSVersion.TLSv1_2* or *ssl.TLSVersion.TLSv1_3* which identifies the TLS protocol version used. These constants are defined in the Python `ssl `__ module. This parameter can be specified when establishing connections with the protocol "tcps". This value is used in both python-oracledb Thin and Thick modes. - The value "ssl.TLSVersion.TLSv1_3" requires Oracle Database 23ai. If you + The value *ssl.TLSVersion.TLSv1_3* requires Oracle Database 23ai. If you are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. @@ -2473,7 +2482,7 @@ Oracledb Methods The ``handle`` parameter is expected to be an integer which represents a pointer to a valid service context handle. This value is only used in the python-oracledb Thick mode. It should be used with extreme caution. The - default value is 0. + default value is *0*. .. versionchanged:: 2.5.0 @@ -2483,9 +2492,9 @@ Oracledb Methods .. versionchanged:: 2.3.0 - The default value of the ``retry_delay`` parameter was changed from 0 - seconds to 1 second. The default value of the ``tcp_connect_timeout`` - parameter was changed from 60.0 seconds to 20.0 seconds. The + The default value of the ``retry_delay`` parameter was changed from *0* + seconds to *1* second. The default value of the ``tcp_connect_timeout`` + parameter was changed from *60.0* seconds to *20.0* seconds. The ``ping_timeout`` and ``ssl_version`` parameters were added. .. versionchanged:: 2.1.0 @@ -2515,7 +2524,7 @@ Oracledb Methods application and return the valid password. Calling :meth:`~oracledb.register_password_type()` with the - ``hook_function`` parameter set to None will result in a previously + ``hook_function`` parameter set to *None* will result in a previously registered user function being removed and the default behavior restored. See :ref:`registerpasswordtype`. @@ -2586,7 +2595,7 @@ Oracledb Methods invalid protocol``. Calling :meth:`~oracledb.register_protocol()` with the ``hook_function`` - parameter set to None will result in a previously registered user function + parameter set to *None* will result in a previously registered user function being removed and the default behavior restored. See :ref:`connectionhook` for more information. diff --git a/doc/src/api_manual/pipeline.rst b/doc/src/api_manual/pipeline.rst index 93dd02e9..62f2d74a 100644 --- a/doc/src/api_manual/pipeline.rst +++ b/doc/src/api_manual/pipeline.rst @@ -173,7 +173,7 @@ PipelineOp Attributes This read-only attribute returns the :ref:`array size ` that will be used when fetching query rows with :meth:`Pipeline.add_fetchall()`. - For all other operations, the value returned is 0. + For all other operations, the value returned is *0*. .. attribute:: PipelineOp.keyword_parameters @@ -189,7 +189,7 @@ PipelineOp Attributes This read-only attribute returns the number of rows to fetch when performing a query of a specific number of rows. For all other operations, - the value returned is 0. + the value returned is *0*. .. attribute:: PipelineOp.op_type @@ -232,7 +232,7 @@ PipelineOpResult Attributes .. attribute:: PipelineOpResult.columns This read-only attribute is a list of :ref:`FetchInfo` - objects. This attribute will be None for operations that do not return + objects. This attribute will be *None* for operations that do not return rows. .. versionadded:: 2.5.0 @@ -240,7 +240,7 @@ PipelineOpResult Attributes .. attribute:: PipelineOpResult.error This read-only attribute returns the error that occurred when running this - operation. If no error occurred, then the value None is returned. + operation. If no error occurred, then the value *None* is returned. .. attribute:: PipelineOpResult.operation @@ -260,7 +260,8 @@ PipelineOpResult Attributes .. attribute:: PipelineOpResult.warning This read-only attribute returns any warning that was encountered when - running this operation. If no warning was encountered, then the value None - is returned. See :ref:`PL/SQL Compilation Warnings `. + running this operation. If no warning was encountered, then the value + *None* is returned. See :ref:`PL/SQL Compilation Warnings + `. .. versionadded:: 2.5.0 diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index 9b998b47..eb05cb12 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -18,19 +18,19 @@ PoolParams Methods .. method:: PoolParams.copy() - Creates a copy of the parameters and returns it. + Creates a copy of the parameters and returns it. .. method:: PoolParams.get_connect_string() - Returns the connection string associated with the PoolParams instance. + Returns the connection string associated with the PoolParams instance. .. method:: PoolParams.parse_connect_string(connect_string) - Parses the connect string into its components and stores the parameters. + Parses the connect string into its components and stores the parameters. - The connect string can be an Easy Connect string, name-value pairs, or a simple alias - which is looked up in ``tnsnames.ora``. Parameters that are found in the connect string - override any currently stored values. + The connect string can be an Easy Connect string, name-value pairs, or a + simple alias which is looked up in ``tnsnames.ora``. Parameters that are + found in the connect string override any currently stored values. .. method:: PoolParams.set(min=None, max=None, increment=None, \ connectiontype=None, getmode=None, homogeneous=None, timeout=None, \ @@ -53,21 +53,21 @@ PoolParams Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, handle=None) - Sets one or more of the parameters. + Sets one or more of the parameters. - .. versionchanged:: 2.5.0 + .. versionchanged:: 2.5.0 - The ``program``, ``machine``, ``terminal``, ``osuser``, and ``driver_name`` - parameters were added. Support for ``edition`` and ``appcontext`` was - added to python-oracledb Thin mode. + The ``program``, ``machine``, ``terminal``, ``osuser``, and + ``driver_name`` parameters were added. Support for ``edition`` and + ``appcontext`` was added to python-oracledb Thin mode. - .. versionchanged:: 2.3.0 + .. versionchanged:: 2.3.0 - The ``ping_timeout`` and ``ssl_version`` parameters were added. + The ``ping_timeout`` and ``ssl_version`` parameters were added. - .. versionchanged:: 2.1.0 + .. versionchanged:: 2.1.0 - The ``pool_boundary`` and ``use_tcp_fast_open`` parameters were added. + The ``pool_boundary`` and ``use_tcp_fast_open`` parameters were added. .. _poolparamsattr: @@ -76,127 +76,128 @@ PoolParams Attributes .. attribute:: PoolParams.connectiontype - This read-only attribute specifies the class of the connection that should - be returned during calls to :meth:`ConnectionPool.acquire()`. It must be - Connection or a subclass of Connection. This attribute is of type - Type["oracledb.connection"]. The default value is ``oracledb.Connection``. + This read-only attribute specifies the class of the connection that should + be returned during calls to :meth:`ConnectionPool.acquire()`. It must be + Connection or a subclass of Connection. This attribute is of type + Type["oracledb.connection"]. The default value is ``oracledb.Connection``. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.getmode - This read-write attribute is an integer that determines the behavior of - :meth:`ConnectionPool.acquire()`. The value of this attribute can be one of - the constants :data:`oracledb.POOL_GETMODE_WAIT`, - :data:`oracledb.POOL_GETMODE_NOWAIT`, :data:`oracledb.POOL_GETMODE_FORCEGET`, - or :data:`oracledb.POOL_GETMODE_TIMEDWAIT`. The default value is - :data:`oracledb.POOL_GETMODE_WAIT`. + This read-write attribute is an integer that determines the behavior of + :meth:`ConnectionPool.acquire()`. The value of this attribute can be one of + the constants :data:`oracledb.POOL_GETMODE_WAIT`, + :data:`oracledb.POOL_GETMODE_NOWAIT`, :data:`oracledb.POOL_GETMODE_FORCEGET`, + or :data:`oracledb.POOL_GETMODE_TIMEDWAIT`. The default value is + :data:`oracledb.POOL_GETMODE_WAIT`. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.homogeneous - This read-only attribute is a boolean which indicates whether the connections - are :ref:`homogeneous ` (same user) or heterogeneous (multiple - users). The default value is True. + This read-only attribute is a boolean which indicates whether the + connections are :ref:`homogeneous ` (same user) or + heterogeneous (multiple users). The default value is *True*. - This attribute is only supported in python-oracledb Thick mode. The - python-oracledb Thin mode supports only homogeneous modes. + This attribute is only supported in python-oracledb Thick mode. The + python-oracledb Thin mode supports only homogeneous modes. .. attribute:: PoolParams.increment - This read-only attribute specifies the number of connections that should - be added to the pool whenever a new connection needs to be created. The - default value is 1. + This read-only attribute specifies the number of connections that should + be added to the pool whenever a new connection needs to be created. The + default value is *1*. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.min - This read-only attribute is an integer that specifies the minimum number of - connections that the pool should contain. The default value is 1. + This read-only attribute is an integer that specifies the minimum number + of connections that the pool should contain. The default value is *1*. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.max - This read-only attribute specifies the maximum number of connections that - the pool should contain. The default value is 2. + This read-only attribute specifies the maximum number of connections that + the pool should contain. The default value is *2*. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.max_lifetime_session - This read-only attribute is an integer that determines the length of time - (in seconds) that connections can remain in the pool. If the value of this - attribute is 0, then the connections may remain in the pool indefinitely. - The default value is 0 seconds. + This read-only attribute is an integer that determines the length of time + (in seconds) that connections can remain in the pool. If the value of this + attribute is *0*, then the connections may remain in the pool indefinitely. + The default value is *0* seconds. - This attribute is only supported in python-oracledb Thick mode. + This attribute is only supported in python-oracledb Thick mode. .. attribute:: PoolParams.max_sessions_per_shard - This read-only attribute is an integer that determines the maximum number of - connections that may be associated with a particular shard. The default value - is 0. + This read-only attribute is an integer that determines the maximum number + of connections that may be associated with a particular shard. The default + value is *0*. - This attribute is only supported in python-oracledb Thick mode. + This attribute is only supported in python-oracledb Thick mode. .. attribute:: PoolParams.ping_interval - This read-only attribute is an integer that specifies the length of time - (in seconds) after which an unused connection in the pool will be a - candidate for pinging when :meth:`ConnectionPool.acquire()` is called. - If the ping to the database indicates that the connection is not alive, - then a replacement connection will be returned by - :meth:`ConnectionPool.acquire()`. If the ``ping_interval`` is a negative - value, then the ping functionality will be disabled. The default value is 60 - seconds. + This read-only attribute is an integer that specifies the length of time + (in seconds) after which an unused connection in the pool will be a + candidate for pinging when :meth:`ConnectionPool.acquire()` is called. + If the ping to the database indicates that the connection is not alive, + then a replacement connection will be returned by + :meth:`ConnectionPool.acquire()`. If the ``ping_interval`` is a negative + value, then the ping functionality will be disabled. The default value is + *60* seconds. This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.ping_timeout - This read-only attribute is an integer that specifies the maximum length of - time (in milliseconds) that :meth:`ConnectionPool.acquire()` waits for a - connection to respond to any internal ping to the database. If the ping does - not respond within the specified time, then the connection is destroyed and - :meth:`~ConnectionPool.acquire()` returns a different connection. The default - value is 5000 milliseconds. + This read-only attribute is an integer that specifies the maximum length of + time (in milliseconds) that :meth:`ConnectionPool.acquire()` waits for a + connection to respond to any internal ping to the database. If the ping + does not respond within the specified time, then the connection is + destroyed and :meth:`~ConnectionPool.acquire()` returns a different + connection. The default value is *5000* milliseconds. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. versionadded:: 2.3.0 .. attribute:: PoolParams.session_callback - This read-only attribute specifies a callback that is invoked when - a connection is returned from the pool for the first time, or when the - connection tag differs from the one requested. + This read-only attribute specifies a callback that is invoked when a + connection is returned from the pool for the first time, or when the + connection tag differs from the one requested. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.soda_metadata_cache - This read-only attribute is a boolean that indicates whether - SODA metadata cache should be enabled or not. The default value is False. + This read-only attribute is a boolean that indicates whether SODA + metadata cache should be enabled or not. The default value is *False*. - This attribute is only supported in python-oracledb Thick mode. + This attribute is only supported in python-oracledb Thick mode. .. attribute:: PoolParams.timeout - This read-only attribute is an integer that specifies the length of time - (in seconds) that a connection may remain idle in the pool before it is - terminated. If the value of this attribute is 0, then the connections are - never terminated. The default value is 0 seconds. + This read-only attribute is an integer that specifies the length of time + (in seconds) that a connection may remain idle in the pool before it is + terminated. If the value of this attribute is *0*, then the connections + are never terminated. The default value is *0* seconds. - This attribute is only supported in python-oracledb Thick mode. + This attribute is only supported in python-oracledb Thick mode. .. attribute:: PoolParams.wait_timeout - This read-only attribute is an integer that specifies the length of time - (in milliseconds) that a caller should wait when acquiring a connection - from the pool with :attr:`~PoolParams.getmode` set to - :data:`~oracledb.POOLGETMODE_TIMEDWAIT`. The default value is 0 milliseconds. + This read-only attribute is an integer that specifies the length of time + (in milliseconds) that a caller should wait when acquiring a connection + from the pool with :attr:`~PoolParams.getmode` set to + :data:`~oracledb.POOLGETMODE_TIMEDWAIT`. The default value is *0* + milliseconds. - This attribute is supported in both python-oracledb Thin and Thick modes. + This attribute is supported in both python-oracledb Thin and Thick modes. diff --git a/doc/src/api_manual/soda.rst b/doc/src/api_manual/soda.rst index 0fc1e620..719e8c4e 100644 --- a/doc/src/api_manual/soda.rst +++ b/doc/src/api_manual/soda.rst @@ -106,9 +106,10 @@ SodaDatabase Methods lookup?ctx=dblatest&id=GUID-49EFF3D3-9FAB-4DA6-BDE2-2650383566A3>`__ for more information. - If the ``mapMode`` parameter is set to True, the new collection is mapped to an - existing table instead of creating a table. If a collection is created in - this way, dropping the collection will not drop the existing table either. + If the ``mapMode`` parameter is set to *True*, the new collection is mapped + to an existing table instead of creating a table. If a collection is + created in this way, dropping the collection will not drop the existing + table either. .. method:: SodaDatabase.createDocument(content, key=None, mediaType="application/json") @@ -117,7 +118,8 @@ SodaDatabase Methods You only need to use this method if your collection requires client-assigned keys or has non-JSON content; otherwise, you can pass your content directly to SODA write operations. SodaDocument attributes - 'createdOn', 'lastModified' and 'version' will be None. + :attr:`~SodaDoc.createdOn`, :attr:`~SodaDoc.lastModified`, and + :attr:`~SodaDoc.version` will be *None*. The ``content`` parameter can be a dictionary or list which will be transformed into a JSON string and then UTF-8 encoded. It can also be a @@ -152,7 +154,7 @@ SodaDatabase Methods Opens an existing collection with the given name and returns a new :ref:`SODA collection object `. If a collection with that name - does not exist, None is returned. + does not exist, *None* is returned. .. _sodacoll: @@ -183,8 +185,8 @@ SodaCollection Methods .. method:: SodaCollection.drop() Drops the collection from the database, if it exists. Note that if the - collection was created with mapMode set to True the underlying table will - not be dropped. + collection was created with the ``mapMode`` parameter set to *True* the + underlying table will not be dropped. A boolean value is returned indicating if the collection was actually dropped. @@ -194,12 +196,12 @@ SodaCollection Methods Drops the index with the specified name, if it exists. - The ``force`` parameter, if set to True, can be used to force the dropping of - an index that the underlying Oracle Database domain index doesn't normally - permit. This is only applicable to spatial and JSON search indexes. - See `here `__ - for more information. + The ``force`` parameter, if set to *True*, can be used to force the + dropping of an index that the underlying Oracle Database domain index + does not normally permit. This is only applicable to spatial and JSON + search indexes. See `here `__ for more + information. A boolean value is returned indicating if the index was actually dropped. @@ -220,7 +222,7 @@ SodaCollection Methods It can be useful for exploring the schema of a collection. Note that this method is only supported for JSON-only collections where a JSON search index has been created with the 'dataguide' option enabled. If there are - no documents in the collection, None is returned. + no documents in the collection, *None* is returned. .. method:: SodaCollection.insertMany(docs) @@ -244,8 +246,8 @@ SodaCollection Methods The ``hint`` parameter, if specified, supplies a hint to the database when processing the SODA operation. This is expected to be a string in the same format as a SQL hint but without any comment characters, for example - ``hint="MONITOR"``. Pass only the hint ``"MONITOR"`` (turn on monitoring) - or ``"NO_MONITOR"`` (turn off monitoring). See the Oracle Database SQL + ``hint="MONITOR"``. Pass only the hint "MONITOR" (turn on monitoring) + or "NO_MONITOR" (turn off monitoring). See the Oracle Database SQL Tuning Guide documentation `MONITOR and NO_MONITOR Hints `__ @@ -258,7 +260,8 @@ SodaCollection Methods - This method requires Oracle Client 18.5 and higher. - - Use of the ``hint`` parameter requires Oracle Client 21.3 or higher (or Oracle Client 19 from 19.11). + - Use of the ``hint`` parameter requires Oracle Client 21.3 or higher + (or Oracle Client 19 from 19.11). .. method:: SodaCollection.insertOne(doc) @@ -277,8 +280,8 @@ SodaCollection Methods The ``hint`` parameter, if specified, supplies a hint to the database when processing the SODA operation. This is expected to be a string in the same format as a SQL hint but without any comment characters, for example - ``hint="MONITOR"``. Pass only the hint ``"MONITOR"`` (turn on monitoring) - or ``"NO_MONITOR"`` (turn off monitoring). See the Oracle Database SQL + ``hint="MONITOR"``. Pass only the hint "MONITOR" (turn on monitoring) + or "NO_MONITOR" (turn off monitoring). See the Oracle Database SQL Tuning Guide documentation `MONITOR and NO_MONITOR Hints `__ @@ -289,7 +292,8 @@ SodaCollection Methods .. note:: - Use of the ``hint`` parameter requires Oracle Client 21.3 or higher (or Oracle Client 19 from 19.11). + Use of the ``hint`` parameter requires Oracle Client 21.3 or higher + (or Oracle Client 19 from 19.11). .. method:: SodaCollection.listIndexes() @@ -321,8 +325,8 @@ SodaCollection Methods The ``hint`` parameter, if specified, supplies a hint to the database when processing the SODA operation. This is expected to be a string in the same format as a SQL hint but without any comment characters, for example - ``hint="MONITOR"``. Pass only the hint ``"MONITOR"`` (turn on monitoring) - or ``"NO_MONITOR"`` (turn off monitoring). See the Oracle Database SQL + ``hint="MONITOR"``. Pass only the hint "MONITOR" (turn on monitoring) + or "NO_MONITOR" (turn off monitoring). See the Oracle Database SQL Tuning Guide documentation `MONITOR and NO_MONITOR Hints `__ @@ -336,7 +340,8 @@ SodaCollection Methods .. note:: - Use of the ``hint`` parameter requires Oracle Client 21.3 or higher (or Oracle Client 19 from 19.11). + Use of the ``hint`` parameter requires Oracle Client 21.3 or higher + (or Oracle Client 19 from 19.11). .. method:: SodaCollection.truncate() @@ -351,9 +356,8 @@ SodaCollection Attributes This read-only attribute returns a dictionary containing the metadata that was used to create the collection. See this `collection metadata reference - `__ - for more information. + `__ for more information. .. attribute:: SodaCollection.name @@ -378,20 +382,20 @@ SodaDoc Methods Returns the content of the document as a dictionary or list. This method assumes that the content is application/json and will raise an exception if - this is not the case. If there is no content, however, None will be + this is not the case. If there is no content, however, *None* will be returned. .. method:: SodaDoc.getContentAsBytes() Returns the content of the document as a bytes object. If there is no - content, however, None will be returned. + content, however, *None* will be returned. .. method:: SodaDoc.getContentAsString() Returns the content of the document as a string. If the document encoding - is not known, UTF-8 will be used. If there is no content, however, None + is not known, UTF-8 will be used. If there is no content, however, *None* will be returned. SodaDoc Attributes @@ -403,13 +407,13 @@ SodaDoc Attributes `ISO 8601 `__ format. Documents created by :meth:`SodaDatabase.createDocument()` or fetched from collections where this attribute is not stored will return - None. + *None*. .. attribute:: SodaDoc.key This read-only attribute returns the unique key assigned to this document. Documents created by :meth:`SodaDatabase.createDocument()` may not have a - value assigned to them and return None. + value assigned to them and return *None*. .. attribute:: SodaDoc.lastModified @@ -418,7 +422,7 @@ SodaDoc Attributes `ISO 8601 `__ format. Documents created by :meth:`SodaDatabase.createDocument()` or fetched from collections where this attribute is not stored will return - None. + *None*. .. attribute:: SodaDoc.mediaType @@ -435,7 +439,7 @@ SodaDoc Attributes This read-only attribute returns the version assigned to this document. Documents created by :meth:`SodaDatabase.createDocument()` or fetched - from collections where this attribute is not stored will return None. + from collections where this attribute is not stored will return *None*. .. _sodadoccur: @@ -483,8 +487,9 @@ SodaOperation Methods This is a tuning method to specify the number of documents that are internally fetched in batches by calls to :meth:`~SodaOperation.getCursor()` and :meth:`~SodaOperation.getDocuments()`. It does not affect how many - documents are returned to the application. A value of 0 will use the default - value (100). This method is only available in Oracle Client 19.5 and higher. + documents are returned to the application. A value of *0* will use the + default value (*100*). This method is only available in Oracle Client 19.5 + and higher. As a convenience, the SodaOperation object is returned so that further criteria can be specified by chaining methods together. @@ -529,13 +534,13 @@ SodaOperation Methods Specifies a hint that will be provided to the SODA operation when it is performed. This is expected to be a string in the same format as a SQL hint but without any comment characters, for example ``hint("MONITOR")``. Pass - only the hint ``"MONITOR"`` (turn on monitoring) or ``"NO_MONITOR"`` (turn - off monitoring). See the Oracle Database SQL Tuning Guide documentation - `MONITOR and NO_MONITOR Hints - `__ - and `Monitoring Database Operations - `__ - for more information. + only the hint "MONITOR" (turn on monitoring) or "NO_MONITOR" (turn off + monitoring). See the Oracle Database SQL Tuning Guide documentation + `MONITOR and NO_MONITOR Hints `__ and + `Monitoring Database Operations `__ for more + information. As a convenience, the SodaOperation object is returned so that further criteria can be specified by chaining methods together. diff --git a/doc/src/api_manual/subscription.rst b/doc/src/api_manual/subscription.rst index 1c44bc6a..de9b5b9c 100644 --- a/doc/src/api_manual/subscription.rst +++ b/doc/src/api_manual/subscription.rst @@ -14,11 +14,11 @@ Subscription Methods .. method:: Subscription.registerquery(statement, [args]) Registers the query for subsequent notification when tables referenced by - the query are changed. This behaves similarly to :meth:`Cursor.execute()` but only - queries are permitted and the ``args`` parameter must be a sequence or - dictionary. If the ``qos`` parameter included the flag - :data:`oracledb.SUBSCR_QOS_QUERY` when the subscription was created, then the ID - for the registered query is returned; otherwise, None is returned. + the query are changed. This behaves similarly to :meth:`Cursor.execute()` + but only queries are permitted and the ``args`` parameter must be a + sequence or dictionary. If the ``qos`` parameter included the flag + :data:`oracledb.SUBSCR_QOS_QUERY` when the subscription was created, then + the ID for the registered query is returned; otherwise, *None* is returned. Subscription Attributes ======================= @@ -37,20 +37,20 @@ Subscription Attributes .. attribute:: Subscription.id - This read-only attribute returns the value of ``REGID`` found in the - database view ``USER_CHANGE_NOTIFICATION_REGS`` or the value of ``REG_ID`` - found in the database view ``USER_SUBSCR_REGISTRATIONS``. For AQ - subscriptions, the value is 0. + This read-only attribute returns the value of the REGID column found in the + database view USER_CHANGE_NOTIFICATION_REGS or the value of the REG_ID + column found in the database view USER_SUBSCR_REGISTRATIONS. For AQ + subscriptions, the value is *0*. .. attribute:: Subscription.ip_address This read-only attribute returns the IP address used for callback notifications from the database server. If not set during construction, - this value is None. + this value is *None*. For consistency and compliance with the PEP 8 naming style, the - attribute `ipAddress` was renamed to `ip_address`. The old name will + attribute ``ipAddress`` was renamed to ``ip_address``. The old name will continue to work for a period of time. @@ -77,7 +77,7 @@ Subscription Attributes This read-only attribute returns the port used for callback notifications from the database server. If not set during construction, this value is - zero. + *0*. .. attribute:: Subscription.protocol @@ -95,7 +95,7 @@ Subscription Attributes .. attribute:: Subscription.timeout This read-only attribute returns the timeout (in seconds) that was - specified when the subscription was created. A value of 0 indicates that + specified when the subscription was created. A value of *0* indicates that there is no timeout. @@ -117,7 +117,7 @@ Message Objects multiple consumer queue. For consistency and compliance with the PEP 8 naming style, the - attribute `consumerName` was renamed to `consumer_name`. The old name + attribute ``consumerName`` was renamed to ``consumer_name``. The old name will continue to work for a period of time. @@ -148,7 +148,7 @@ Message Objects with the namespace :data:`oracledb.SUBSCR_NAMESPACE_AQ`. For consistency and compliance with the PEP 8 naming style, the - attribute `queueName` was renamed to `queue_name`. The old name will + attribute ``queueName`` was renamed to ``queue_name``. The old name will continue to work for a period of time. @@ -251,7 +251,7 @@ MessageQuery Objects This read-only attribute returns the query id of the query for which the result set changed. The value will match the value returned by - Subscription.registerquery when the related query was registered. + :meth:`Subscription.registerquery()` when the related query was registered. .. attribute:: MessageQuery.operation diff --git a/doc/src/api_manual/variable.rst b/doc/src/api_manual/variable.rst index 11cfb2ee..d6453ac5 100644 --- a/doc/src/api_manual/variable.rst +++ b/doc/src/api_manual/variable.rst @@ -39,7 +39,7 @@ Variable Attributes identical to the attribute :attr:`~Variable.numElements`. For consistency and compliance with the PEP 8 naming style, the - attribute `actualElements` was renamed to `actual_elements`. The old + attribute ``actualElements`` was renamed to ``actual_elements``. The old name will continue to work for a period of time. @@ -49,7 +49,7 @@ Variable Attributes element in bytes. For consistency and compliance with the PEP 8 naming style, the - attribute `bufferSize` was renamed to `buffer_size`. The old + attribute ``bufferSize`` was renamed to ``buffer_size``. The old name will continue to work for a period of time. @@ -65,7 +65,7 @@ Variable Attributes This read-only attribute specifies the method used to convert data from Python to the Oracle database. The method signature is converter(value) and the expected return value is the value to bind to the database. If this - attribute is None, the value is bound directly without any conversion. + attribute is *None*, the value is bound directly without any conversion. .. attribute:: Variable.num_elements @@ -75,8 +75,8 @@ Variable Attributes or bound to the variable. For consistency and compliance with the PEP 8 naming style, the - attribute `numElements` was renamed to `num_elements`. The old - name will continue to work for a period of time. + attribute ``numElements`` was renamed to ``num_elements``. The old name + will continue to work for a period of time. .. attribute:: Variable.outconverter @@ -84,7 +84,7 @@ Variable Attributes This read-only attribute specifies the method used to convert data from the Oracle database to Python. The method signature is converter(value) and the expected return value is the value to return to Python. If this - attribute is None, the value is returned directly without any conversion. + attribute is *None*, the value is returned directly without any conversion. .. attribute:: Variable.size diff --git a/doc/src/user_guide/installation.rst b/doc/src/user_guide/installation.rst index 581b1e85..9db86272 100644 --- a/doc/src/user_guide/installation.rst +++ b/doc/src/user_guide/installation.rst @@ -929,9 +929,14 @@ To use python-oracledb without the cryptography package: Installing from Source Code =========================== -You can build and install python-oracledb either -:ref:`locally from source code `, or -by using a :ref:`presupplied GitHub Action ` which builds +For platforms that do not have pre-built binaries on `PyPI +`__, using the normal ``python -m pip +install oracledb`` command will download the python-oracledb source bundle, +build, and install it. + +Alternatively, to create your own package files for installation, you can build +and install python-oracledb either :ref:`locally from source code `, +or by using a :ref:`presupplied GitHub Action ` which builds packages for all architectures and Python versions. .. _installgh: From ad8fadd99bf36848116fae55146c46e1efd4ebaf Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:29:21 -0700 Subject: [PATCH 004/178] Optimization: the connect descriptor sent to the database does not include the RETRY_DELAY parameter unless the RETRY_COUNT parameter is also specified. --- doc/src/release_notes.rst | 3 +++ src/oracledb/impl/base/connect_params.pyx | 4 ++-- tests/test_4500_connect_params.py | 3 +-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index a77855e5..083a7bd8 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -40,6 +40,9 @@ Thin Mode Changes #) Error ``DPY-3001: bequeath is only supported in python-oracledb thick mode`` is now raised when attempting to connect to the database without a connect string. +#) Optimization: the connect descriptor sent to the database does not include + the RETRY_DELAY parameter unless the RETRY_COUNT parameter is also + specified. #) Internal change: improve low-level encoding and decoding routines. #) Internal change: send buffer length for bind variables without unneeded adjustment. diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index 134ba195..6d7036cd 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -785,8 +785,8 @@ cdef class Description(ConnectParamsNode): parts.append("(SOURCE_ROUTE=ON)") if self.retry_count != 0: parts.append(f"(RETRY_COUNT={self.retry_count})") - if self.retry_delay != 0: - parts.append(f"(RETRY_DELAY={self.retry_delay})") + if self.retry_delay != 0: + parts.append(f"(RETRY_DELAY={self.retry_delay})") if self.expire_time != 0: parts.append(f"(EXPIRE_TIME={self.expire_time})") if self.tcp_connect_timeout != DEFAULT_TCP_CONNECT_TIMEOUT: diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index 01f1f169..008a751c 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -898,8 +898,7 @@ def test_4549(self): self.assertEqual(params.port, [1521, 4549, 4549, 1521]) self.assertEqual(params.service_name, "service_name_4549") expected_conn_string = ( - "(DESCRIPTION=(RETRY_DELAY=1)" - "(ADDRESS=(PROTOCOL=tcp)(HOST=host4549a)(PORT=1521))" + "(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=host4549a)(PORT=1521))" "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=host4549b)(PORT=4549))" "(ADDRESS=(PROTOCOL=tcp)(HOST=host4549c)(PORT=4549)))" "(ADDRESS=(PROTOCOL=tcp)(HOST=host4549d)(PORT=1521))" From 1c738786050a3bcd37b96eefed95d57128d659a8 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:34:26 -0700 Subject: [PATCH 005/178] Bumped Cython requirement to 3.0.10 to avoid bug in earlier versions. --- doc/src/release_notes.rst | 2 ++ setup.cfg | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 083a7bd8..2e618114 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -72,6 +72,8 @@ Common Changes arg "{arg}"`` is now raised when an exception occurs when calling the registered handler for a protocol. #) Internal change: improve handling of metadata. +#) Internal build tool change: bumped minimum Cython version to 3.0.10 to + avoid bug in earlier versions. oracledb 2.5.1 (December 2024) diff --git a/setup.cfg b/setup.cfg index ed72e412..dbf00933 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ classifiers = [options] zip_safe = false python_requires = >=3.8 -setup_requires = cython>=3.0 +setup_requires = cython>=3.0.10 install_requires = cryptography>=3.2.1 test_suite = tests packages = find: From 60225f677b15b981d2b2195450bf4a7f2fb1bccf Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:34:43 -0700 Subject: [PATCH 006/178] Ensure that Python 3.11 and earlier work as expected. --- src/oracledb/impl/base/utils.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/oracledb/impl/base/utils.pyx b/src/oracledb/impl/base/utils.pyx index 315fa5e9..fb7e9ca7 100644 --- a/src/oracledb/impl/base/utils.pyx +++ b/src/oracledb/impl/base/utils.pyx @@ -101,8 +101,13 @@ cdef int _set_enum_param(dict args, str name, object enum_obj, if in_val is not None: if not isinstance(in_val, str) or in_val.isdigit(): out_val[0] = int(in_val) - if out_val[0] in enum_obj: + if isinstance(in_val, enum_obj): return 0 + try: + enum_obj(out_val[0]) + return 0 + except ValueError: + pass else: enum_val = getattr(enum_obj, in_val.upper(), None) if enum_val is not None: From 732e7d68e9da6ed430cb26504654542edbfc6e3c Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:35:21 -0700 Subject: [PATCH 007/178] AQ doc updates. --- doc/src/user_guide/appendix_a.rst | 4 +- doc/src/user_guide/aq.rst | 143 +++++++++++++++++++----------- 2 files changed, 91 insertions(+), 56 deletions(-) diff --git a/doc/src/user_guide/appendix_a.rst b/doc/src/user_guide/appendix_a.rst index faf96cd2..46b0f59b 100644 --- a/doc/src/user_guide/appendix_a.rst +++ b/doc/src/user_guide/appendix_a.rst @@ -250,9 +250,9 @@ see :ref:`driverdiff` and :ref:`compatibility`. - No - Yes - Yes - * - Advanced Queuing (AQ) (see :ref:`aqusermanual`) + * - Oracle Transactional Event Queues and Advanced Queuing (AQ) (see :ref:`aqusermanual`) - No - - Yes - must use new API introduced in cx_Oracle 7.2 + - Yes - Yes * - Call timeouts (see :attr:`Connection.call_timeout`) - Yes diff --git a/doc/src/user_guide/aq.rst b/doc/src/user_guide/aq.rst index 10ababd0..72c87c06 100644 --- a/doc/src/user_guide/aq.rst +++ b/doc/src/user_guide/aq.rst @@ -1,40 +1,64 @@ .. _aqusermanual: -*********************************** -Using Oracle Advanced Queuing (AQ) -*********************************** - -`Oracle Advanced Queuing -`__ is a highly -configurable and scalable messaging feature of Oracle Database. It has -interfaces in various languages, letting you integrate multiple tools in your -architecture. +************************************************************ +Using Oracle Transactional Event Queues and Advanced Queuing +************************************************************ + +`Oracle Transactional Event Queues and Advanced Queuing +`__ are highly +configurable and scalable messaging features of Oracle Database allowing +data-driven and event-driven applications to stream events and communicate with +each other. They have interfaces in various languages, letting you integrate +multiple tools in your architecture. Both Oracle Transactional Event Queues +(TxEventQ) and Advanced Queuing (AQ) "Classic" queues support sending and +receiving of various payloads, such as RAW values, JSON, JMS, and objects. +Transactional Event Queues use a highly optimized implementation of Advanced +Queuing. They were previously called AQ Sharded Queues. .. note:: - Oracle Advanced Queuing is only supported in the python-oracledb Thick + TxEventQ and AQ Classic queues are only supported in python-oracledb Thick mode. See :ref:`enablingthick`. -Python-oracledb uses the updated interface for Oracle Advanced Queuing that -was first introduced in cx_Oracle 7.2. +Python-oracledb API calls are the same for Transactional Event Queues and +Classic Queues, however there are differences in support for some payload +types. + +**Classic Queue Support** + +- RAW, named Oracle objects, and JMS payloads are supported. -Starting from Oracle Database 21c, Advanced Queuing also supports the JSON -payload type. To use the JSON payload type, the Oracle Client libraries must -be version 21 or later. +- The JSON payload requires Oracle Client libraries 21c (or later) and Oracle + Database 21c (or later). -There are Advanced Queuing examples in the `GitHub examples +There are examples of AQ Classic Queues in the `GitHub examples `__ directory. +**Transactional Event Queue Support** + +- RAW and named Oracle object payloads are supported for single and array + message enqueuing and dequeuing when using Oracle Client 19c (or later) and + connected to Oracle Database 19c (or later). + +- JMS payloads are supported for single and array message enqueuing and + dequeuing when using Oracle Client 19c (or later) and Oracle Database 23ai. + +- JSON payloads are supported for single message enqueuing and dequeuing when + using Oracle Client libraries 21c (or later) and Oracle Database 21c (or + later). Array enqueuing and dequeuing is not supported for JSON payloads. + +Transactional Event Queues do not support :attr:`EnqOptions.transformation`, +:attr:`DeqOptions.transformation`, or :ref:`Recipient Lists `. Creating a Queue ================ -Before being used, queues need to be created in the database. +Before being used in applications, queues need to be created in the database. **Using RAW Payloads** -Queues can be created using the RAW payload type, for example in -SQL*Plus: +To use SQL*Plus to create a Classic Queue for the RAW payload which is suitable +for sending string or bytes messages: .. code-block:: sql @@ -45,13 +69,20 @@ SQL*Plus: end; / -This example creates a RAW queue suitable for sending string or bytes -messages. +To create a Transactional Event Queue for RAW payloads: + +.. code-block:: sql + + begin + dbms_aqadm.create_sharded_queue('RAW_SHQ', queue_payload_type=>'RAW'); + dbms_aqadm.start_queue('RAW_SHQ'); + end; + / **Using JSON Payloads** -Also, queues can be created using the JSON payload type. For example, -in SQL*Plus: +Queues can also be created for JSON payloads. For example, to create a Classic +Queue in SQL*Plus: .. code-block:: sql @@ -62,14 +93,11 @@ in SQL*Plus: end; / -This example creates a JSON queue suitable for sending JSON data -messages. - Enqueuing Messages ================== To send messages in Python, you connect and get a :ref:`queue `. The -queue can be used for enqueuing, dequeuing, or both as needed. +queue can then be used for enqueuing, dequeuing, or for both. **Using RAW Payloads** @@ -94,9 +122,12 @@ messages: queue.enqone(connection.msgproperties(payload=data)) connection.commit() -Since the queue sending the messages is a RAW queue, the strings in this -example will be internally encoded to bytes using ``message.encode()`` -before being enqueued. +Since the queue is a RAW queue, strings are internally encoded to bytes using +``message.encode()`` before being enqueued. + +The use of :meth:`~Connection.commit()` means that messages are sent only when +any database transaction related to them is committed. This behavior can be +altered, see :ref:`aqoptions`. **Using JSON Payloads** @@ -105,8 +136,8 @@ payload type by using: .. code-block:: python + # The argument "JSON" indicates the queue is of JSON payload type queue = connection.queue("DEMO_JSON_QUEUE", "JSON") - # The second argument (JSON) indicates that the queue is of JSON payload type. Now the message can be enqueued using :meth:`~Queue.enqone()`. @@ -133,7 +164,7 @@ Dequeuing Messages Dequeuing is performed similarly. To dequeue a message call the method :meth:`~Queue.deqone()` as shown in the examples below. -**Using RAW Payload Type** +**Using RAW Payloads** .. code-block:: python @@ -142,10 +173,10 @@ Dequeuing is performed similarly. To dequeue a message call the method connection.commit() print(message.payload.decode()) -Note that if the message is expected to be a string, the bytes must -be decoded using ``message.payload.decode()``, as shown. +Note that if the message is expected to be a string, the bytes must be decoded +by the application using ``message.payload.decode()``, as shown. -**Using JSON Payload Type** +**Using JSON Payloads** .. code-block:: python @@ -179,7 +210,7 @@ And a queue that accepts this type: end; / -You can queue messages: +You can enqueue messages: .. code-block:: python @@ -194,7 +225,7 @@ You can queue messages: queue.enqone(connection.msgproperties(payload=book)) connection.commit() -Dequeuing is done like this: +Dequeuing can be done like this: .. code-block:: python @@ -205,18 +236,20 @@ Dequeuing is done like this: connection.commit() print(message.payload.TITLE) # will print Quick Brown Fox +.. _reciplists: Using Recipient Lists ===================== -A list of recipient names can be associated with a message at the time -a message is enqueued. This allows a limited set of recipients to -dequeue each message. The recipient list associated with the message -overrides the queue subscriber list, if there is one. The recipient -names need not be in the subscriber list but can be, if desired. +Classic Queues support Recipient Lists. A list of recipient names can be +associated with a message at the time a message is enqueued. This allows a +limited set of recipients to dequeue each message. The recipient list +associated with the message overrides the queue subscriber list, if there is +one. The recipient names need not be in the subscriber list but can be, if +desired. Transactional Event Queues do not support Recipient Lists. -To dequeue a message, the ``consumername`` attribute can be set to -one of the recipient names. The original message recipient list is +To dequeue a message, the :attr:`~DeqOptions.consumername` attribute can be +set to one of the recipient names. The original message recipient list is not available on dequeued messages. All recipients have to dequeue a message before it gets removed from the queue. @@ -237,6 +270,8 @@ messages intended for that recipient using the ``consumername`` attribute:: queue.deqoptions.consumername = "sub3" m = queue.deqone() +.. _aqoptions: + Changing Queue and Message Options ================================== @@ -245,8 +280,8 @@ Refer to the :ref:`python-oracledb AQ API ` and `__ for details on all of the enqueue and dequeue options available. -Enqueue options can be set. For example, to make it so that an explicit -call to :meth:`~Connection.commit()` on the connection is not needed to commit +Enqueue options can be set. For example, to make it so that an explicit call +to :meth:`~Connection.commit()` on the connection is not needed to send messages: .. code-block:: python @@ -269,7 +304,7 @@ expiration of 60 seconds on a message: queue.enqone(connection.msgproperties(payload="Message", expiration=60)) -This means that if no dequeue operation occurs within 60 seconds that the +This means that if no dequeue operation occurs within 60 seconds then the message will be dropped from the queue. @@ -279,8 +314,8 @@ Bulk Enqueue and Dequeue The :meth:`~Queue.enqmany()` and :meth:`~Queue.deqmany()` methods can be used for efficient bulk message handling. -:meth:`~Queue.enqmany()` is similar to :meth:`~Queue.enqone()` but accepts an -array of messages: +The :meth:`~Queue.enqmany()` method is similar to :meth:`~Queue.enqone()` but +accepts an array of messages: .. code-block:: python @@ -296,11 +331,11 @@ array of messages: .. warning:: Calling :meth:`~Queue.enqmany()` in parallel on different connections - acquired from the same pool may fail due to Oracle bug 29928074. Ensure - that this function is not run in parallel, use standalone connections or - connections from different pools, or make multiple calls to - :meth:`~Queue.enqone()` instead. The function :meth:`~Queue.deqmany()` call - is not affected. + acquired from the same pool may fail due to Oracle bug 29928074. To avoid + this, ensure that :meth:`~Queue.enqmany()` is not run in parallel, use + standalone connections or connections from different pools, or make + multiple calls to :meth:`~Queue.enqone()` instead. The function + :meth:`~Queue.deqmany()` call is not affected. To dequeue multiple messages at one time, use :meth:`~Queue.deqmany()`. This takes an argument specifying the maximum number of messages to dequeue at one From d5c0b82e65a0e916c29e4c051566c18ef1db21f7 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:37:19 -0700 Subject: [PATCH 008/178] Added namespace package oracledb.plugins to support extending the capability of python-oracledb. Added plugins for Oracle Cloud Infrastructure Storage and Microsoft Azure App Configuration configuration providers. --- MANIFEST.in | 1 + doc/src/api_manual/module.rst | 51 ++ doc/src/index.rst | 1 + doc/src/release_notes.rst | 7 +- doc/src/user_guide/connection_handling.rst | 513 +++++++++++++++++- doc/src/user_guide/extending.rst | 185 +++++++ doc/src/user_guide/installation.rst | 45 ++ doc/src/user_guide/tracing.rst | 69 --- setup.cfg | 4 +- src/oracledb/plugins/azure_config_provider.py | 216 ++++++++ src/oracledb/plugins/oci_config_provider.py | 203 +++++++ 11 files changed, 1201 insertions(+), 94 deletions(-) create mode 100644 doc/src/user_guide/extending.rst create mode 100644 src/oracledb/plugins/azure_config_provider.py create mode 100644 src/oracledb/plugins/oci_config_provider.py diff --git a/MANIFEST.in b/MANIFEST.in index 431af34d..5bdd1ca4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include *.txt recursive-include src/oracledb *.pxd recursive-include src/oracledb *.pxi recursive-include src/oracledb *.pyx +recursive-include src/oracledb/plugins *.py recursive-include src/oracledb/impl/thick/odpi *.c recursive-include src/oracledb/impl/thick/odpi *.h prune src/oracledb/impl/thick/odpi/test diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index cb1b683e..afcca1e7 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -4234,3 +4234,54 @@ See :ref:`exception` for usage information. Boolean attribute representing whether the error is recoverable or not. This is False in all cases unless both Oracle Database 12.1 (or later) and Oracle Client 12.1 (or later) are being used. + +.. _oracledbplugins: + +Oracledb Plugins +================ + +The `namespace package `__ +``oracledb.plugins`` can contain plugins to extend the capability of +python-oracledb. See :ref:`customplugins`. Note that the namespace +``oracledb.plugins.ldap_support`` is reserved for future use by the +python-oracledb project. + +.. versionadded:: 3.0.0 + +.. _configociplugin: + +Oracle Cloud Infrastructure (OCI) Object Storage Configuration Provider Plugin +------------------------------------------------------------------------------ + +``oracledb.plugins.oci_config_provider`` is a plugin that provides access to +the configuration information stored in the :ref:`OCI Object Storage +` configuration provider. Importing this plugin defines and +:meth:`registers ` the hook function that +handles :ref:`OCI Object Storage connection strings ` prefixed +with ``config-oci``. The hook function parses this connection string, and +extracts the authentication details and URI details from the connection +string. Using the information, the hook function accesses the configuration +information in OCI Object Storage, which python-oracledb will use to connect +to Oracle Database. See :ref:`importconfigociplugin` for more information. + +.. versionadded:: 3.0.0 + +.. _configazureplugin: + +Azure App Configuration Provider Plugin +--------------------------------------- + +``oracledb.plugins.azure_config_provider`` is a plugin that provides access to +the configuration information stored in :ref:`Azure App Configuration +` provider. Importing this plugin defines and +:meth:`registers ` the hook function that +handles :ref:`Azure App Configuration connection string ` +prefixed with ``config-azure``. The hook function parses this connection +string, and extracts the authentication details and URI details from the +connection string. Using the information, the hook function accesses the +configuration information in Azure App Configuration, which python-oracledb +will use to connect to Oracle Database. See :ref:`importconfigazureplugin` +for more information. + +.. versionadded:: 3.0.0 diff --git a/doc/src/index.rst b/doc/src/index.rst index 9a3898cf..2021def1 100644 --- a/doc/src/index.rst +++ b/doc/src/index.rst @@ -39,6 +39,7 @@ User Guide user_guide/asyncio.rst user_guide/exception_handling.rst user_guide/tracing.rst + user_guide/extending.rst user_guide/troubleshooting.rst user_guide/appendix_a.rst user_guide/appendix_b.rst diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 2e618114..c38bce62 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -17,6 +17,8 @@ oracledb 3.0.0 (TBD) Thin Mode Changes +++++++++++++++++ +#) Added namespace package :ref:`oracledb.plugins ` for plugins that + can be used to extend the capability of python-oracledb. #) Perform TLS server matching in python-oracledb instead of the Python SSL library to allow alternate names to be checked (`issue 415 `__). @@ -56,8 +58,9 @@ Common Changes #) Added support for :ref:`naming and caching connection pools ` during creation, and retrieving them later from the python-oracledb pool cache with :meth:`oracledb.get_pool()`. -#) Added Centralized Configuration Provider support for :ref:`file-based - configurations `. +#) Added :ref:`Centralized Configuration Provider ` + support for Oracle Cloud Infrastructure Object Storage, Microsoft Azure App + Configuration, and file-based configurations. #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index c69bd65a..04be5043 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -289,11 +289,7 @@ of Oracle Database's naming methods: * A :ref:`TNS Alias ` mapping to a Connect Descriptor in a :ref:`tnsnames.ora ` file * An :ref:`LDAP URL ` - -The ``dsn`` can additionally refer to a :ref:`Centralized Configuration -Provider `. The following providers are supported: - -* :ref:`File Configuration Provider ` +* A :ref:`Configuration Provider URL ` Connection strings used for JDBC and Oracle SQL Developer need to be altered to be usable as the ``dsn`` value, see :ref:`jdbcconnstring`. @@ -449,6 +445,41 @@ Client 23ai could connect using: This syntax is also usable in python-oracledb Thin mode via a :ref:`connection hook function `, see :ref:`ldapconnections`. +.. _configproviderurl: + +Centralized Configuration Provider URL Connection Strings +--------------------------------------------------------- + +A :ref:`Centralized Configuration Provider ` URL +contains the details of where the configuration information is located. The +information that can be stored in configuration providers includes connect +descriptors, database credentials (user name and password), and python-oracledb +specific attributes. With this URL, python-oracledb can access the information +stored in the configuration providers listed below and connect to Oracle +Database: + +- :ref:`Oracle Cloud Infrastructure (OCI) Object Storage configuration + provider ` +- :ref:`Microsoft Azure App Configuration provider ` +- :ref:`File Configuration Provider ` + +The configuration provider URL can be set in the ``dsn`` parameter of +connection functions :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, and +:meth:`oracledb.create_pool_async()`. This URL must begin with +"config-" where the configuration-provider value can +be set to *ociobject*, *azure*, or *file*, depending on the location of your +configuration information. For example, to use connection configuration stored +in a local file ``/opt/oracle/my-config.json``, you need to specify the ``dsn`` +parameter as shown: + +.. code-block:: python + + connection = oracledb.connect(user="hr", password=userpwd, + dsn="config-file:///opt/oracle/my-config.json") + +See the respective configuration provider sections for more details. + .. _jdbcconnstring: JDBC and Oracle SQL Developer Connection Strings @@ -507,28 +538,466 @@ This can be referenced in python-oracledb: connection = oracledb.connect(user="hr", password=userpwd, dsn="finance") -.. _builtinconfigproviders: +.. _configurationproviders: Centralized Configuration Providers =================================== -Oracle Database Centralized Configuration Providers allow the storage and -management of database connection credentials and application configuration -information in a central location. +Centralized Configuration Providers allow the storage and management of +database connection credentials and application configuration information in a +central location. These providers allow you to separately store the +configuration information from the code of your application. The information +that can be stored in these providers includes connect descriptors, database +credentials such as user name and password, and python-oracledb specific +attributes. + +You can access the information stored in configuration providers using both +python-oracledb Thin and Thick modes. With this information, python-oracledb +can connect to Oracle Database using :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, or +:meth:`oracledb.create_pool_async()`. + +The following configuration providers are supported by python-oracledb: + +- :ref:`Oracle Cloud Infrastructure (OCI) Object Storage ` +- :ref:`Microsoft Azure App Configuration ` +- :ref:`File Configuration Provider ` + +**Precedence of Attributes** + +If you have defined the values of ``user`` and ``password`` in both the +application and the configuration provider, then the values defined in the +application will have the higher precedence. If the ``externalauth`` +parameter is set to *True*, then the ``user`` and ``password`` values +specified in the configuration provider is ignored. + +If you have defined the python-oracledb specific attributes in both the +application and in the configuration provider, then the values defined in the +configuration provider will have the higher precedence. + +.. _ociobjstorage: + +OCI Object Storage Configuration Provider +----------------------------------------- -A configuration provider is used by setting the ``dsn`` parameter of connection -and pool creation methods to specify where the configuration is located. For -example to use connection configuration stored in a local file -``/opt/oracle/my-config.json``: +The `Oracle Cloud Infrastructure (OCI) Object Storage `__ configuration +provider enables the storage and management of Oracle Database connection +information in a JSON file. + +To use python-oracledb to access the configuration information from OCI Object +Storage, you must install the `OCI module `__, +see :ref:`ocimodules`. + +The JSON configuration file must contain the ``connect_descriptor`` property. +Optionally, you can specify the database user name, password, and +python-oracledb specific properties in the file. The database password can also +be stored securely as a secret using `OCI Vault `__. The properties that +can be added in the JSON file are listed below: + +.. list-table-with-summary:: JSON Properties for OCI Object Storage Configuration Provider + :header-rows: 1 + :class: wy-table-responsive + :widths: 15 25 15 + :name: _oci_object_storage_sub-objects + :summary: The first column displays the name of the property. The second column displays the description of the property. The third column displays whether the property is required or optional. + + * - Property + - Description + - Required or Optional + * - ``user`` + - The database user name. + - Optional + * - ``password`` + - The password of the database user, or a dictionary containing the key "type" and password-type specific properties. + - Optional + * - ``connect_descriptor`` + - The database :ref:`connection string `. + - Required + * - ``pyo`` + - Python-oracledb specific properties. + - Optional + +The following sample is an example of OCI Object Storage configuration +provider syntax:: + + { + "user": "scott", + "password": { + "type": "oci-vault", + "value": "oci.vaultsecret.my-secret-id" + "authentication": "OCI_INSTANCE_PRINCIPAL" + }, + "connect_descriptor": "dbhost.example.com:1522/orclpdb", + "pyo": { + "stmtcachesize": 30, + "min": 2, + "max": 10 + } + } + +If the password key has a reference to Azure Key Vault, then you must define +the Azure Key Vault credentials in the ``password`` property. The +``azure_client_id`` and ``azure_tenant_id`` must be specified in the password +property. Also, either the ``azure_client_secret`` or +``azure_client_certificate_path`` should be specified. The password format +should be:: + + "password": { + "type": "azure-vault", + "value": "", + "azure_tenant_id":"", + "azure_client_id":"", + "azure_client_secret": "", or "azure_client_certificate_path" : "" + } + +.. _useociconfigprovider: + +Using python-oracledb with OCI Object Storage Configuration Provider +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To use python-oracledb with an OCI Object Storage configuration provider, you +must: + +1. :ref:`Import the oracledb.plugins.oci_config_provider plugin + ` in your code. + +2. :ref:`Use an OCI Object Storage connection string URL ` + in the ``dsn`` parameter of connection and pool creation methods. + +An example using a :ref:`standalone connection ` is +shown below: .. code-block:: python - connection = oracledb.connect(user="hr", password=userpwd, - dsn="config-file:///opt/oracle/my-config.json") + import oracledb.plugins.oci_config_provider + + configociurl = "config-ociobject://abc.oraclecloud.com/n/abcnamespace/b/abcbucket/o/abcobject?oci_tenancy=abc123&oci_user=ociuser1&oci_fingerprint=ab:14:ba:13&oci_key_file=ociabc/ocikeyabc.pem" + + oracledb.connect(dsn=configociurl) + +An example using a :ref:`connection pool ` is shown below: + +.. code-block:: python + + import oracledb.plugins.oci_config_provider + + configociurl = "config-ociobject://abc.oraclecloud.com/n/abcnamespace/b/abcbucket/o/abcobject?oci_tenancy=abc123&oci_user=ociuser1&oci_fingerprint=ab:14:ba:13&oci_key_file=ociabc/ocikeyabc.pem" + + oracledb.create_pool(dsn=configociurl) + +.. _importconfigociplugin: + +Importing ``oracledb.plugins.oci_config_provider`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You must import a :ref:`oracledb.plugins.oci_config_provider ` +plugin provided by python-oracledb to access the configuration information +stored in :ref:`OCI Object Storage ` such as database connect +descriptor, user name, password, and python-oracledb specific attributes. + +Importing this plugin defines and +:meth:`registers ` a built-in +:ref:`connection hook function ` that handles :ref:`connection +strings prefixed with config-ociobject `. This function is +internally invoked when the ``dsn`` parameter is prefixed with +``config-ociobject`` in calls to :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, or +:meth:`oracledb.create_pool_async()`. This hook function parses the connection +string, and extracts the following details: + +- URL of the OCI Object Storage endpoint +- OCI Object Storage namespace where the JSON file is stored +- OCI Object Storage bucket name where the JSON file is stored +- JSON file name +- Network service name or alias if the JSON file contains one or more aliases +- OCI Object Storage authentication details + +Using the above details, the hook function accesses the configuration +information stored in OCI Object Storage. The hook function sets the +connection information from OCI Object Storage in its ``connect_params`` +parameter which is a :ref:`ConnectParams ` object. This object is +used by python-oracledb to establish a connection to Oracle Database. + +.. _connstringoci: + +Defining a Connection String URL for OCI Object Storage +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You must define a connection string URL in a specific format in the ``dsn`` +property of :meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()` to +access the information stored in OCI Object Storage. The syntax of the OCI +Object Storage connection string URL is:: + + config-ociobject:/n/{namespaceName}/b/{bucketName}/o/ + [/c/][?=&=...] + +The parameters of the connection string are detailed in the table below. + +.. list-table-with-summary:: Connection String Parameters for OCI Object Storage + :header-rows: 1 + :class: wy-table-responsive + :widths: 15 25 15 + :name: _connection_string_for_oci_object_storage + :summary: The first row displays the name of the connection string parameter. The second row displays whether the connection string parameter is required or optional. The third row displays the description of the connection string parameter. -The following providers are supported by python-oracledb: + * - Parameter + - Description + - Required or Optional + * - ``config-ociobject`` + - Indicates that the configuration provider is OCI Object Storage. + - Required + * - + - The URL of OCI Object Storage endpoint. + - Required + * - + - The OCI Object Storage namespace where the JSON file is stored. + - Required + * - + - The OCI Object Storage bucket name where the JSON file is stored. + - Required + * - + - The JSON file name. + - Required + * - + - The network service name or alias if the JSON file contains one or more network service names. + - Optional + * - and + - The authentication method and corresponding authentication parameters to access the OCI Object Storage configuration provider. Depending on the specified authentication method, you must also set the corresponding authentication parameters in the ``option=value`` syntax of the connection string. You can specify one of the following authentication methods: + + - **API Key-based Authentication**: The authentication to OCI is done using API key-related values. This is the default authentication method. To use this method, you must set the option value to OCI_DEFAULT. Note that this value is also used when no authentication value is set. + + You can set optional authentication parameters for this method such as OCI_PROFILE, OCI_TENANCY, OCI_USER, OCI_FINGERPRINT, OCI_KEY_FILE, and OCI_PASS_PHRASE. See `Authentication Parameters for Oracle Cloud Infrastructure (OCI) Object Storage `__. These authentication parameters can also be set in an OCI Authentication Configuration file which can be stored in a default location ~/.oci/config, or in location ~/.oraclebmc/config, or in the location specified by the OCI_CONFIG_FILE environment variable. + + - **Instance Principal Authentication**: The authentication to OCI is done using VM instance credentials running on OCI. To use this method, you must set the option value to OCI_INSTANCE_PRINCIPAL. There are no optional authentication parameters for this method. + + - **Resource Principal Authentication**: The authentication to OCI is done using OCI resource principals. To use this method, you must set the option value to OCI_RESOURCE_PRINCIPAL. There are no optional authentication parameters for this method. + + See `OCI Authentication Methods `__ for more information. + - Optional + +You can store the authentication details in an OCI Authentication Configuration +file which can be stored in a default location (~/.oci/config). The +``oci_from_file()`` method will check this location for the configuration file. +The OCI Object Storage configuration provider uses this method when the +default authentication method is specified or when the authentication details +are not provided in the connection string. + +An example of a connection string for OCI Object Storage configuration provider +is shown below: + +.. code-block:: python + + configociurl = "config-ociobject://abc.oraclecloud.com/n/abcnamespace/b/abcbucket/o/abcobject?oci_tenancy=abc123&oci_user=ociuser1&oci_fingerprint=ab:14:ba:13&oci_key_file=ociabc/ocikeyabc.pem" + +.. _azureappconfig: + +Azure App Configuration Provider +-------------------------------- + +`Azure App Configuration `__ is a cloud-based service provided by Microsoft +Azure that enables the storage and management of Oracle Database connection +information. Your application must be registered with `Microsoft Entra ID +`__ (formerly Microsoft Azure Active Directory) and must have the +required authorization permissions to access the Azure App Configuration +provider. + +To use python-oracledb to access the configuration information from Azure App +Configuration, you must install certain Microsoft Azure modules, see +:ref:`azuremodules`. + +Configuration information is stored as key-value pairs in Azure App +Configuration. You must add the connect descriptor as a key under a prefix +based on the requirements of your application. Optionally, you can add the +database user name, password, and python-oracledb specific properties as keys. +The database password can be stored securely as a secret using `Azure Key Vault +`__. In +Azure App Configuration, you can add the following keys under a prefix: + +- connect_descriptor (required) +- user (optional) +- password (optional) +- pyo(optional) + +The key ending with: + +- ``connect_descriptor`` stores the :ref:`connect descriptor ` + as the value. +- ``user`` stores the database user name as the value. +- ``password`` stores the reference to the Azure Key Vault and Secret as + the value. +- ``pyo`` stores the values of the python-oracledb specific properties. + +See `Oracle Net Service Administrator’s Guide `__ for +more information. + +.. _azureappconfigexample: + +The following table lists the sample configuration information defined in Azure +App Configuration as key-value pairs. Note that the key-value pairs are defined +under the same prefix ``test/`` as an example. + +.. list-table-with-summary:: + :header-rows: 1 + :class: wy-table-responsive + :align: center + :widths: 30 70 + :name: _azure_app_configuration_keys_and_values + :summary: The first row displays the name of the key defined in Azure App Configuration. The second row displays the value of the key defined in Azure App Configuration. + + * - Azure App Configuration Key + - Value + * - test/connect_descriptor + - (description=(retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.region.oraclecloud.com))(connect_data=(service_name=cdb1_pdb1))) + * - test/user + - scott + * - test/password + - {"uri":"https://mykeyvault.vault.azure.net/secrets/passwordsalescrm"} + * - test/pyo + - {"stmtcachesize":30, "min":2, "max":10} + +.. _useazureconfigprovider: + +Using python-oracledb with Azure App Configuration Provider ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To use python-oracledb with an Azure App Configuration provider, you must: + +1. :ref:`Import the + oracledb.plugins.azure_config_provider ` plugin in + your code. + +2. :ref:`Use an Azure App Configuration connection string URL + ` in the ``dsn`` parameter of connection and pool creation + methods. + +An example using a :ref:`standalone connection ` is +shown below. + +.. code-block:: python + + import oracledb.plugins.azure_config_provider + + configazureurl = "config-azure://aznetnamingappconfig.azconfig.io/?key=test/&azure_client_id=123-456&azure_client_secret=MYSECRET&azure_tenant_id=789-123" + + oracledb.connect(dsn=configazureurl) + +An example using a :ref:`connection pool ` is shown below. + +.. code-block:: python -* :ref:`File Configuration Provider ` + import oracledb.plugins.azure_config_provider + + configazureurl = "config-azure://aznetnamingappconfig.azconfig.io/?key=test/&azure_client_id=123-456&azure_client_secret=MYSECRET&azure_tenant_id=789-123" + + oracledb.create_pool(dsn=configazureurl) + +.. _importconfigazureplugin: + +Importing ``oracledb.plugins.azure_config_provider`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You must import a :ref:`oracledb.plugins.azure_config_provider +` plugin provided by python-oracledb to access the +configuration information stored in Azure App Configuration such as database +connect descriptor, user name, password, and python-oracledb specific +attributes. + +Importing this plugin defines and +:meth:`registers ` a built-in :ref:`connection +hook function ` that handles :ref:`connection strings prefixed +with config-azure `. This function is internally invoked when +the ``dsn`` parameter is prefixed with ``config-azure`` in calls to +:meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()`. This +hook function parses the connection string, and extracts the following details: + +- The URL of the Azure App Configuration endpoint +- The key prefix to identify the connection +- The Azure App Configuration label name +- Azure App Configuration authentication details + +Using the above details, the hook function accesses the configuration +information stored in Azure App Configuration. The hook function sets the +connection information from Azure App Configuration in its ``connect_params`` +parameter which is a :ref:`ConnectParams ` object. This object is +used by python-oracledb to establish a connection to Oracle Database. + +.. _connstringazure: + +Defining a Connection String URL for Azure App Configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You must define a connection string URL in a specific format in the ``dsn`` +property of :meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()` to +access the information stored in Azure App Configuration. The syntax of the +Azure App Configuration connection string URL is:: + + config-azure://[?key=&label=&=&=…] + +The parameters of the connection string are detailed in the table below. + +.. list-table-with-summary:: Connection String Parameters for Azure App Configuration + :header-rows: 1 + :class: wy-table-responsive + :align: center + :widths: 15 25 15 + :name: _connection_string_for_azure_app + :summary: The first row displays the name of the connection string parameter. The second row displays whether the connection string parameter is required or optional. The third row displays the description of the connection string parameter. + + * - Parameter + - Description + - Required or Optional + * - config-azure + - Indicates that the configuration provider is Azure App Configuration. + - Required + * - + - The URL of the Azure App configuration endpoint. + - Required + * - key= + - A key prefix to identify the connection. You can organize configuration information under a prefix as per application requirements. + - Required + * - label= + - The Azure App Configuration label name. + - Optional + * - = + - The authentication method and corresponding authentication parameters to access the Azure App Configuration provider. Depending on the specified authentication method, you must also set the corresponding authentication parameters in the ``option=value`` syntax of the connection string. You can specify one of the following authentication methods: + + - **Default Azure Credential**: The authentication to Azure App Configuration is done as a service principal (using either a client secret or client certificate) or as a managed identity depending on which parameters are set. This authentication method also supports reading the parameters as environment variables. This is the default authentication method. To use this authentication method, you must set the option value to AZURE_DEFAULT. Note that this value is also used when no authentication value is set. + + There are no required parameters for this option value. The optional parameters include AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_CLIENT_CERTIFICATE_PATH, AZURE_TENANT_ID, and AZURE_MANAGED_IDENTITY_CLIENT_ID. + + - **Service Principal with Client Secret**: The authentication to Azure App Configuration is done using the client secret. To use this method, you must set the option value to AZURE_SERVICE_PRINCIPAL. + + The required parameters for this option include AZURE_SERVICE_PRINCIPAL, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, and AZURE_TENANT_ID. There are no optional parameters for this option value. + + - **Service Principal with Client Certificate**: The authentication to Azure App Configuration is done using the client certificate. To use this method, you must set the option value to AZURE_SERVICE_PRINCIPAL. + + The required parameters for this option are AZURE_SERVICE_PRINCIPAL, AZURE_CLIENT_ID, AZURE_CLIENT_CERTIFICATE_PATH, and AZURE_TENANT_ID. There are no optional parameters for this option value. + + - **Managed Identity**: The authentication to Azure App Configuration is done using managed identity or managed user identity credentials. To use this method, you must set the option value to AZURE_MANAGED_IDENTITY. + + If you want to use a user-assigned managed identity for authentication, then you must specify the required parameter AZURE_MANAGED_IDENTITY_CLIENT_ID. There are no optional parameters for this option value. + + - Optional + +Note that the Azure service principal with client certificate overrides Azure +service principal with client secret. See `Authentication Parameters for Azure +App Configuration Store `__ for more information. + +An example of a connection string for Azure App Configuration provider is shown +below: + +.. code-block:: python + + configazureurl = "config-azure://aznetnamingappconfig.azconfig.io/?key=test/&azure_client_id=123-456&azure_client_secret=MYSECRET&azure_tenant_id=789-123" .. _fileconfigprovider: @@ -558,7 +1027,7 @@ the file are listed in the table below. :name: _file_configuration_provider :summary: The first column displays the name of the property. The second column displays its description. The third column displays whether the sub-object is required or optional. - * - Sub-object + * - Property - Description - Required or Optional * - ``user`` @@ -568,10 +1037,10 @@ the file are listed in the table below. - The password of the database user, or a dictionary containing the key "type" and password type-specific properties. - Optional * - ``connect_descriptor`` - - The database :ref:`connection string `. The file must contain this property. + - The database :ref:`connection string `. - Required * - ``pyo`` - - Python-oracledb specific settings. + - Python-oracledb specific properties. - Optional See the `Oracle Net Service Administrator’s Guide ` and also by using +:ref:`plugins `. + +.. _subclassconn: + +Subclassing Connections +======================= + +Subclassing enables applications to add "hooks" for connection and statement +execution. This can be used to alter or log connection and execution +parameters, and to extend python-oracledb functionality. + +The example below demonstrates subclassing a connection to log SQL execution +to a file. This example also shows how connection credentials can be embedded +in the custom subclass, so application code does not need to supply them. + +.. code-block:: python + + class Connection(oracledb.Connection): + log_file_name = "log.txt" + + def __init__(self): + connect_string = "hr/hr_password@dbhost.example.com/orclpdb" + self._log("Connect to the database") + return super(Connection, self).__init__(connect_string) + + def _log(self, message): + with open(self.log_file_name, "a") as f: + print(message, file=f) + + def execute(self, sql, parameters): + self._log(sql) + cursor = self.cursor() + try: + return cursor.execute(sql, parameters) + except oracledb.Error as e: + error_obj, = e.args + self._log(error_obj.message) + raise + + connection = Connection() + connection.execute(""" + select department_name + from departments + where department_id = :id""", dict(id=270)) + +The messages logged in ``log.txt`` are:: + + Connect to the database + + select department_name + from departments + where department_id = :id + +If an error occurs, perhaps due to a missing table, the log file would contain +instead:: + + Connect to the database + + select department_name + from departments + where department_id = :id + ORA-00942: table or view does not exist + +In production applications, be careful not to log sensitive information. + +See `Subclassing.py +`__ for an example. + +.. _plugins: + +Python-oracledb Plugins +======================= + +Plugins simplify extending python-oracledb functionality and the +distribution of modules. The plugin mechanism lets these plugins use large +Python modules without requiring python-oracledb users to install these +modules. You can use plugins to extend python-oracledb with +your own `namespace package `__. Python-oracledb +provides two plugins ``oracledb.plugins.oci_config_provider`` and +``oracledb.plugins.azure_config_provider`` which allow you to access the +configuration information stored in OCI Object Storage and Azure App +Configuration respectively, and connect to Oracle Database. See +:ref:`ociobjstorage` and :ref:`azureappconfig` for more information. Note +that the namespace ``oracledb.plugins.ldap_support`` is reserved for future +use by the python-oracledb project. + +.. _customplugins: + +Building Custom Plugins +----------------------- + +If you want to use the :ref:`plugin mechanism ` for your own +packages, you can create a `namespace package `__. + +The following example creates a plugin that uses a :ref:`connection hook +function ` to do special processing of connection strings +prefixed with "myprefix://". + +The example uses the following files: + +- A ``README`` file which contains:: + + My sample connection plugin + +- A ``pyproject.toml`` file which contains:: + + [build-system] + requires = ["setuptools"] + build-backend = "setuptools.build_meta" + +- A ``setup.cfg`` file which contains:: + + [metadata] + name = myplugin + version = 1.0.0 + description = Sample connection plugin for python-oracleb + long_description = file: README + long_description_content_type = text/markdown + author = Your Name + author_email = youremail@example.com + license = Apache Software License + + [options] + zip_safe = False + package_dir = + =src + + [options.packages.find] + where = src + +- The plugin code file ``src/oracledb/plugins/myplugin.py`` which contains: + + .. code-block:: python + + import oracledb + + def myhookfunc(protocol, arg, params): + print(f"In myhookfunc: protocol={protocol} arg={arg}") + params.parse_connect_string(arg) + + oracledb.register_protocol("myprefix", myhookfunc) + +To use the plugin, perform the following steps: + +1. Build the sample package:: + + python -m pip install build + python -m build + +2. Install the sample package:: + + python -m pip install dist/myplugin-1.0.0-py3-none-any.whl + +3. To show the plugin being used, create an application file containing: + + .. code-block:: python + + import oracledb + import oracledb.plugins.myplugin + + cs = 'myprefix://localhost/orclpdb1' + + cp = oracledb.ConnectParams() + cp.parse_connect_string(cs) + + print(f"host={cp.host}, port={cp.port}, service name={cp.service_name}") + + Running this will print:: + + In myhookfunc: protocol=myprefix arg=localhost/orclpdb1 + host=localhost, port=1521, service name=orclpdb1 + +You can distribute the created package either internally or on a package +repository. diff --git a/doc/src/user_guide/installation.rst b/doc/src/user_guide/installation.rst index 9db86272..511232c8 100644 --- a/doc/src/user_guide/installation.rst +++ b/doc/src/user_guide/installation.rst @@ -1023,3 +1023,48 @@ Python versions. version. For example, when using Python 3.12 on macOS, install:: python -m pip install oracledb-2.5.0-cp312-cp312-macosx_10_13_universal2.whl + +.. _configprovidermodules: + +Installing Configuration Provider Modules for python-oracledb +============================================================= + +To use python-oracledb with :ref:`centralized configuration providers +`, you must install the necessary module for your +preferred configuration provider as detailed below. + +.. _ocimodules: + +Install Modules for the OCI Object Storage Configuration Provider +----------------------------------------------------------------- + +For python-oracledb to work with Oracle Cloud Infrastructure (OCI) Object +Storage configuration provider, you must install the `OCI `__ module using:: + + python -m pip install oci + +See :ref:`ociobjstorage` for information on using this configuration provider +with python-oracledb. + +.. _azuremodules: + +Install Modules for the Azure App Configuration Provider +-------------------------------------------------------- + +For python-oracledb to work with Azure App Configuration Provider, you must +install the `Azure App Configuration `__, `Azure Core `__, +and `Azure Identity `__ modules +using:: + + python -m pip install azure-appconfiguration azure-core azure-identity + +If your password is stored in the Azure Key vault, then you additionally need +to install the `Azure Key Vault Secrets `__ module. This can be done using:: + + python -m pip install azure-keyvault-secrets + +See :ref:`azureappconfig` for information on using this configuration provider +with python-oracledb. diff --git a/doc/src/user_guide/tracing.rst b/doc/src/user_guide/tracing.rst index 8fe68e95..5329cd52 100644 --- a/doc/src/user_guide/tracing.rst +++ b/doc/src/user_guide/tracing.rst @@ -120,74 +120,6 @@ be shown in the DBOP_NAME column of the V$SQL_MONITOR view: WHERE sid = SYS_CONTEXT('USERENV', 'SID')"""): print(row) -.. _subclassconn: - -Subclassing Connections ------------------------ - -Subclassing enables applications to add "hooks" for connection and statement -execution. This can be used to alter or log connection and execution -parameters, and to extend python-oracledb functionality. - -The example below demonstrates subclassing a connection to log SQL execution -to a file. This example also shows how connection credentials can be embedded -in the custom subclass, so application code does not need to supply them. - -.. code-block:: python - - class Connection(oracledb.Connection): - log_file_name = "log.txt" - - def __init__(self): - connect_string = "hr/hr_password@dbhost.example.com/orclpdb" - self._log("Connect to the database") - return super(Connection, self).__init__(connect_string) - - def _log(self, message): - with open(self.log_file_name, "a") as f: - print(message, file=f) - - def execute(self, sql, parameters): - self._log(sql) - cursor = self.cursor() - try: - return cursor.execute(sql, parameters) - except oracledb.Error as e: - error_obj, = e.args - self._log(error_obj.message) - raise - - connection = Connection() - connection.execute(""" - select department_name - from departments - where department_id = :id""", dict(id=270)) - -The messages logged in ``log.txt`` are:: - - Connect to the database - - select department_name - from departments - where department_id = :id - -If an error occurs, perhaps due to a missing table, the log file would contain -instead:: - - Connect to the database - - select department_name - from departments - where department_id = :id - ORA-00942: table or view does not exist - -In production applications, be careful not to log sensitive information. - -See `Subclassing.py -`__ for an example. - - .. _jdwp: Debugging PL/SQL with the Java Debug Wire Protocol @@ -217,7 +149,6 @@ blog post `Debugging PL/SQL with Visual Studio Code (and more) `_. - .. _lowlevelsqltrace: Low Level SQL Tracing diff --git a/setup.cfg b/setup.cfg index dbf00933..34e0650f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,7 +44,9 @@ python_requires = >=3.8 setup_requires = cython>=3.0.10 install_requires = cryptography>=3.2.1 test_suite = tests -packages = find: +packages = + oracledb + oracledb.plugins package_dir = =src diff --git a/src/oracledb/plugins/azure_config_provider.py b/src/oracledb/plugins/azure_config_provider.py new file mode 100644 index 00000000..99f8e7bb --- /dev/null +++ b/src/oracledb/plugins/azure_config_provider.py @@ -0,0 +1,216 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# azure_config_provider.py +# +# Python file contains the hook method config_azure_hook() that fetches config +# store from Azure App Configuration. +# ----------------------------------------------------------------------------- + +import json +import re + +import oracledb + +from urllib.parse import urlparse, parse_qs +from azure.appconfiguration import AzureAppConfigurationClient +from azure.keyvault.secrets import SecretClient +from azure.core.exceptions import ResourceNotFoundError +from azure.identity import ( + ClientSecretCredential, + CertificateCredential, + ManagedIdentityCredential, + ChainedTokenCredential, + EnvironmentCredential, +) + + +def _get_credential(parameters): + """ + Returns the appropriate credential given the input supplied by the original + connect string. + """ + + tokens = [] + auth = parameters.get("authentication") + if auth is not None: + auth = auth.upper() + if auth == "AZURE_DEFAULT": + auth = None + + if auth is None or auth == "AZURE_SERVICE_PRINCIPAL": + if "azure_client_secret" in parameters: + tokens.append( + ClientSecretCredential( + _get_required_parameter(parameters, "azure_tenant_id"), + _get_required_parameter(parameters, "azure_client_id"), + _get_required_parameter(parameters, "azure_client_secret"), + ) + ) + if "azure_client_certificate_path" in parameters: + tokens.append( + CertificateCredential( + _get_required_parameter(parameters, "azure_tenant_id"), + _get_required_parameter(parameters, "azure_client_id"), + _get_required_parameter( + parameters, "azure_client_certificate_path" + ), + ) + ) + if auth is None or auth == "AZURE_MANAGED_IDENTITY": + client_id = parameters.get("azure_managed_identity_client_id") + if client_id is not None: + tokens.append(ManagedIdentityCredential(client_id=client_id)) + + if len(tokens) == 0: + message = "Authentication options not available in Connection String" + raise Exception(message) + elif len(tokens) == 1: + return tokens[0] + tokens.append(EnvironmentCredential()) + return ChainedTokenCredential(*tokens) + + +def _get_required_parameter(parameters, name): + try: + return parameters[name] + except KeyError: + message = f'Parameter named "{name}" missing from connection string' + raise Exception(message) from None + + +def _get_setting(client, key, sub_key, label, required=True): + """ + Returns the configuration setting given the client, key and label. + """ + try: + if key.endswith("/"): + actual_key = f"{key}{sub_key}" + else: + actual_key = f"{key}/{sub_key}" + obj = client.get_configuration_setting(key=actual_key, label=label) + except ResourceNotFoundError: + if required: + message = f"Missing required configuration key: {actual_key}" + raise Exception(message) + return None + return obj.value + + +def _parse_parameters(protocol_arg: str) -> dict: + """ + Parse the parameters from the protocol argument string. + """ + pos = protocol_arg.find("?") + parsed_url = urlparse(protocol_arg[pos + 1 :]) + parsed_values = parse_qs(parsed_url.path) + parameters = { + key.lower(): value[0] for key, value in parsed_values.items() + } + parameters["appconfigname"] = protocol_arg[:pos] + return parameters + + +def password_type_azure_vault_hook(args): + uri = _get_required_parameter(args, "uri") + credential = args.get("credential") + + if credential is None: + # if credential not present, this might be coming + # from oci config provider, so create credential + # for azure key vault. + auth = args.get("authentication") + if auth is None: + raise Exception( + "Azure Vault authentication details are not provided." + ) + credential = _get_credential(auth) + + pattern = re.compile( + r"(?Phttps://[A-Za-z0-9._-]+)/" + r"secrets/(?P[A-Za-z][A-Za-z0-9-]*)$" + ) + match = pattern.match(uri) + if match is None: + raise Exception("Invalid Azure Vault details") + vault_url = match.group("vault_url") + secret_key = match.group("secretKey") + secret_client = SecretClient(vault_url, credential) + return secret_client.get_secret(secret_key).value + + +def _process_config(parameters, connect_params): + """ + Processes the configuration stored in the Azure App configuration store. + """ + + credential = _get_credential(parameters) + client = AzureAppConfigurationClient( + "https://" + _get_required_parameter(parameters, "appconfigname"), + credential, + ) + key = _get_required_parameter(parameters, "key") + label = parameters.get("label") + + # get the common parameters + config = {} + config["connect_descriptor"] = _get_setting( + client, key, "connect_descriptor", label + ) + config["user"] = _get_setting(client, key, "user", label, required=False) + pwd = _get_setting(client, key, "password", label, required=False) + + if pwd is not None: + try: + pwd = json.loads(pwd) + pwd["type"] = "azure-vault" + pwd["credential"] = credential + except json.JSONDecodeError: + message = ( + "Password is expected to be JSON" + " containing Azure Vault details." + ) + raise Exception(message) + + config["password"] = pwd + # get the python-oracledb specific parameters + settings = _get_setting(client, key, "pyo", label, required=False) + if settings is not None: + config["pyo"] = json.loads(settings) + + # set the configuration + connect_params.set_from_config(config) + + +def config_azure_hook(protocol, protocol_arg, connect_params): + """ + Hook for handling parameters stored in an Azure configuration store. + """ + parameters = _parse_parameters(protocol_arg) + _process_config(parameters, connect_params) + + +oracledb.register_password_type("azure-vault", password_type_azure_vault_hook) +oracledb.register_protocol("config-azure", config_azure_hook) diff --git a/src/oracledb/plugins/oci_config_provider.py b/src/oracledb/plugins/oci_config_provider.py new file mode 100644 index 00000000..89767ab3 --- /dev/null +++ b/src/oracledb/plugins/oci_config_provider.py @@ -0,0 +1,203 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# oci_config_provider.py +# +# Python file contains the hook method config_oci_hook() that fetches config +# store from OCI Object Storage. +# ----------------------------------------------------------------------------- + +import re +import json +import oci +import oracledb + +from urllib.parse import urlparse, parse_qs + +oci_from_file = oci.config.from_file +oci_client_error = oci.exceptions.ClientError +oci_object_storage_client = oci.object_storage.ObjectStorageClient +oci_secrets_client = oci.secrets.SecretsClient + + +""" +Pattern to parse OCI Object Connect String +""" +cloud_net_naming_pattern_oci = re.compile( + r"(?P[^/]+)/n/(?P[^/]+)/b/(?P[^/]+)/o/(?P[^/]+)(/c/(?P[^/]+))?" +) + + +def _get_config(parameters, connect_params): + config = {} + + credential = _get_credential(parameters) + client_oci = oci_object_storage_client(credential) + get_object_request = { + "object_name": _get_required_parameter(parameters, "filename"), + "bucket_name": _get_required_parameter(parameters, "bucketname"), + "namespace_name": _get_required_parameter(parameters, "namespace"), + } + + get_object_response = client_oci.get_object(**get_object_request) + resp = _stream_to_string(get_object_response.data) + settings = json.loads(resp) + user_alias = parameters.get("alias") + if user_alias: + settings = settings[user_alias] + + # Connect Descriptor + config["connect_descriptor"] = _get_required_parameter( + settings, "connect_descriptor" + ) + + if connect_params.user is None: + config["user"] = settings.get("user") + if "password" in settings: + pwd = settings["password"] + if settings["password"]["type"] == "oci-vault": + pwd["credential"] = credential + + # password should be stored in JSON and not plain text. + config["password"] = pwd + + # pyo parameters settings + config["pyo"] = settings.get("pyo", None) + + # parse connect string and set requested parameters + connect_params.set_from_config(config) + + +def _get_credential(parameters): + """ + Returns the appropriate credential given the input supplied by the original + connect string. + """ + auth = parameters.get("authentication") + if auth is not None: + auth = auth.upper() + + if auth is None or auth == "OCI_DEFAULT": + # Default Authentication + # default path ~/.oci/config + return oci_from_file() + if "tenancy_user" in parameters and "oci_user" in parameters: + with open(parameters["oci_key_file"], "r") as file_content: + public_key = file_content.read() + _retrieve_region(parameters.get("objservername")) + provider = oci.signer.Signer( + tenancy=parameters["oci_tenancy"], + user=parameters["oci_user"], + fingerprint=parameters["oci_fingerprint"], + private_key_file_location=parameters["oci_key_file"], + private_key_content=public_key, + pass_phrase=None, + ) + else: + signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() + rps = oci.auth.signers.get_resource_principals_signer() + if parameters[auth].upper() == "OCI_INSTANCE_PRINCIPAL": + provider = signer().build() + elif parameters[auth].upper() == "OCI_RESOURCE_PRINCIPAL": + provider = rps.builder().build() + else: + msg = "Authentication options not available in Connection String" + raise Exception(msg) + return provider + + +def _get_required_parameter(parameters, name): + try: + return parameters[name] + except KeyError: + message = f'Parameter named "{name}" missing from connect string' + raise Exception(message) from None + + +def _parse_parameters(protocol_arg: str) -> dict: + """ + Parse the parameters from the protocol argument string. + """ + pos = protocol_arg.find("?") + parsed_url = urlparse(protocol_arg[pos + 1 :]) + parsed_values = parse_qs(parsed_url.path) + parameters = { + key.lower(): value[0] for key, value in parsed_values.items() + } + + match = cloud_net_naming_pattern_oci.match(protocol_arg[:pos]) + if match: + parameters["objservername"] = match.group("objservername") + parameters["namespace"] = match.group("namespace") + parameters["bucketname"] = match.group("bucketname") + parameters["filename"] = match.group("filename") + if match.group("alias"): + parameters["alias"] = match.group("alias") + return parameters + + +def password_type_oci_vault_hook(args): + secret_id = args.get("uri") + credential = args.get("credential") + + # if credentials are not present, create credentials with given + # authentication details. + if credential is None: + auth = _get_required_parameter(args, "auth") + if auth is None: + raise Exception( + "OCI Key Vault authentication details are not provided." + ) + credential = _get_credential(auth) + + secret_client_oci = oci_secrets_client(credential) + get_secret_bundle_request = {"secret_id": secret_id} + get_secret_bundle_response = secret_client_oci.get_secret_bundle( + **get_secret_bundle_request + ) + return get_secret_bundle_response.data.secret_bundle_content.content + + +def _retrieve_region(objservername): + arr = objservername.split(".") + return arr[1].upper().replace("-", "_") + + +def _stream_to_string(stream): + return b"".join(stream).decode() + + +def config_oci_hook( + protocol: str, protocol_arg: str, connect_params: oracledb.ConnectParams +): + """ + Hook for handling parameters stored in an OCI Object store. + """ + parameters = _parse_parameters(protocol_arg) + _get_config(parameters, connect_params) + + +oracledb.register_password_type("oci-vault", password_type_oci_vault_hook) +oracledb.register_protocol("config-ociobject", config_oci_hook) From 8392f23bbeae8bb9c9b3b7451f68ace1df5a5df9 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:43:39 -0700 Subject: [PATCH 009/178] Added support for sparse vectors. --- doc/src/api_manual/fetch_info.rst | 11 + doc/src/api_manual/module.rst | 13 + doc/src/api_manual/sparse_vector.rst | 30 + doc/src/index.rst | 1 + doc/src/release_notes.rst | 1 + doc/src/user_guide/vector_data_type.rst | 110 ++++ src/oracledb/__init__.py | 5 + src/oracledb/base_impl.pxd | 16 +- src/oracledb/base_impl.pyx | 1 + src/oracledb/constants.py | 1 + src/oracledb/fetch_info.py | 10 + src/oracledb/impl/base/connection.pyx | 2 + src/oracledb/impl/base/constants.pxi | 2 + src/oracledb/impl/base/metadata.pyx | 2 +- src/oracledb/impl/base/utils.pyx | 19 + src/oracledb/impl/base/vector.pyx | 198 ++++-- src/oracledb/impl/thick/odpi.pxd | 3 + src/oracledb/impl/thick/utils.pyx | 47 +- src/oracledb/impl/thin/capabilities.pyx | 3 +- src/oracledb/impl/thin/constants.pxi | 1 + src/oracledb/sparse_vector.py | 104 +++ src/oracledb/thick_impl.pyx | 3 + tests/create_schema.py | 6 +- tests/sql/create_schema_23_7.sql | 45 ++ tests/test_6400_vector_var.py | 13 +- tests/test_7700_sparse_vector.py | 798 ++++++++++++++++++++++++ tests/test_env.py | 4 +- 27 files changed, 1368 insertions(+), 81 deletions(-) create mode 100644 doc/src/api_manual/sparse_vector.rst create mode 100644 src/oracledb/sparse_vector.py create mode 100644 tests/sql/create_schema_23_7.sql create mode 100644 tests/test_7700_sparse_vector.py diff --git a/doc/src/api_manual/fetch_info.rst b/doc/src/api_manual/fetch_info.rst index 3cf75fd6..69b45c3b 100644 --- a/doc/src/api_manual/fetch_info.rst +++ b/doc/src/api_manual/fetch_info.rst @@ -142,3 +142,14 @@ FetchInfo Attributes the value returned is *None*. .. versionadded:: 2.2.0 + +.. attribute:: FetchInfo.vector_is_sparse + + This read-only attribute returns a boolean that indicates whether the + vector is sparse or not. + + If the column contains vectors that are SPARSE, the value returned is + True. If the column contains vectors that are DENSE, the value returned is + False. If the column is not a VECTOR column, the value returned is ``None``. + + .. versionadded:: 3.0.0 diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index afcca1e7..13c6ae8b 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -2509,6 +2509,19 @@ Oracledb Methods The ``connection_id_prefix`` parameter was added. +.. function:: SparseVector(num_dimensions, indices, values) + + Creates and returns a :ref:`SparseVector object `. + + The ``num_dimensions`` parameter is the number of dimensions contained in + the vector. + + The ``indices`` parameter is the indices (zero-based) of non-zero values + in the vector. + + The ``values`` parameter is the non-zero values stored in the vector. + + .. versionadded:: 3.0.0 .. function:: register_password_type(password_type, hook_function) diff --git a/doc/src/api_manual/sparse_vector.rst b/doc/src/api_manual/sparse_vector.rst new file mode 100644 index 00000000..c597372c --- /dev/null +++ b/doc/src/api_manual/sparse_vector.rst @@ -0,0 +1,30 @@ +.. _sparsevectorsobj: + +************************* +API: SparseVector Objects +************************* + +A SparseVector Object stores information about a sparse vector. This object +can be created with :meth:`oracledb.SparseVector()`. + +See :ref:`sparsevectors` for more information. + +.. versionadded:: 3.0.0 + +SparseVector Attributes +======================= + +.. attribute:: SparseVector.indices + + This read-only attribute is an array that returns the indices (zero-based) + of non-zero values in the vector. + +.. attribute:: SparseVector.num_dimensions + + This read-only attribute is an integer that returns the number of + dimensions of the vector. + +.. attribute:: SparseVector.values + + This read-only attribute is an array that returns the non-zero values + stored in the vector. diff --git a/doc/src/index.rst b/doc/src/index.rst index 2021def1..2ac38246 100644 --- a/doc/src/index.rst +++ b/doc/src/index.rst @@ -64,6 +64,7 @@ API Manual api_manual/subscription.rst api_manual/lob.rst api_manual/dbobject_type.rst + api_manual/sparse_vector.rst api_manual/aq.rst api_manual/soda.rst api_manual/async_connection.rst diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index c38bce62..e502bd02 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -55,6 +55,7 @@ Thick Mode Changes Common Changes ++++++++++++++ +#) Added support for Oracle Database 23ai SPARSE vectors. #) Added support for :ref:`naming and caching connection pools ` during creation, and retrieving them later from the python-oracledb pool cache with :meth:`oracledb.get_pool()`. diff --git a/doc/src/user_guide/vector_data_type.rst b/doc/src/user_guide/vector_data_type.rst index 394f19cf..8ac26b69 100644 --- a/doc/src/user_guide/vector_data_type.rst +++ b/doc/src/user_guide/vector_data_type.rst @@ -212,6 +212,116 @@ If you are using python-oracledb Thick mode with older versions of Oracle Client libraries than 23ai, see this :ref:`section `. +.. _sparsevectors: + +Using SPARSE Vectors +==================== + +A Sparse vector is a vector which has zero value for most of its dimensions. +This vector only physically stores the non-zero values. A sparse vector is +supported when you are using Oracle Database 23.7 or later. + +Sparse vectors can store the total number of dimensions, an array of indices, +and an array of values. The storage formats that can be used with sparse +vectors are float32, float64, and int8. Note that the binary storage format +cannot be used with sparse vectors. You can define a column for a sparse +vector using the following format:: + + VECTOR(number_of_dimensions, dimension_storage_format, sparse) + +For example, to create a table with three columns for sparse vectors: + +.. code-block:: sql + + CREATE TABLE vector_sparse_table ( + float32sparsecol vector(25, float32, sparse), + float64sparsecol vector(30, float64, sparse), + int8sparsecol vector(35, int8, sparse) + ) + +In this example the: + +- The float32sparsecol column can store sparse vector data of 25 dimensions + where each dimension value is a 32-bit floating-point number. + +- The float64sparsecol column can store sparse vector data of 30 dimensions + where each dimension value is a 64-bit floating-point number. + +- The int8sparsecol column can store sparse vector data of 35 dimensions where + each dimension value is a 8-bit signed integer. + +.. _insertsparsevectors: + +Inserting SPARSE Vectors +------------------------ + +With python-oracledb, sparse vector data can be inserted using +:ref:`SparseVector objects `. You can specify the number of +dimensions, an array of indices, and an array of values as the data for a +sparse vector. For example, the string representation is:: + + [25, [5,8,11], [25.25, 6.125, 8.25]] + +In this example, the sparse vector has 25 dimensions. Only indices 5, 8, and +11 have values 25.25, 6.125, and 8.25 respectively. All of the other values +are zero. + +The SparseVector objects are used as bind values when inserting sparse vector +columns. For example: + +.. code-block:: python + + import array + + # 32-bit float sparse vector + float32_val = oracledb.SparseVector( + 25, [6, 10, 18], array.array('f', [26.25, 129.625, 579.875]) + ) + + # 64-bit float sparse vector + float64_val = oracledb.SparseVector( + 30, [9, 16, 24], array.array('d', [19.125, 78.5, 977.375]) + ) + + # 8-bit signed integer sparse vector + int8_val = oracledb.SparseVector( + 35, [10, 20, 30], array.array('b', [26, 125, -37]) + ) + + cursor.execute( + "insert into vector_sparse_table (:1, :2, :3)", + [float32_val, float64_val, int8_val] + ) + +.. _fetchsparsevectors: + +Fetching Sparse Vectors +----------------------- + +With python-oracledb, sparse vector columns are fetched in the same format +accepted by Oracle Database by using the str() function. For example: + +.. code-block:: python + + cursor.execute("select * from vec_sparse") + for float32_val, float64_val, int8_val in cursor: + print("float32:", str(float32_val)) + print("float64:", str(float64_val)) + print("int8:", str(int8_val)) + +This prints the following output:: + + float32: [25, [6, 10, 18], [26.25, 129.625, 579.875]] + float64: [30, [9, 16, 24], [19.125, 78.5, 977.375]] + int8: [35, [10, 20, 30], [26, 125, -37]] + +The :ref:`FetchInfo ` object that is returned as part of the +fetched metadata contains attributes :attr:`FetchInfo.vector_dimensions`, +:attr:`FetchInfo.vector_format`, and :attr:`FetchInfo.vector_is_sparse` which +return the number of dimensions of the vector column, the format of each +dimension value in the vector column, and a boolean which determines whether +the vector is sparse or not. + .. _vector_thick_mode_old_client: Using python-oracledb Thick Mode with Older Versions of Oracle Client Libraries diff --git a/src/oracledb/__init__.py b/src/oracledb/__init__.py index f8c5dd58..45cca983 100644 --- a/src/oracledb/__init__.py +++ b/src/oracledb/__init__.py @@ -310,6 +310,10 @@ future as __future__, # noqa: F401 ) +from .sparse_vector import ( + SparseVector as SparseVector, +) + from . import config_providers IntervalYM = collections.namedtuple("IntervalYM", ["years", "months"]) @@ -345,6 +349,7 @@ class JsonId(bytes): lob, # noqa pool, # noqa pool_params, # noqa + sparse_vector, # noqa soda, # noqa subscr, # noqa sys, # noqa diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 95eee8e2..5cb1889e 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -195,12 +195,14 @@ cdef type PY_TYPE_MESSAGE cdef type PY_TYPE_MESSAGE_QUERY cdef type PY_TYPE_MESSAGE_ROW cdef type PY_TYPE_MESSAGE_TABLE +cdef type PY_TYPE_SPARSE_VECTOR cdef type PY_TYPE_TIMEDELTA cdef type PY_TYPE_VAR cdef str DRIVER_NAME cdef str DRIVER_VERSION cdef str DRIVER_INSTALLATION_URL +cdef str ARRAY_TYPE_CODE_UINT32 cdef const char* ENCODING_UTF8 cdef const char* ENCODING_UTF16 @@ -403,12 +405,17 @@ cdef class OsonEncoder(GrowableBuffer): cdef class VectorDecoder(Buffer): + cdef array.array _decode_values(self, uint32_t num_elements, + uint8_t vector_format) cdef object decode(self, bytes data) cdef class VectorEncoder(GrowableBuffer): - cdef int encode(self, array.array value) except -1 + cdef int _encode_values(self, array.array value, uint32_t num_elements, + uint8_t vector_format) except -1 + cdef uint8_t _get_vector_format(self, array.array value) + cdef int encode(self, object value) except -1 cdef class OracleMetadata: @@ -870,6 +877,13 @@ cdef class PipelineOpResultImpl: cdef int _capture_err(self, Exception exc) except -1 +cdef class SparseVectorImpl: + cdef: + readonly uint32_t num_dimensions + readonly array.array indices + readonly array.array values + + cdef struct OracleDate: int16_t year uint8_t month diff --git a/src/oracledb/base_impl.pyx b/src/oracledb/base_impl.pyx index 22c10ec1..69a872ee 100644 --- a/src/oracledb/base_impl.pyx +++ b/src/oracledb/base_impl.pyx @@ -77,6 +77,7 @@ cdef type PY_TYPE_MESSAGE cdef type PY_TYPE_MESSAGE_QUERY cdef type PY_TYPE_MESSAGE_ROW cdef type PY_TYPE_MESSAGE_TABLE +cdef type PY_TYPE_SPARSE_VECTOR cdef type PY_TYPE_TIMEDELTA = datetime.timedelta cdef type PY_TYPE_VAR cdef type PY_TYPE_FETCHINFO diff --git a/src/oracledb/constants.py b/src/oracledb/constants.py index fff7a542..b9a2d194 100644 --- a/src/oracledb/constants.py +++ b/src/oracledb/constants.py @@ -135,3 +135,4 @@ # vector metadata flags VECTOR_META_FLAG_FLEXIBLE_DIM = 0x01 +VECTOR_META_FLAG_SPARSE_VECTOR = 0x02 diff --git a/src/oracledb/fetch_info.py b/src/oracledb/fetch_info.py index c02d28cb..c693267b 100644 --- a/src/oracledb/fetch_info.py +++ b/src/oracledb/fetch_info.py @@ -249,3 +249,13 @@ def vector_format(self) -> [oracledb.VectorFormat, None]: and self._impl.vector_format != 0 ): return oracledb.VectorFormat(self._impl.vector_format) + + @property + def vector_is_sparse(self) -> Union[bool, None]: + """ + Returns a boolean indicating if the vector is sparse or not. If the + column is not a vector column, the value returned is None. + """ + if self._impl.dbtype is DB_TYPE_VECTOR: + flags = self._impl.vector_flags + return bool(flags & constants.VECTOR_META_FLAG_SPARSE_VECTOR) diff --git a/src/oracledb/impl/base/connection.pyx b/src/oracledb/impl/base/connection.pyx index 3c53e3ae..180bbe74 100644 --- a/src/oracledb/impl/base/connection.pyx +++ b/src/oracledb/impl/base/connection.pyx @@ -151,6 +151,8 @@ cdef class BaseConnImpl: if len(value) == 0: errors._raise_err(errors.ERR_INVALID_VECTOR) return value + elif isinstance(value, PY_TYPE_SPARSE_VECTOR): + return value elif db_type_num == DB_TYPE_NUM_INTERVAL_YM: if isinstance(value, PY_TYPE_INTERVAL_YM): return value diff --git a/src/oracledb/impl/base/constants.pxi b/src/oracledb/impl/base/constants.pxi index cf5b7aea..96bedc11 100644 --- a/src/oracledb/impl/base/constants.pxi +++ b/src/oracledb/impl/base/constants.pxi @@ -78,11 +78,13 @@ cdef enum: TNS_VECTOR_MAGIC_BYTE = 0xDB TNS_VECTOR_VERSION_BASE = 0 TNS_VECTOR_VERSION_WITH_BINARY = 1 + TNS_VECTOR_VERSION_WITH_SPARSE = 2 # VECTOR flags cdef enum: TNS_VECTOR_FLAG_NORM = 0x0002 TNS_VECTOR_FLAG_NORM_RESERVED = 0x0010 + TNS_VECTOR_FLAG_SPARSE = 0x0020 # general constants cdef enum: diff --git a/src/oracledb/impl/base/metadata.pyx b/src/oracledb/impl/base/metadata.pyx index be441f29..206f5ea8 100644 --- a/src/oracledb/impl/base/metadata.pyx +++ b/src/oracledb/impl/base/metadata.pyx @@ -158,7 +158,7 @@ cdef class OracleMetadata: metadata.dbtype = value.type elif isinstance(value, (PY_TYPE_CURSOR, PY_TYPE_ASYNC_CURSOR)): metadata.dbtype = DB_TYPE_CURSOR - elif isinstance(value, array.array): + elif isinstance(value, (array.array, PY_TYPE_SPARSE_VECTOR)): metadata.dbtype = DB_TYPE_VECTOR elif isinstance(value, PY_TYPE_INTERVAL_YM): metadata.dbtype = DB_TYPE_INTERVAL_YM diff --git a/src/oracledb/impl/base/utils.pyx b/src/oracledb/impl/base/utils.pyx index fb7e9ca7..a66d55a0 100644 --- a/src/oracledb/impl/base/utils.pyx +++ b/src/oracledb/impl/base/utils.pyx @@ -189,6 +189,23 @@ cdef int _set_str_param(dict args, str name, object target, bint check_network_c setattr(target, name, in_val) +def get_array_type_code_uint32(): + """ + Returns the type code to use for array.array that will store uint32_t. + """ + cdef: + array.array temp_array + str type_code + global ARRAY_TYPE_CODE_UINT32 + if ARRAY_TYPE_CODE_UINT32 is None: + for type_code in ("I", "L"): + temp_array = array.array(type_code) + if temp_array.itemsize == 4: + ARRAY_TYPE_CODE_UINT32 = type_code + break + return ARRAY_TYPE_CODE_UINT32 + + def init_base_impl(package): """ Initializes globals after the package has been completely initialized. This @@ -217,6 +234,7 @@ def init_base_impl(package): PY_TYPE_MESSAGE_ROW, \ PY_TYPE_MESSAGE_TABLE, \ PY_TYPE_POOL_PARAMS, \ + PY_TYPE_SPARSE_VECTOR, \ PY_TYPE_VAR errors = package.errors @@ -241,6 +259,7 @@ def init_base_impl(package): PY_TYPE_MESSAGE_ROW = package.MessageRow PY_TYPE_MESSAGE_TABLE = package.MessageTable PY_TYPE_POOL_PARAMS = package.PoolParams + PY_TYPE_SPARSE_VECTOR = package.SparseVector PY_TYPE_VAR = package.Var diff --git a/src/oracledb/impl/base/vector.pyx b/src/oracledb/impl/base/vector.pyx index b4e54cda..449c0ade 100644 --- a/src/oracledb/impl/base/vector.pyx +++ b/src/oracledb/impl/base/vector.pyx @@ -34,40 +34,40 @@ cdef array.array double_template = array.array('d') cdef array.array int8_template = array.array('b') cdef array.array uint8_template = array.array('B') +@cython.final +cdef class SparseVectorImpl: + + @classmethod + def from_values(cls, num_dimensions, indices, values): + """ + Creates an implementation from its component values. + """ + cdef SparseVectorImpl impl = cls.__new__(cls) + impl.num_dimensions = num_dimensions + impl.indices = indices + impl.values = values + return impl + + @cython.final cdef class VectorDecoder(Buffer): - cdef object decode(self, bytes data): + cdef array.array _decode_values(self, uint32_t num_elements, + uint8_t vector_format): """ - Returns a Python object corresponding to the encoded VECTOR bytes. + Returns an array containing the decoded values. """ cdef: - uint8_t magic_byte, version, vector_format - uint8_t * uint8_buf = NULL + uint8_t *uint8_buf = NULL double *double_buf = NULL + uint8_t element_size = 0 int8_t *int8_buf = NULL - uint32_t num_elements, i float *float_buf = NULL OracleDataBuffer buffer array.array result - uint16_t flags - object value + uint32_t i - # populate the buffer with the data - self._populate_from_bytes(data) - - # parse header - self.read_ub1(&magic_byte) - if magic_byte != TNS_VECTOR_MAGIC_BYTE: - errors._raise_err(errors.ERR_UNEXPECTED_DATA, - data=bytes([magic_byte])) - self.read_ub1(&version) - if version > TNS_VECTOR_VERSION_WITH_BINARY: - errors._raise_err(errors.ERR_VECTOR_VERSION_NOT_SUPPORTED, - version=version) - self.read_uint16be(&flags) - self.read_ub1(&vector_format) - self.read_uint32be(&num_elements) + # set up buffers based on vector storage format if vector_format == VECTOR_FORMAT_FLOAT32: result = array.clone(float_template, num_elements, False) float_buf = result.data.as_floats @@ -84,9 +84,6 @@ cdef class VectorDecoder(Buffer): else: errors._raise_err(errors.ERR_VECTOR_FORMAT_NOT_SUPPORTED, vector_format=vector_format) - if flags & TNS_VECTOR_FLAG_NORM_RESERVED \ - or flags & TNS_VECTOR_FLAG_NORM: - self.skip_raw_bytes(8) # parse data for i in range(num_elements): @@ -100,48 +97,125 @@ cdef class VectorDecoder(Buffer): self.read_sb1(&int8_buf[i]) else: self.read_ub1(&uint8_buf[i]) + return result + cdef object decode(self, bytes data): + """ + Returns a Python object corresponding to the encoded VECTOR bytes. + """ + cdef: + uint8_t magic_byte, version, vector_format + uint16_t flags, num_sparse_elements + SparseVectorImpl sparse_impl + array.array uint32_template + uint32_t* sparse_indices + uint32_t num_elements, i + + # populate the buffer with the data + self._populate_from_bytes(data) + + # parse header + self.read_ub1(&magic_byte) + if magic_byte != TNS_VECTOR_MAGIC_BYTE: + errors._raise_err(errors.ERR_UNEXPECTED_DATA, + data=bytes([magic_byte])) + self.read_ub1(&version) + if version > TNS_VECTOR_VERSION_WITH_SPARSE: + errors._raise_err(errors.ERR_VECTOR_VERSION_NOT_SUPPORTED, + version=version) + self.read_uint16be(&flags) + self.read_ub1(&vector_format) + self.read_uint32be(&num_elements) + if flags & TNS_VECTOR_FLAG_NORM_RESERVED \ + or flags & TNS_VECTOR_FLAG_NORM: + self.skip_raw_bytes(8) + + # for sparse vectors, only non-zero elements are found in the image + if flags & TNS_VECTOR_FLAG_SPARSE: + sparse_impl = SparseVectorImpl.__new__(SparseVectorImpl) + sparse_impl.num_dimensions = num_elements + self.read_uint16be(&num_sparse_elements) + num_elements = num_sparse_elements + uint32_template = array.array(ARRAY_TYPE_CODE_UINT32) + sparse_impl.indices = array.clone(uint32_template, + num_sparse_elements, False) + sparse_indices = sparse_impl.indices.data.as_voidptr + for i in range(num_sparse_elements): + self.read_uint32be(&sparse_indices[i]) + sparse_impl.values = self._decode_values(num_sparse_elements, + vector_format) + return PY_TYPE_SPARSE_VECTOR._from_impl(sparse_impl) + + # all other vectors have just the values + return self._decode_values(num_elements, vector_format) + @cython.final cdef class VectorEncoder(GrowableBuffer): - cdef int encode(self, array.array value) except -1: + cdef int _encode_values(self, array.array value, uint32_t num_elements, + uint8_t vector_format) except -1: """ - Encodes the given value to the internal VECTOR format. + Encode the values into the image using the given vector storage format. """ cdef: - uint16_t flags = TNS_VECTOR_FLAG_NORM_RESERVED - uint8_t vector_format, vector_version - double *double_ptr = NULL - uint8_t *uint8_ptr = NULL - uint32_t num_elements, i - float *float_ptr = NULL - int8_t *int8_ptr = NULL - object element + double *double_ptr = value.data.as_doubles + uint8_t *uint8_ptr = value.data.as_uchars + float *float_ptr = value.data.as_floats + int8_t *int8_ptr = value.data.as_schars + uint32_t i + if vector_format == VECTOR_FORMAT_INT8: + self.write_raw( int8_ptr, num_elements) + elif vector_format == VECTOR_FORMAT_BINARY: + self.write_raw( uint8_ptr, num_elements // 8) + else: + for i in range(num_elements): + if vector_format == VECTOR_FORMAT_FLOAT32: + self.write_binary_float(float_ptr[i], write_length=False) + elif vector_format == VECTOR_FORMAT_FLOAT64: + self.write_binary_double(double_ptr[i], write_length=False) - # determine the type of vector to write + cdef uint8_t _get_vector_format(self, array.array value): + """ + Returns the vector storage format used by the array. + """ if value.typecode == 'd': - vector_format = VECTOR_FORMAT_FLOAT64 - double_ptr = value.data.as_doubles + return VECTOR_FORMAT_FLOAT64 elif value.typecode == 'f': - vector_format = VECTOR_FORMAT_FLOAT32 - float_ptr = value.data.as_floats + return VECTOR_FORMAT_FLOAT32 elif value.typecode == 'b': - vector_format = VECTOR_FORMAT_INT8 - int8_ptr = value.data.as_schars - else: - vector_format = VECTOR_FORMAT_BINARY - uint8_ptr = value.data.as_uchars + return VECTOR_FORMAT_INT8 + return VECTOR_FORMAT_BINARY + + cdef int encode(self, object value) except -1: + """ + Encodes the given value to the internal VECTOR format. + """ + cdef: + uint16_t flags = TNS_VECTOR_FLAG_NORM_RESERVED + uint8_t vector_format, vector_version + SparseVectorImpl sparse_impl = None + uint16_t num_sparse_elements, i + uint32_t* sparse_indices + uint32_t num_elements - # determine vector version and number of elements - if vector_format == VECTOR_FORMAT_BINARY: - num_elements = ( len(value)) * 8 - vector_version = TNS_VECTOR_VERSION_WITH_BINARY + # determine metadatda about the vector to write + if isinstance(value, PY_TYPE_SPARSE_VECTOR): + sparse_impl = value._impl + num_elements = sparse_impl.num_dimensions + vector_format = self._get_vector_format(sparse_impl.values) + vector_version = TNS_VECTOR_VERSION_WITH_SPARSE + flags |= TNS_VECTOR_FLAG_SPARSE | TNS_VECTOR_FLAG_NORM else: - num_elements = len(value) - vector_version = TNS_VECTOR_VERSION_BASE - flags |= TNS_VECTOR_FLAG_NORM + vector_format = self._get_vector_format(value) + if vector_format == VECTOR_FORMAT_BINARY: + num_elements = ( len(value)) * 8 + vector_version = TNS_VECTOR_VERSION_WITH_BINARY + else: + num_elements = len(value) + vector_version = TNS_VECTOR_VERSION_BASE + flags |= TNS_VECTOR_FLAG_NORM # write header self.write_uint8(TNS_VECTOR_MAGIC_BYTE) @@ -151,14 +225,14 @@ cdef class VectorEncoder(GrowableBuffer): self.write_uint32be(num_elements) self._reserve_space(8) # reserve space for norm - # write elements - if vector_format == VECTOR_FORMAT_INT8: - self.write_raw( int8_ptr, num_elements) - elif vector_format == VECTOR_FORMAT_BINARY: - self.write_raw( uint8_ptr, num_elements // 8) + # write data + if sparse_impl is None: + self._encode_values(value, num_elements, vector_format) else: - for i in range(num_elements): - if vector_format == VECTOR_FORMAT_FLOAT32: - self.write_binary_float(float_ptr[i], write_length=False) - elif vector_format == VECTOR_FORMAT_FLOAT64: - self.write_binary_double(double_ptr[i], write_length=False) + sparse_indices = sparse_impl.indices.data.as_voidptr + num_sparse_elements = len(sparse_impl.indices) + self.write_uint16be(num_sparse_elements) + for i in range(num_sparse_elements): + self.write_uint32be(sparse_indices[i]) + self._encode_values(sparse_impl.values, num_sparse_elements, + vector_format) diff --git a/src/oracledb/impl/thick/odpi.pxd b/src/oracledb/impl/thick/odpi.pxd index d37bb4b0..81d016d8 100644 --- a/src/oracledb/impl/thick/odpi.pxd +++ b/src/oracledb/impl/thick/odpi.pxd @@ -575,6 +575,9 @@ cdef extern from "impl/thick/odpi/embed/dpi.c": uint32_t numDimensions uint8_t dimensionSize dpiVectorDimensionBuffer dimensions + uint32_t numSparseValues; + uint32_t *sparseIndices; + ctypedef struct dpiVersionInfo: int versionNum diff --git a/src/oracledb/impl/thick/utils.pyx b/src/oracledb/impl/thick/utils.pyx index 8b0c800a..a030ba93 100644 --- a/src/oracledb/impl/thick/utils.pyx +++ b/src/oracledb/impl/thick/utils.pyx @@ -107,6 +107,7 @@ cdef int _convert_from_python(object value, OracleMetadata metadata, StringBuffer buf) except -1: cdef: uint32_t oracle_type = metadata.dbtype.num + SparseVectorImpl sparse_impl ThickDbObjectImpl obj_impl dpiVectorInfo vector_info dpiTimestamp *timestamp @@ -199,7 +200,17 @@ cdef int _convert_from_python(object value, OracleMetadata metadata, if dpiJson_setValue(dbvalue.asJson, &json_buf._top_node) < 0: _raise_from_odpi() elif oracle_type == DPI_ORACLE_TYPE_VECTOR: - vector_info.numDimensions = len(value) + if isinstance(value, PY_TYPE_SPARSE_VECTOR): + sparse_impl = value._impl + vector_info.numDimensions = sparse_impl.num_dimensions + vector_info.numSparseValues = len(sparse_impl.indices) + vector_info.sparseIndices = \ + sparse_impl.indices.data.as_voidptr + value = sparse_impl.values + else: + vector_info.numDimensions = len(value) + vector_info.numSparseValues = 0 + vector_info.sparseIndices = NULL if value.typecode == 'd': vector_info.format = DPI_VECTOR_FORMAT_FLOAT64 elif value.typecode == 'f': @@ -385,24 +396,40 @@ cdef object _convert_vector_to_python(dpiVector *vector): Converts a vector to a Python array. """ cdef: + array.array result, indices_template, sparse_indices + uint32_t num_elements, num_bytes + SparseVectorImpl sparse_impl dpiVectorInfo vector_info - array.array result - uint32_t num_bytes if dpiVector_getValue(vector, &vector_info) < 0: _raise_from_odpi() + if vector_info.numSparseValues > 0: + num_elements = vector_info.numSparseValues + else: + num_elements = vector_info.numDimensions if vector_info.format == DPI_VECTOR_FORMAT_FLOAT32: - result = array.clone(float_template, vector_info.numDimensions, False) - num_bytes = vector_info.numDimensions * vector_info.dimensionSize + result = array.clone(float_template, num_elements, False) + num_bytes = num_elements * vector_info.dimensionSize elif vector_info.format == DPI_VECTOR_FORMAT_FLOAT64: - result = array.clone(double_template, vector_info.numDimensions, False) - num_bytes = vector_info.numDimensions * vector_info.dimensionSize + result = array.clone(double_template, num_elements, False) + num_bytes = num_elements * vector_info.dimensionSize elif vector_info.format == DPI_VECTOR_FORMAT_INT8: - result = array.clone(int8_template, vector_info.numDimensions, False) - num_bytes = vector_info.numDimensions + result = array.clone(int8_template, num_elements, False) + num_bytes = num_elements elif vector_info.format == DPI_VECTOR_FORMAT_BINARY: - num_bytes = vector_info.numDimensions // 8 + num_bytes = num_elements // 8 result = array.clone(uint8_template, num_bytes, False) memcpy(result.data.as_voidptr, vector_info.dimensions.asPtr, num_bytes) + if vector_info.numSparseValues > 0: + sparse_impl = SparseVectorImpl.__new__(SparseVectorImpl) + sparse_impl.num_dimensions = vector_info.numDimensions + indices_template = array.array(ARRAY_TYPE_CODE_UINT32) + sparse_indices = array.clone(indices_template, + vector_info.numSparseValues, False) + memcpy(sparse_indices.data.as_voidptr, vector_info.sparseIndices, + vector_info.numSparseValues * sizeof(uint32_t)) + sparse_impl.indices = sparse_indices + sparse_impl.values = result + return PY_TYPE_SPARSE_VECTOR._from_impl(sparse_impl) return result diff --git a/src/oracledb/impl/thin/capabilities.pyx b/src/oracledb/impl/thin/capabilities.pyx index 04c965cd..8cfdaffa 100644 --- a/src/oracledb/impl/thin/capabilities.pyx +++ b/src/oracledb/impl/thin/capabilities.pyx @@ -130,7 +130,8 @@ cdef class Capabilities: TNS_CCAP_TOKEN_SUPPORTED | TNS_CCAP_PIPELINING_SUPPORT | \ TNS_CCAP_PIPELINING_BREAK self.compile_caps[TNS_CCAP_VECTOR_FEATURES] = \ - TNS_CCAP_VECTOR_FEATURE_BINARY + TNS_CCAP_VECTOR_FEATURE_BINARY | \ + TNS_CCAP_VECTOR_FEATURE_SPARSE @cython.boundscheck(False) cdef void _init_runtime_caps(self): diff --git a/src/oracledb/impl/thin/constants.pxi b/src/oracledb/impl/thin/constants.pxi index cf34b8ad..086ee9f4 100644 --- a/src/oracledb/impl/thin/constants.pxi +++ b/src/oracledb/impl/thin/constants.pxi @@ -404,6 +404,7 @@ cdef enum: TNS_CCAP_PIPELINING_SUPPORT = 0x04 TNS_CCAP_PIPELINING_BREAK = 0x10 TNS_CCAP_VECTOR_FEATURE_BINARY = 0x01 + TNS_CCAP_VECTOR_FEATURE_SPARSE = 0x02 # runtime capability indices cdef enum: diff --git a/src/oracledb/sparse_vector.py b/src/oracledb/sparse_vector.py new file mode 100644 index 00000000..5cbb2b6a --- /dev/null +++ b/src/oracledb/sparse_vector.py @@ -0,0 +1,104 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# sparse_vector.py +# +# Contains the SparseVector class which stores information about a sparse +# vector. Sparse vectors are available in Oracle Database 23.6 and higher. +# ----------------------------------------------------------------------------- + +import array +from typing import Union + +from .base_impl import get_array_type_code_uint32, SparseVectorImpl +from . import __name__ as MODULE_NAME + +ARRAY_TYPE_CODE_UINT32 = get_array_type_code_uint32() + + +class SparseVector: + """ + Provides information about sparse vectors. + """ + + __module__ = MODULE_NAME + + def __init__( + self, + num_dimensions: int, + indices: Union[list, array.array], + values: Union[list, array.array], + ): + if ( + not isinstance(indices, array.array) + or indices.typecode != ARRAY_TYPE_CODE_UINT32 + ): + indices = array.array(ARRAY_TYPE_CODE_UINT32, indices) + if not isinstance(values, array.array): + values = array.array("d", values) + if len(indices) != len(values): + raise TypeError("indices and values must be of the same length!") + self._impl = SparseVectorImpl.from_values( + num_dimensions, indices, values + ) + + def __repr__(self): + return ( + f"{MODULE_NAME}.{self.__class__.__name__}({self.num_dimensions}, " + f"{self.indices}, {self.values})" + ) + + def __str__(self): + return ( + f"[{self.num_dimensions}, {list(self.indices)}, " + f"{list(self.values)}]" + ) + + @classmethod + def _from_impl(cls, impl): + vector = cls.__new__(cls) + vector._impl = impl + return vector + + @property + def indices(self) -> array.array: + """ + Returns the indices (zero-based) of non-zero values in the vector. + """ + return self._impl.indices + + @property + def num_dimensions(self) -> int: + """ + Returns the number of dimensions contained in the vector. + """ + return self._impl.num_dimensions + + @property + def values(self) -> array.array: + """ + Returns the non-zero values stored in the vector. + """ + return self._impl.values diff --git a/src/oracledb/thick_impl.pyx b/src/oracledb/thick_impl.pyx index 340949b9..dc71cd37 100644 --- a/src/oracledb/thick_impl.pyx +++ b/src/oracledb/thick_impl.pyx @@ -44,6 +44,7 @@ import sys cydatetime.import_datetime() from .base_impl cimport ( + ARRAY_TYPE_CODE_UINT32, BaseConnImpl, BaseCursorImpl, BaseDbObjectImpl, @@ -83,11 +84,13 @@ from .base_impl cimport ( PY_TYPE_MESSAGE_QUERY, PY_TYPE_MESSAGE_ROW, PY_TYPE_MESSAGE_TABLE, + PY_TYPE_SPARSE_VECTOR, PY_TYPE_TIMEDELTA, PoolParamsImpl, PY_TYPE_NUM_FLOAT, PY_TYPE_NUM_INT, PY_TYPE_NUM_DECIMAL, + SparseVectorImpl, VectorDecoder, VectorEncoder, ) diff --git a/tests/create_schema.py b/tests/create_schema.py index 17d28b6a..2fbc6e37 100644 --- a/tests/create_schema.py +++ b/tests/create_schema.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2023, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -61,6 +61,10 @@ test_env.run_sql_script( conn, "create_schema_23_5", main_user=test_env.get_main_user() ) +if test_env.get_server_version() >= (23, 7): + test_env.run_sql_script( + conn, "create_schema_23_7", main_user=test_env.get_main_user() + ) if test_env.is_on_oracle_cloud(conn): test_env.run_sql_script( conn, "create_schema_cloud", main_user=test_env.get_main_user() diff --git a/tests/sql/create_schema_23_7.sql b/tests/sql/create_schema_23_7.sql new file mode 100644 index 00000000..d5e6d5f1 --- /dev/null +++ b/tests/sql/create_schema_23_7.sql @@ -0,0 +1,45 @@ +/*----------------------------------------------------------------------------- + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. + * + * This software is dual-licensed to you under the Universal Permissive License + * (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License + * 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose + * either license.* + * + * If you elect to accept the software under the Apache License, Version 2.0, + * the following applies: + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *---------------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------------- + * create_schema_23_7.sql + * + * Performs the actual work of creating and populating the schemas with the + * database objects used by the python-oracledb test suite that require Oracle + * Database 23.7 or higher. It is executed by the Python script + * create_schema.py. + *---------------------------------------------------------------------------*/ + +create table &main_user..TestSparseVectors ( + IntCol number(9) not null, + SparseVectorFlexAllCol vector(*, *, sparse), + SparseVectorFlexTypeCol vector(2, *, sparse), + SparseVectorFlex8Col vector(*, int8, sparse), + SparseVectorFlex32Col vector(*, float32, sparse), + SparseVectorFlex64Col vector(*, float64, sparse), + SparseVector8Col vector(16, int8, sparse), + SparseVector32Col vector(16, float32, sparse), + SparseVector64Col vector(16, float64, sparse) +) +/ diff --git a/tests/test_6400_vector_var.py b/tests/test_6400_vector_var.py index 2a106afd..4670db81 100644 --- a/tests/test_6400_vector_var.py +++ b/tests/test_6400_vector_var.py @@ -306,46 +306,53 @@ def test_6412(self): "type_code", "vector_dimensions", "vector_format", + "vector_is_sparse", ] expected_values = [ - ["INTCOL", oracledb.DB_TYPE_NUMBER, None, None], - ["VECTORFLEXALLCOL", oracledb.DB_TYPE_VECTOR, None, None], - ["VECTORFLEXTYPECOL", oracledb.DB_TYPE_VECTOR, 2, None], + ["INTCOL", oracledb.DB_TYPE_NUMBER, None, None, None], + ["VECTORFLEXALLCOL", oracledb.DB_TYPE_VECTOR, None, None, False], + ["VECTORFLEXTYPECOL", oracledb.DB_TYPE_VECTOR, 2, None, False], [ "VECTORFLEX8COL", oracledb.DB_TYPE_VECTOR, None, oracledb.VECTOR_FORMAT_INT8, + False, ], [ "VECTORFLEX32COL", oracledb.DB_TYPE_VECTOR, None, oracledb.VECTOR_FORMAT_FLOAT32, + False, ], [ "VECTORFLEX64COL", oracledb.DB_TYPE_VECTOR, None, oracledb.VECTOR_FORMAT_FLOAT64, + False, ], [ "VECTOR8COL", oracledb.DB_TYPE_VECTOR, 16, oracledb.VECTOR_FORMAT_INT8, + False, ], [ "VECTOR32COL", oracledb.DB_TYPE_VECTOR, 16, oracledb.VECTOR_FORMAT_FLOAT32, + False, ], [ "VECTOR64COL", oracledb.DB_TYPE_VECTOR, 16, oracledb.VECTOR_FORMAT_FLOAT64, + False, ], ] self.cursor.execute("select * from TestVectors") diff --git a/tests/test_7700_sparse_vector.py b/tests/test_7700_sparse_vector.py new file mode 100644 index 00000000..9f008713 --- /dev/null +++ b/tests/test_7700_sparse_vector.py @@ -0,0 +1,798 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +""" +7700 - Module for testing the VECTOR database type with storage type SPARSE +available in Oracle Database 23.7 and higher. +""" + +import array +import json +import unittest +import oracledb +import test_env + + +@unittest.skipUnless( + test_env.get_client_version() >= (23, 7), "unsupported client" +) +@unittest.skipUnless( + test_env.get_server_version() >= (23, 7), "unsupported server" +) +class TestCase(test_env.BaseTestCase): + def __test_insert_and_fetch(self, vector, column_name, expected_typecode): + """ + Test inserting sparse and fetching from a dense vector column. + """ + self.cursor.execute("delete from TestVectors") + self.cursor.execute( + f""" + insert into TestVectors (IntCol, {column_name}) + values(1, :vector) + """, + vector=vector, + ) + self.conn.commit() + self.cursor.execute(f"select {column_name} from TestVectors") + (fetched_value,) = self.cursor.fetchone() + dense_values = [0 for _ in range(vector.num_dimensions)] + for i, index in enumerate(vector.indices): + if expected_typecode == "b": + dense_values[index] = int(vector.values[i]) + else: + dense_values[index] = vector.values[i] + expected_value = array.array(expected_typecode, dense_values) + self.assertEqual(fetched_value, expected_value) + self.assertEqual(fetched_value.typecode, expected_typecode) + + def __test_insert_and_fetch_sparse( + self, vector, column_name, expected_typecode + ): + """ + Test inserting and fetching from a sparse vector column. + """ + self.cursor.execute("delete from TestSparseVectors") + self.cursor.execute( + f""" + insert into TestSparseVectors (IntCol, {column_name}) + values(1, :vector) + """, + vector=vector, + ) + self.conn.commit() + self.cursor.execute(f"select {column_name} from TestSparseVectors") + (fetched_value,) = self.cursor.fetchone() + expected_value = vector.values + if fetched_value.values.typecode == "b": + expected_value = array.array("b", [int(i) for i in vector.values]) + expected_indices = vector.indices + expected_num_dimensions = vector.num_dimensions + self.assertEqual(fetched_value.values, expected_value) + self.assertEqual(fetched_value.indices, expected_indices) + self.assertEqual(fetched_value.num_dimensions, expected_num_dimensions) + + def __fetch_with_vector( + self, + vector, + column_name, + dimensions, + vector_format, + expected_typecode, + ): + """ + Test fetching a vector with vector() function. + """ + self.cursor.execute("delete from TestSparseVectors") + self.cursor.execute( + f""" + insert into TestSparseVectors (IntCol, {column_name}) + values(1, :vector) + """, + vector=vector, + ) + self.cursor.execute( + f""" + select + vector({column_name}, {dimensions}, {vector_format}, DENSE) + from TestSparseVectors + """ + ) + (fetched_value,) = self.cursor.fetchone() + self.assertIsInstance(fetched_value, array.array) + self.assertEqual(fetched_value.typecode, expected_typecode) + + self.cursor.execute( + f""" + select + vector({column_name}, {dimensions}, {vector_format}, SPARSE) + from TestSparseVectors + """ + ) + (fetched_value,) = self.cursor.fetchone() + self.assertIsInstance(fetched_value, oracledb.SparseVector) + self.assertEqual(fetched_value.values.typecode, expected_typecode) + + def test_7700(self): + "7700 - test binding in a sparse vector with oracledb.SparseVector" + vector = oracledb.SparseVector(3, [1], [9]) + self.cursor.execute("select :1 from dual", [vector]) + (fetched_value,) = self.cursor.fetchone() + self.assertIsInstance(fetched_value, oracledb.SparseVector) + self.assertEqual(fetched_value.num_dimensions, vector.num_dimensions) + self.assertEqual(fetched_value.indices, vector.indices) + self.assertEqual(fetched_value.values, vector.values) + + def test_7701(self): + "7701 - test binding in a sparse vector of type float32" + vector = oracledb.SparseVector(3, [1], array.array("f", [0.5])) + self.cursor.execute("select :1 from dual", [vector]) + (fetched_value,) = self.cursor.fetchone() + self.assertEqual(fetched_value.values, vector.values) + self.assertEqual(fetched_value.indices, vector.indices) + self.assertEqual(fetched_value.num_dimensions, vector.num_dimensions) + self.assertEqual(fetched_value.values.typecode, "f") + + def test_7702(self): + "7702 - test binding in a sparse vector of type float64" + vector = oracledb.SparseVector(3, [1], array.array("d", [0.25])) + self.cursor.execute("select :1 from dual", [vector]) + (fetched_value,) = self.cursor.fetchone() + self.assertEqual(fetched_value.values, vector.values) + self.assertEqual(fetched_value.indices, vector.indices) + self.assertEqual(fetched_value.num_dimensions, vector.num_dimensions) + self.assertEqual(fetched_value.values.typecode, "d") + self.assertIsInstance(fetched_value, oracledb.SparseVector) + + def test_7703(self): + "7703 - test binding in a sparse vector of type int8" + vector = oracledb.SparseVector(3, [1], array.array("b", [3])) + self.cursor.execute("select :1 from dual", [vector]) + (fetched_value,) = self.cursor.fetchone() + self.assertEqual(fetched_value.values, vector.values) + self.assertEqual(fetched_value.indices, vector.indices) + self.assertEqual(fetched_value.num_dimensions, vector.num_dimensions) + self.assertEqual(fetched_value.values.typecode, "b") + self.assertIsInstance(fetched_value, oracledb.SparseVector) + + def test_7704(self): + "7704 - insert a float32 sparse vector into a float32 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1.5, 0.25, 0.5]) + ) + self.__test_insert_and_fetch(value, "Vector32Col", "f") + self.__test_insert_and_fetch_sparse(value, "SparseVector32Col", "f") + + def test_7705(self): + "7705 - insert a float32 vector into a float64 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("d", [1.5, 0.25, 0.5]) + ) + self.__test_insert_and_fetch(value, "Vector64Col", "d") + self.__test_insert_and_fetch_sparse(value, "SparseVector64Col", "d") + + def test_7706(self): + "7706 - insert a float32 vector into a flexible format column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1.5, 0.25, 0.5]) + ) + self.__test_insert_and_fetch(value, "VectorFlexAllCol", "f") + self.__test_insert_and_fetch_sparse( + value, "SparseVectorFlexAllCol", "f" + ) + + def test_7707(self): + "7707 - insert a float64 vector into a float64 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("d", [1.5, 0.25, 0.5]) + ) + self.__test_insert_and_fetch(value, "Vector64Col", "d") + self.__test_insert_and_fetch_sparse(value, "SparseVector64Col", "d") + + def test_7708(self): + "7708 - insert float64 vector into a float32 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1.5, 0.25, 0.5]) + ) + self.__test_insert_and_fetch(value, "Vector32Col", "f") + self.__test_insert_and_fetch_sparse(value, "SparseVector32Col", "f") + + def test_7709(self): + "7709 - insert float64 vector into a flexible type column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("d", [1.5, 0.25, 0.5]) + ) + self.__test_insert_and_fetch(value, "VectorFlexAllCol", "f") + self.__test_insert_and_fetch_sparse( + value, "SparseVectorFlexAllCol", "d" + ) + + def test_7710(self): + "7710 - insert a vector with an invalid size" + self.cursor.execute("delete from TestVectors") + self.cursor.execute("delete from TestSparseVectors") + statements = [ + """ + insert into TestVectors (IntCol, Vector64Col) + values(1, :1) + """, + """ + insert into TestSparseVectors (IntCol, SparseVector64Col) + values(2, :1) + """, + ] + for statement in statements: + for num_elems in [4, 20]: + vector = oracledb.SparseVector( + num_elems, [2, 3], array.array("f", [6.54, 9.6]) + ) + with self.assertRaisesFullCode("ORA-51803"): + self.cursor.execute(statement, [vector]) + + def test_7711(self): + "7711 - verify fetch info for vectors" + attr_names = [ + "name", + "type_code", + "vector_dimensions", + "vector_format", + "vector_is_sparse", + ] + expected_values = [ + ["INTCOL", oracledb.DB_TYPE_NUMBER, None, None, None], + [ + "SPARSEVECTORFLEXALLCOL", + oracledb.DB_TYPE_VECTOR, + None, + None, + True, + ], + [ + "SPARSEVECTORFLEXTYPECOL", + oracledb.DB_TYPE_VECTOR, + 2, + None, + True, + ], + [ + "SPARSEVECTORFLEX8COL", + oracledb.DB_TYPE_VECTOR, + None, + oracledb.VECTOR_FORMAT_INT8, + True, + ], + [ + "SPARSEVECTORFLEX32COL", + oracledb.DB_TYPE_VECTOR, + None, + oracledb.VECTOR_FORMAT_FLOAT32, + True, + ], + [ + "SPARSEVECTORFLEX64COL", + oracledb.DB_TYPE_VECTOR, + None, + oracledb.VECTOR_FORMAT_FLOAT64, + True, + ], + [ + "SPARSEVECTOR8COL", + oracledb.DB_TYPE_VECTOR, + 16, + oracledb.VECTOR_FORMAT_INT8, + True, + ], + [ + "SPARSEVECTOR32COL", + oracledb.DB_TYPE_VECTOR, + 16, + oracledb.VECTOR_FORMAT_FLOAT32, + True, + ], + [ + "SPARSEVECTOR64COL", + oracledb.DB_TYPE_VECTOR, + 16, + oracledb.VECTOR_FORMAT_FLOAT64, + True, + ], + ] + self.cursor.execute("select * from TestSparseVectors") + values = [ + [getattr(i, n) for n in attr_names] + for i in self.cursor.description + ] + self.assertEqual(values, expected_values) + self.assertIs( + self.cursor.description[6].vector_format, + oracledb.VectorFormat.INT8, + ) + + def test_7712(self): + "7712 - insert an int8 vector into an int8 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "Vector8Col", "b") + self.__test_insert_and_fetch_sparse(value, "SparseVector8Col", "b") + + def test_7713(self): + "7713 - insert an int8 vector into a float32 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "Vector32Col", "f") + self.__test_insert_and_fetch_sparse(value, "SparseVector32Col", "f") + + def test_7714(self): + "7714 - insert an int8 vector into a float64 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("b", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "Vector64Col", "d") + self.__test_insert_and_fetch_sparse(value, "SparseVector64Col", "d") + + def test_7715(self): + "7715 - insert an int8 vector into a flexible column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("b", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "VectorFlexAllCol", "f") + self.__test_insert_and_fetch_sparse( + value, "SparseVectorFlexAllCol", "b" + ) + + def test_7716(self): + "7716 - insert a float32 vector into an int8 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "Vector8Col", "b") + self.__test_insert_and_fetch_sparse(value, "SparseVector8Col", "b") + + def test_7717(self): + "7717 - insert a float64 vector into an int8 column" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("d", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "Vector8Col", "b") + self.__test_insert_and_fetch_sparse(value, "SparseVector8Col", "b") + + def test_7718(self): + "7718 - test dml returning vector type" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + out_var = self.cursor.var(oracledb.DB_TYPE_VECTOR) + self.cursor.execute("delete from TestSparseVectors") + self.cursor.execute( + """ + insert into TestSparseVectors (IntCol, SparseVectorFlex32Col) + values (1, :value) + returning SparseVectorFlex32Col into :out_value + """, + [value, out_var], + ) + self.conn.commit() + vector = out_var.getvalue()[0] + self.assertEqual(vector.values, value.values) + self.assertEqual(vector.indices, value.indices) + self.assertEqual(vector.num_dimensions, value.num_dimensions) + + def test_7719(self): + "7719 - test handling of NULL vector value" + self.cursor.execute("delete from TestSparseVectors") + self.cursor.execute( + "insert into TestSparseVectors (IntCol) values (1)" + ) + self.conn.commit() + self.cursor.execute( + "select SparseVectorFlexTypeCol from TestSparseVectors" + ) + (fetched_value,) = self.cursor.fetchone() + self.assertIsNone(fetched_value) + + def test_7720(self): + "7720 - insert a float32 vector into an int8 column (negative)" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [-130, 400, 5]) + ) + with self.assertRaisesFullCode("ORA-51806"): + self.__test_insert_and_fetch(value, "Vector8Col", "b") + with self.assertRaisesFullCode("ORA-51806"): + self.__test_insert_and_fetch_sparse(value, "SparseVector8Col", "b") + + def test_7721(self): + "7721 - insert a float32 vector with 65,533 dimensions" + value = oracledb.SparseVector( + 65533, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + self.__test_insert_and_fetch(value, "VectorFlexAllCol", "f") + self.__test_insert_and_fetch_sparse( + value, "SparseVectorFlexAllCol", "f" + ) + + def test_7722(self): + "7722 - insert vectors with different dimensions" + for dim in [30, 70, 255, 256, 65534, 65535]: + for typ in ["f", "d", "b"]: + with self.subTest(dim=dim, typ=typ): + element_value = 3 if typ == "b" else 1.5 + value = oracledb.SparseVector( + dim, [1, 3, 5], array.array(typ, [element_value] * 3) + ) + self.__test_insert_and_fetch( + value, "VectorFlexAllCol", "f" + ) + self.__test_insert_and_fetch_sparse( + value, "SparseVectorFlexAllCol", typ + ) + + def test_7723(self): + "7723 - insert and fetch VECTOR data using strings" + values = [16, [1, 3, 5], [1, 0, 5]] + vector = oracledb.SparseVector(*values) + self.cursor.execute("delete from TestSparseVectors") + self.cursor.execute( + """ + insert into TestSparseVectors (IntCol, SparseVectorFlexAllCol) + values(1, :value) + """, + value=str(vector), + ) + + def type_handler(cursor, metadata): + if metadata.name == "SPARSEVECTORFLEXALLCOL": + return cursor.var( + oracledb.DB_TYPE_LONG, arraysize=cursor.arraysize + ) + + self.cursor.outputtypehandler = type_handler + + self.cursor.execute( + "select SparseVectorFlexAllCol from TestSparseVectors" + ) + (fetched_value,) = self.cursor.fetchone() + self.assertEqual(json.loads(fetched_value), values) + + def test_7724(self): + "7724 - insert vectors with flexible dimensions and conversion" + for dim in [30, 255, 256, 257, 32768, 65535]: + for source_type in ["f", "d", "b"]: + for target_type in ["f", "d", "b"]: + with self.subTest( + dim=dim, + source_type=source_type, + target_type=target_type, + ): + if target_type == "f": + target_col = "VectorFlex32Col" + elif target_type == "d": + target_col = "VectorFlex64Col" + else: + target_col = "VectorFlex8Col" + element_value = 4 if source_type == "b" else 2.25 + value = oracledb.SparseVector( + dim, + [1, 3, 7, 9], + array.array(source_type, [element_value] * 4), + ) + self.__test_insert_and_fetch( + value, target_col, target_type + ) + self.__test_insert_and_fetch_sparse( + value, f"Sparse{target_col}", target_type + ) + + def test_7725(self): + "7725 - test binding a vector with inf values (negative)" + value = oracledb.SparseVector( + 16, + [1, 3, 5], + array.array("d", [float("inf"), float("-inf"), float("-inf")]), + ) + with self.assertRaisesFullCode("ORA-51805", "ORA-51831"): + self.cursor.execute("select :1 from dual", [value]) + + def test_7726(self): + "7726 - test setting a sparse vector to a vector variable" + value = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + var = self.cursor.var(oracledb.DB_TYPE_VECTOR) + var.setvalue(0, value) + vector = var.getvalue() + self.assertEqual(vector.values, value.values) + self.assertEqual(vector.indices, value.indices) + self.assertEqual(vector.num_dimensions, value.num_dimensions) + + def test_7727(self): + "7727 - fetch JSON value with an embedded vector" + self.cursor.execute("delete from TestSparseVectors") + vector = oracledb.SparseVector( + 16, [1, 3, 5], array.array("d", [1.5, 0.25, 0.5]) + ) + self.cursor.execute( + """ + insert into TestSparseVectors (IntCol, SparseVector64Col) + values (1, :1) + """, + [vector], + ) + self.cursor.execute( + """ + select json_object( + 'id': 7732, + 'vector' : vector(SparseVector64Col, 16, float64, sparse) + returning json + ) from TestSparseVectors + """ + ) + (result,) = self.cursor.fetchone() + fetched_vector = result["vector"] + self.assertIsInstance(fetched_vector, oracledb.SparseVector) + self.assertEqual(fetched_vector.indices, vector.indices) + self.assertEqual(fetched_vector.values, vector.values) + self.assertEqual(fetched_vector.num_dimensions, vector.num_dimensions) + + def test_7728(self): + "7728 - executemany() without setinputsizes()" + self.cursor.execute("delete from TestSparseVectors") + vector = oracledb.SparseVector( + 16, [1, 3, 5], array.array("f", [1, 0, 5]) + ) + values = [vector, [0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0]] + self.cursor.executemany( + """ + insert into TestSparseVectors (IntCol, SparseVector32Col) + values (:1, :2) + """, + list(enumerate(values)), + ) + self.cursor.execute( + "select SparseVector32Col from TestSparseVectors order by IntCol" + ) + (fetched_vector1,), (fetched_vector2,) = self.cursor.fetchall() + self.assertEqual(fetched_vector1.values, vector.values) + self.assertEqual(fetched_vector1.indices, vector.indices) + self.assertEqual(fetched_vector1.num_dimensions, vector.num_dimensions) + self.assertEqual( + fetched_vector2.values, array.array("f", [2.0, 1.0, 4.0]) + ) + self.assertEqual(fetched_vector2.indices, array.array("I", [3, 9, 11])) + self.assertEqual(fetched_vector2.num_dimensions, 16) + + def test_7729(self): + "7729 - executemany() with setinputsizes()" + self.cursor.execute("delete from TestSparseVectors") + vector = oracledb.SparseVector( + 16, [1, 3, 5], array.array("d", [1, 0, 5]) + ) + values = [[144, 0, 1000], vector] + self.cursor.setinputsizes(None, oracledb.DB_TYPE_VECTOR) + self.cursor.executemany( + """ + insert into TestSparseVectors (IntCol, SparseVectorFlex64Col) + values (:1, :2) + """, + list(enumerate(values)), + ) + self.cursor.execute( + """ + select SparseVectorFlex64Col + from TestSparseVectors order by IntCol + """ + ) + (fetched_vector1,), (fetched_vector2,) = self.cursor.fetchall() + self.assertEqual( + fetched_vector1.values, array.array("d", [144.0, 1000.0]) + ) + self.assertEqual(fetched_vector1.indices, array.array("I", [0, 2])) + self.assertEqual(fetched_vector1.num_dimensions, 3) + self.assertEqual(fetched_vector2.values, vector.values) + self.assertEqual(fetched_vector2.indices, vector.indices) + self.assertEqual(fetched_vector2.num_dimensions, vector.num_dimensions) + + def test_7730(self): + "7730 - vector with zero dimensions" + self.cursor.setinputsizes(oracledb.DB_TYPE_VECTOR) + vector = oracledb.SparseVector(4, [], []) + with self.assertRaisesFullCode("ORA-51803", "ORA-21560"): + self.cursor.execute("select :1", [vector]) + + def test_7731(self): + "7731 - test inserting a vector as a string and fetching it" + self.cursor.execute("delete from TestSparseVectors") + self.cursor.execute( + """ + insert into TestSparseVectors (IntCol, SparseVectorFlexAllCol) + values (1, '[4, [1, 3], [1.0, 2.0]]') + """ + ) + self.cursor.execute( + "select SparseVectorFlexAllCol from TestSparseVectors" + ) + vector = self.cursor.fetchone()[0] + self.assertEqual(vector.values, array.array("f", [1, 2])) + self.assertEqual(vector.num_dimensions, 4) + self.assertEqual(vector.indices, array.array("I", [1, 3])) + + def test_7732(self): + "7732 - SparseVector() with invalid values" + # pass strings instead of number or list/array.array + with self.assertRaises(TypeError): + oracledb.SparseVector("10", [1, 2], [1.5, 3.5]) + with self.assertRaises(TypeError): + oracledb.SparseVector(10, "[1, 2]", [1.5, 3.5]) + with self.assertRaises(TypeError): + oracledb.SparseVector(10, [1, 2], "[1.5, 3.5]") + + # insert matrix + with self.assertRaises(TypeError): + oracledb.SparseVector(10, [[1, 2]], [1.5, 3.5]) + with self.assertRaises(TypeError): + oracledb.SparseVector(10, [1, 2], [[1.5, 3.5]]) + # use num_dimensions as a list + with self.assertRaises(TypeError): + oracledb.SparseVector([10], [1, 2], [1.5, 3.5]) + # use num_dimensions as a float + value = oracledb.SparseVector(10.4, [1, 2], [1.5, 3.5]) + self.assertEqual(value.num_dimensions, 10) + + # negative index + with self.assertRaises(OverflowError): + oracledb.SparseVector(10, [-1], [1.5]) + # negative num_dimensions + with self.assertRaises(OverflowError): + oracledb.SparseVector(-10, [1], [3.5]) + # use float index + with self.assertRaises(TypeError): + oracledb.SparseVector(10, [2.4], [3.5]) + + def test_7733(self): + "7733 - SparseVector() with indices and values of different length" + with self.assertRaises(TypeError): + oracledb.SparseVector(10, [1], [1.5, 3.5]) + with self.assertRaises(TypeError): + oracledb.SparseVector(10, [1, 2, 3, 4], [6.75]) + + def test_7734(self): + "7734 - declare and insert an empty SparseVector" + value = oracledb.SparseVector(0, [], []) + self.assertEqual(value.values, array.array("d")) + self.assertEqual(value.indices, array.array("I")) + self.assertEqual(value.num_dimensions, 0) + with self.assertRaisesFullCode("ORA-51803", "ORA-21560"): + self.__test_insert_and_fetch(value, "VectorFlexAllCol", "d") + with self.assertRaisesFullCode("ORA-51803", "ORA-21560"): + self.__test_insert_and_fetch_sparse( + value, "SparseVectorFlexAllCol", "d" + ) + + def test_7735(self): + "7735 - select with vector()" + dense_vector = array.array( + "f", [1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 0] + ) + sparse_vector = oracledb.SparseVector(16, [1], array.array("f", [9])) + column_names = [ + "SparseVector8Col", + "SparseVector32Col", + "SparseVector64Col", + "SparseVectorFlex8Col", + "SparseVectorFlex32Col", + "SparseVectorFlex64Col", + "SparseVectorFlexAllCol", + ] + for vector in [dense_vector, sparse_vector]: + for column_name in column_names: + with self.subTest(vector=vector, column_name=column_name): + self.__fetch_with_vector( + vector, column_name, 16, "INT8", "b" + ) + self.__fetch_with_vector( + vector, column_name, 16, "FLOAT32", "f" + ) + self.__fetch_with_vector( + vector, column_name, 16, "FLOAT64", "d" + ) + + # fixed dimension columns + dense_vector = array.array("f", [1, 2]) + sparse_vector = oracledb.SparseVector(2, [1], array.array("f", [1])) + for vector in [dense_vector, sparse_vector]: + for column_name in column_names[3:]: + with self.subTest(vector=vector, column_name=column_name): + self.__fetch_with_vector( + vector, column_name, 2, "INT8", "b" + ) + self.__fetch_with_vector( + vector, column_name, 2, "FLOAT32", "f" + ) + self.__fetch_with_vector( + vector, column_name, 2, "FLOAT64", "d" + ) + + def test_7736(self): + "7736 - test from_vector() with returning and vector storage format" + self.cursor.execute("delete from TestSparseVectors") + values = [16, [1, 2, 15], [2, 45.5, 73.25]] + vector = oracledb.SparseVector(*values) + dense_vector = [0, 2, 45.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73.25] + column_name = "SparseVector64Col" + self.cursor.execute( + f""" + insert into TestSparseVectors (IntCol, {column_name}) + values(1, :vector) + """, + vector=vector, + ) + self.cursor.execute( + f""" + select from_vector({column_name} returning clob format sparse) + from TestSparseVectors + """ + ) + (lob,) = self.cursor.fetchone() + self.assertEqual(json.loads(lob.read()), values) + self.cursor.execute( + f""" + select from_vector({column_name} returning clob format dense) + from TestSparseVectors + """ + ) + (lob,) = self.cursor.fetchone() + self.assertEqual(json.loads(lob.read()), dense_vector) + self.cursor.execute( + f""" + select from_vector({column_name} returning clob) + from TestSparseVectors + """ + ) + (lob,) = self.cursor.fetchone() + self.assertEqual(json.loads(lob.read()), values) + self.cursor.execute( + f""" + select from_vector({column_name} returning varchar2 format sparse) + from TestSparseVectors + """ + ) + self.assertEqual(json.loads(self.cursor.fetchone()[0]), values) + self.cursor.execute( + f""" + select from_vector({column_name} returning varchar2 format dense) + from TestSparseVectors + """ + ) + self.assertEqual(json.loads(self.cursor.fetchone()[0]), dense_vector) + self.cursor.execute( + f""" + select from_vector({column_name} returning varchar2) + from TestSparseVectors + """ + ) + self.assertEqual(json.loads(self.cursor.fetchone()[0]), values) + + +if __name__ == "__main__": + test_env.run_test_cases() diff --git a/tests/test_env.py b/tests/test_env.py index 2a7c3958..d6377752 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -207,7 +207,7 @@ def get_client_version(): value = PARAMETERS.get(name) if value is None: if get_is_thin(): - value = (23, 5) + value = (23, 7) else: oracledb.init_oracle_client() value = oracledb.clientversion()[:2] From 28df38954b7c4c0f070994d12ed964e654188e63 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:44:04 -0700 Subject: [PATCH 010/178] Added support for using the TLS SNI extension to reduce the number of TLS renegotiations that are needed to connect to the database. --- doc/src/api_manual/connect_params.rst | 16 ++++- doc/src/api_manual/module.rst | 71 +++++++++++++++++++---- doc/src/api_manual/pool_params.rst | 6 +- doc/src/release_notes.rst | 3 + src/oracledb/base_impl.pxd | 1 + src/oracledb/connect_params.py | 21 ++++++- src/oracledb/connection.py | 10 ++++ src/oracledb/impl/base/connect_params.pyx | 4 ++ src/oracledb/impl/base/parsers.pyx | 3 +- src/oracledb/impl/thin/transport.pyx | 30 +++++++++- src/oracledb/pool.py | 10 ++++ src/oracledb/pool_params.py | 12 +++- tests/test_4500_connect_params.py | 27 ++++++++- tests/test_4700_pool_params.py | 1 + utils/fields.cfg | 8 +++ 15 files changed, 205 insertions(+), 18 deletions(-) diff --git a/doc/src/api_manual/connect_params.rst b/doc/src/api_manual/connect_params.rst index bfe80edd..a082bbb8 100644 --- a/doc/src/api_manual/connect_params.rst +++ b/doc/src/api_manual/connect_params.rst @@ -62,11 +62,15 @@ ConnectParams Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=None) + driver_name=oracledb.defaults.driver_name, use_sni=None, handle=None) Sets the values for one or more of the parameters of a ConnectParams object. + .. versionchanged:: 3.0.0 + + The ``use_sni`` parameter was added. + .. versionchanged:: 2.5.0 The ``program``, ``machine``, ``terminal``, ``osuser``, and @@ -509,6 +513,16 @@ ConnectParams Attributes The default value of this attribute was changed from *60.0* seconds to *20.0* seconds. +.. attribute:: ConnectParams.use_sni + + This read-only attribute is a boolean which indicates whether to use the + TLS Server Name Indicator (SNI) extension to bypass the second TLS + negotiation that would otherwise be required. + + This attribute is supported in both python-oracledb Thin and Thick modes. + + .. versionadded:: 3.0.0 + .. attribute:: ConnectParams.terminal This read-only attribute is a string that specifies the terminal diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 13c6ae8b..70be0137 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -52,7 +52,7 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) Constructor for creating a connection to the database. Returns a :ref:`Connection Object `. All parameters are optional and can be @@ -362,6 +362,12 @@ Oracledb Methods are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. + The ``use_sni`` parameter is expected to be a boolean which indicates + whether to use the TLS Server Name Indicator (SNI) extension to bypass the + second TLS neogiation that would otherwise be required. This parameter is + used in both python-oracledb Thin and Thick modes. The default value is + False. + The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle Database. This value is only used in the python-oracledb Thin mode. The @@ -397,8 +403,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` parameter was added. The ``pool`` parameter was - deprecated. Use :meth:`ConnectionPool.acquire()` instead. + The ``pool_alias`` and ``use_sni`` parameters were added. The ``pool`` + parameter was deprecated. Use :meth:`ConnectionPool.acquire()` instead. .. versionchanged:: 2.5.0 @@ -443,7 +449,7 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) Constructor for creating a connection to the database. Returns an :ref:`AsyncConnection Object `. All parameters are optional @@ -689,6 +695,12 @@ Oracledb Methods are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. + The ``use_sni`` parameter is expected to be a boolean which indicates + whether to use the TLS Server Name Indicator (SNI) extension to bypass the + second TLS neogiation that would otherwise be required. This parameter is + used in both python-oracledb Thin and Thick modes. The default value is + False. + The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle Database. This value is only used in the python-oracledb Thin mode. The @@ -718,8 +730,9 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` parameter was added. The ``pool`` parameter was - deprecated. Use :meth:`AsyncConnectionPool.acquire()` instead. + The ``pool_alias`` and ``use_sni`` parameters were added. The ``pool`` + parameter was deprecated. Use :meth:`AsyncConnectionPool.acquire()` + instead. .. versionchanged:: 2.5.0 @@ -762,7 +775,7 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) Contains all the parameters that can be used to establish a connection to the database. @@ -1026,6 +1039,12 @@ Oracledb Methods are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. + The ``use_sni`` parameter is expected to be a boolean which indicates + whether to use the TLS Server Name Indicator (SNI) extension to bypass the + second TLS neogiation that would otherwise be required. This parameter is + used in both python-oracledb Thin and Thick modes. The default value is + False. + The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle Database. This value is only used in the python-oracledb Thin mode. The @@ -1056,6 +1075,10 @@ Oracledb Methods python-oracledb Thick mode. It should be used with extreme caution. The default value is *0*. + .. versionchanged:: 3.0.0 + + The ``use_sni`` parameter was added. + .. versionchanged:: 2.5.0 The ``program``, ``machine``, ``terminal``, ``osuser``, and @@ -1115,7 +1138,7 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) Creates a connection pool with the supplied parameters and returns the :ref:`ConnectionPool object ` for the pool. See :ref:`Connection @@ -1497,6 +1520,12 @@ Oracledb Methods are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. + The ``use_sni`` parameter is expected to be a boolean which indicates + whether to use the TLS Server Name Indicator (SNI) extension to bypass the + second TLS neogiation that would otherwise be required. This parameter is + used in both python-oracledb Thin and Thick modes. The default value is + False. + The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle Database. This value is only used in the python-oracledb Thin mode. The @@ -1540,6 +1569,10 @@ Oracledb Methods ``driver_name`` parameters were added. Support for ``edition`` and ``appcontext`` was added to python-oracledb Thin mode. + .. versionchanged:: 2.5.0 + + The ``use_sni`` parameter was added. + .. versionchanged:: 2.3.0 The default value of the ``retry_delay`` parameter was changed from *0* @@ -1582,7 +1615,7 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) Creates a connection pool with the supplied parameters and returns the :ref:`AsyncConnectionPool object ` for the pool. @@ -1884,6 +1917,12 @@ Oracledb Methods are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. + The ``use_sni`` parameter is expected to be a boolean which indicates + whether to use the TLS Server Name Indicator (SNI) extension to bypass the + second TLS neogiation that would otherwise be required. This parameter is + used in both python-oracledb Thin and Thick modes. The default value is + False. + The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle Database. This value is only used in the python-oracledb Thin mode. The @@ -1913,7 +1952,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` parameter was added. + The ``pool_alias`` and ``use_sni`` parameters were added. .. versionchanged:: 2.5.0 @@ -2129,7 +2168,7 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) Creates and returns a :ref:`PoolParams Object `. The object can be passed to :meth:`oracledb.create_pool()`. @@ -2454,6 +2493,12 @@ Oracledb Methods are using python-oracledb Thick mode, Oracle Client 23ai is additionally required. + The ``use_sni`` parameter is expected to be a boolean which indicates + whether to use the TLS Server Name Indicator (SNI) extension to bypass the + second TLS neogiation that would otherwise be required. This parameter is + used in both python-oracledb Thin and Thick modes. The default value is + False. + The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle Database. This value is only used in the python-oracledb Thin mode. The @@ -2484,6 +2529,10 @@ Oracledb Methods python-oracledb Thick mode. It should be used with extreme caution. The default value is *0*. + .. versionchanged:: 3.0.0 + + The ``use_sni`` parameter was added. + .. versionchanged:: 2.5.0 The ``program``, ``machine``, ``terminal``, ``osuser``, and diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index eb05cb12..50eb272d 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -51,10 +51,14 @@ PoolParams Methods use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, handle=None) + driver_name=oracledb.defaults.driver_name, use_sni=None, handle=None) Sets one or more of the parameters. + .. versionchanged:: 3.0.0 + + The ``use_sni`` parameter was added. + .. versionchanged:: 2.5.0 The ``program``, ``machine``, ``terminal``, ``osuser``, and diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index e502bd02..f341bd2b 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -22,6 +22,9 @@ Thin Mode Changes #) Perform TLS server matching in python-oracledb instead of the Python SSL library to allow alternate names to be checked (`issue 415 `__). +#) Added parameter :data:`ConnectParams.use_sni` to specify that the TLS SNI + extension should be used to reduce the number of TLS neegotiations that are + needed to connect to the database. #) Host names are now resolved to IP addresses in python-oracledb instead of the Python libraries. Address list load balancing and failover settings will be used when establishing connections. diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 5cb1889e..082982c5 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -496,6 +496,7 @@ cdef class Description(ConnectParamsNode): public uint32_t purity public bint ssl_server_dn_match public bint use_tcp_fast_open + public bint use_sni public str ssl_server_cert_dn public object ssl_version public str wallet_location diff --git a/src/oracledb/connect_params.py b/src/oracledb/connect_params.py index 19e2df6b..0d38ddd2 100644 --- a/src/oracledb/connect_params.py +++ b/src/oracledb/connect_params.py @@ -103,6 +103,7 @@ def __init__( terminal: str = oracledb.defaults.terminal, osuser: str = oracledb.defaults.osuser, driver_name: str = oracledb.defaults.driver_name, + use_sni: bool = False, handle: int = 0, ): """ @@ -294,6 +295,10 @@ def __init__( - driver_name: the driver name used by the client to connect to the Oracle Database (default: oracledb.defaults.driver_name) + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + (default: False) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -346,7 +351,8 @@ def __repr__(self): + f"machine={self.machine!r}, " + f"terminal={self.terminal!r}, " + f"osuser={self.osuser!r}, " - + f"driver_name={self.driver_name!r}" + + f"driver_name={self.driver_name!r}, " + + f"use_sni={self.use_sni!r}" + ")" ) @@ -729,6 +735,15 @@ def user(self) -> str: """ return self._impl.user + @property + @_flatten_value + def use_sni(self) -> Union[list, bool]: + """ + Boolean indicating whether to use the TLS SNI extension to bypass the + second TLS neogiation that would otherwise be required. + """ + return [d.use_sni for d in self._impl.description_list.children] + @property @_flatten_value def use_tcp_fast_open(self) -> Union[list, bool]: @@ -848,6 +863,7 @@ def set( terminal: str = None, osuser: str = None, driver_name: str = None, + use_sni: bool = None, handle: int = None, ): """ @@ -1027,6 +1043,9 @@ def set( - driver_name: the driver name used by the client to connect to the Oracle Database + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 8e4937f7..41016717 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1271,6 +1271,7 @@ def connect( terminal: str = oracledb.defaults.terminal, osuser: str = oracledb.defaults.osuser, driver_name: str = oracledb.defaults.driver_name, + use_sni: bool = False, handle: int = 0, ) -> Connection: """ @@ -1481,6 +1482,10 @@ def connect( - driver_name: the driver name used by the client to connect to the Oracle Database (default: oracledb.defaults.driver_name) + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + (default: False) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -2036,6 +2041,7 @@ def connect_async( terminal: str = oracledb.defaults.terminal, osuser: str = oracledb.defaults.osuser, driver_name: str = oracledb.defaults.driver_name, + use_sni: bool = False, handle: int = 0, ) -> AsyncConnection: """ @@ -2246,6 +2252,10 @@ def connect_async( - driver_name: the driver name used by the client to connect to the Oracle Database (default: oracledb.defaults.driver_name) + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + (default: False) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index 6d7036cd..0d17fdc9 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -792,6 +792,8 @@ cdef class Description(ConnectParamsNode): if self.tcp_connect_timeout != DEFAULT_TCP_CONNECT_TIMEOUT: temp = self._build_duration_str(self.tcp_connect_timeout) parts.append(f"(TRANSPORT_CONNECT_TIMEOUT={temp})") + if self.use_sni: + parts.append("(USE_SNI=ON)") if self.sdu != DEFAULT_SDU: parts.append(f"(SDU={self.sdu})") @@ -875,6 +877,7 @@ cdef class Description(ConnectParamsNode): description.use_tcp_fast_open = self.use_tcp_fast_open description.ssl_server_cert_dn = self.ssl_server_cert_dn description.ssl_version = self.ssl_version + description.use_sni = self.use_sni description.wallet_location = self.wallet_location return description @@ -914,6 +917,7 @@ cdef class Description(ConnectParamsNode): _set_bool_param(args, "source_route", &self.source_route) _set_uint_param(args, "retry_count", &self.retry_count) _set_uint_param(args, "retry_delay", &self.retry_delay) + _set_bool_param(args, "use_sni", &self.use_sni) _set_uint_param(args, "sdu", &self.sdu) self.sdu = min(max(self.sdu, 512), 2097152) # sanitize SDU _set_duration_param(args, "tcp_connect_timeout", diff --git a/src/oracledb/impl/base/parsers.pyx b/src/oracledb/impl/base/parsers.pyx index e7ba295c..6ddeba4e 100644 --- a/src/oracledb/impl/base/parsers.pyx +++ b/src/oracledb/impl/base/parsers.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2024, Oracle and/or its affiliates. +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -65,6 +65,7 @@ COMMON_PARAM_NAMES = set([ "ssl_server_cert_dn", "ssl_server_dn_match", "transport_connect_timeout", + "use_sni", "wallet_location", ]) diff --git a/src/oracledb/impl/thin/transport.pyx b/src/oracledb/impl/thin/transport.pyx index cc7c624d..2d84c6fc 100644 --- a/src/oracledb/impl/thin/transport.pyx +++ b/src/oracledb/impl/thin/transport.pyx @@ -36,6 +36,7 @@ cdef class Transport: cdef: object _transport object _ssl_context + str _ssl_sni_data uint32_t _transport_num ssize_t _max_packet_size uint32_t _op_num @@ -43,6 +44,20 @@ cdef class Transport: bint _full_packet_size bint _is_async + cdef str _calc_sni_data(self, Description description): + """ + Calculates the string used for the special SNI handling that allows one + of the TLS negotiations to be bypassed. + """ + cdef str server_type_part = "" + if description.server_type is not None: + server_type_part = f".T1.{description.server_type[:1]}" + return ( + f"S{len(description.service_name)}.{description.service_name}" + f"{server_type_part}" + f".V3.{TNS_VERSION_DESIRED}" + ) + cdef str _get_debugging_header(self, str operation): """ Returns the header line used for debugging packets. @@ -160,6 +175,12 @@ cdef class Transport: # established self._ssl_context.check_hostname = False + # calculate the SNI data to send to the server, if applicable + if description.use_sni: + self._ssl_sni_data = self._calc_sni_data(description) + else: + self._ssl_sni_data = None + cdef Packet extract_packet(self, bytes data=None): """ Extracts a packet from the data, if possible (after first appending it @@ -236,7 +257,11 @@ cdef class Transport: """ Negotiate TLS on the socket. """ - self._transport = self._ssl_context.wrap_socket(sock) + if DEBUG_PACKETS: + self._print_output(self._get_debugging_header("Negotiate TLS")) + self._transport = self._ssl_context.wrap_socket( + sock, server_hostname=self._ssl_sni_data + ) if description.ssl_server_dn_match: check_server_dn(self._transport, description.ssl_server_cert_dn, address.host) @@ -256,11 +281,14 @@ cdef class Transport: """ Negotiate TLS on the socket asynchronously. """ + if DEBUG_PACKETS: + self._print_output(self._get_debugging_header("Negotiate TLS")) orig_transport = self._transport loop = protocol._read_buf._loop self._transport = await loop.start_tls( self._transport, protocol, self._ssl_context, + server_hostname=self._ssl_sni_data ) if description.ssl_server_dn_match: sock = self._transport.get_extra_info("ssl_object") diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index 9728442f..d024fc3c 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -667,6 +667,7 @@ def create_pool( terminal: str = oracledb.defaults.terminal, osuser: str = oracledb.defaults.osuser, driver_name: str = oracledb.defaults.driver_name, + use_sni: bool = False, handle: int = 0, ) -> ConnectionPool: """ @@ -934,6 +935,10 @@ def create_pool( - driver_name: the driver name used by the client to connect to the Oracle Database (default: oracledb.defaults.driver_name) + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + (default: False) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -1172,6 +1177,7 @@ def create_pool_async( terminal: str = oracledb.defaults.terminal, osuser: str = oracledb.defaults.osuser, driver_name: str = oracledb.defaults.driver_name, + use_sni: bool = False, handle: int = 0, ) -> AsyncConnectionPool: """ @@ -1440,6 +1446,10 @@ def create_pool_async( - driver_name: the driver name used by the client to connect to the Oracle Database (default: oracledb.defaults.driver_name) + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + (default: False) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) diff --git a/src/oracledb/pool_params.py b/src/oracledb/pool_params.py index 5c353d11..9050112e 100644 --- a/src/oracledb/pool_params.py +++ b/src/oracledb/pool_params.py @@ -116,6 +116,7 @@ def __init__( terminal: str = oracledb.defaults.terminal, osuser: str = oracledb.defaults.osuser, driver_name: str = oracledb.defaults.driver_name, + use_sni: bool = False, handle: int = 0, ): """ @@ -363,6 +364,10 @@ def __init__( - driver_name: the driver name used by the client to connect to the Oracle Database (default: oracledb.defaults.driver_name) + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + (default: False) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -429,7 +434,8 @@ def __repr__(self): + f"machine={self.machine!r}, " + f"terminal={self.terminal!r}, " + f"osuser={self.osuser!r}, " - + f"driver_name={self.driver_name!r}" + + f"driver_name={self.driver_name!r}, " + + f"use_sni={self.use_sni!r}" + ")" ) @@ -626,6 +632,7 @@ def set( terminal: str = None, osuser: str = None, driver_name: str = None, + use_sni: bool = None, handle: int = None, ): """ @@ -856,6 +863,9 @@ def set( - driver_name: the driver name used by the client to connect to the Oracle Database + - use_sni: boolean indicating whether to use the TLS SNI extension to + bypass the second TLS neogiation that would otherwise be required + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index 008a751c..ed6de051 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2021, 2024, Oracle and/or its affiliates. +# Copyright (c) 2021, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -686,6 +686,7 @@ def test_4539(self): ("terminal", "my_terminal"), ("osuser", "me"), ("driver_name", "custom_driver"), + ("use_sni", True), ] params = oracledb.ConnectParams(**dict(values)) parts = [f"{name}={value!r}" for name, value in values] @@ -737,6 +738,7 @@ def test_4539(self): ("terminal", "modified_terminal"), ("osuser", "modified_osuser"), ("driver_name", "modified_driver_name"), + ("use_sni", False), ] params.set(**dict(new_values)) parts = [f"{name}={value!r}" for name, value in new_values] @@ -1184,6 +1186,29 @@ def test_4568(self): self.assertEqual(params.user, user) self.assertEqual(params.stmtcachesize, stmtcachesize) + def test_4569(self): + "4569 - test USE_SNI in connect string" + options = [("on", True), ("off", False)] + service_name = "service_4569" + host = "host_4569" + port = 4569 + for str_val, val in options: + easy_connect = f"{host}:{port}/{service_name}?use_sni={str_val}" + descriptor_part = f"(USE_SNI={str_val.upper()})" if val else "" + connect_descriptor = ( + f"(DESCRIPTION={descriptor_part}" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host})" + f"(PORT={port}))(CONNECT_DATA=(SERVICE_NAME={service_name})))" + ) + for connect_string in (easy_connect, connect_descriptor): + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.host, host) + self.assertEqual(params.port, port) + self.assertEqual(params.service_name, service_name) + self.assertEqual(params.use_sni, val) + self.assertEqual(params.get_connect_string(), connect_descriptor) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_4700_pool_params.py b/tests/test_4700_pool_params.py index 2e062152..b0ec5054 100644 --- a/tests/test_4700_pool_params.py +++ b/tests/test_4700_pool_params.py @@ -129,6 +129,7 @@ def test_4701(self): ("terminal", "my_terminal"), ("osuser", "me"), ("driver_name", "custom_driver"), + ("use_sni", True), ] params = oracledb.PoolParams(**dict(values)) parts = [f"{name}={value!r}" for name, value in values] diff --git a/utils/fields.cfg b/utils/fields.cfg index f6736cb7..63f07edb 100644 --- a/utils/fields.cfg +++ b/utils/fields.cfg @@ -488,6 +488,14 @@ default: oracledb.defaults.driver_name description = the driver name used by the client to connect to the Oracle Database +[use_sni] +type = bool +default = False +source = description +description = + boolean indicating whether to use the TLS SNI extension to bypass the + second TLS neogiation that would otherwise be required + [handle] type = int default = 0 From 02a06bfe7a506363a76b8e20b7a6dc242202a9fd Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:46:03 -0700 Subject: [PATCH 011/178] All connect strings are parsed in the driver. --- doc/src/release_notes.rst | 7 ++ doc/src/user_guide/connection_handling.rst | 85 ++++++++++++---------- src/oracledb/base_impl.pxd | 5 +- src/oracledb/connection.py | 6 +- src/oracledb/impl/base/connect_params.pyx | 38 ++++++++-- src/oracledb/impl/base/parsers.pyx | 35 ++++++++- src/oracledb/impl/thick/connection.pyx | 29 +++++--- src/oracledb/impl/thick/pool.pyx | 28 +++---- src/oracledb/impl/thin/messages.pyx | 9 ++- src/oracledb/pool.py | 4 +- tests/test_4500_connect_params.py | 18 +++++ utils/templates/connection.py | 6 +- utils/templates/pool.py | 4 +- 13 files changed, 187 insertions(+), 87 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index f341bd2b..00383ffe 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -68,6 +68,13 @@ Common Changes #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". +#) All connect strings are now parsed by the driver. Previously, only thin + mode parsed all connect strings and thick mode passed the connect string + unchanged to the Oracle Client library to parse. Parameters unrecognized by + the driver in Easy Connect strings are now ignored. Parameters unrecognized + by the driver in the ``CONNECT_DATA`` section of a full connect descriptor + are passed through unchanged. All other parameters in other sections of a + full connect deescriptor that are unrecognized by the driver are ignored. #) Added attributes :attr:`DbObjectAttribute.precision`, :attr:`DbObjectAttribute.scale`, and :attr:`DbObjectAttribute.max_size` that provide additional metadata about diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index 04be5043..80c5ac77 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -129,19 +129,16 @@ If you like to encapsulate values, parameters can be passed using a conn = oracledb.connect(user="my_user", password="my_password", params=params) Some values such as the database host name can be specified as ``connect()`` -parameters, as part of the connect string, and in the ``params`` object. If a -``dsn`` is passed, the python-oracledb :ref:`Thick ` mode will -use the ``dsn`` string to connect. Otherwise, a connection string is internally -constructed from the individual parameters and ``params`` object values, with -the individual parameters having precedence. In python-oracledb's default Thin -mode, a connection string is internally used that contains all relevant values -specified. The precedence in Thin mode is that values in any ``dsn`` parameter -override values passed as individual parameters, which themselves override -values set in the ``params`` object. Similar precedence rules also apply to -other values. +parameters, as part of the connect string, and in the ``params`` object. If a +``dsn`` is passed, a connection string is internally constructed from the +individual parameters and ``params`` object values, with the individual +parameters having precedence. The precedence is that values in any ``dsn`` +parameter override values passed as individual parameters, which themselves +override values set in the ``params`` object. Similar precedence rules also +apply to other values. A single, combined connection string can be passed to ``connect()`` but this -may cause complications if the password contains '@' or '/' characters: +may cause complications if the password contains "@" or "/" characters: .. code-block:: python @@ -329,26 +326,37 @@ If the database is using a non-default port, it must be specified: The Easy Connect syntax supports Oracle Database service names. It cannot be used with the older System Identifiers (SID). -The latest `Easy Connect `__ syntax allows the use of multiple hosts or ports, along with optional entries for the wallet location, the distinguished name of the database server, and allows some network configuration options such as the connection timeout and keep-alive values to -be set. This means that a :ref:`sqlnet.ora ` file is not needed -for some common connection scenarios. See the technical brief `Oracle Database -Easy Connect Plus `__ for more information. +be set:: -In python-oracledb Thin mode, any unknown Easy Connect options are ignored and -are not passed to the database. See :ref:`Connection String Differences -` for more information. +.. code-block:: python -In python-oracledb Thick mode, it is the Oracle Client libraries that parse the -Easy Connect string. Check the Easy Connect Naming method in `Oracle Net -Service Administrator's Guide -`__ for the syntax to use in your -version of the Oracle Client libraries. + connection = oracledb.connect(user="hr", password=userpwd, + dsn="dbhost.example.com/orclpdb?expire_time=2") + +This means that a :ref:`sqlnet.ora ` file is not needed for common +connection scenarios. See the technical brief `Oracle Database Easy Connect +Plus `__ for additional information. + +Python-oracledb specific settings can also be passed as Easy Connect arguments. +For example to set the statement cache size used by connections:: + +.. code-block:: python + + connection = oracledb.connect(user="hr", password=userpwd, + dsn="dbhost.example.com/orclpdb?pyo.stmtcachesize=50") + +See :ref:`defineconnparams` and :ref:`definepoolparams` for the settings that +can be passed as arguments. + +Any Easy Connect parameters that are unknown to python-oracledb are ignored and +not passed to the database. See :ref:`Connection String Differences +` for more information. .. _conndescriptor: @@ -386,6 +394,9 @@ This prints:: (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=dbhost.example.com)(PORT=1521)))(CONNECT_DATA=(SERVICE_NAME=orclpdb))(SECURITY=(SSL_SERVER_DN_MATCH=True))) +The ``CONNECT_DATA`` parameters of a full connect descriptor that are +unrecognized by python-oracledb are passed to the database unchanged. + .. _netservice: TNS Aliases for Connection Strings @@ -1216,12 +1227,9 @@ Note :meth:`ConnectParams.set()` has no effect after Some values such as the database host name can be specified as :func:`oracledb.connect()`, parameters, as part of the connect string, and in -the ``params`` object. If a ``dsn`` is passed, the python-oracledb :ref:`Thick -` mode will use the ``dsn`` string to connect. Otherwise, a -connection string is internally constructed from the individual parameters and -``params`` object values, with the individual parameters having precedence. In -python-oracledb's default Thin mode, a connection string is internally used -that contains all relevant values specified. The precedence in Thin mode is +the ``params`` object. If a ``dsn`` is passed, a connection string is +internally constructed from the individual parameters and ``params`` object +values, with the individual parameters having precedence. The precedence is that values in any ``dsn`` parameter override values passed as individual parameters, which themselves override values set in the ``params`` object. Similar precedence rules also apply to other values. @@ -2454,15 +2462,12 @@ individually using the ``set()`` method: Some values such as the database host name, can be specified as :func:`oracledb.create_pool()` parameters, as part of the connect string, and -in the ``params`` object. If a ``dsn`` is passed, the python-oracledb -:ref:`Thick ` mode will use the ``dsn`` string to connect. -Otherwise, a connection string is internally constructed from the individual -parameters and ``params`` object values, with the individual parameters having -precedence. In python-oracledb's default Thin mode, a connection string is -internally used that contains all relevant values specified. The precedence in -Thin mode is that values in any ``dsn`` parameter override values passed as -individual parameters, which themselves override values set in the ``params`` -object. Similar precedence rules also apply to other values. +in the ``params`` object. If a ``dsn`` is passed, a connection string is +internally constructed from the individual parameters and ``params`` object +values, with the individual parameters having precedence. The precedence is +that values in any ``dsn`` parameter override values passed as individual +parameters, which themselves override values set in the ``params`` object. +Similar precedence rules also apply to other values. .. _definepoolparams: diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 082982c5..1af58eac 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -500,9 +500,11 @@ cdef class Description(ConnectParamsNode): public str ssl_server_cert_dn public object ssl_version public str wallet_location + dict extra_connect_data_args str connection_id cdef str _build_duration_str(self, double value) + cdef str _value_repr(self, object value) cdef str build_connect_string(self, str cid=*) cdef int set_server_type(self, str value) except -1 @@ -556,6 +558,7 @@ cdef class ConnectParamsImpl: cdef int _check_credentials(self) except -1 cdef int _copy(self, ConnectParamsImpl other_params) except -1 + cdef str _get_connect_string(self) cdef bytes _get_new_password(self) cdef bytearray _get_obfuscator(self, str secret_value) cdef bytes _get_password(self) diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 41016717..7a39ae8c 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -549,7 +549,7 @@ def __init__( errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs, thin) + dsn = params_impl.process_args(dsn, kwargs) # see if connection is being acquired from a pool if pool is None: @@ -1545,7 +1545,7 @@ async def _connect(self, dsn, pool, params, kwargs): errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs, thin=True) + dsn = params_impl.process_args(dsn, kwargs) # see if connection is being acquired from a pool if pool is None: diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index 0d17fdc9..f50af01b 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -188,6 +188,12 @@ cdef class ConnectParamsImpl: self.osuser = other_params.osuser self.driver_name = other_params.driver_name + cdef str _get_connect_string(self): + """ + Returns the connect string to use for the stored components. + """ + return self.description_list.build_connect_string() + cdef bytes _get_new_password(self): """ Returns the new password, after removing the obfuscation. @@ -434,7 +440,7 @@ cdef class ConnectParamsImpl: """ Returns a connect string generated from the parameters. """ - return self.description_list.build_connect_string() + return self._get_connect_string() def get_full_user(self): """ @@ -511,7 +517,7 @@ cdef class ConnectParamsImpl: else: self.user = user - def process_args(self, str dsn, dict kwargs, bint thin): + def process_args(self, str dsn, dict kwargs): """ Processes the arguments to connect() and create_pool(). @@ -519,8 +525,7 @@ cdef class ConnectParamsImpl: - if no user was specified in the keyword arguments and a dsn is specified, it is parsed to determine the user, password and connect string and the user and password are stored - - in thin mode, the connect string is then parsed into its - components and stored + - the connect string is then parsed into its components and stored - if no dsn was specified, one is built from the components - the connect string is returned """ @@ -529,10 +534,10 @@ cdef class ConnectParamsImpl: if self.user is None and not self.externalauth and dsn is not None: user, password, dsn = self.parse_dsn_with_credentials(dsn) self.set(dict(user=user, password=password)) - if dsn is not None and thin: + if dsn is not None: self.parse_connect_string(dsn) - if dsn is None: - dsn = self.get_connect_string() + else: + dsn = self._get_connect_string() return dsn @@ -767,6 +772,17 @@ cdef class Description(ConnectParamsNode): return f"{value_minutes}min" return f"{value_int}" + cdef str _value_repr(self, object value): + """ + Returns the representation to use for a value. Strings are returned as + is but dictionaries are returned as key/value pairs in the format + expected by the listener. + """ + if isinstance(value, str): + return value + return "".join(f"({k.upper()}={self._value_repr(v)})" + for k, v in value.items()) + cdef str build_connect_string(self, str cid=None): """ Build a connect string from the components. @@ -829,6 +845,9 @@ cdef class Description(ConnectParamsNode): temp_parts.append(f"(POOL_PURITY=SELF)") elif self.purity == PURITY_NEW: temp_parts.append(f"(POOL_PURITY=NEW)") + if self.extra_connect_data_args is not None: + temp_parts.extend(f"({k.upper()}={self._value_repr(v)})" + for k, v in self.extra_connect_data_args.items()) if self.connection_id is not None: temp_parts.append(f"(CONNECTION_ID={self.connection_id})") if temp_parts: @@ -904,6 +923,9 @@ cdef class Description(ConnectParamsNode): _set_str_param(args, "pool_boundary", self) _set_str_param(args, "connection_id_prefix", self) _set_bool_param(args, "use_tcp_fast_open", &self.use_tcp_fast_open) + extra_args = args.get("extra_connect_data_args") + if extra_args is not None: + self.extra_connect_data_args = extra_args def set_from_description_args(self, dict args): """ diff --git a/src/oracledb/impl/base/parsers.pyx b/src/oracledb/impl/base/parsers.pyx index 6ddeba4e..1c567621 100644 --- a/src/oracledb/impl/base/parsers.pyx +++ b/src/oracledb/impl/base/parsers.pyx @@ -50,8 +50,21 @@ CONTAINER_PARAM_NAMES = set([ "security", ]) -# a set of parameter names supported in EasyConnect strings that are common -# to all drivers +# CONNECT_DATA parameter names that are supported by the driver; all other +# simple key/value pairs are passed unchanged to the database +CONNECT_DATA_PARAM_NAMES = set([ + "cclass", + "connection_id_prefix", + "pool_boundary", + "purity", + "server_type", + "service_name", + "sid", + "use_tcp_fast_open", +]) + +# a set of parameter names supported by the driver in EasyConnect strings that +# are common to all drivers COMMON_PARAM_NAMES = set([ "expire_time", "https_proxy", @@ -593,6 +606,22 @@ cdef class ConnectStringParser(BaseParser): value = self.data_as_str[service_name_end_pos + 1:self.temp_pos] self.description.set_server_type(value) + cdef dict _set_connect_data(self, dict args): + """ + Sets the connect data value. + """ + cdef: + dict extras, result = {} + object value + str key + for key, value in args.items(): + if key in CONNECT_DATA_PARAM_NAMES: + result[key] = value + else: + extras = result.setdefault("extra_connect_data_args", {}) + extras[key] = value + return result + cdef int _set_descriptor_arg( self, dict args, str name, object value ) except -1: @@ -614,6 +643,8 @@ cdef class ConnectStringParser(BaseParser): if not isinstance(addresses, list): addresses = [addresses] value = [dict(address=a) for a in addresses] + [value] + elif name == "connect_data": + value = self._set_connect_data(value) args[name] = value elif isinstance(orig_value, list): args[name].append(value) diff --git a/src/oracledb/impl/thick/connection.pyx b/src/oracledb/impl/thick/connection.pyx index 2adc5ca3..825c3d80 100644 --- a/src/oracledb/impl/thick/connection.pyx +++ b/src/oracledb/impl/thick/connection.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -33,7 +33,7 @@ ctypedef int (*dpiConnSetTextAttrFunc)(dpiConn*, const char*, uint32_t) nogil cdef class ConnectionParams: cdef: - bytes dsn + bytes connect_string bytes username bytes password bytes cclass @@ -44,7 +44,7 @@ cdef class ConnectionParams: bytes private_key bytes driver_name - const char *dsn_ptr + const char *connect_string_ptr const char *username_ptr const char *password_ptr const char *cclass_ptr @@ -55,7 +55,7 @@ cdef class ConnectionParams: const char *private_key_ptr const char *driver_name_ptr - uint32_t dsn_len + uint32_t connect_string_len uint32_t username_len uint32_t password_len uint32_t cclass_len @@ -348,7 +348,7 @@ cdef class ThickConnImpl(BaseConnImpl): def connect(self, ConnectParamsImpl user_params, ThickPoolImpl pool_impl): cdef: - str full_user, cclass, token, private_key + str full_user, cclass, token, private_key, connect_string bytes password_bytes, new_password_bytes dpiCommonCreateParams common_params dpiConnCreateParams conn_params @@ -382,10 +382,13 @@ cdef class ThickConnImpl(BaseConnImpl): params.password = password_bytes params.password_ptr = params.password params.password_len = len(params.password) - if self.dsn is not None: - params.dsn = self.dsn.encode() - params.dsn_ptr = params.dsn - params.dsn_len = len(params.dsn) + if pool_impl is None: + connect_string = user_params._get_connect_string() + if connect_string is not None: + params.connect_string = connect_string.encode() + params.connect_string_ptr = params.connect_string + params.connect_string_len = \ + len(params.connect_string) if pool_impl is None \ or user_params._default_description.cclass is not None: cclass = user_params._default_description.cclass @@ -494,9 +497,11 @@ cdef class ThickConnImpl(BaseConnImpl): with nogil: status = dpiConn_create(driver_info.context, params.username_ptr, params.username_len, params.password_ptr, - params.password_len, params.dsn_ptr, - params.dsn_len, &common_params, - &conn_params, &self._handle) + params.password_len, + params.connect_string_ptr, + params.connect_string_len, + &common_params, &conn_params, + &self._handle) dpiContext_getError(driver_info.context, &error_info) if status < 0: _raise_from_info(&error_info) diff --git a/src/oracledb/impl/thick/pool.pyx b/src/oracledb/impl/thick/pool.pyx index cec14498..f03aeba7 100644 --- a/src/oracledb/impl/thick/pool.pyx +++ b/src/oracledb/impl/thick/pool.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -43,21 +43,21 @@ cdef class ThickPoolImpl(BasePoolImpl): def __init__(self, str dsn, PoolParamsImpl params): cdef: + uint32_t password_len = 0, user_len = 0, connect_string_len = 0 + bytes token_bytes, private_key_bytes, connect_string_bytes bytes session_callback_bytes, name_bytes, driver_name_bytes - bytes edition_bytes, user_bytes, password_bytes, dsn_bytes - uint32_t password_len = 0, user_len = 0, dsn_len = 0 + bytes edition_bytes, user_bytes, password_bytes + const char *connect_string_ptr = NULL + str token, private_key, connect_string dpiCommonCreateParams common_params dpiPoolCreateParams create_params const char *password_ptr = NULL const char *user_ptr = NULL - const char *dsn_ptr = NULL - bytes token_bytes, private_key_bytes uint32_t token_len = 0, private_key_len = 0 const char *token_ptr = NULL const char *private_key_ptr = NULL dpiAccessToken access_token dpiErrorInfo error_info - str token, private_key int status # save parameters @@ -132,7 +132,7 @@ cdef class ThickPoolImpl(BasePoolImpl): common_params.sodaMetadataCache = params.soda_metadata_cache create_params.externalAuth = params.externalauth - # prepare user, password and DSN for use + # prepare user, password and connect string for use if self.username is not None: user_bytes = params.get_full_user().encode() user_ptr = user_bytes @@ -141,16 +141,18 @@ cdef class ThickPoolImpl(BasePoolImpl): if password_bytes is not None: password_ptr = password_bytes password_len = len(password_bytes) - if self.dsn is not None: - dsn_bytes = self.dsn.encode() - dsn_ptr = dsn_bytes - dsn_len = len(dsn_bytes) + connect_string = params._get_connect_string() + if connect_string is not None: + connect_string_bytes = connect_string.encode() + connect_string_ptr = connect_string_bytes + connect_string_len = len(connect_string_bytes) # create pool with nogil: status = dpiPool_create(driver_info.context, user_ptr, user_len, - password_ptr, password_len, dsn_ptr, - dsn_len, &common_params, &create_params, + password_ptr, password_len, + connect_string_ptr, connect_string_len, + &common_params, &create_params, &self._handle) dpiContext_getError(driver_info.context, &error_info) if status < 0: diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index 177ce0f2..bd26f7a4 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -1419,6 +1419,7 @@ cdef class AuthMessage(Message): str driver_name str edition list appcontext + str connect_string cdef int _encrypt_passwords(self) except -1: """ @@ -1628,6 +1629,7 @@ cdef class AuthMessage(Message): self.driver_name = f"{DRIVER_NAME} thn : {DRIVER_VERSION}" self.edition = params.edition self.appcontext = params.appcontext + self.connect_string = params._get_connect_string() # if drcp is used, use purity = NEW as the default purity for # standalone connections and purity = SELF for connections that belong @@ -1733,6 +1735,8 @@ cdef class AuthMessage(Message): num_pairs += 1 if self.appcontext is not None: num_pairs += len(self.appcontext) * 3 + if self.connect_string is not None: + num_pairs += 1 # write basic data to packet self._write_function_code(buf) @@ -1806,6 +1810,9 @@ cdef class AuthMessage(Message): self._write_key_value(buf, "AUTH_APPCTX_NSPACE\0", entry[0]) self._write_key_value(buf, "AUTH_APPCTX_ATTR\0", entry[1]) self._write_key_value(buf, "AUTH_APPCTX_VALUE\0", entry[2]) + if self.connect_string is not None: + self._write_key_value(buf, "AUTH_CONNECT_STRING", + self.connect_string) @cython.final diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index d024fc3c..395b7c8a 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -92,7 +92,7 @@ def __init__( params_impl = params._impl.copy() with driver_mode.get_manager() as mode_mgr: thin = mode_mgr.thin - dsn = params_impl.process_args(dsn, kwargs, thin) + dsn = params_impl.process_args(dsn, kwargs) self._set_connection_type(params_impl.connectiontype) self._cache_name = cache_name if cache_name is not None: diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index ed6de051..fa3c484f 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -1209,6 +1209,24 @@ def test_4569(self): self.assertEqual(params.use_sni, val) self.assertEqual(params.get_connect_string(), connect_descriptor) + def test_4570(self): + "4570 - test passing through unrecognized parameters in CONNECT_DATA" + options = [ + "(SIMPLE_KEY=SIMPLE_VALUE)", + "(COMPLEX_KEY=(SUB_VALUE_A=5)(SUB_VALUE_B=6))", + "(COMPLEX_KEY=(SUB_VALUE_A=5)(SUB_VALUE_B=(SUB_SUB_A=6)))", + ] + for option in options: + with self.subTest(option=option): + connect_string = ( + "(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=host4570)" + "(PORT=1521))(CONNECT_DATA=(SERVICE_NAME=service4570)" + f"{option}))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.get_connect_string(), connect_string) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/utils/templates/connection.py b/utils/templates/connection.py index 313e478c..27992ab3 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -547,7 +547,7 @@ def __init__( errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs, thin) + dsn = params_impl.process_args(dsn, kwargs) # see if connection is being acquired from a pool if pool is None: @@ -1311,7 +1311,7 @@ async def _connect(self, dsn, pool, params, kwargs): errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs, thin=True) + dsn = params_impl.process_args(dsn, kwargs) # see if connection is being acquired from a pool if pool is None: diff --git a/utils/templates/pool.py b/utils/templates/pool.py index 65d0a6f4..c43956e6 100644 --- a/utils/templates/pool.py +++ b/utils/templates/pool.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -90,7 +90,7 @@ def __init__( params_impl = params._impl.copy() with driver_mode.get_manager() as mode_mgr: thin = mode_mgr.thin - dsn = params_impl.process_args(dsn, kwargs, thin) + dsn = params_impl.process_args(dsn, kwargs) self._set_connection_type(params_impl.connectiontype) self._cache_name = cache_name if cache_name is not None: From 3c0974e7903c6ad0019d0f1068dde4ee29851c58 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:56:17 -0700 Subject: [PATCH 012/178] Implemented support for max_lifetime_session in thin mode (#410). --- doc/src/api_manual/pool_params.rst | 2 - doc/src/release_notes.rst | 2 + doc/src/user_guide/connection_handling.rst | 21 +++-- src/oracledb/impl/thin/connection.pyx | 5 +- src/oracledb/impl/thin/pool.pyx | 82 +++++++++++++------- tests/ext/test_ext_1000_pool_shrink.py | 44 +++++++++-- tests/ext/test_ext_1900_pool_shrink_async.py | 38 ++++++++- tests/test_2400_pool.py | 3 +- tests/test_5500_pool_async.py | 3 +- 9 files changed, 147 insertions(+), 53 deletions(-) diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index 50eb272d..596658b7 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -136,8 +136,6 @@ PoolParams Attributes attribute is *0*, then the connections may remain in the pool indefinitely. The default value is *0* seconds. - This attribute is only supported in python-oracledb Thick mode. - .. attribute:: PoolParams.max_sessions_per_shard This read-only attribute is an integer that determines the maximum number diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 00383ffe..15f5c2eb 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -19,6 +19,8 @@ Thin Mode Changes #) Added namespace package :ref:`oracledb.plugins ` for plugins that can be used to extend the capability of python-oracledb. +#) Added support for property :attr:`ConnectionPool.max_lifetime_session` + (`issue 410 `__). #) Perform TLS server matching in python-oracledb instead of the Python SSL library to allow alternate names to be checked (`issue 415 `__). diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index 80c5ac77..8ecacbe0 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -2079,17 +2079,16 @@ application) and are unused for longer than the pool creation attribute seconds signifying an infinite time and meaning idle connections will never be closed. -In python-oracledb Thick mode, the pool creation parameter -``max_lifetime_session`` also allows pools to shrink. This parameter bounds -the total length of time that a connection can exist starting from the time the -pool created it. If a connection was created ``max_lifetime_session`` or -longer seconds ago, then it will be closed when it is idle in the pool. In the -case when ``timeout`` and ``max_lifetime_session`` are both set, the connection -will be terminated if either the idle timeout happens or the max lifetime -setting is exceeded. Note that when using python-oracledb in Thick mode with -Oracle Client libraries prior to 21c, pool shrinkage is only initiated when the -pool is accessed so pools in fully dormant applications will not shrink until -the application is next used. +The pool creation parameter ``max_lifetime_session`` also allows pools to +shrink. This parameter bounds the total length of time that a connection can +exist starting from the time the pool created it. If a connection was created +``max_lifetime_session`` or longer seconds ago, then it will be closed when it +is idle in the pool. In the case when ``timeout`` and ``max_lifetime_session`` +are both set, the connection will be terminated if either the idle timeout +happens or the max lifetime setting is exceeded. Note that when using +python-oracledb in Thick mode with Oracle Client libraries prior to 21c, pool +shrinkage is only initiated when the pool is accessed so pools in fully dormant +applications will not shrink until the application is next used. For pools created with :ref:`external authentication `, with :ref:`homogeneous ` set to False, or when using :ref:`drcp`, diff --git a/src/oracledb/impl/thin/connection.pyx b/src/oracledb/impl/thin/connection.pyx index f837345f..a3d896d0 100644 --- a/src/oracledb/impl/thin/connection.pyx +++ b/src/oracledb/impl/thin/connection.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -61,7 +61,8 @@ cdef class BaseThinConnImpl(BaseConnImpl): str _service_name bint _drcp_enabled bint _drcp_establish_session - double _time_in_pool + double _time_created + double _time_returned list _temp_lobs_to_close uint32_t _temp_lobs_total_size uint32_t _call_timeout diff --git a/src/oracledb/impl/thin/pool.pyx b/src/oracledb/impl/thin/pool.pyx index 9a708fe6..f34c758e 100644 --- a/src/oracledb/impl/thin/pool.pyx +++ b/src/oracledb/impl/thin/pool.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -70,6 +70,7 @@ cdef class BaseThinPoolImpl(BasePoolImpl): self.set_wait_timeout(params.wait_timeout) self.set_timeout(params.timeout) self._stmt_cache_size = params.stmtcachesize + self._max_lifetime_session = params.max_lifetime_session self._ping_interval = params.ping_interval self._ping_timeout = params.ping_timeout self._free_new_conn_impls = [] @@ -154,6 +155,7 @@ cdef class BaseThinPoolImpl(BasePoolImpl): if conn_impl._protocol._transport is not None: self._conn_impls_to_drop.append(conn_impl) self._notify_bg_task() + self._ensure_min_connections() cdef int _drop_conn_impls_helper(self, list conn_impls_to_drop) except -1: """ @@ -167,6 +169,16 @@ cdef class BaseThinPoolImpl(BasePoolImpl): except: pass + cdef int _ensure_min_connections(self) except -1: + """ + Ensure that the minimum number of connections in the pool is + maintained. + """ + if self._open_count < self.min: + self._num_to_create = max(self._num_to_create, + self.min - self._open_count) + self._notify_bg_task() + cdef PooledConnRequest _get_next_request(self): """ Get the next request to process. @@ -246,13 +258,15 @@ cdef class BaseThinPoolImpl(BasePoolImpl): """ Called before the connection is connected. The connection class and pool attributes are updated and the TLS session is stored on the - transport for reuse. + transport for reuse. The timestamps are also retained for later use. """ if params is not None: conn_impl._cclass = params._default_description.cclass else: conn_impl._cclass = self.connect_params._default_description.cclass conn_impl._pool = self + conn_impl._time_created = time.monotonic() + conn_impl._time_returned = conn_impl._time_created def _process_timeout(self): """ @@ -280,25 +294,35 @@ cdef class BaseThinPoolImpl(BasePoolImpl): bint is_open = conn_impl._protocol._transport is not None BaseThinDbObjectTypeCache type_cache PooledConnRequest request + double tstamp int cache_num self._busy_conn_impls.remove(conn_impl) if conn_impl._dbobject_type_cache_num > 0: cache_num = conn_impl._dbobject_type_cache_num type_cache = get_dbobject_type_cache(cache_num) type_cache._clear_cursors() + if not is_open: + self._open_count -= 1 + self._ensure_min_connections() if conn_impl._is_pool_extra: conn_impl._is_pool_extra = False if is_open and self._open_count >= self.max: if self._free_new_conn_impls and self._open_count == self.max: self._drop_conn_impl(self._free_new_conn_impls.pop(0)) else: + self._open_count -= 1 self._drop_conn_impl(conn_impl) is_open = False - if not is_open: - self._open_count -= 1 - else: + if is_open: conn_impl.warning = None - conn_impl._time_in_pool = time.monotonic() + conn_impl._time_returned = time.monotonic() + if self._max_lifetime_session != 0: + tstamp = conn_impl._time_created + self._max_lifetime_session + if conn_impl._time_returned > tstamp: + self._open_count -= 1 + self._drop_conn_impl(conn_impl) + is_open = False + if is_open: for request in self._requests: if request.in_progress or request.wants_new \ or request.conn_impl is not None \ @@ -349,7 +373,7 @@ cdef class BaseThinPoolImpl(BasePoolImpl): current_time = time.monotonic() while conn_impls_to_check and self._open_count > self.min: conn_impl = conn_impls_to_check[0] - if current_time - conn_impl._time_in_pool < self._timeout: + if current_time - conn_impl._time_returned < self._timeout: break conn_impls_to_check.pop(0) self._drop_conn_impl(conn_impl) @@ -535,7 +559,6 @@ cdef class ThinPoolImpl(BaseThinPoolImpl): conn_impl = ThinConnImpl(self.dsn, self.connect_params) self._pre_connect(conn_impl, params) conn_impl.connect(self.connect_params) - conn_impl._time_in_pool = time.monotonic() return conn_impl def _notify_bg_task(self): @@ -729,7 +752,6 @@ cdef class AsyncThinPoolImpl(BaseThinPoolImpl): conn_impl = AsyncThinConnImpl(self.dsn, self.connect_params) self._pre_connect(conn_impl, params) await conn_impl.connect(self.connect_params) - conn_impl._time_in_pool = time.monotonic() return conn_impl def _notify_bg_task(self): @@ -856,7 +878,7 @@ cdef class PooledConnRequest: """ cdef: ReadBuffer buf = conn_impl._protocol._read_buf - double elapsed_time + double elapsed_time, min_create_time bint has_data_ready if not buf._transport._is_async: while buf._pending_error_num == 0: @@ -865,20 +887,27 @@ cdef class PooledConnRequest: break buf.check_control_packet() if buf._pending_error_num != 0: - self.pool_impl._drop_conn_impl(conn_impl) self.pool_impl._open_count -= 1 - else: - self.conn_impl = conn_impl - if self.pool_impl._ping_interval == 0: + self.pool_impl._drop_conn_impl(conn_impl) + return 0 + elif self.pool_impl._max_lifetime_session > 0: + min_create_time = \ + time.monotonic() - self.pool_impl._max_lifetime_session + if conn_impl._time_created < min_create_time: + self.pool_impl._open_count -= 1 + self.pool_impl._drop_conn_impl(conn_impl) + return 0 + self.conn_impl = conn_impl + if self.pool_impl._ping_interval == 0: + self.requires_ping = True + elif self.pool_impl._ping_interval > 0: + elapsed_time = time.monotonic() - conn_impl._time_returned + if elapsed_time > self.pool_impl._ping_interval: self.requires_ping = True - elif self.pool_impl._ping_interval > 0: - elapsed_time = time.monotonic() - conn_impl._time_in_pool - if elapsed_time > self.pool_impl._ping_interval: - self.requires_ping = True - if self.requires_ping: - self.pool_impl._add_request(self) - else: - self.completed = True + if self.requires_ping: + self.pool_impl._add_request(self) + else: + self.completed = True def fulfill(self): """ @@ -890,8 +919,8 @@ cdef class PooledConnRequest: cdef: BaseThinPoolImpl pool = self.pool_impl BaseThinConnImpl conn_impl + ssize_t ix object exc - ssize_t i # if an exception was raised in the background thread, raise it now if self.exception is not None: @@ -910,13 +939,14 @@ cdef class PooledConnRequest: # connection is not required); in addition, ensure that the connection # class matches if not self.wants_new and pool._free_used_conn_impls: - for i, conn_impl in enumerate(reversed(pool._free_used_conn_impls)): + ix = len(pool._free_used_conn_impls) - 1 + for conn_impl in reversed(pool._free_used_conn_impls): if self.cclass is None or conn_impl._cclass == self.cclass: - i = len(pool._free_used_conn_impls) - i - 1 - pool._free_used_conn_impls.pop(i) + pool._free_used_conn_impls.pop(ix) self._check_connection(conn_impl) if self.completed or self.requires_ping: return self.completed + ix -= 1 # check for an available new connection (only permitted if the # connection class matches) diff --git a/tests/ext/test_ext_1000_pool_shrink.py b/tests/ext/test_ext_1000_pool_shrink.py index 3d4649c1..710d923b 100644 --- a/tests/ext/test_ext_1000_pool_shrink.py +++ b/tests/ext/test_ext_1000_pool_shrink.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2024, Oracle and/or its affiliates. +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -62,9 +62,9 @@ def test_ext_1001(self): conn = pool.acquire() self.assertEqual(pool.opened, 3) - @unittest.skipIf(not test_env.get_is_thin(), "doesn't occur in thick mode") + @unittest.skipUnless(test_env.get_is_thin(), "doesn't occur in thick mode") def test_ext_1002(self): - "E1002 - test pool shrinks to min on pool inactivity" + "E1002 - test pool timeout shrinks to min on pool inactivity" pool = test_env.get_pool(min=3, max=10, increment=2, timeout=4) conns = [pool.acquire() for i in range(6)] self.assertEqual(pool.opened, 6) @@ -73,9 +73,9 @@ def test_ext_1002(self): time.sleep(6) self.assertEqual(pool.opened, 3) - @unittest.skipIf(not test_env.get_is_thin(), "doesn't occur in thick mode") + @unittest.skipUnless(test_env.get_is_thin(), "doesn't occur in thick mode") def test_ext_1003(self): - "E1003 - test pool eliminates extra connections on inactivity" + "E1003 - test pool timeout eliminates extra connections on inactivity" pool = test_env.get_pool(min=4, max=10, increment=4, timeout=3) conns = [pool.acquire() for i in range(5)] self.assertEqual(pool.opened, 5) @@ -85,6 +85,40 @@ def test_ext_1003(self): self.assertEqual(pool.opened, 5) del conns + @unittest.skipUnless(test_env.get_is_thin(), "doesn't occur in thick mode") + def test_ext_1004(self): + "E1004 - test pool max_lifetime_session on release" + pool = test_env.get_pool( + min=4, max=10, increment=4, max_lifetime_session=3 + ) + conns = [pool.acquire() for i in range(5)] + self.assertEqual(pool.opened, 5) + time.sleep(2) + self.assertEqual(pool.opened, 8) + time.sleep(2) + for conn in conns: + conn.close() + time.sleep(2) + self.assertEqual(pool.opened, 4) + + @unittest.skipUnless(test_env.get_is_thin(), "doesn't occur in thick mode") + def test_ext_1005(self): + "E1005 - test pool max_lifetime_session on acquire" + pool = test_env.get_pool( + min=4, max=10, increment=4, max_lifetime_session=4 + ) + conns = [pool.acquire() for i in range(5)] + self.assertEqual(pool.opened, 5) + time.sleep(2) + self.assertEqual(pool.opened, 8) + for conn in conns: + conn.close() + time.sleep(4) + with pool.acquire(): + pass + time.sleep(2) + self.assertEqual(pool.opened, 4) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/ext/test_ext_1900_pool_shrink_async.py b/tests/ext/test_ext_1900_pool_shrink_async.py index af5e19e0..1cf2cfcd 100644 --- a/tests/ext/test_ext_1900_pool_shrink_async.py +++ b/tests/ext/test_ext_1900_pool_shrink_async.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2024, Oracle and/or its affiliates. +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -69,7 +69,7 @@ async def test_ext_1901(self): self.assertEqual(pool.opened, 3) async def test_ext_1902(self): - "E1902 - test pool shrinks to min on pool inactivity" + "E1902 - test pool timeout shrinks to min on pool inactivity" pool = test_env.get_pool_async(min=3, max=10, increment=2, timeout=4) conns = [await pool.acquire() for i in range(6)] self.assertEqual(pool.opened, 6) @@ -79,7 +79,7 @@ async def test_ext_1902(self): self.assertEqual(pool.opened, 3) async def test_ext_1903(self): - "E1902 - test pool eliminates extra connections on inactivity" + "E1902 - test pool timeout eliminates extra connections on inactivity" pool = test_env.get_pool_async(min=4, max=10, increment=4, timeout=3) conns = [await pool.acquire() for i in range(5)] self.assertEqual(pool.opened, 5) @@ -89,6 +89,38 @@ async def test_ext_1903(self): self.assertEqual(pool.opened, 5) del conns + async def test_ext_1904(self): + "E1904 - test pool max_lifetime_session on release" + pool = test_env.get_pool_async( + min=4, max=10, increment=4, max_lifetime_session=3 + ) + conns = [await pool.acquire() for i in range(5)] + self.assertEqual(pool.opened, 5) + await asyncio.sleep(2) + self.assertEqual(pool.opened, 8) + await asyncio.sleep(2) + for conn in conns: + await conn.close() + await asyncio.sleep(2) + self.assertEqual(pool.opened, 4) + + async def test_ext_1905(self): + "E1905 - test pool max_lifetime_session on acquire" + pool = test_env.get_pool_async( + min=4, max=10, increment=4, max_lifetime_session=4 + ) + conns = [await pool.acquire() for i in range(5)] + self.assertEqual(pool.opened, 5) + await asyncio.sleep(2) + self.assertEqual(pool.opened, 8) + for conn in conns: + await conn.close() + await asyncio.sleep(4) + async with pool.acquire(): + pass + await asyncio.sleep(2) + self.assertEqual(pool.opened, 4) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_2400_pool.py b/tests/test_2400_pool.py index 93f78d6a..e6685140 100644 --- a/tests/test_2400_pool.py +++ b/tests/test_2400_pool.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -724,7 +724,6 @@ def test_2424(self): with self.assertRaisesFullCode("DPY-4011"): cursor.execute("select user from dual") conn.close() - self.assertEqual(pool.opened, 0) # if a free connection is available, it can be used; otherwise a new # connection will be created diff --git a/tests/test_5500_pool_async.py b/tests/test_5500_pool_async.py index 65c5b7cc..2a510d3d 100644 --- a/tests/test_5500_pool_async.py +++ b/tests/test_5500_pool_async.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2023, 2024, Oracle and/or its affiliates. +# Copyright (c) 2023, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -329,7 +329,6 @@ async def test_5514(self): with self.assertRaisesFullCode("DPY-4011"): await cursor.execute("select user from dual") await conn.close() - self.assertEqual(pool.opened, 0) # if a free connection is available, it can be used; otherwise a # new connection will be created From 94c1573a5e73214e4915a5a1e2c4efdcb3abe670 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:57:00 -0700 Subject: [PATCH 013/178] Fix wait mode. --- .../Python-and-Oracle-Database-The-New-Wave-of-Scripting.html | 4 ++-- samples/tutorial/solutions/connect_pool2.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/tutorial/Python-and-Oracle-Database-The-New-Wave-of-Scripting.html b/samples/tutorial/Python-and-Oracle-Database-The-New-Wave-of-Scripting.html index c5f65436..42c78154 100644 --- a/samples/tutorial/Python-and-Oracle-Database-The-New-Wave-of-Scripting.html +++ b/samples/tutorial/Python-and-Oracle-Database-The-New-Wave-of-Scripting.html @@ -637,7 +637,7 @@

2.2 Connection pool experiments

Try changing getmode to -oracledb.POOL_GETMODE_WAIT. When numberOfThreads +oracledb.POOL_GETMODE_NOWAIT. When numberOfThreads exceeds the maximum size of the pool, the acquire() call will now generate an error such as "ORA-24459: OCISessionGet() timed out waiting for pool to create new connections".

@@ -2726,7 +2726,7 @@

Resources


License

-

Copyright © 2017, 2023, Oracle and/or its affiliates.

+

Copyright © 2017, 2025, Oracle and/or its affiliates.

This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License diff --git a/samples/tutorial/solutions/connect_pool2.py b/samples/tutorial/solutions/connect_pool2.py index 425a4bba..0532691a 100644 --- a/samples/tutorial/solutions/connect_pool2.py +++ b/samples/tutorial/solutions/connect_pool2.py @@ -3,7 +3,7 @@ # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- -# Copyright (c) 2017, 2023, Oracle and/or its affiliates. +# Copyright (c) 2017, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -38,7 +38,7 @@ min=2, max=5, increment=1, - getmode=oracledb.POOL_GETMODE_WAIT, + getmode=oracledb.POOL_GETMODE_NOWAIT, cclass="PYTHONDEMO", purity=oracledb.PURITY_SELF, ) # try PURITY_NEW From fe435a705678773feaad47b8c8760af9f5665cf7 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:57:16 -0700 Subject: [PATCH 014/178] Improve error message when NNE is required by the server. --- doc/src/release_notes.rst | 5 ++ doc/src/user_guide/troubleshooting.rst | 95 +++++++++++++++----------- src/oracledb/impl/thin/constants.pxi | 1 + src/oracledb/impl/thin/messages.pyx | 14 ++-- 4 files changed, 71 insertions(+), 44 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 15f5c2eb..ef4acf39 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -47,6 +47,11 @@ Thin Mode Changes #) Error ``DPY-3001: bequeath is only supported in python-oracledb thick mode`` is now raised when attempting to connect to the database without a connect string. +#) Error ``DPY-3001: Native Network Encryption and Data Integrity is only + supported in python-oracledb thick mode`` is now the secondary error + message returned when Oracle Net NNE or checksumming is required by the + database. Previously, the error ``DPY-4011: the database or network closed + the connection`` was raised. #) Optimization: the connect descriptor sent to the database does not include the RETRY_DELAY parameter unless the RETRY_COUNT parameter is also specified. diff --git a/doc/src/user_guide/troubleshooting.rst b/doc/src/user_guide/troubleshooting.rst index f67e46bd..4bb54a3b 100644 --- a/doc/src/user_guide/troubleshooting.rst +++ b/doc/src/user_guide/troubleshooting.rst @@ -253,6 +253,54 @@ DPY Error Messages The python-oracledb Thin mode code and python-oracledb Thick mode code generates error messages with the prefix ``DPY``. +.. _dpy3001: + +DPY-3001 +++++++++ + +**Message:** ``DPY-3001: Native Network Encryption and Data Integrity is only +supported in python-oracledb thick mode`` + +**Action:** To verify if NNE or checksumming are enabled, you can use the +following query:: + + SELECT network_service_banner FROM v$session_connect_info; + +If NNE is enabled, then this query prints output that includes the +available encryption service, the crypto-checksumming service, and the +algorithms in use, such as:: + + NETWORK_SERVICE_BANNER + ------------------------------------------------------------------------------------- + TCP/IP NT Protocol Adapter for Linux: Version 19.0.0.0.0 - Production + Encryption service for Linux: Version 19.0.1.0.0 - Production + AES256 Encryption service adapter for Linux: Version 19.0.1.0.0 - Production + Crypto-checksumming service for Linux: Version 19.0.1.0.0 - Production + SHA256 Crypto-checksumming service adapter for Linux: Version 19.0.1.0.0 - Production + +If NNE is not enabled, then the query will only print the available encryption +and crypto-checksumming services in the output. For example:: + + NETWORK_SERVICE_BANNER + ------------------------------------------------------------------------------------- + TCP/IP NT Protocol Adapter for Linux: Version 19.0.0.0.0 - Production + Encryption service for Linux: Version 19.0.1.0.0 - Production + +If NNE or checksumming are enabled, you can resolve this error by either: + +- Changing the architecture to use Transport Layer Security (TLS), which is + supported in python-oracledb Thin and Thick modes. See `Configuring + Transport Layer Security Encryption + `__. +- Or :ref:`enabling python-oracledb Thick mode `. + +.. seealso:: + + `Oracle Database Security Guide `__ for more information about Oracle Data Network + Encryption and Integrity, and for information about configuring TLS + network encryption. + DPY-3010 ++++++++ @@ -330,48 +378,15 @@ DPY-4011 **Cause:** If this occurs when using an already opened connection, additional messages may indicate a reason. -If the error occurs when creating a connection or connection pool, the common -cause is that Oracle Database has Native Network Encryption (NNE) enabled. NNE -is only supported in python-oracledb Thick mode. - -**Action:** To verify if NNE is enabled, you can use the following query:: - - SELECT network_service_banner FROM v$session_connect_info; - -If NNE is enabled, then this query prints output that includes the -available encryption service, the crypto-checksumming service, and the -algorithms in use, such as:: - - NETWORK_SERVICE_BANNER - ------------------------------------------------------------------------------------- - TCP/IP NT Protocol Adapter for Linux: Version 19.0.0.0.0 - Production - Encryption service for Linux: Version 19.0.1.0.0 - Production - AES256 Encryption service adapter for Linux: Version 19.0.1.0.0 - Production - Crypto-checksumming service for Linux: Version 19.0.1.0.0 - Production - SHA256 Crypto-checksumming service adapter for Linux: Version 19.0.1.0.0 - Production - -If NNE is not enabled, then the query will only print the available encryption -and crypto-checksumming services in the output. For example:: - - NETWORK_SERVICE_BANNER - ------------------------------------------------------------------------------------- - TCP/IP NT Protocol Adapter for Linux: Version 19.0.0.0.0 - Production - Encryption service for Linux: Version 19.0.1.0.0 - Production +If the error occurs when creating a connection or connection pool with +python-oracledb 2 or earlier, the common cause is that Oracle Database has +Native Network Encryption (NNE) enabled. NNE and Oracle Net checksumming are +only supported in python-oracledb Thick mode. -If NNE is enabled, you can resolve this error by either: +**Action:** Review if NNE or checksumming are enabled. See +:ref:`DPY-3001 ` for solutions. -- Changing the architecture to use Transport Layer Security (TLS), which is - supported in python-oracledb Thin and Thick modes. See `Configuring - Transport Layer Security Encryption - `__. -- Or :ref:`enabling python-oracledb Thick mode `. - -.. seealso:: - - `Oracle Database Security Guide `__ for more information about Oracle Data Network - Encryption and Integrity, and for information about configuring TLS - network encryption. +If additional messages indicate a reason, follow their guidance. .. _oraerr: diff --git a/src/oracledb/impl/thin/constants.pxi b/src/oracledb/impl/thin/constants.pxi index 086ee9f4..c83e15bf 100644 --- a/src/oracledb/impl/thin/constants.pxi +++ b/src/oracledb/impl/thin/constants.pxi @@ -268,6 +268,7 @@ cdef enum: cdef enum: TNS_GSO_DONT_CARE = 0x0001 TNS_GSO_CAN_RECV_ATTENTION = 0x0400 + TNS_NSI_NA_REQUIRED = 0x10 TNS_NSI_DISABLE_NA = 0x04 TNS_NSI_SUPPORT_SECURITY_RENEG = 0x80 diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index bd26f7a4..60234e0e 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -1856,7 +1856,8 @@ cdef class ConnectMessage(Message): cdef: uint16_t protocol_version, protocol_options const char_type *redirect_data - uint32_t flags = 0 + uint32_t flags2 = 0 + uint8_t flags1 bytes db_uuid if buf._current_packet.packet_type == TNS_PACKET_TYPE_REDIRECT: if not self.read_redirect_data_len: @@ -1875,13 +1876,18 @@ cdef class ConnectMessage(Message): if protocol_version < TNS_VERSION_MIN_ACCEPTED: errors._raise_err(errors.ERR_SERVER_VERSION_NOT_SUPPORTED) buf.read_uint16be(&protocol_options) - buf.skip_raw_bytes(20) + buf.skip_raw_bytes(10) + buf.read_ub1(&flags1) + if flags1 & TNS_NSI_NA_REQUIRED: + feature = "Native Network Encryption and Data Integrity" + errors._raise_not_supported(feature) + buf.skip_raw_bytes(9) buf.read_uint32be(&buf._caps.sdu) if protocol_version >= TNS_VERSION_MIN_OOB_CHECK: buf.skip_raw_bytes(5) - buf.read_uint32be(&flags) + buf.read_uint32be(&flags2) buf._caps._adjust_for_protocol(protocol_version, protocol_options, - flags) + flags2) buf._transport._full_packet_size = True elif buf._current_packet.packet_type == TNS_PACKET_TYPE_REFUSE: response = self.error_info.message From 0ad6125ceff0bfcd32b122c7a46408104e2b46b7 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:57:59 -0700 Subject: [PATCH 015/178] All Oracle errors that result in the connection no longer being usable will be raised as "DPY-4011: the database or network closed the connection", with the underlying reason being included in the error message. --- doc/src/release_notes.rst | 3 +++ src/oracledb/errors.py | 27 ++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index ef4acf39..6d97c88b 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -89,6 +89,9 @@ Common Changes #) Fixed bug where some :ref:`DbObject ` attributes for database objects defined using ANSI names (including FLOAT and REAL) may have shown as integers. +#) All Oracle errors that result in the connection no longer being usable will + be raised as ``DPY-4011: the database or network closed the connection`` + with the underlying reason being included in the error message. #) Error ``DPY-2056: registered handler for protocol "{protocol}" failed for arg "{arg}"`` is now raised when an exception occurs when calling the registered handler for a protocol. diff --git a/src/oracledb/errors.py b/src/oracledb/errors.py index 3c32202f..7f2f590c 100644 --- a/src/oracledb/errors.py +++ b/src/oracledb/errors.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -373,11 +373,33 @@ def _raise_not_supported(feature: str) -> None: # Oracle error number cross reference ERR_ORACLE_ERROR_XREF = { + 22: ERR_CONNECTION_CLOSED, 28: ERR_CONNECTION_CLOSED, + 31: ERR_CONNECTION_CLOSED, + 45: ERR_CONNECTION_CLOSED, + 378: ERR_CONNECTION_CLOSED, 600: ERR_CONNECTION_CLOSED, + 602: ERR_CONNECTION_CLOSED, + 603: ERR_CONNECTION_CLOSED, + 609: ERR_CONNECTION_CLOSED, 1005: ERR_NO_CREDENTIALS, + 1012: ERR_CONNECTION_CLOSED, + 1041: ERR_CONNECTION_CLOSED, + 1043: ERR_CONNECTION_CLOSED, + 1089: ERR_CONNECTION_CLOSED, + 1092: ERR_CONNECTION_CLOSED, 1740: ERR_MISSING_ENDING_DOUBLE_QUOTE, 1756: ERR_MISSING_ENDING_SINGLE_QUOTE, + 2396: ERR_CONNECTION_CLOSED, + 3113: ERR_CONNECTION_CLOSED, + 3114: ERR_CONNECTION_CLOSED, + 3122: ERR_CONNECTION_CLOSED, + 3135: ERR_CONNECTION_CLOSED, + 12153: ERR_CONNECTION_CLOSED, + 12537: ERR_CONNECTION_CLOSED, + 12547: ERR_CONNECTION_CLOSED, + 12570: ERR_CONNECTION_CLOSED, + 12583: ERR_CONNECTION_CLOSED, 22165: ( ERR_INVALID_COLL_INDEX_SET, r"index \[(?P\d+)\] must be in the range of " @@ -391,7 +413,10 @@ def _raise_not_supported(feature: str) -> None: 24496: ERR_POOL_NO_CONNECTION_AVAILABLE, 24338: ERR_INVALID_REF_CURSOR, 24344: WRN_COMPILATION_ERROR, + 27146: ERR_CONNECTION_CLOSED, + 28511: ERR_CONNECTION_CLOSED, 38902: ERR_TOO_MANY_BATCH_ERRORS, + 56600: ERR_CONNECTION_CLOSED, } # ODPI-C error number cross reference From e0f480e1e28049ab8945bb66e2aaea4a33475f2d Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 21:59:23 -0700 Subject: [PATCH 016/178] Fixed hang when using asyncio and a connection is unexpectedly closed by the database. --- doc/src/release_notes.rst | 2 ++ src/oracledb/impl/thin/protocol.pyx | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 6d97c88b..ec0ed4ea 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -35,6 +35,8 @@ Thin Mode Changes (`issue 426 `__). #) Fixed hang when attempting to use pipelining against a database that doesn't support the end of response flag. +#) Fixed hang when using asyncio and a connection is unexpectedly closed by + the database. #) Error ``DPY-6002: The distinguished name (DN) on the server certificate does not match the expected value: "{expected_dn}"`` now shows the expected value. diff --git a/src/oracledb/impl/thin/protocol.pyx b/src/oracledb/impl/thin/protocol.pyx index dff3c90e..9a65bbc0 100644 --- a/src/oracledb/impl/thin/protocol.pyx +++ b/src/oracledb/impl/thin/protocol.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -761,7 +761,7 @@ cdef class BaseAsyncProtocol(BaseProtocol): except: if not self._in_connect \ and self._write_buf._packet_sent \ - and self._read_buf._transport is not None: + and self._transport is not None: self._send_marker(self._write_buf, TNS_MARKER_TYPE_BREAK) await self._reset() raise @@ -888,6 +888,10 @@ cdef class BaseAsyncProtocol(BaseProtocol): """ if not self._in_connect: self._transport = None + if self._read_buf._waiter is not None \ + and not self._read_buf._waiter.done(): + error = errors._create_err(errors.ERR_CONNECTION_CLOSED) + self._read_buf._waiter.set_exception(error.exc_type(error)) def data_received(self, data): """ From fd76c9c5bd2c62db5756023f086e734992012186 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 22:00:27 -0700 Subject: [PATCH 017/178] Added support for getting the value of `ltxid` in thin mode. --- doc/src/api_manual/async_connection.rst | 3 +- doc/src/api_manual/connection.rst | 4 +- doc/src/release_notes.rst | 2 + doc/src/user_guide/appendix_a.rst | 2 +- doc/src/user_guide/ha.rst | 42 +++--- samples/transaction_guard.py | 7 +- src/oracledb/impl/thin/capabilities.pyx | 5 +- src/oracledb/impl/thin/constants.pxi | 3 +- src/oracledb/impl/thin/messages.pyx | 2 +- tests/ext/test_ext_2300_tg.py | 152 ++++++++++++++++++++++ tests/ext/test_ext_2400_tg_async.py | 166 ++++++++++++++++++++++++ 11 files changed, 359 insertions(+), 29 deletions(-) create mode 100644 tests/ext/test_ext_2300_tg.py create mode 100644 tests/ext/test_ext_2400_tg_async.py diff --git a/doc/src/api_manual/async_connection.rst b/doc/src/api_manual/async_connection.rst index 51c0faf8..b73b3e22 100644 --- a/doc/src/api_manual/async_connection.rst +++ b/doc/src/api_manual/async_connection.rst @@ -523,8 +523,7 @@ AsyncConnection Attributes .. note: - This attribute is only available when Oracle Database 12.1 or later is - in use + This attribute is only available with Oracle Database 12.1 or later. .. attribute:: AsyncConnection.max_identifier_length diff --git a/doc/src/api_manual/connection.rst b/doc/src/api_manual/connection.rst index ec02e85c..a685dcea 100644 --- a/doc/src/api_manual/connection.rst +++ b/doc/src/api_manual/connection.rst @@ -774,8 +774,8 @@ Connection Attributes .. note: This attribute is an extension to the DB API definition. It is only - available when Oracle Database 12.1 or higher is in use on both the - server and the client. + available with Oracle Database 12.1 or higher. In python-oracledb Thick + mode, it also requires Oracle Client libraries 12.1 or higer. .. attribute:: Connection.max_identifier_length diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index ec0ed4ea..8c16c41e 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -33,6 +33,8 @@ Thin Mode Changes #) The thread that closes connection pools on interpreter shutdown is now only started when the first pool is created and not at module import (`issue 426 `__). +#) Added support for Transaction Guard by adding support to get the value of + :attr:`Connection.ltxid`. #) Fixed hang when attempting to use pipelining against a database that doesn't support the end of response flag. #) Fixed hang when using asyncio and a connection is unexpectedly closed by diff --git a/doc/src/user_guide/appendix_a.rst b/doc/src/user_guide/appendix_a.rst index 46b0f59b..734a9ab8 100644 --- a/doc/src/user_guide/appendix_a.rst +++ b/doc/src/user_guide/appendix_a.rst @@ -287,7 +287,7 @@ see :ref:`driverdiff` and :ref:`compatibility`. - Yes - no callback - Yes - no callback * - Transaction Guard (TG) (see :ref:`tg`) - - No + - Yes - Yes - Yes * - Data Guard (DG) and Active Data Guard (ADG) diff --git a/doc/src/user_guide/ha.rst b/doc/src/user_guide/ha.rst index 5823de52..42474b7c 100644 --- a/doc/src/user_guide/ha.rst +++ b/doc/src/user_guide/ha.rst @@ -161,13 +161,9 @@ Python-oracledb supports `Transaction Guard `__ which enables Python application to verify the success or failure of the last transaction in the -event of an unplanned outage. This feature is available when both client and -database are 12.1 or higher. - -.. note:: - - The Transaction Guard feature is only supported in the python-oracledb - Thick mode. See :ref:`enablingthick`. +event of an unplanned outage. This feature requires Oracle Database 12.1 or +higher. When using python-oracledb Thick mode, Oracle Client 12.1 or higher is +additionally required. Using Transaction Guard helps to: @@ -184,7 +180,10 @@ logical transaction id (``ltxid``) from the connection and then call a procedure to determine the outcome of the commit for this logical transaction id. -Follow the steps below to use the Transaction Guard feature in Python: +The steps below show how to use Transaction Guard in python-oracledb in a +single-instance database. Refer to Oracle documentation if you are using `RAC +`__ or standby +databases. 1. Grant execute privileges to the database users who will be checking the outcome of the commit. Log in as SYSDBA and run the following command: @@ -193,8 +192,9 @@ Follow the steps below to use the Transaction Guard feature in Python: GRANT EXECUTE ON DBMS_APP_CONT TO ; -2. Create a new service by executing the following PL/SQL block as SYSDBA. - Replace the ````, ```` and +2. Create a new service by calling `DBMS_SERVICE.CREATE_SERVICE + `__ + as SYSDBA. Replace the ````, ```` and ```` values with suitable values. It is important that the ``COMMIT_OUTCOME`` parameter be set to true for Transaction Guard to function properly. @@ -210,12 +210,14 @@ Follow the steps below to use the Transaction Guard feature in Python: END; / -3. Start the service by executing the following PL/SQL block as SYSDBA: +3. Start the service by calling `DBMS_SERVICE.START_SERVICE + `__ + as SYSDBA: .. code-block:: sql BEGIN - DBMS_SERVICE.start_service(''); + DBMS_SERVICE.START_SERVICE(''); END; / @@ -231,12 +233,18 @@ query: In the Python application code: -* Use the connection attribute :attr:`~Connection.ltxid` to determine the +* Connect to the appropriately enabled database service. If the connection is + TAF, AC or TAC enabled, then do not proceed with TG. +* Check :attr:`oracledb._Error.isrecoverable` to confirm the error is + recoverable. If not, do not proceed with TG. +* Use the connection attribute :attr:`Connection.ltxid` to find the logical transaction id. -* Call the ``DBMS_APP_CONT.GET_LTXID_OUTCOME`` PL/SQL procedure with the - logical transaction id acquired from the connection attribute. This returns - a boolean value indicating if the last transaction was committed and whether - the last call was completed successfully or not. +* Call the `DBMS_APP_CONT.GET_LTXID_OUTCOME + `__ + PL/SQL procedure with the logical transaction id. This returns a boolean + value indicating if the last transaction was committed and whether the last + call was completed successfully or not. +* Take any necessary action to re-do uncommitted work. See the `Transaction Guard Sample 0: - buf.skip_raw_bytes(num_bytes) + self.conn_impl._ltxid = buf.read_bytes() elif opcode == TNS_SERVER_PIGGYBACK_QUERY_CACHE_INVALIDATION \ or opcode == TNS_SERVER_PIGGYBACK_TRACE_EVENT: pass diff --git a/tests/ext/test_ext_2300_tg.py b/tests/ext/test_ext_2300_tg.py new file mode 100644 index 00000000..917c2752 --- /dev/null +++ b/tests/ext/test_ext_2300_tg.py @@ -0,0 +1,152 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +""" +E2300 - Module for testing Transaction Guard (TG). No special setup is required +but the test suite makes use of debugging packages that are not intended for +normal use. It also creates and drops a service. +""" + +import oracledb +import test_env + + +class TestCase(test_env.BaseTestCase): + service_name = "oracledb-test-tg" + requires_connection = False + + @classmethod + def setUpClass(cls): + cls.admin_conn = test_env.get_admin_connection() + user = test_env.get_main_user() + with cls.admin_conn.cursor() as cursor: + cursor.execute( + f""" + declare + params dbms_service.svc_parameter_array; + begin + params('COMMIT_OUTCOME') := 'true'; + params('RETENTION_TIMEOUT') := 604800; + dbms_service.create_service('{cls.service_name}', + '{cls.service_name}', params); + dbms_service.start_service('{cls.service_name}'); + end; + """ + ) + cursor.execute(f"grant execute on dbms_tg_dbg to {user}") + cursor.execute(f"grant execute on dbms_app_cont to {user}") + + @classmethod + def tearDownClass(cls): + user = test_env.get_main_user() + with cls.admin_conn.cursor() as cursor: + cursor.execute(f"revoke execute on dbms_tg_dbg from {user}") + cursor.execute(f"revoke execute on dbms_app_cont from {user}") + cursor.callproc("dbms_service.stop_service", [cls.service_name]) + cursor.callproc("dbms_service.delete_service", [cls.service_name]) + + def test_ext_2300(self): + "E2300 - test standalone connection" + params = test_env.get_connect_params().copy() + params.parse_connect_string(test_env.get_connect_string()) + params.set(service_name=self.service_name) + for arg_name in ("pre_commit", "post_commit"): + with self.subTest(arg_name=arg_name): + conn = oracledb.connect(params=params) + cursor = conn.cursor() + cursor.execute("truncate table TestTempTable") + cursor.execute( + """ + insert into TestTempTable (IntCol, StringCol1) + values (:1, :2) + """, + [2300, "String for test 2300"], + ) + full_arg_name = f"dbms_tg_dbg.tg_failpoint_{arg_name}" + cursor.execute( + f""" + begin + dbms_tg_dbg.set_failpoint({full_arg_name}); + end; + """ + ) + ltxid = conn.ltxid + with self.assertRaisesFullCode("DPY-4011"): + conn.commit() + conn = oracledb.connect(params=params) + cursor = conn.cursor() + committed_var = cursor.var(bool) + completed_var = cursor.var(bool) + cursor.callproc( + "dbms_app_cont.get_ltxid_outcome", + [ltxid, committed_var, completed_var], + ) + expected_value = arg_name == "post_commit" + self.assertEqual(committed_var.getvalue(), expected_value) + self.assertEqual(completed_var.getvalue(), expected_value) + + def test_ext_2301(self): + "E2301 - test pooled connection" + params = test_env.get_pool_params().copy() + params.parse_connect_string(test_env.get_connect_string()) + params.set(service_name=self.service_name, max=10) + pool = oracledb.create_pool(params=params) + for arg_name in ("pre_commit", "post_commit"): + with self.subTest(arg_name=arg_name): + conn = pool.acquire() + cursor = conn.cursor() + cursor.execute("truncate table TestTempTable") + cursor.execute( + """ + insert into TestTempTable (IntCol, StringCol1) + values (:1, :2) + """, + [2300, "String for test 2300"], + ) + full_arg_name = f"dbms_tg_dbg.tg_failpoint_{arg_name}" + cursor.execute( + f""" + begin + dbms_tg_dbg.set_failpoint({full_arg_name}); + end; + """ + ) + ltxid = conn.ltxid + with self.assertRaisesFullCode("DPY-4011"): + conn.commit() + conn = pool.acquire() + cursor = conn.cursor() + committed_var = cursor.var(bool) + completed_var = cursor.var(bool) + cursor.callproc( + "dbms_app_cont.get_ltxid_outcome", + [ltxid, committed_var, completed_var], + ) + expected_value = arg_name == "post_commit" + self.assertEqual(committed_var.getvalue(), expected_value) + self.assertEqual(completed_var.getvalue(), expected_value) + + +if __name__ == "__main__": + test_env.run_test_cases() diff --git a/tests/ext/test_ext_2400_tg_async.py b/tests/ext/test_ext_2400_tg_async.py new file mode 100644 index 00000000..e28aace4 --- /dev/null +++ b/tests/ext/test_ext_2400_tg_async.py @@ -0,0 +1,166 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +""" +E2400 - Module for testing Transaction Guard (TG) using asyncio. No special +setup is required but the test suite makes use of debugging packages that are +not intended for normal use. It also creates and drops a service. +""" + +import unittest + +import oracledb +import test_env + + +@unittest.skipUnless( + test_env.get_is_thin(), "asyncio not supported in thick mode" +) +class TestCase(test_env.BaseAsyncTestCase): + service_name = "oracledb-test-tg-async" + requires_connection = False + setup_completed = False + + async def __perform_setup(self): + """ + Perform setup, if needed. + """ + if self.__class__.setup_completed: + return + user = test_env.get_main_user() + async with test_env.get_admin_connection(use_async=True) as conn: + cursor = conn.cursor() + await cursor.execute(f"grant execute on dbms_tg_dbg to {user}") + await cursor.execute(f"grant execute on dbms_app_cont to {user}") + await cursor.execute( + """ + select count(*) from dba_services + where name = :name + """, + name=self.service_name, + ) + (count,) = await cursor.fetchone() + if count > 0: + return + await cursor.execute( + f""" + declare + params dbms_service.svc_parameter_array; + begin + params('COMMIT_OUTCOME') := 'true'; + params('RETENTION_TIMEOUT') := 604800; + dbms_service.create_service('{self.service_name}', + '{self.service_name}', params); + dbms_service.start_service('{self.service_name}'); + end; + """ + ) + + async def test_ext_2400(self): + "E2400 - test standalone connection" + await self.__perform_setup() + params = test_env.get_connect_params().copy() + params.parse_connect_string(test_env.get_connect_string()) + params.set(service_name=self.service_name) + for arg_name in ("pre_commit", "post_commit"): + with self.subTest(arg_name=arg_name): + conn = await oracledb.connect_async(params=params) + cursor = conn.cursor() + await cursor.execute("truncate table TestTempTable") + await cursor.execute( + """ + insert into TestTempTable (IntCol, StringCol1) + values (:1, :2) + """, + [2400, "String for test 2400"], + ) + full_arg_name = f"dbms_tg_dbg.tg_failpoint_{arg_name}" + await cursor.execute( + f""" + begin + dbms_tg_dbg.set_failpoint({full_arg_name}); + end; + """ + ) + ltxid = conn.ltxid + with self.assertRaisesFullCode("DPY-4011"): + await conn.commit() + conn = await oracledb.connect_async(params=params) + cursor = conn.cursor() + committed_var = cursor.var(bool) + completed_var = cursor.var(bool) + await cursor.callproc( + "dbms_app_cont.get_ltxid_outcome", + [ltxid, committed_var, completed_var], + ) + expected_value = arg_name == "post_commit" + self.assertEqual(committed_var.getvalue(), expected_value) + self.assertEqual(completed_var.getvalue(), expected_value) + + async def test_ext_2401(self): + "E2401 - test pooled connection" + await self.__perform_setup() + params = test_env.get_pool_params().copy() + params.parse_connect_string(test_env.get_connect_string()) + params.set(service_name=self.service_name, max=10) + pool = oracledb.create_pool_async(params=params) + for arg_name in ("pre_commit", "post_commit"): + with self.subTest(arg_name=arg_name): + async with pool.acquire() as conn: + cursor = conn.cursor() + await cursor.execute("truncate table TestTempTable") + await cursor.execute( + """ + insert into TestTempTable (IntCol, StringCol1) + values (:1, :2) + """, + [2400, "String for test 2400"], + ) + full_arg_name = f"dbms_tg_dbg.tg_failpoint_{arg_name}" + await cursor.execute( + f""" + begin + dbms_tg_dbg.set_failpoint({full_arg_name}); + end; + """ + ) + ltxid = conn.ltxid + with self.assertRaisesFullCode("DPY-4011"): + await conn.commit() + async with pool.acquire() as conn: + cursor = conn.cursor() + committed_var = cursor.var(bool) + completed_var = cursor.var(bool) + await cursor.callproc( + "dbms_app_cont.get_ltxid_outcome", + [ltxid, committed_var, completed_var], + ) + expected_value = arg_name == "post_commit" + self.assertEqual(committed_var.getvalue(), expected_value) + self.assertEqual(completed_var.getvalue(), expected_value) + await pool.close() + + +if __name__ == "__main__": + test_env.run_test_cases() From 7aebce21de6fb965da02f8b0c2234770bc0b7b7b Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 22:06:02 -0700 Subject: [PATCH 018/178] Doc updates. --- doc/src/api_manual/module.rst | 15 ++++++++------- doc/src/user_guide/bind.rst | 2 +- doc/src/user_guide/connection_handling.rst | 13 +++++++++---- doc/src/user_guide/tracing.rst | 10 ++++++---- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 70be0137..423a910a 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -364,7 +364,7 @@ Oracledb Methods The ``use_sni`` parameter is expected to be a boolean which indicates whether to use the TLS Server Name Indicator (SNI) extension to bypass the - second TLS neogiation that would otherwise be required. This parameter is + second TLS negotiation that would otherwise be required. This parameter is used in both python-oracledb Thin and Thick modes. The default value is False. @@ -697,7 +697,7 @@ Oracledb Methods The ``use_sni`` parameter is expected to be a boolean which indicates whether to use the TLS Server Name Indicator (SNI) extension to bypass the - second TLS neogiation that would otherwise be required. This parameter is + second TLS negotiation that would otherwise be required. This parameter is used in both python-oracledb Thin and Thick modes. The default value is False. @@ -1041,7 +1041,7 @@ Oracledb Methods The ``use_sni`` parameter is expected to be a boolean which indicates whether to use the TLS Server Name Indicator (SNI) extension to bypass the - second TLS neogiation that would otherwise be required. This parameter is + second TLS negotiation that would otherwise be required. This parameter is used in both python-oracledb Thin and Thick modes. The default value is False. @@ -1522,7 +1522,7 @@ Oracledb Methods The ``use_sni`` parameter is expected to be a boolean which indicates whether to use the TLS Server Name Indicator (SNI) extension to bypass the - second TLS neogiation that would otherwise be required. This parameter is + second TLS negotiation that would otherwise be required. This parameter is used in both python-oracledb Thin and Thick modes. The default value is False. @@ -1919,7 +1919,7 @@ Oracledb Methods The ``use_sni`` parameter is expected to be a boolean which indicates whether to use the TLS Server Name Indicator (SNI) extension to bypass the - second TLS neogiation that would otherwise be required. This parameter is + second TLS negotiation that would otherwise be required. This parameter is used in both python-oracledb Thin and Thick modes. The default value is False. @@ -2123,7 +2123,8 @@ Oracledb Methods ``is_thin_mode()`` will never change for the lifetime of the process. The attribute :attr:`Connection.thin` can be used to check a connection's - mode. + mode. The attribute :attr:`ConnectionPool.thin` can be used to check a + pool's mode. .. note:: @@ -2495,7 +2496,7 @@ Oracledb Methods The ``use_sni`` parameter is expected to be a boolean which indicates whether to use the TLS Server Name Indicator (SNI) extension to bypass the - second TLS neogiation that would otherwise be required. This parameter is + second TLS negotiation that would otherwise be required. This parameter is used in both python-oracledb Thin and Thick modes. The default value is False. diff --git a/doc/src/user_guide/bind.rst b/doc/src/user_guide/bind.rst index 806b00b5..cc757f0e 100644 --- a/doc/src/user_guide/bind.rst +++ b/doc/src/user_guide/bind.rst @@ -29,7 +29,7 @@ more than once with different data values. If you do not use bind variables, Oracle must reparse and cache multiple statements. When using bind variables, Oracle Database may be able to reuse the statement execution plan and context. -.. note:: +.. warning:: Never concatenate or interpolate user data into SQL statements: diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index 8ecacbe0..ec980e84 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -2091,10 +2091,11 @@ shrinkage is only initiated when the pool is accessed so pools in fully dormant applications will not shrink until the application is next used. For pools created with :ref:`external authentication `, with -:ref:`homogeneous ` set to False, or when using :ref:`drcp`, -then the number of connections opened at pool creation is zero even if a larger -value is specified for ``min``. Also, in these cases the pool increment unit -is always 1 regardless of the value of ``increment``. +:ref:`homogeneous ` set to False, or when using :ref:`drcp` (in +python-oracledb Thick mode), then the number of connections opened at pool +creation is zero even if a larger value is specified for ``min``. Also, in +these cases the pool increment unit is always 1 regardless of the value of +``increment``. .. _poolhealth: @@ -2774,6 +2775,10 @@ state can optionally be specified. See the Oracle Database documentation on dblatest&id=GUID-661BB906-74D2-4C5D-9C7E-2798F76501B3>`__ for more information on purity and connection classes. +Note that when using DRCP with a python-oracledb local :ref:`connection pool +` in Thick mode, the local connection pool ``min`` value is +ignored and the pool will be created with zero connections. + **Requesting a Pooled Server** To request a DRCP pooled server, you can: diff --git a/doc/src/user_guide/tracing.rst b/doc/src/user_guide/tracing.rst index 5329cd52..8be95e36 100644 --- a/doc/src/user_guide/tracing.rst +++ b/doc/src/user_guide/tracing.rst @@ -248,11 +248,13 @@ Finding the python-oracledb Mode The boolean attributes :attr:`Connection.thin` and :attr:`ConnectionPool.thin` can be used to show the current mode of a python-oracledb connection or pool, -respectively. The python-oracledb version can be shown with -:data:`oracledb.__version__`. +respectively. The method :meth:`oracledb.is_thin_mode()` can also be used, but +review its usage notes about when its return value may change. -The information can also be seen in the Oracle Database data dictionary table -V$SESSION_CONNECT_INFO: +The python-oracledb version can be shown with :data:`oracledb.__version__`. + +Version and mode information can also be seen in the Oracle Database data +dictionary table V$SESSION_CONNECT_INFO: .. code-block:: python From 382e64d594f7fcd4cd0b3e076b460083917a3c99 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 22:08:09 -0700 Subject: [PATCH 019/178] Fixed bug that caused oracledb._Error.isrecoverable to always be False. --- doc/src/api_manual/module.rst | 6 ++++-- doc/src/release_notes.rst | 7 +++++-- src/oracledb/impl/thin/messages.pyx | 31 ++++++++++++++++++++++++++++- tests/test_1700_error.py | 17 +++++++++++++++- tests/test_6800_error_async.py | 17 +++++++++++++++- tests/test_env.py | 6 +++--- 6 files changed, 74 insertions(+), 10 deletions(-) diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 423a910a..cac21d9e 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -4295,8 +4295,10 @@ See :ref:`exception` for usage information. .. attribute:: _Error.isrecoverable Boolean attribute representing whether the error is recoverable or not. - This is False in all cases unless both Oracle Database 12.1 (or later) and - Oracle Client 12.1 (or later) are being used. + This requires Oracle Database 12.1 (or later). If python-oracledb Thick + mode is used, then Oracle Client 12.1 (or later) is also required. + + See :ref:`tg` for more information. .. _oracledbplugins: diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 8c16c41e..e831a4cb 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -33,8 +33,8 @@ Thin Mode Changes #) The thread that closes connection pools on interpreter shutdown is now only started when the first pool is created and not at module import (`issue 426 `__). -#) Added support for Transaction Guard by adding support to get the value of - :attr:`Connection.ltxid`. +#) Added support for Transaction Guard by adding support to get the values of + :attr:`Connection.ltxid` and :attr:`oracledb._Error.isrecoverable`. #) Fixed hang when attempting to use pipelining against a database that doesn't support the end of response flag. #) Fixed hang when using asyncio and a connection is unexpectedly closed by @@ -66,6 +66,9 @@ Thin Mode Changes Thick Mode Changes ++++++++++++++++++ +#) Fixed bug that caused :attr:`oracledb._Error.isrecoverable` to always be + `False`. + Common Changes ++++++++++++++ diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index b8b6aae3..90af950c 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -65,10 +65,39 @@ cdef class Message: connection" error is detected, the connection is forced closed immediately. """ + cdef bint is_recoverable = False if self.error_occurred: + if self.error_info.num in ( + 28, # session has been terminated + 31, # session marked for kill + 376, # file %s cannot be read at this time + 603, # ORACLE server session terminated + 1012, # not logged on + 1033, # ORACLE initialization or shutdown in progress + 1034, # the Oracle instance is not available for use + 1089, # immediate shutdown or close in progress + 1090, # shutdown in progress + 1092, # ORACLE instance terminated + 1115, # IO error reading block from file %s (block # %s) + 2396, # exceeded maximum idle time + 3113, # end-of-file on communication channel + 3114, # not connected to ORACLE + 3135, # connection lost contact + 12153, # TNS:not connected + 12514, # Service %s is not registered with the listener + 12537, # TNS:connection closed + 12547, # TNS:lost contact + 12570, # TNS:packet reader failure + 12571, # TNS:packet writer failure + 12583, # TNS:no reader + 12757, # instance does not currently know of requested service + 16456, # missing or invalid value + ): + is_recoverable = True error = errors._Error(self.error_info.message, code=self.error_info.num, - offset=self.error_info.pos) + offset=self.error_info.pos, + isrecoverable=is_recoverable) if error.is_session_dead: self.conn_impl._protocol._force_close() raise error.exc_type(error) diff --git a/tests/test_1700_error.py b/tests/test_1700_error.py index 65412f12..84c440f1 100644 --- a/tests/test_1700_error.py +++ b/tests/test_1700_error.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -58,6 +58,7 @@ def test_1701(self): self.assertEqual(error_obj.code, 20101) self.assertEqual(error_obj.offset, 0) self.assertIsInstance(error_obj.isrecoverable, bool) + self.assertFalse(error_obj.isrecoverable) new_error_obj = pickle.loads(pickle.dumps(error_obj)) self.assertIsInstance(new_error_obj, oracledb._Error) self.assertEqual(new_error_obj.message, error_obj.message) @@ -198,6 +199,20 @@ def test_1708(self): self.assertEqual(error_obj.full_code, f"ORA-{code}") self.assertTrue("Help:" not in error_obj.message) + @unittest.skipIf(test_env.get_is_drcp(), "not supported with DRCP") + def test_1709(self): + "1709 - error from killed connection is deemed recoverable" + admin_conn = test_env.get_admin_connection() + conn = test_env.get_connection() + sid, serial = self.get_sid_serial(conn) + with admin_conn.cursor() as admin_cursor: + sql = f"alter system kill session '{sid},{serial}'" + admin_cursor.execute(sql) + with self.assertRaisesFullCode("DPY-4011") as cm: + with conn.cursor() as cursor: + cursor.execute("select user from dual") + self.assertTrue(cm.error_obj.isrecoverable) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_6800_error_async.py b/tests/test_6800_error_async.py index dea188cc..dda09e96 100644 --- a/tests/test_6800_error_async.py +++ b/tests/test_6800_error_async.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2023, 2024, Oracle and/or its affiliates. +# Copyright (c) 2023, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -61,6 +61,7 @@ async def test_6801(self): self.assertEqual(error_obj.code, 20101) self.assertEqual(error_obj.offset, 0) self.assertIsInstance(error_obj.isrecoverable, bool) + self.assertFalse(error_obj.isrecoverable) new_error_obj = pickle.loads(pickle.dumps(error_obj)) self.assertIsInstance(new_error_obj, oracledb._Error) self.assertEqual(new_error_obj.message, error_obj.message) @@ -206,6 +207,20 @@ async def test_6808(self): self.assertEqual(result.warning.full_code, "DPY-7000") await self.cursor.execute(f"drop procedure {proc_name}") + @unittest.skipIf(test_env.get_is_drcp(), "not supported with DRCP") + async def test_6809(self): + "6809 - error from killed connection is deemed recoverable" + admin_conn = await test_env.get_admin_connection_async() + conn = await test_env.get_connection_async() + sid, serial = await self.get_sid_serial(conn) + with admin_conn.cursor() as admin_cursor: + sql = f"alter system kill session '{sid},{serial}'" + await admin_cursor.execute(sql) + with self.assertRaisesFullCode("DPY-4011") as cm: + with conn.cursor() as cursor: + await cursor.execute("select user from dual") + self.assertTrue(cm.error_obj.isrecoverable) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_env.py b/tests/test_env.py index d6377752..04dc1b07 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -552,11 +552,11 @@ def __exit__(self, exc_type, exc_value, tb): if not issubclass(exc_type, oracledb.Error): return False if issubclass(exc_type, oracledb.Error): - error_obj = exc_value.args[0] - if error_obj.full_code not in self.full_codes: + self.error_obj = exc_value.args[0] + if self.error_obj.full_code not in self.full_codes: message = ( f"{self.message_fragment} should have been raised but " - f'"{error_obj.full_code}" was raised instead.' + f'"{self.error_obj.full_code}" was raised instead.' ) raise AssertionError(message) return True From 9fcce6e1ffb96b0cb334a99a07898de2e5e489e8 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 22:09:55 -0700 Subject: [PATCH 020/178] Doc updates. --- doc/src/api_manual/async_connection_pool.rst | 26 +++++----- doc/src/api_manual/connection_pool.rst | 16 +++--- doc/src/api_manual/module.rst | 51 ++++++++++++-------- doc/src/api_manual/pool_params.rst | 20 +++++--- doc/src/user_guide/connection_handling.rst | 39 ++++++++------- 5 files changed, 86 insertions(+), 66 deletions(-) diff --git a/doc/src/api_manual/async_connection_pool.rst b/doc/src/api_manual/async_connection_pool.rst index 96cd0eff..db819d1f 100644 --- a/doc/src/api_manual/async_connection_pool.rst +++ b/doc/src/api_manual/async_connection_pool.rst @@ -119,14 +119,14 @@ AsyncConnectionPool Attributes .. attribute:: AsyncConnectionPool.max_lifetime_session - This read-write attribute returns the maximum length of time (in seconds) - that a pooled connection may exist. Connections that are in use will not be - closed. They become candidates for termination only when they are released - back to the pool and have existed for longer than max_lifetime_session - seconds. Note that termination only occurs when the pool is accessed. A - value of *0* means that there is no maximum length of time that a pooled - connection may exist. This attribute is only available in Oracle Database - 12.1 or later. + This read-write attribute is the maximum length of time (in seconds) that a + pooled connection may exist since first being created. A value of *0* means + there is no limit. Connections become candidates for termination when they + are acquired or released back to the pool, and have existed for longer than + ``max_lifetime_session`` seconds. Connections that are in active use will + not be closed. In python-oracledb Thick mode, Oracle Client libraries 12.1 + or later must be used and, prior to Oracle Client 21, cleanup only occurs + when the pool is accessed. .. attribute:: AsyncConnectionPool.max_sessions_per_shard @@ -189,10 +189,12 @@ AsyncConnectionPool Attributes .. attribute:: AsyncConnectionPool.timeout - This read-write attribute specifies the time (in seconds) after which idle - connections will be terminated in order to maintain an optimum number of - open connections. A value of *0* means that no idle connections are - terminated. + This read-only attribute is an integer that specifies the length of time + (in seconds) that a connection may remain idle in the pool before it is + terminated. This applies only when the pool has more than ``min`` + connections open, allowing it to shrink to the specified minimum size. The + default value is *0* seconds. A value of *0* means that there is no maximum + time. .. attribute:: AsyncConnectionPool.username diff --git a/doc/src/api_manual/connection_pool.rst b/doc/src/api_manual/connection_pool.rst index 214feb27..dc8c39d4 100644 --- a/doc/src/api_manual/connection_pool.rst +++ b/doc/src/api_manual/connection_pool.rst @@ -216,14 +216,14 @@ ConnectionPool Attributes .. attribute:: ConnectionPool.max_lifetime_session - This read-write attribute returns the maximum length of time (in seconds) - that a pooled connection may exist. Connections that are in use will not be - closed. They become candidates for termination only when they are released - back to the pool and have existed for longer than max_lifetime_session - seconds. Note that termination only occurs when the pool is accessed. A - value of *0* means that there is no maximum length of time that a pooled - connection may exist. This attribute is only available in Oracle Database - 12.1 or later. + This read-write attribute is the maximum length of time (in seconds) that a + pooled connection may exist since first being created. A value of *0* means + there is no limit. Connections become candidates for termination when they + are acquired or released back to the pool, and have existed for longer than + ``max_lifetime_session`` seconds. Connections that are in active use will + not be closed. In python-oracledb Thick mode, Oracle Client libraries 12.1 + or later must be used and, prior to Oracle Client 21, cleanup only occurs + when the pool is accessed. .. attribute:: ConnectionPool.max_sessions_per_shard diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index cac21d9e..143fc7bd 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -1229,11 +1229,10 @@ Oracledb Methods users). The default value is *True*. The ``timeout`` parameter is the length of time (in seconds) that a - connection may remain idle in the pool before it is terminated. This + connection may remain idle in the pool before it is terminated. This applies only when the pool has more than ``min`` connections open, allowing - it to shrink to the specified minimum size. If the value of this parameter - is 0, then the connections are never terminated. The default value is *0* - seconds. + it to shrink to the specified minimum size. The default value is *0* + seconds. A value of *0* means there is no limit. The ``wait_timeout`` parameter is the length of time (in milliseconds) that a caller should wait when acquiring a connection from the pool with @@ -1241,9 +1240,13 @@ Oracledb Methods value is *0* milliseconds. The ``max_lifetime_session`` parameter is the length of time (in seconds) - that connections can remain in the pool. If the value of this parameter is - 0, then the connections may remain in the pool indefinitely. The default - value is *0* seconds. + that a pooled connection may exist since first being created. The default + value is *0*. A value of *0* means that there is no limit. Connections + become candidates for termination when they are acquired or released back + to the pool and have existed for longer than ``max_lifetime_session`` + seconds. In python-oracledb Thick mode, Oracle Client libraries 12.1 or + later must be used and, prior to Oracle Client 21, cleanup only occurs when + the pool is accessed. The ``session_callback`` parameter is a callable that is invoked when a connection is returned from the pool for the first time, or when the @@ -1687,11 +1690,10 @@ Oracledb Methods users). The default value is *True*. The ``timeout`` parameter is the length of time (in seconds) that a - connection may remain idle in the pool before it is terminated. This + connection may remain idle in the pool before it is terminated. This applies only when the pool has more than ``min`` connections open, allowing - it to shrink to the specified minimum size. If the value of this parameter - is 0, then the connections are never terminated. The default value is *0* - seconds. + it to shrink to the specified minimum size. The default value is *0* + seconds. A value of *0* means there is no limit. The ``wait_timeout`` parameter is the length of time (in milliseconds) that a caller should wait when acquiring a connection from the pool with @@ -1699,9 +1701,13 @@ Oracledb Methods value is *0* milliseconds. The ``max_lifetime_session`` parameter is the length of time (in seconds) - that connections can remain in the pool. If the value of this parameter is - 0, then the connections may remain in the pool indefinitely. The default - value is *0* seconds. + that a pooled connection may exist since first being created. The default + value is *0*. A value of *0* means that there is no limit. Connections + become candidates for termination when they are acquired or released back + to the pool and have existed for longer than ``max_lifetime_session`` + seconds. In python-oracledb Thick mode, Oracle Client libraries 12.1 or + later must be used and, prior to Oracle Client 21, cleanup only occurs when + the pool is accessed. The ``session_callback`` parameter is a callable that is invoked when a connection is returned from the pool for the first time, or when the @@ -2202,11 +2208,10 @@ Oracledb Methods The default value is *True*. The ``timeout`` parameter is the length of time (in seconds) that a - connection may remain idle in the pool before it is terminated. This + connection may remain idle in the pool before it is terminated. This applies only when the pool has more than ``min`` connections open, allowing - it to shrink to the specified minimim size. If the value of this parameter - is 0, then the connections are never terminated. The default value is *0* - seconds. + it to shrink to the specified minimum size. The default value is *0* + seconds. A value of *0* means there is no limit. The ``wait_timeout`` parameter is the length of time (in milliseconds) that a caller should wait when acquiring a connection from the pool with @@ -2214,9 +2219,13 @@ Oracledb Methods value is *0* milliseconds. The ``max_lifetime_session`` parameter is the length of time (in seconds) - that connections can remain in the pool. If the value of this parameter is - 0, then the connections may remain in the pool indefinitely. The default - value is *0* seconds. + that a pooled connection may exist since first being created. The default + value is *0*. A value of *0* means that there is no limit. Connections + become candidates for termination when they are acquired or released back + to the pool and have existed for longer than ``max_lifetime_session`` + seconds. In python-oracledb Thick mode, Oracle Client libraries 12.1 or + later must be used and, prior to Oracle Client 21, cleanup only occurs when + the pool is accessed. The ``session_callback`` parameter is a callable that is invoked when a connection is returned from the pool for the first time, or when the diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index 596658b7..296bc3b4 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -131,10 +131,14 @@ PoolParams Attributes .. attribute:: PoolParams.max_lifetime_session - This read-only attribute is an integer that determines the length of time - (in seconds) that connections can remain in the pool. If the value of this - attribute is *0*, then the connections may remain in the pool indefinitely. - The default value is *0* seconds. + This read-only attribute is the maximum length of time (in seconds) that a + pooled connection may exist since first being created. A value of *0* means + there is no limit. Connections become candidates for termination when they + are acquired or released back to the pool, and have existed for longer than + ``max_lifetime_session`` seconds. Connections that are in active use will + not be closed. In python-oracledb Thick mode, Oracle Client libraries 12.1 + or later must be used and, prior to Oracle Client 21, cleanup only occurs + when the pool is accessed. .. attribute:: PoolParams.max_sessions_per_shard @@ -189,10 +193,12 @@ PoolParams Attributes This read-only attribute is an integer that specifies the length of time (in seconds) that a connection may remain idle in the pool before it is - terminated. If the value of this attribute is *0*, then the connections - are never terminated. The default value is *0* seconds. + terminated. This applies only when the pool has more than ``min`` + connections open, allowing it to shrink to the specified minimum size. The + default value is *0* seconds. A value of *0* means that there is no maximum + time. - This attribute is only supported in python-oracledb Thick mode. + This attribute is supported in both python-oracledb Thin and Thick modes. .. attribute:: PoolParams.wait_timeout diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index ec980e84..e3edf20c 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -2069,26 +2069,29 @@ regardless of how big ``increment`` is. The pool will then continue to re-establish connections in a background thread. A connection pool can shrink back to its minimum size ``min`` when connections -opened by the pool are not used by the application. This frees up database -resources while allowing pools to retain connections for active users. If -connections are idle in the pool (i.e. not currently acquired by the -application) and are unused for longer than the pool creation attribute -``timeout`` value, then they will be closed. The check occurs every -``timeout`` interval and hence in the worst case it may take twice the -``timeout`` time to close the idle connections. The default ``timeout`` is 0 -seconds signifying an infinite time and meaning idle connections will never be -closed. +opened by the pool are not used by the application. This frees up database +resources while allowing pools to retain open connections for active users. If +there are more than ``min`` connections open, and connections are idle in the +pool (i.e. not currently acquired by the application) and unused for longer +than the pool creation attribute ``timeout`` value, then they will be closed. +The check occurs every ``timeout`` interval and hence in the worst case it may +take twice the ``timeout`` time to close the idle connections. The default +``timeout`` is *0* seconds signifying an infinite time and meaning idle +connections will never be closed. The pool creation parameter ``max_lifetime_session`` also allows pools to -shrink. This parameter bounds the total length of time that a connection can -exist starting from the time the pool created it. If a connection was created -``max_lifetime_session`` or longer seconds ago, then it will be closed when it -is idle in the pool. In the case when ``timeout`` and ``max_lifetime_session`` -are both set, the connection will be terminated if either the idle timeout -happens or the max lifetime setting is exceeded. Note that when using -python-oracledb in Thick mode with Oracle Client libraries prior to 21c, pool -shrinkage is only initiated when the pool is accessed so pools in fully dormant -applications will not shrink until the application is next used. +shrink. This parameter bounds the total length of time that a connection can +exist starting from the time that it was created in the pool. It is mostly used +for defensive programming to mitigate against unforeseeable problems that may +occur with connections. If a connection was created ``max_lifetime_session`` or +longer seconds ago, then it will be a candidate for being closed. In the case +when ``timeout`` and ``max_lifetime_session`` are both set, the connection will +be terminated if either the idle timeout happens or the maximum lifetime +setting is exceeded. Note that when using python-oracledb in Thick mode with +Oracle Client libraries prior to 21c, pool shrinkage is only initiated when the +pool is accessed so pools in fully dormant applications will not shrink until +the application is next used. In Thick mode, Oracle Client libraries 12.1, or +later, are needed to use ``max_lifetime_session``. For pools created with :ref:`external authentication `, with :ref:`homogeneous ` set to False, or when using :ref:`drcp` (in From 6f53c303ef994d6a567623ce56d2cd7109ffea75 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 12 Feb 2025 22:10:11 -0700 Subject: [PATCH 021/178] Simplify code. --- src/oracledb/impl/thin/var.pyx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/oracledb/impl/thin/var.pyx b/src/oracledb/impl/thin/var.pyx index df762048..8ed07f73 100644 --- a/src/oracledb/impl/thin/var.pyx +++ b/src/oracledb/impl/thin/var.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -73,10 +73,7 @@ cdef class ThinVarImpl(BaseVarImpl): if value is not None \ and not isinstance(value, (PY_TYPE_LOB, PY_TYPE_ASYNC_LOB)): - lob = conn.createlob(metadata.dbtype) - if value: - lob.write(value) - self._values[idx] = lob + self._values[idx] = conn.createlob(metadata.dbtype, value) # bind by name if name is not None: From 737389a89c7eeb73fff822e789a6841734be1c30 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 09:56:24 -0700 Subject: [PATCH 022/178] Update copyrights. --- LICENSE.txt | 2 +- doc/src/conf.py | 2 +- doc/src/license.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index a2c0651a..38fd1483 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2016, 2024 Oracle and/or its affiliates. +Copyright (c) 2016, 2025 Oracle and/or its affiliates. This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License diff --git a/doc/src/conf.py b/doc/src/conf.py index 1520c93b..4b36b77b 100644 --- a/doc/src/conf.py +++ b/doc/src/conf.py @@ -36,7 +36,7 @@ # General substitutions. project = "python-oracledb" copyright = ( - "2016, 2024, Oracle and/or its affiliates. All rights reserved. " + "2016, 2025, Oracle and/or its affiliates. All rights reserved. " "Portions Copyright © 2007-2015, Anthony Tuininga. All rights reserved. " "Portions Copyright © 2001-2007, Computronix (Canada) Ltd., " "Edmonton, Alberta, Canada. All rights reserved" diff --git a/doc/src/license.rst b/doc/src/license.rst index 48d2f928..b81cfd3d 100644 --- a/doc/src/license.rst +++ b/doc/src/license.rst @@ -10,7 +10,7 @@ License .. centered:: **LICENSE AGREEMENT FOR python-oracledb** -Copyright |copy| 2016, 2024 Oracle and/or its affiliates. +Copyright |copy| 2016, 2025 Oracle and/or its affiliates. This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License From 6838bc7c8fcd3e75bb92de2804e1f0428ab9d9cc Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 09:56:55 -0700 Subject: [PATCH 023/178] Add instance_name parameter. --- doc/src/api_manual/connect_params.rst | 17 ++- doc/src/api_manual/module.rst | 135 +++++++++++++--------- doc/src/api_manual/pool_params.rst | 22 ++-- doc/src/release_notes.rst | 3 + src/oracledb/base_impl.pxd | 1 + src/oracledb/connect_params.py | 17 ++- src/oracledb/connection.py | 6 + src/oracledb/impl/base/connect_params.pyx | 4 + src/oracledb/impl/base/parsers.pyx | 30 +++++ src/oracledb/pool.py | 6 + src/oracledb/pool_params.py | 9 +- tests/test_4500_connect_params.py | 23 ++++ tests/test_4700_pool_params.py | 3 +- utils/fields.cfg | 6 + utils/templates/connect_params.py | 2 +- utils/templates/pool_params.py | 2 +- 16 files changed, 209 insertions(+), 77 deletions(-) diff --git a/doc/src/api_manual/connect_params.rst b/doc/src/api_manual/connect_params.rst index a082bbb8..ef037d78 100644 --- a/doc/src/api_manual/connect_params.rst +++ b/doc/src/api_manual/connect_params.rst @@ -51,9 +51,9 @@ ConnectParams Methods .. method:: ConnectParams.set(user=None, proxy_user=None, password=None, \ newpassword=None, wallet_password=None, access_token=None, host=None, \ port=None, protocol=None, https_proxy=None, https_proxy_port=None, \ - service_name=None, sid=None, server_type=None, cclass=None, \ - purity=None, expire_time=None, retry_count=None, retry_delay=None, \ - tcp_connect_timeout=None, ssl_server_dn_match=None, \ + service_name=None, instance_name=None, sid=None, server_type=None, \ + cclass=None, purity=None, expire_time=None, retry_count=None, \ + retry_delay=None, tcp_connect_timeout=None, ssl_server_dn_match=None, \ ssl_server_cert_dn=None, wallet_location=None, events=None, \ externalauth=None, mode=None, disable_oob=None, stmtcachesize=None, \ edition=None, tag=None, matchanytag=None, config_dir=None, \ @@ -69,7 +69,7 @@ ConnectParams Methods .. versionchanged:: 3.0.0 - The ``use_sni`` parameter was added. + The ``use_sni`` and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 @@ -260,6 +260,15 @@ ConnectParams Attributes This attribute is supported in both python-oracledb Thin and Thick modes. +.. attribute:: ConnectParams.instance_name + + This read-only attribute is a string that returns the instance name of the + database. + + This attribute is supported in both python-oracledb Thin and Thick modes. + + .. versionadded:: 3.0.0 + .. attribute:: ConnectParams.machine This read-only attribute is a string that specifies the machine name of diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 143fc7bd..6fb49361 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -39,15 +39,15 @@ Oracledb Methods params=None, user=None, proxy_user=None, password=None, \ newpassword=None, wallet_password=None, access_token=None, host=None, \ port=1521, protocol="tcp", https_proxy=None, https_proxy_port=0, \ - service_name=None, sid=None, server_type=None, cclass=None, \ - purity=oracledb.PURITY_DEFAULT, expire_time=0, retry_count=0, \ - retry_delay=1, tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ - ssl_server_cert_dn=None, wallet_location=None, events=False, \ - externalauth=False, mode=oracledb.AUTH_MODE_DEFAULT, \ - disable_oob=False, stmtcachesize=oracledb.defaults.stmtcachesize, \ - edition=None, tag=None, matchanytag=False, \ - config_dir=oracledb.defaults.config_dir, appcontext=[], \ - shardingkey=[], supershardingkey=[], debug_jdwp=None, \ + service_name=None, instance_name=None, sid=None, server_type=None, \ + cclass=None, purity=oracledb.PURITY_DEFAULT, expire_time=0, \ + retry_count=0, retry_delay=1, tcp_connect_timeout=20.0, \ + ssl_server_dn_match=True, ssl_server_cert_dn=None, \ + wallet_location=None, events=False, externalauth=False, \ + mode=oracledb.AUTH_MODE_DEFAULT, disable_oob=False, \ + stmtcachesize=oracledb.defaults.stmtcachesize, edition=None, \ + tag=None, matchanytag=False, config_dir=oracledb.defaults.config_dir, \ + appcontext=[], shardingkey=[], supershardingkey=[], debug_jdwp=None, \ connection_id_prefix=None, ssl_context=None, sdu=8192, \ pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ @@ -170,6 +170,10 @@ Oracledb Methods the service name of the database. This value is used in both the python-oracledb Thin and Thick modes. + The ``instance_name`` parameter is expected to be a string which indicates + the instance name of the database. This value is used in both the + python-oracledb Thin and Thick modes. + The ``sid`` parameter is expected to be a string which indicates the SID of the database. It is recommended to use ``service_name`` instead. This value is used in both the python-oracledb Thin and Thick modes. @@ -403,8 +407,9 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` and ``use_sni`` parameters were added. The ``pool`` - parameter was deprecated. Use :meth:`ConnectionPool.acquire()` instead. + The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were + added. The ``pool`` parameter was deprecated. Use + :meth:`ConnectionPool.acquire()` instead. .. versionchanged:: 2.5.0 @@ -435,10 +440,10 @@ Oracledb Methods conn_class=None, params=None, user=None, proxy_user=None, \ password=None, newpassword=None, wallet_password=None, \ access_token=None, host=None, port=1521, protocol="tcp", \ - https_proxy=None, https_proxy_port=0, service_name=None, sid=None, \ - server_type=None, cclass=None, purity=oracledb.PURITY_DEFAULT, \ - expire_time=0, retry_count=0, retry_delay=1, \ - tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ + https_proxy=None, https_proxy_port=0, service_name=None, \ + instance_name=None, sid=None, server_type=None, cclass=None, \ + purity=oracledb.PURITY_DEFAULT, expire_time=0, retry_count=0, \ + retry_delay=1, tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ ssl_server_cert_dn=None, wallet_location=None, events=False, \ externalauth=False, mode=oracledb.AUTH_MODE_DEFAULT, \ disable_oob=False, stmtcachesize=oracledb.defaults.stmtcachesize, \ @@ -550,6 +555,9 @@ Oracledb Methods The ``service_name`` parameter is expected to be a string which indicates the service name of the database. + The ``instance_name`` parameter is expected to be a string which indicates + the instance name of the database. + The ``sid`` parameter is expected to be a string which indicates the SID of the database. It is recommended to use ``service_name`` instead. @@ -730,9 +738,9 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` and ``use_sni`` parameters were added. The ``pool`` - parameter was deprecated. Use :meth:`AsyncConnectionPool.acquire()` - instead. + The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were + added. The ``pool`` parameter was deprecated. Use + :meth:`AsyncConnectionPool.acquire()` instead. .. versionchanged:: 2.5.0 @@ -762,15 +770,15 @@ Oracledb Methods .. function:: ConnectParams(user=None, proxy_user=None, password=None, \ newpassword=None, wallet_password=None, access_token=None, host=None, \ port=1521, protocol="tcp", https_proxy=None, https_proxy_port=0, \ - service_name=None, sid=None, server_type=None, cclass=None, \ - purity=oracledb.PURITY_DEFAULT, expire_time=0, retry_count=0, \ - retry_delay=1, tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ - ssl_server_cert_dn=None, wallet_location=None, events=False, \ - externalauth=False, mode=oracledb.AUTH_MODE_DEFAULT, \ - disable_oob=False, stmtcachesize=oracledb.defaults.stmtcachesize, \ - edition=None, tag=None, matchanytag=False, \ - config_dir=oracledb.defaults.config_dir, appcontext=[], \ - shardingkey=[], supershardingkey=[], debug_jdwp=None, \ + service_name=None, instance_name=None, sid=None, server_type=None, \ + cclass=None, purity=oracledb.PURITY_DEFAULT, expire_time=0, \ + retry_count=0, retry_delay=1, tcp_connect_timeout=20.0, \ + ssl_server_dn_match=True, ssl_server_cert_dn=None, \ + wallet_location=None, events=False, externalauth=False, \ + mode=oracledb.AUTH_MODE_DEFAULT, disable_oob=False, \ + stmtcachesize=oracledb.defaults.stmtcachesize, edition=None, \ + tag=None, matchanytag=False, config_dir=oracledb.defaults.config_dir, \ + appcontext=[], shardingkey=[], supershardingkey=[], debug_jdwp=None, \ connection_id_prefix=None, ssl_context=None, sdu=8192, \ pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ @@ -849,6 +857,10 @@ Oracledb Methods the service name of the database. This value is used in both the python-oracledb Thin and Thick modes. + The ``instance_name`` parameter is expected to be a string which indicates + the instance name of the database. This value is used in both the + python-oracledb Thin and Thick modes. + The ``sid`` parameter is expected to be a string which indicates the SID of the database. It is recommended to use ``service_name`` instead. This value is used in both the python-oracledb Thin and Thick modes. @@ -1077,7 +1089,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``use_sni`` parameter was added. + The ``use_sni`` and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 @@ -1125,15 +1137,15 @@ Oracledb Methods ping_timeout=5000, user=None, proxy_user=None, password=None, \ newpassword=None, wallet_password=None, access_token=None, host=None, \ port=1521, protocol="tcp", https_proxy=None, https_proxy_port=0, \ - service_name=None, sid=None, server_type=None, cclass=None, \ - purity=oracledb.PURITY_DEFAULT, expire_time=0, retry_count=0, \ - retry_delay=1, tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ - ssl_server_cert_dn=None, wallet_location=None, events=False, \ - externalauth=False, mode=oracledb.AUTH_MODE_DEFAULT, \ - disable_oob=False, stmtcachesize=oracledb.defaults.stmtcachesize, \ - edition=None, tag=None, matchanytag=False, \ - config_dir=oracledb.defaults.config_dir, appcontext=[], \ - shardingkey=[], supershardingkey=[], debug_jdwp=None, \ + service_name=None, instance_name=None, sid=None, server_type=None, \ + cclass=None, purity=oracledb.PURITY_DEFAULT, expire_time=0, \ + retry_count=0, retry_delay=1, tcp_connect_timeout=20.0, \ + ssl_server_dn_match=True, ssl_server_cert_dn=None, \ + wallet_location=None, events=False, externalauth=False, \ + mode=oracledb.AUTH_MODE_DEFAULT, disable_oob=False, \ + stmtcachesize=oracledb.defaults.stmtcachesize, edition=None, \ + tag=None, matchanytag=False, config_dir=oracledb.defaults.config_dir, \ + appcontext=[], shardingkey=[], supershardingkey=[], debug_jdwp=None, \ connection_id_prefix=None, ssl_context=None, sdu=8192, \ pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ @@ -1334,6 +1346,10 @@ Oracledb Methods the service name of the database. This value is used in both the python-oracledb Thin and Thick modes. + The ``instance_name`` parameter is expected to be a string which indicates + the instance name of the database. This value is used in both the + python-oracledb Thin and Thick modes. + The ``sid`` parameter is expected to be a string which indicates the SID of the database. It is recommended to use ``service_name`` instead. This value is used in both the python-oracledb Thin and Thick modes. @@ -1564,7 +1580,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` parameter was added. + The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were + added. .. versionchanged:: 2.5.0 @@ -1572,10 +1589,6 @@ Oracledb Methods ``driver_name`` parameters were added. Support for ``edition`` and ``appcontext`` was added to python-oracledb Thin mode. - .. versionchanged:: 2.5.0 - - The ``use_sni`` parameter was added. - .. versionchanged:: 2.3.0 The default value of the ``retry_delay`` parameter was changed from *0* @@ -1605,15 +1618,15 @@ Oracledb Methods ping_timeout=5000, user=None, proxy_user=None, password=None, \ newpassword=None, wallet_password=None, access_token=None, host=None, \ port=1521, protocol="tcp", https_proxy=None, https_proxy_port=0, \ - service_name=None, sid=None, server_type=None, cclass=None, \ - purity=oracledb.PURITY_DEFAULT, expire_time=0, retry_count=0, \ - retry_delay=1, tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ - ssl_server_cert_dn=None, wallet_location=None, events=False, \ - externalauth=False, mode=oracledb.AUTH_MODE_DEFAULT, \ - disable_oob=False, stmtcachesize=oracledb.defaults.stmtcachesize, \ - edition=None, tag=None, matchanytag=False, \ - config_dir=oracledb.defaults.config_dir, appcontext=[], \ - shardingkey=[], supershardingkey=[], debug_jdwp=None, \ + service_name=None, instance_name=None, sid=None, server_type=None, \ + cclass=None, purity=oracledb.PURITY_DEFAULT, expire_time=0, \ + retry_count=0, retry_delay=1, tcp_connect_timeout=20.0, \ + ssl_server_dn_match=True, ssl_server_cert_dn=None, \ + wallet_location=None, events=False, externalauth=False, \ + mode=oracledb.AUTH_MODE_DEFAULT, disable_oob=False, \ + stmtcachesize=oracledb.defaults.stmtcachesize, edition=None, \ + tag=None, matchanytag=False, config_dir=oracledb.defaults.config_dir, \ + appcontext=[], shardingkey=[], supershardingkey=[], debug_jdwp=None, \ connection_id_prefix=None, ssl_context=None, sdu=8192, \ pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ @@ -1780,6 +1793,9 @@ Oracledb Methods The ``service_name`` parameter is expected to be a string which indicates the service name of the database. + The ``instance_name`` parameter is expected to be a string which indicates + the instance name of the database. + The ``sid`` parameter is expected to be a string which indicates the SID of the database. It is recommended to use ``service_name`` instead. @@ -1958,7 +1974,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias`` and ``use_sni`` parameters were added. + The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were + added. .. versionchanged:: 2.5.0 @@ -2161,10 +2178,10 @@ Oracledb Methods ping_interval=60, ping_timeout=5000, user=None, proxy_user=Nonde, \ password=None, newpassword=None, wallet_password=None, \ access_token=None, host=None, port=1521, protocol="tcp", \ - https_proxy=None, https_proxy_port=0, service_name=None, sid=None, \ - server_type=None, cclass=None, purity=oracledb.PURITY_DEFAULT, \ - expire_time=0, retry_count=0, retry_delay=1, \ - tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ + https_proxy=None, https_proxy_port=0, service_name=None, \ + instance_name=None, sid=None, server_type=None, cclass=None, \ + purity=oracledb.PURITY_DEFAULT, expire_time=0, retry_count=0, \ + retry_delay=1, tcp_connect_timeout=20.0, ssl_server_dn_match=True, \ ssl_server_cert_dn=None, wallet_location=None, events=False, \ externalauth=False, mode=oracledb.AUTH_MODE_DEFAULT, \ disable_oob=False, stmtcachesize=oracledb.defaults.stmtcachesize, \ @@ -2319,6 +2336,10 @@ Oracledb Methods the service name of the database. This value is used in both the python-oracledb Thin and Thick modes. + The ``instance_name`` parameter is expected to be a string which indicates + the instance name of the database. This value is used in both the + python-oracledb Thin and Thick modes. + The ``sid`` parameter is expected to be a string which indicates the SID of the database. It is recommended to use ``service_name`` instead. This value is used in both the python-oracledb Thin and Thick modes. @@ -2541,7 +2562,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``use_sni`` parameter was added. + The ``use_sni`` and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index 296bc3b4..9ca30588 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -39,16 +39,16 @@ PoolParams Methods ping_interval=None, ping_timeout=None, user=None, proxy_user=None, \ password=None, newpassword=None, wallet_password=None, \ access_token=None, host=None, port=None, protocol=None, \ - https_proxy=None, https_proxy_port=None, service_name=None, sid=None, \ - server_type=None, cclass=None, purity=None, expire_time=None, \ - retry_count=None, retry_delay=None, tcp_connect_timeout=None, \ - ssl_server_dn_match=None, ssl_server_cert_dn=None, \ - wallet_location=None, events=None, externalauth=None, mode=None, \ - disable_oob=None, stmtcachesize=None, edition=None, tag=None, \ - matchanytag=None, config_dir=None, appcontext=[], shardingkey=[], \ - supershardingkey=[], debug_jdwp=None, connection_id_prefix=None, \ - ssl_context=None, sdu=None, pool_boundary=None, \ - use_tcp_fast_open=False, ssl_version=None, \ + https_proxy=None, https_proxy_port=None, service_name=None, \ + instance_name=None, sid=None, server_type=None, cclass=None, \ + purity=None, expire_time=None, retry_count=None, retry_delay=None, \ + tcp_connect_timeout=None, ssl_server_dn_match=None, \ + ssl_server_cert_dn=None, wallet_location=None, events=None, \ + externalauth=None, mode=None, disable_oob=None, stmtcachesize=None, \ + edition=None, tag=None, matchanytag=None, config_dir=None, \ + appcontext=[], shardingkey=[], supershardingkey=[], debug_jdwp=None, \ + connection_id_prefix=None, ssl_context=None, sdu=None, \ + pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=None, handle=None) @@ -57,7 +57,7 @@ PoolParams Methods .. versionchanged:: 3.0.0 - The ``use_sni`` parameter was added. + The ``use_sni`` and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index e831a4cb..8691a32b 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -27,6 +27,9 @@ Thin Mode Changes #) Added parameter :data:`ConnectParams.use_sni` to specify that the TLS SNI extension should be used to reduce the number of TLS neegotiations that are needed to connect to the database. +#) Added parameter :data:`ConnectParams.instance_name` to specify the instance + name to use when connecting to the database. Added support for setting the + instance name in :ref:`Easy Connect strings `. #) Host names are now resolved to IP addresses in python-oracledb instead of the Python libraries. Address list load balancing and failover settings will be used when establishing connections. diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 1af58eac..4ff641c5 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -488,6 +488,7 @@ cdef class Description(ConnectParamsNode): public uint32_t sdu public double tcp_connect_timeout public str service_name + public str instance_name public str server_type public str sid public str cclass diff --git a/src/oracledb/connect_params.py b/src/oracledb/connect_params.py index 0d38ddd2..44d822cf 100644 --- a/src/oracledb/connect_params.py +++ b/src/oracledb/connect_params.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2021, 2024, Oracle and/or its affiliates. +# Copyright (c) 2021, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -68,6 +68,7 @@ def __init__( https_proxy: str = None, https_proxy_port: int = 0, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -154,6 +155,8 @@ def __init__( - service_name: the service name of the database (default: None) + - instance_name: the instance name of the database (default: None) + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended (default: None) @@ -317,6 +320,7 @@ def __repr__(self): + f"https_proxy={self.https_proxy!r}, " + f"https_proxy_port={self.https_proxy_port!r}, " + f"service_name={self.service_name!r}, " + + f"instance_name={self.instance_name!r}, " + f"sid={self.sid!r}, " + f"server_type={self.server_type!r}, " + f"cclass={self.cclass!r}, " @@ -493,6 +497,14 @@ def https_proxy_port(self) -> Union[list, int]: """ return [a.https_proxy_port for a in self._impl._get_addresses()] + @property + @_flatten_value + def instance_name(self) -> Union[list, str]: + """ + The instance name of the database. + """ + return [d.instance_name for d in self._impl.description_list.children] + @property def machine(self) -> str: """ @@ -828,6 +840,7 @@ def set( https_proxy: str = None, https_proxy_port: int = None, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -911,6 +924,8 @@ def set( - service_name: the service name of the database + - instance_name: the instance name of the database + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 7a39ae8c..8978a1c4 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1236,6 +1236,7 @@ def connect( https_proxy: str = None, https_proxy_port: int = 0, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -1347,6 +1348,8 @@ def connect( - service_name: the service name of the database (default: None) + - instance_name: the instance name of the database (default: None) + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended (default: None) @@ -2006,6 +2009,7 @@ def connect_async( https_proxy: str = None, https_proxy_port: int = 0, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -2117,6 +2121,8 @@ def connect_async( - service_name: the service name of the database (default: None) + - instance_name: the instance name of the database (default: None) + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended (default: None) diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index f50af01b..0ebfd5a2 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -828,6 +828,8 @@ cdef class Description(ConnectParamsNode): temp_parts = [] if self.service_name is not None: temp_parts.append(f"(SERVICE_NAME={self.service_name})") + if self.instance_name is not None: + temp_parts.append(f"(INSTANCE_NAME={self.instance_name})") elif self.sid is not None: temp_parts.append(f"(SID={self.sid})") if self.server_type is not None: @@ -886,6 +888,7 @@ cdef class Description(ConnectParamsNode): description.sdu = self.sdu description.tcp_connect_timeout = self.tcp_connect_timeout description.service_name = self.service_name + description.instance_name = self.instance_name description.server_type = self.server_type description.sid = self.sid description.cclass = self.cclass @@ -914,6 +917,7 @@ cdef class Description(ConnectParamsNode): node in a connect descriptor. """ _set_str_param(args, "service_name", self) + _set_str_param(args, "instance_name", self) _set_str_param(args, "sid", self) server_type = args.get("server_type") if server_type is not None: diff --git a/src/oracledb/impl/base/parsers.pyx b/src/oracledb/impl/base/parsers.pyx index 1c567621..0dda8432 100644 --- a/src/oracledb/impl/base/parsers.pyx +++ b/src/oracledb/impl/base/parsers.pyx @@ -55,6 +55,7 @@ CONTAINER_PARAM_NAMES = set([ CONNECT_DATA_PARAM_NAMES = set([ "cclass", "connection_id_prefix", + "instance_name", "pool_boundary", "purity", "server_type", @@ -363,6 +364,7 @@ cdef class ConnectStringParser(BaseParser): self.template_address.set_protocol(protocol) self._parse_easy_connect_hosts() self._parse_easy_connect_service_name() + self._parse_easy_connect_instance_name() if self.description_list is not None: self._parse_easy_connect_parameters() @@ -606,6 +608,34 @@ cdef class ConnectStringParser(BaseParser): value = self.data_as_str[service_name_end_pos + 1:self.temp_pos] self.description.set_server_type(value) + cdef str _parse_easy_connect_instance_name(self): + """ + Parses the instance name from an easy connect string. This is expected + to be a slash followed by a series of alphanumeric characters. If such + a string is found, it is returned. + """ + cdef: + ssize_t instance_name_end_pos = 0 + bint found_instance_name = False + bint found_slash = False + Py_UCS4 ch + str value + self.temp_pos = self.pos + while self.temp_pos < self.num_chars: + ch = self.get_current_char() + if not found_slash and ch == '/': + found_slash = True + elif found_slash and self._is_host_or_service_name_char(ch): + found_instance_name = True + instance_name_end_pos = self.temp_pos + 1 + else: + break + self.temp_pos += 1 + if found_instance_name: + self.description.instance_name = \ + self.data_as_str[self.pos + 1:instance_name_end_pos] + self.pos = self.temp_pos + cdef dict _set_connect_data(self, dict args): """ Sets the connect data value. diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index 395b7c8a..b86c2d6e 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -632,6 +632,7 @@ def create_pool( https_proxy: str = None, https_proxy_port: int = 0, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -800,6 +801,8 @@ def create_pool( - service_name: the service name of the database (default: None) + - instance_name: the instance name of the database (default: None) + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended (default: None) @@ -1142,6 +1145,7 @@ def create_pool_async( https_proxy: str = None, https_proxy_port: int = 0, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -1311,6 +1315,8 @@ def create_pool_async( - service_name: the service name of the database (default: None) + - instance_name: the instance name of the database (default: None) + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended (default: None) diff --git a/src/oracledb/pool_params.py b/src/oracledb/pool_params.py index 9050112e..36b63be6 100644 --- a/src/oracledb/pool_params.py +++ b/src/oracledb/pool_params.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -81,6 +81,7 @@ def __init__( https_proxy: str = None, https_proxy_port: int = 0, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -223,6 +224,8 @@ def __init__( - service_name: the service name of the database (default: None) + - instance_name: the instance name of the database (default: None) + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended (default: None) @@ -400,6 +403,7 @@ def __repr__(self): + f"https_proxy={self.https_proxy!r}, " + f"https_proxy_port={self.https_proxy_port!r}, " + f"service_name={self.service_name!r}, " + + f"instance_name={self.instance_name!r}, " + f"sid={self.sid!r}, " + f"server_type={self.server_type!r}, " + f"cclass={self.cclass!r}, " @@ -597,6 +601,7 @@ def set( https_proxy: str = None, https_proxy_port: int = None, service_name: str = None, + instance_name: str = None, sid: str = None, server_type: str = None, cclass: str = None, @@ -731,6 +736,8 @@ def set( - service_name: the service name of the database + - instance_name: the instance name of the database + - sid: the system identifier (SID) of the database. Note using a service_name instead is recommended diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index fa3c484f..01311ba7 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -651,6 +651,7 @@ def test_4539(self): ("https_proxy", "proxy_a"), ("https_proxy_port", 4528), ("service_name", "my_service_name1"), + ("instance_name", "my_instance_name"), ("sid", "my_sid1"), ("server_type", "dedicated"), ("cclass", "cclass_1"), @@ -703,6 +704,7 @@ def test_4539(self): ("https_proxy", "proxy_b"), ("https_proxy_port", 4529), ("service_name", "my_service_name_new"), + ("instance_name", "my_instance_name_new"), ("sid", "my_sid_new"), ("server_type", "pooled"), ("cclass", "cclass_new"), @@ -1227,6 +1229,27 @@ def test_4570(self): params.parse_connect_string(connect_string) self.assertEqual(params.get_connect_string(), connect_string) + def test_4571(self): + "4571 - test INSTANCE_NAME in connect string" + service_name = "service_4571" + instance_name = "instance_4571" + host = "host_4571" + port = 4571 + easy_connect = f"{host}:{port}/{service_name}/{instance_name}" + connect_descriptor = ( + f"(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST={host})(PORT={port}))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name})" + f"(INSTANCE_NAME={instance_name})))" + ) + for connect_string in (easy_connect, connect_descriptor): + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.service_name, service_name) + self.assertEqual(params.instance_name, instance_name) + self.assertEqual(params.host, host) + self.assertEqual(params.port, port) + self.assertEqual(params.get_connect_string(), connect_descriptor) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_4700_pool_params.py b/tests/test_4700_pool_params.py index b0ec5054..dd510239 100644 --- a/tests/test_4700_pool_params.py +++ b/tests/test_4700_pool_params.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -94,6 +94,7 @@ def test_4701(self): ("https_proxy", "proxy_4701"), ("https_proxy_port", 4701), ("service_name", "my_service_name1"), + ("instance_name", "my_instance_name"), ("sid", "my_sid1"), ("server_type", "dedicated"), ("cclass", "cclass_1"), diff --git a/utils/fields.cfg b/utils/fields.cfg index 63f07edb..fff9fb9c 100644 --- a/utils/fields.cfg +++ b/utils/fields.cfg @@ -233,6 +233,12 @@ source = description description = the service name of the database +[instance_name] +type = str +source = description +description = + the instance name of the database + [sid] type = str source = description diff --git a/utils/templates/connect_params.py b/utils/templates/connect_params.py index 14842959..941b1c2f 100644 --- a/utils/templates/connect_params.py +++ b/utils/templates/connect_params.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2021, 2024, Oracle and/or its affiliates. +# Copyright (c) 2021, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License diff --git a/utils/templates/pool_params.py b/utils/templates/pool_params.py index 9a1a4e9d..6df381ff 100644 --- a/utils/templates/pool_params.py +++ b/utils/templates/pool_params.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License From 9b4510e3b5c202670221ad7857e141439a6ecc1e Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 09:58:11 -0700 Subject: [PATCH 024/178] Correct handling of user and password in a configuration. --- src/oracledb/impl/base/connect_params.pyx | 5 +-- tests/test_4500_connect_params.py | 40 ++++++++++++++--------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index 0ebfd5a2..1948c21f 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -125,10 +125,11 @@ cdef class ConnectParamsImpl: if connect_string is None: errors._raise_err(errors.ERR_MISSING_CONNECT_DESCRIPTOR) self.parse_connect_string(connect_string) - if self.user is None and self._password is None: + if self.user is None and self._password is None \ + and not self.externalauth: user = config.get("user") password = config.get("password") - if not isinstance(password, dict): + if password is not None and not isinstance(password, dict): errors._raise_err(errors.ERR_PLAINTEXT_PASSWORD_IN_CONFIG) if user is not None or password is not None: self.set(dict(user=user, password=password)) diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index 01311ba7..a14aaafd 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -1116,24 +1116,32 @@ def test_4564(self): def test_4565(self): "4565 - test set_from_config() with no user and password set" - host = "host_4565" - service_name = "service_4565" - connect_string = f"{host}/{service_name}" user = "user_4565" password = test_env.get_random_string() - config = dict( - connect_descriptor=connect_string, - user=user, - password=dict( - type="base64", - value=base64.b64encode(password.encode()).decode(), - ), - ) - params = oracledb.ConnectParams() - params.set_from_config(config) - self.assertEqual(params.host, host) - self.assertEqual(params.service_name, service_name) - self.assertEqual(params.user, user) + options = [ + ("a", user, password), + ("b", user, None), + ("c", None, None), + ] + for option, user, password in options: + with self.subTest(option=option): + host = f"host_4565{option}" + service_name = f"service_4565{option}" + connect_string = f"{host}/{service_name}" + config = dict(connect_descriptor=connect_string) + if user is not None: + config["user"] = user + if password is not None: + config["password"] = dict( + type="base64", + value=base64.b64encode(password.encode()).decode(), + ) + params = oracledb.ConnectParams() + params.set_from_config(config) + self.assertEqual(params.host, host) + self.assertEqual(params.service_name, service_name) + if user is not None: + self.assertEqual(params.user, user) def test_4566(self): "4566 - test set_from_config() with user and password already set" From 998fca91dd6f33f2e5150089784781da60c3cbcd Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 10:00:43 -0700 Subject: [PATCH 025/178] Further work on handling connection string parsing for lesser known parameters. --- doc/src/release_notes.rst | 14 +- src/oracledb/base_impl.pxd | 3 + src/oracledb/impl/base/connect_params.pyx | 51 ++- src/oracledb/impl/base/parsers.pyx | 84 +++- tests/test_4500_connect_params.py | 465 +++++++++++++++++++++- 5 files changed, 575 insertions(+), 42 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 8691a32b..523ab256 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -85,13 +85,15 @@ Common Changes #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". -#) All connect strings are now parsed by the driver. Previously, only thin - mode parsed all connect strings and thick mode passed the connect string +#) All connect strings are now parsed by the driver. Previously, only Thin + mode parsed all connect strings and Thick mode passed the connect string unchanged to the Oracle Client library to parse. Parameters unrecognized by - the driver in Easy Connect strings are now ignored. Parameters unrecognized - by the driver in the ``CONNECT_DATA`` section of a full connect descriptor - are passed through unchanged. All other parameters in other sections of a - full connect deescriptor that are unrecognized by the driver are ignored. + the driver in :ref:`Easy Connect strings ` are now ignored. + Parameters unrecognized by the driver in the ``DESCRIPTION``, + ``CONNECT_DATA`` and ``SECURITY`` sections of a + :ref:`full connect descriptor ` are passed through + unchanged. All other parameters in other sections of a full connect + descriptor that are unrecognized by the driver are ignored. #) Added attributes :attr:`DbObjectAttribute.precision`, :attr:`DbObjectAttribute.scale`, and :attr:`DbObjectAttribute.max_size` that provide additional metadata about diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 4ff641c5..b7f4c28e 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -456,6 +456,7 @@ cdef class ConnectParamsNode: list active_children cdef int _copy(self, ConnectParamsNode source) except -1 + cdef list _get_initial_connect_string_parts(self) cdef int _set_active_children(self, list children) except -1 @@ -502,6 +503,8 @@ cdef class Description(ConnectParamsNode): public object ssl_version public str wallet_location dict extra_connect_data_args + dict extra_security_args + dict extra_args str connection_id cdef str _build_duration_str(self, double value) diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index 1948c21f..6628424f 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -561,6 +561,20 @@ cdef class ConnectParamsNode: self.load_balance = source.load_balance self.source_route = source.source_route + cdef list _get_initial_connect_string_parts(self): + """ + Returns a list of the initial connect strings parts used for container + nodes. + """ + cdef list parts = [] + if not self.failover: + parts.append("(FAILOVER=OFF)") + if self.load_balance: + parts.append("(LOAD_BALANCE=ON)") + if self.source_route: + parts.append("(SOURCE_ROUTE=ON)") + return parts + cdef int _set_active_children(self, list children) except -1: """ Set the active children to process when connecting to the database. @@ -731,8 +745,12 @@ cdef class AddressList(ConnectParamsNode): """ Build a connect string from the components. """ - cdef Address a - parts = [a.build_connect_string() for a in self.children] + cdef: + Address address + list parts + parts = self._get_initial_connect_string_parts() + for address in self.children: + parts.append(address.build_connect_string()) if len(parts) == 1: return parts[0] return f'(ADDRESS_LIST={"".join(parts)})' @@ -795,11 +813,7 @@ cdef class Description(ConnectParamsNode): str temp # build top-level description parts - parts = [] - if self.load_balance: - parts.append("(LOAD_BALANCE=ON)") - if self.source_route: - parts.append("(SOURCE_ROUTE=ON)") + parts = self._get_initial_connect_string_parts() if self.retry_count != 0: parts.append(f"(RETRY_COUNT={self.retry_count})") if self.retry_delay != 0: @@ -813,6 +827,9 @@ cdef class Description(ConnectParamsNode): parts.append("(USE_SNI=ON)") if self.sdu != DEFAULT_SDU: parts.append(f"(SDU={self.sdu})") + if self.extra_args is not None: + parts.extend(f"({k.upper()}={self._value_repr(v)})" + for k, v in self.extra_args.items()) # add address lists, but if the address list contains only a single # entry and that entry does not have a host, the other parts aren't @@ -872,6 +889,9 @@ cdef class Description(ConnectParamsNode): if self.wallet_location is not None: temp = f"(MY_WALLET_DIRECTORY={self.wallet_location})" temp_parts.append(temp) + if self.extra_security_args is not None: + temp_parts.extend(f"({k.upper()}={self._value_repr(v)})" + for k, v in self.extra_security_args.items()) parts.append(f'(SECURITY={"".join(temp_parts)})') return f'(DESCRIPTION={"".join(parts)})' @@ -902,6 +922,9 @@ cdef class Description(ConnectParamsNode): description.ssl_version = self.ssl_version description.use_sni = self.use_sni description.wallet_location = self.wallet_location + description.extra_args = self.extra_args + description.extra_connect_data_args = self.extra_connect_data_args + description.extra_security_args = self.extra_security_args return description def set_from_args(self, dict args): @@ -949,6 +972,9 @@ cdef class Description(ConnectParamsNode): self.sdu = min(max(self.sdu, 512), 2097152) # sanitize SDU _set_duration_param(args, "tcp_connect_timeout", &self.tcp_connect_timeout) + extra_args = args.get("extra_args") + if extra_args is not None: + self.extra_args = extra_args def set_from_security_args(self, dict args): """ @@ -959,6 +985,9 @@ cdef class Description(ConnectParamsNode): _set_str_param(args, "ssl_server_cert_dn", self) _set_ssl_version_param(args, "ssl_version", self) _set_str_param(args, "wallet_location", self) + extra_args = args.get("extra_security_args") + if extra_args is not None: + self.extra_security_args = extra_args cdef int set_server_type(self, str value) except -1: """ @@ -985,12 +1014,14 @@ cdef class DescriptionList(ConnectParamsNode): Build a connect string from the components. """ cdef: - Description d + Description description list parts - parts = [d.build_connect_string() for d in self.children] + parts = self._get_initial_connect_string_parts() + for description in self.children: + parts.append(description.build_connect_string()) if len(parts) == 1: return parts[0] - return f'(DESCIPTION_LIST={"".join(parts)})' + return f'(DESCRIPTION_LIST={"".join(parts)})' cdef list get_addresses(self): """ diff --git a/src/oracledb/impl/base/parsers.pyx b/src/oracledb/impl/base/parsers.pyx index 0dda8432..a9dcdb0b 100644 --- a/src/oracledb/impl/base/parsers.pyx +++ b/src/oracledb/impl/base/parsers.pyx @@ -50,8 +50,34 @@ CONTAINER_PARAM_NAMES = set([ "security", ]) +# DESCRIPTION parameter names that are supported by the driver; all other +# key/value pairs are passed unchanged to the database +DESCRIPTION_PARAM_NAMES = set([ + "address", + "address_list", + "connect_data", + "expire_time", + "failover", + "load_balance", + "source_route", + "retry_count", + "retry_delay", + "sdu", + "tcp_connect_timeout", + "use_sni", + "security", +]) + +# extra DESCRIPTION parameter names that are passed through when detected in an +# easy connect string +EXTRA_DESCRIPTION_PARAM_NAMES = set([ + "enable", + "recv_buf_size", + "send_buf_size" +]) + # CONNECT_DATA parameter names that are supported by the driver; all other -# simple key/value pairs are passed unchanged to the database +# key/value pairs are passed unchanged to the database CONNECT_DATA_PARAM_NAMES = set([ "cclass", "connection_id_prefix", @@ -64,18 +90,30 @@ CONNECT_DATA_PARAM_NAMES = set([ "use_tcp_fast_open", ]) +# SECURITY parameter names that are supported by the driver; all other +# key/value pairs are passed unchanged to the database +SECURITY_PARAM_NAMES = set([ + "ssl_server_cert_dn", + "ssl_server_dn_match", + "ssl_version", + "wallet_location", +]) + # a set of parameter names supported by the driver in EasyConnect strings that # are common to all drivers COMMON_PARAM_NAMES = set([ "expire_time", + "failover", "https_proxy", "https_proxy_port", + "load_balance", "pool_boundary", "pool_connection_class", "pool_purity", "retry_count", "retry_delay", "sdu", + "source_route", "ssl_server_cert_dn", "ssl_server_dn_match", "transport_connect_timeout", @@ -154,7 +192,9 @@ cdef class BaseParser: cdef Py_UCS4 ch while self.temp_pos < self.num_chars: ch = self.get_current_char() - if not cpython.Py_UNICODE_ISALPHA(ch) and ch != '_' and ch != '.': + if not cpython.Py_UNICODE_ISALPHA(ch) \ + and not cpython.Py_UNICODE_ISDIGIT(ch) \ + and ch != '_' and ch != '.': break self.temp_pos += 1 @@ -444,9 +484,9 @@ cdef class ConnectStringParser(BaseParser): """ cdef: ssize_t start_pos, end_pos = 0 + str name, value Py_UCS4 ch = 0 bint keep - str name # get parameter name self.skip_spaces() @@ -459,7 +499,8 @@ cdef class ConnectStringParser(BaseParser): name = name[len(EXTENDED_PARAM_PREFIX):] keep = name in EXTENDED_PARAM_NAMES else: - keep = name in COMMON_PARAM_NAMES + keep = name in COMMON_PARAM_NAMES \ + or name in EXTRA_DESCRIPTION_PARAM_NAMES name = ALTERNATIVE_PARAM_NAMES.get(name, name) # look for the equals sign @@ -495,7 +536,11 @@ cdef class ConnectStringParser(BaseParser): if end_pos > start_pos and keep: if self.parameters is None: self.parameters = {} - self.parameters[name] = self.data_as_str[start_pos:end_pos] + value = self.data_as_str[start_pos:end_pos] + if name in EXTRA_DESCRIPTION_PARAM_NAMES: + self.parameters.setdefault("extra_args", {})[name] = value + else: + self.parameters[name] = value self.skip_spaces() self.pos = self.temp_pos @@ -636,19 +681,23 @@ cdef class ConnectStringParser(BaseParser): self.data_as_str[self.pos + 1:instance_name_end_pos] self.pos = self.temp_pos - cdef dict _set_connect_data(self, dict args): + cdef dict _process_args_with_extras(self, dict args, set allowed_names, + str extras_name): """ - Sets the connect data value. + Processes arguments which contain a set of known attributes and any + number of unknown attributes. The known attributes are left untouched + whereas the unknown ones are put in a separate dictionary with the + given name. """ cdef: dict extras, result = {} object value str key for key, value in args.items(): - if key in CONNECT_DATA_PARAM_NAMES: + if key in allowed_names: result[key] = value else: - extras = result.setdefault("extra_connect_data_args", {}) + extras = result.setdefault(extras_name, {}) extras[key] = value return result @@ -663,6 +712,21 @@ cdef class ConnectStringParser(BaseParser): being added and addresses already exist, those addresses are first added to the address list before the new value is added. """ + # process unrecognized parameters, if applicable + if name == "description": + value = self._process_args_with_extras( + value, DESCRIPTION_PARAM_NAMES, "extra_args" + ) + elif name == "connect_data": + value = self._process_args_with_extras( + value, CONNECT_DATA_PARAM_NAMES, "extra_connect_data_args" + ) + elif name == "security": + value = self._process_args_with_extras( + value, SECURITY_PARAM_NAMES, "extra_security_args" + ) + + # add value to arguments, creating a list if encountered multiple times orig_value = args.get(name) if orig_value is None: if name == "address" and "address_list" in args: @@ -673,8 +737,6 @@ cdef class ConnectStringParser(BaseParser): if not isinstance(addresses, list): addresses = [addresses] value = [dict(address=a) for a in addresses] + [value] - elif name == "connect_data": - value = self._set_connect_data(value) args[name] = value elif isinstance(orig_value, list): args[name].append(value) diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index a14aaafd..80f33134 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -331,19 +331,21 @@ def test_4520(self): def test_4521(self): "4521 - test connect string with an address list" params = oracledb.ConnectParams() - connect_string = """ - (DESCRIPTION=(LOAD_BALANCE=ON)(RETRY_COUNT=5)(RETRY_DELAY=2) - (ADDRESS_LIST=(LOAD_BALANCE=ON) - (ADDRESS=(PROTOCOL=tcp)(PORT=1521)(HOST=my_host25)) - (ADDRESS=(PROTOCOL=tcps)(PORT=222)(HOST=my_host26))) - (CONNECT_DATA=(SERVICE_NAME=my_service_name25)))""" + connect_string = ( + "(DESCRIPTION=(LOAD_BALANCE=ON)(RETRY_COUNT=5)(RETRY_DELAY=2)" + "(ADDRESS_LIST=(LOAD_BALANCE=ON)" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host25)(PORT=4521))" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host26)(PORT=4522)))" + "(CONNECT_DATA=(SERVICE_NAME=my_service_name25)))" + ) params.parse_connect_string(connect_string) self.assertEqual(params.host, ["my_host25", "my_host26"]) - self.assertEqual(params.port, [1521, 222]) - self.assertEqual(params.protocol, ["tcp", "tcps"]) + self.assertEqual(params.port, [4521, 4522]) + self.assertEqual(params.protocol, ["tcp", "tcp"]) self.assertEqual(params.service_name, "my_service_name25") self.assertEqual(params.retry_count, 5) self.assertEqual(params.retry_delay, 2) + self.assertEqual(params.get_connect_string(), connect_string) def test_4522(self): "4522 - test connect string with multiple address lists" @@ -477,9 +479,9 @@ def test_4527(self): tcp_timeout_val = f"(TRANSPORT_CONNECT_TIMEOUT={out_val})" connect_string = ( f"(DESCRIPTION={tcp_timeout_val}" - + "(ADDRESS=(PROTOCOL=tcp)" - + f"(HOST={host})(PORT=1521))(CONNECT_DATA=" - + f"(SERVICE_NAME={service_name})))" + "(ADDRESS=(PROTOCOL=tcp)" + f"(HOST={host})(PORT=1521))(CONNECT_DATA=" + f"(SERVICE_NAME={service_name})))" ) self.assertEqual(params.get_connect_string(), connect_string) @@ -559,10 +561,10 @@ def test_4531(self): source_route_clause = "(SOURCE_ROUTE=ON)" if has_section else "" connect_string = ( f"(DESCRIPTION={source_route_clause}" - + "(ADDRESS_LIST=" - + "(ADDRESS=(PROTOCOL=tcp)(HOST=host1)(PORT=1521))" - + "(ADDRESS=(PROTOCOL=tcp)(HOST=host2)(PORT=1522)))" - + "(CONNECT_DATA=(SERVICE_NAME=my_service_35)))" + f"(ADDRESS_LIST={source_route_clause}" + "(ADDRESS=(PROTOCOL=tcp)(HOST=host1)(PORT=1521))" + "(ADDRESS=(PROTOCOL=tcp)(HOST=host2)(PORT=1522)))" + "(CONNECT_DATA=(SERVICE_NAME=my_service_35)))" ) self.assertEqual(params.get_connect_string(), connect_string) @@ -1258,6 +1260,439 @@ def test_4571(self): self.assertEqual(params.port, port) self.assertEqual(params.get_connect_string(), connect_descriptor) + def test_4572(self): + "4572 - test passing through unrecognized parameters in SECURITY" + options = [ + "(SIMPLE_KEY=SIMPLE_VALUE)", + "(COMPLEX_KEY=(SUB_VALUE_A=23)(SUB_VALUE_B=27))", + "(COMPLEX_KEY=(SUB_VALUE_A=A)(SUB_VALUE_B=(SUB_SUB_A=B)))", + ] + for option in options: + with self.subTest(option=option): + connect_string = ( + "(DESCRIPTION=(ADDRESS=(PROTOCOL=tcps)(HOST=host4572)" + "(PORT=1521))(CONNECT_DATA=(SERVICE_NAME=service4572))" + f"(SECURITY=(SSL_SERVER_DN_MATCH=ON){option}))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.get_connect_string(), connect_string) + + def test_4573(self): + "4573 - test passing through unrecognized parameters in DESCRIPTION" + options = [ + "(SIMPLE_KEY=SIMPLE_VALUE)", + "(COMPLEX_KEY=(SUB_VALUE_1=1)(SUB_VALUE_B=2))", + "(COMPLEX_KEY=(SUB_VALUE_2=S)(SUB_VALUE_B=(SUB_SUB_A=T)))", + ] + for option in options: + with self.subTest(option=option): + connect_string = ( + "(DESCRIPTION_LIST=" + f"(DESCRIPTION={option}(ADDRESS=(PROTOCOL=tcp)" + "(HOST=host4573a)(PORT=1521))" + "(CONNECT_DATA=(SERVICE_NAME=service4573)))" + f"(DESCRIPTION={option}(ADDRESS=(PROTOCOL=tcp)" + "(HOST=host4573b)(PORT=1521))" + "(CONNECT_DATA=(SERVICE_NAME=service4573))))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.get_connect_string(), connect_string) + + def test_4574(self): + "4574 - test passing through specific unsupported parameters" + easy_connect = ( + "host_4574/service_4574?" + "enable=broken&recv_buf_size=1024&send_buf_size=2048" + ) + connect_descriptor = ( + "(DESCRIPTION=(ENABLE=broken)(RECV_BUF_SIZE=1024)" + "(SEND_BUF_SIZE=2048)(ADDRESS=(PROTOCOL=tcp)(HOST=host_4574)" + "(PORT=1521))(CONNECT_DATA=(SERVICE_NAME=service_4574)))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(easy_connect) + self.assertEqual(params.get_connect_string(), connect_descriptor) + + def test_4575(self): + "4575 - test syntax rule for keywords" + for value, ok in [ + ("(SIMPLE_KEY=SIMPLE_VALUE)", True), + ("(KEY_CONTAINS SPACE=SIMPLE_VALUE)", False), + ("(∆KEY✓🚀=SIMPLE_VALUE)", False), + ("(§∞ホスト🔑=SIMPLE_VALUE)", False), + ("(^MY_KEY_NAME=SIMPLE_VALUE)", False), + ("(KEY_CONTAINS TAB=SIMPLE_VALUE)", False), + ("(KEY_CONTAINS_QUOTES_''=SIMPLE_VALUE)", False), + ("(KEY_CONTAINS'\r'=SIMPLE_VALUE)", False), + ("(KEY_CONTAINS'\n'=SIMPLE_VALUE)", False), + ]: + with self.subTest(value=value): + connect_string = ( + "(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=host4573)" + + "(PORT=1521))(CONNECT_DATA=(SERVICE_NAME=service4573)" + + f"{value}))" + ) + if ok: + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual( + params.get_connect_string(), connect_string + ) + else: + with self.assertRaisesFullCode("DPY-4017"): + params.parse_connect_string(connect_string) + + def test_4576(self): + "4576 - test syntax rule for keywords in easy connect string" + for value, ok in [ + ("simple_key=simple_value", True), + ("key_contains space=simple_value", False), + ("∆key✓🚀=simple_value", False), + ("^my_key_name=simple_value", False), + ("key_contains tab=simple_value", False), + ("key_contains_quotes_''=simple_value", False), + ("key_contains'r'=simple_value", False), + ("key_contains'\n'=simple_value", False), + ]: + with self.subTest(value=value): + easy_connect = f"""host4574:1589/service4574?{value}""" + connect_string_exp = ( + "(DESCRIPTION=" + + "(ADDRESS=(PROTOCOL=tcp)(HOST=host4574)(PORT=1589))" + + "(CONNECT_DATA=(SERVICE_NAME=service4574)))" + ) + if ok: + params = oracledb.ConnectParams() + params.parse_connect_string(easy_connect) + self.assertEqual(params.host, "host4574") + self.assertEqual(params.port, 1589) + self.assertEqual(params.service_name, "service4574") + self.assertEqual( + params.get_connect_string(), connect_string_exp + ) + else: + with self.assertRaisesFullCode("DPY-4018"): + params.parse_connect_string(easy_connect) + + def test_4577(self): + "4577 - test for DESCRIPTION_LIST with FAILOVER" + connect_string = ( + "(DESCRIPTION_LIST=(FAILOVER=OFF)(LOAD_BALANCE=ON)" + "(DESCRIPTION=(LOAD_BALANCE=ON)(RETRY_COUNT=1)(RETRY_DELAY=1)" + "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=my_host30)(PORT=5001))" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host31)(PORT=1521)))" + "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=my_host32)(PORT=5002))" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host32)(PORT=5003)))" + "(CONNECT_DATA=(SERVICE_NAME=my_service_name27)))" + "(DESCRIPTION=(LOAD_BALANCE=ON)(RETRY_COUNT=2)(RETRY_DELAY=3)" + "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=my_host34)(PORT=5002))" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host35)(PORT=5001)))" + "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=my_host36)(PORT=5002))" + "(ADDRESS=(PROTOCOL=tcps)(HOST=my_host37)(PORT=1521)))" + "(SECURITY=(SSL_SERVER_DN_MATCH=ON))))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.get_connect_string(), connect_string) + + def test_4578(self): + "4578 - test for descriptor parameters in connect descriptor" + options = [ + ("(FAILOVER=on)", ""), + ("(FAILOVER=off)", "(FAILOVER=OFF)"), + ("(FAILOVER=true)", ""), + ("(FAILOVER=false)", "(FAILOVER=OFF)"), + ("(FAILOVER=yes)", ""), + ("(FAILOVER=no)", "(FAILOVER=OFF)"), + ("(FAILOVER=unsupported_value)", "(FAILOVER=OFF)"), + ("(FAILOVER=1700)", "(FAILOVER=OFF)"), + ("(ENABLE=broken)", "(ENABLE=broken)"), + ("(LOAD_BALANCE=on)", "(LOAD_BALANCE=ON)"), + ("(LOAD_BALANCE=off)", ""), + ("(LOAD_BALANCE=true)", "(LOAD_BALANCE=ON)"), + ("(LOAD_BALANCE=false)", ""), + ("(LOAD_BALANCE=yes)", "(LOAD_BALANCE=ON)"), + ("(LOAD_BALANCE=no)", ""), + ("(LOAD_BALANCE=unsupported_value)", ""), + ("(LOAD_BALANCE=1700)", ""), + ("(RECV_BUF_SIZE=87300)", "(RECV_BUF_SIZE=87300)"), + ("(RECV_BUF_SIZE=11784)", "(RECV_BUF_SIZE=11784)"), + ("(SEND_BUF_SIZE=87300)", "(SEND_BUF_SIZE=87300)"), + ("(SEND_BUF_SIZE=11784)", "(SEND_BUF_SIZE=11784)"), + ("(RECV_TIMEOUT=10)", "(RECV_TIMEOUT=10)"), + ("(RECV_TIMEOUT=10ms)", "(RECV_TIMEOUT=10ms)"), + ("(RECV_TIMEOUT=10 ms)", "(RECV_TIMEOUT=10 ms)"), + ("(RECV_TIMEOUT=10 hr)", "(RECV_TIMEOUT=10 hr)"), + ("(RECV_TIMEOUT=10 min)", "(RECV_TIMEOUT=10 min)"), + ("(RECV_TIMEOUT=10 sec)", "(RECV_TIMEOUT=10 sec)"), + ("(COMPRESSION=on)", "(COMPRESSION=on)"), + ("(COMPRESSION=off)", "(COMPRESSION=off)"), + ( + "(COMPRESSION=on)(COMPRESSION_LEVELS=(LEVEL=low))", + "(COMPRESSION=on)(COMPRESSION_LEVELS=(LEVEL=low))", + ), + ( + "(COMPRESSION=on)(COMPRESSION_LEVELS=(LEVEL=high))", + "(COMPRESSION=on)(COMPRESSION_LEVELS=(LEVEL=high))", + ), + ( + "(COMPRESSION=on)(COMPRESSION_LEVELS=(LEVEL=wrong))", + "(COMPRESSION=on)(COMPRESSION_LEVELS=(LEVEL=wrong))", + ), + ] + + service_name = "service_4576" + host1 = "host_4576_1" + host2 = "host_4576_2" + port1 = 45761 + port2 = 45762 + for str_val, exp_val in options: + with self.subTest(str_val=str_val): + descriptor_part = str_val + descriptor_part_exp = exp_val + connect_descriptor = ( + f"(DESCRIPTION={descriptor_part}(ADDRESS_LIST=" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host1})(PORT={port1}))" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host2})(PORT={port2})))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name})))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_descriptor) + + connect_descriptor_exp = ( + f"(DESCRIPTION={descriptor_part_exp}(ADDRESS_LIST=" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host1})(PORT={port1}))" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host2})(PORT={port2})))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name})))" + ) + + self.assertEqual(params.host, [host1, host2]) + self.assertEqual(params.port, [port1, port2]) + self.assertEqual(params.service_name, service_name) + self.assertEqual( + params.get_connect_string(), connect_descriptor_exp + ) + + def test_4579(self): + "4579 - test for connect data parameters in connect descriptor" + options = [ + "(COLOCATION_TAG=ColocationTag4577)", + "(COLOCATION_TAG=ColocationTag_4577)", + "(FAILOVER_MODE=(BACKUP=bhost)(TYPE=session)(METHOD=basic))", + "(FAILOVER_MODE=(BACKUP=bhost)(TYPE=select)(METHOD=preconnect))", + "(FAILOVER_MODE=(TYPE=select)(METHOD=basic)(RETRIES=2)(DELAY=15))", + "(HS=ok)", + "(TUNNEL_SERVICE_NAME=south)", + ] + + service_name = "service_4577" + host = "host_4577" + port = 4577 + for str_val in options: + with self.subTest(str_val=str_val): + connect_data_part = str_val + connect_descriptor = ( + f"(DESCRIPTION=" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host})(PORT={port}))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name})" + f"{connect_data_part}))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_descriptor) + self.assertEqual(params.host, host) + self.assertEqual(params.port, port) + self.assertEqual(params.service_name, service_name) + self.assertEqual( + params.get_connect_string(), connect_descriptor + ) + + def test_4580(self): + "4580 - test for security parameters in connect descriptor" + + security_options = { + # IGNORE_ANO_ENCRYPTION_FOR_TCPS variations + "(SECURITY=(IGNORE_ANO_ENCRYPTION_FOR_TCPS=TRUE))": ( + "(SECURITY=(SSL_SERVER_DN_MATCH=ON)" + "(IGNORE_ANO_ENCRYPTION_FOR_TCPS=TRUE))" + ), + "(SECURITY=(IGNORE_ANO_ENCRYPTION_FOR_TCPS=FALSE))": ( + "(SECURITY=(SSL_SERVER_DN_MATCH=ON)" + "(IGNORE_ANO_ENCRYPTION_FOR_TCPS=FALSE))" + ), + "(SECURITY=(SSL_SERVER_DN_MATCH=false)" + "(IGNORE_ANO_ENCRYPTION_FOR_TCPS=FALSE))": ( + "(SECURITY=(IGNORE_ANO_ENCRYPTION_FOR_TCPS=FALSE))" + ), + # KERBEROS5_CC_NAME and KERBEROS5_PRINCIPAL variations + "(SECURITY=(KERBEROS5_CC_NAME=/tmp/krbuser2/krb.cc)" + "(KERBEROS5_PRINCIPAL=krbprinc2@example.com))": ( + "(SECURITY=(SSL_SERVER_DN_MATCH=ON)" + "(KERBEROS5_CC_NAME=/tmp/krbuser2/krb.cc)" + "(KERBEROS5_PRINCIPAL=krbprinc2@example.com))" + ), + # SSL_SERVER_CERT_DN and SSL_SERVER_DN_MATCH variations + "(SECURITY=(SSL_SERVER_DN_MATCH=on)" + "(SSL_SERVER_CERT_DN=CN=unknown19a)" + "(MY_WALLET_DIRECTORY=/tmp/wallet_loc19a))": ( + "(SECURITY=(SSL_SERVER_DN_MATCH=ON)" + "(SSL_SERVER_CERT_DN=CN=unknown19a)" + "(MY_WALLET_DIRECTORY=/tmp/wallet_loc19a))" + ), + "(SECURITY=(SSL_SERVER_DN_MATCH=false)" + "(SSL_SERVER_CERT_DN=CN=unknown19a)" + "(MY_WALLET_DIRECTORY=/tmp/wallet_loc19a))": ( + "(SECURITY=(SSL_SERVER_CERT_DN=CN=unknown19a)" + "(MY_WALLET_DIRECTORY=/tmp/wallet_loc19a))" + ), + "(SECURITY=(SSL_SERVER_DN_MATCH=wrong)" + "(SSL_SERVER_CERT_DN=CN=unknown19a)" + "(MY_WALLET_DIRECTORY=/tmp/wallet_loc19a))": ( + "(SECURITY=(SSL_SERVER_CERT_DN=CN=unknown19a)" + "(MY_WALLET_DIRECTORY=/tmp/wallet_loc19a))" + ), + } + + service_name = "service_4578" + host = "host_4578" + port = 4578 + for str_val, exp_val in security_options.items(): + with self.subTest(str_val=str_val): + security_part = str_val + security_part_exp = exp_val + connect_descriptor = ( + f"(DESCRIPTION=" + f"(ADDRESS=(PROTOCOL=tcps)(HOST={host})(PORT={port}))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name}))" + f"{security_part})" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_descriptor) + connect_descriptor_exp = ( + f"(DESCRIPTION=" + f"(ADDRESS=(PROTOCOL=tcps)(HOST={host})(PORT={port}))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name}))" + f"{security_part_exp})" + ) + self.assertEqual(params.host, host) + self.assertEqual(params.port, port) + self.assertEqual(params.service_name, service_name) + self.assertEqual( + params.get_connect_string(), connect_descriptor_exp + ) + + def test_4581(self): + "4581 - test for parameters supported in easy connect descriptor" + options = [ + ("retry_count=3&retry_delay=6", "(RETRY_COUNT=3)(RETRY_DELAY=6)"), + ("enable=broken", "(ENABLE=broken)"), + ("failover=on", ""), + ("failover=off", "(FAILOVER=OFF)"), + ("failover=true", ""), + ("failover=false", "(FAILOVER=OFF)"), + ("failover=yes", ""), + ("failover=no", "(FAILOVER=OFF)"), + ("failover=unsupported_value", "(FAILOVER=OFF)"), + ("failover=1700", "(FAILOVER=OFF)"), + ("load_balance=on", "(LOAD_BALANCE=ON)"), + ("load_balance=off", ""), + ("load_balance=true", "(LOAD_BALANCE=ON)"), + ("load_balance=false", ""), + ("load_balance=yes", "(LOAD_BALANCE=ON)"), + ("load_balance=no", ""), + ("load_balance=unsupported_value", ""), + ("load_balance=1700", ""), + ("recv_buf_size=87300", "(RECV_BUF_SIZE=87300)"), + ("send_buf_size=11786", "(SEND_BUF_SIZE=11786)"), + ("sdu=16384", "(SDU=16384)"), + ("retry_count=6", "(RETRY_COUNT=6)(RETRY_DELAY=1)"), + ("source_route=on", "(SOURCE_ROUTE=ON)"), + ("source_route=true", "(SOURCE_ROUTE=ON)"), + ("source_route=yes", "(SOURCE_ROUTE=ON)"), + ("source_route=off", ""), + ("source_route=false", ""), + ("source_route=no", ""), + ("source_route=wrong", ""), + ( + "transport_connect_timeout=100", + "(TRANSPORT_CONNECT_TIMEOUT=100)", + ), + ( + "transport_connect_timeout=500ms", + "(TRANSPORT_CONNECT_TIMEOUT=500ms)", + ), + ] + + service_name = "service_4579" + host = "host_4579" + port = 4579 + for str_val, exp_str in options: + with self.subTest(str_val=str_val): + descriptor_part = exp_str + easy_connect = f"""{host}:{port}/{service_name}?{str_val}""" + connect_descriptor_exp = ( + f"(DESCRIPTION={descriptor_part}" + f"(ADDRESS=(PROTOCOL=tcp)(HOST={host})(PORT={port}))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name})))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(easy_connect) + self.assertEqual(params.host, host) + self.assertEqual(params.port, port) + self.assertEqual(params.service_name, service_name) + self.assertEqual( + params.get_connect_string(), connect_descriptor_exp + ) + + def test_4582(self): + "4582 - test for security parameters in easy connect descriptor" + service_name = "service_4580" + srvc_str = ( + "ssl_server_dn_match=true" + "&ssl_server_cert_dn='cn=sales,cn=OracleContext," + "dc=us,dc=example,dc=com'" + "&wallet_location='/tmp/oracle'" + ) + host = "host_4580" + port = 4580 + easy_connect = f"tcps://{host}:{port}/{service_name}?{srvc_str}" + connect_descriptor_exp = ( + f"(DESCRIPTION=" + f"(ADDRESS=(PROTOCOL=tcps)(HOST={host})" + f"(PORT={port}))" + f"(CONNECT_DATA=(SERVICE_NAME={service_name}))" + "(SECURITY=(SSL_SERVER_DN_MATCH=ON)" + "(SSL_SERVER_CERT_DN='cn=sales,cn=OracleContext," + "dc=us,dc=example,dc=com')" + "(MY_WALLET_DIRECTORY='/tmp/oracle')))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(easy_connect) + self.assertEqual(params.host, host) + self.assertEqual(params.port, port) + self.assertEqual(params.service_name, service_name) + self.assertEqual(params.get_connect_string(), connect_descriptor_exp) + + def test_4583(self): + "4583 - test for TYPE_OF_SERVICE, RDB_DATABASE, GLOBAL_NAME parameters" + connect_string = ( + "(DESCRIPTION_LIST=" + "(DESCRIPTION=(TYPE_OF_SERVICE=rdb_database)" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host94_1)(PORT=5002))" + "(CONNECT_DATA=" + "(SERVICE_NAME=generic)" + "(RDB_DATABASE=[.mf]mf_personal.rdb)" + "(GLOBAL_NAME=alpha5)))" + "(DESCRIPTION=(TYPE_OF_SERVICE=oracle11_database)" + "(ADDRESS=(PROTOCOL=tcp)(HOST=my_host94_2)(PORT=5003))" + "(CONNECT_DATA=" + "(SERVICE_NAME=sales.us.example.com))))" + ) + params = oracledb.ConnectParams() + params.parse_connect_string(connect_string) + self.assertEqual(params.get_connect_string(), connect_string) + if __name__ == "__main__": test_env.run_test_cases() From 065bbf9925b59a0995ebad6e52b55b93ab7f95e0 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 11:01:18 -0700 Subject: [PATCH 026/178] TG example improvements. --- samples/transaction_guard.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/samples/transaction_guard.py b/samples/transaction_guard.py index 60aa1acf..a9f8fdf5 100644 --- a/samples/transaction_guard.py +++ b/samples/transaction_guard.py @@ -77,15 +77,28 @@ cursor = connection.cursor() cursor.execute("delete from TestTempTable where IntCol = 1") cursor.execute("insert into TestTempTable values (1, null)") -input( - "Please kill %s session now. Press ENTER when complete." - % sample_env.get_main_user() -) + +try: + sql = """select unique + 'alter system kill session '''||sid||','||serial#||''';' + from v$session_connect_info + where sid = sys_context('USERENV', 'SID')""" + (killsql,) = connection.cursor().execute(sql).fetchone() + print(f"Execute this SQL statement as a DBA user in SQL*Plus:\n {killsql}") +except Exception: + print( + "As a DBA user in SQL*Plus, use ALTER SYSTEM KILL SESSION " + f"to terminate the {sample_env.get_main_user()} session now." + ) + +input("Press ENTER when complete.") + try: connection.commit() # this should fail - sys.exit("Session was not killed. Terminating.") + sys.exit("Session was not killed. Sample cannot continue.") except oracledb.DatabaseError as e: (error_obj,) = e.args + print("Session is recoverable:", error_obj.isrecoverable) if not error_obj.isrecoverable: sys.exit("Session is not recoverable. Terminating.") ltxid = connection.ltxid From cb29895b10faa200636ebf1cf27225eca7d88b96 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 11:01:57 -0700 Subject: [PATCH 027/178] Dockerfile for creating Python-oracledb application development environment container. --- doc/src/release_notes.rst | 2 + samples/containers/app_dev/Dockerfile | 318 ++++++++++++++++++ samples/containers/app_dev/README.md | 236 +++++++++++++ .../containers/app_dev/container_build.env | 80 +++++ samples/containers/app_dev/container_build.sh | 56 +++ .../otherfiles/autodeployscript.sh.template | 98 ++++++ .../otherfiles/pyorcldb_generate_cert.sh | 41 +++ .../otherfiles/pyorcldb_wsgi.conf.template | 44 +++ .../containers/app_dev/sample_app/README.md | 4 + .../containers/app_dev/sample_app/customer.py | 228 +++++++++++++ .../app_dev/sample_app/pyorcldb_app.wsgi | 5 + .../app_dev/sample_app/requirements.txt | 2 + .../app_dev/sample_app/static/logo.png | Bin 0 -> 2330 bytes .../sample_app/static/styles/styles.css | 41 +++ .../sample_app/templates/customer_list.html | 61 ++++ .../sample_app/templates/error_handler.html | 22 ++ .../samples_and_db}/Dockerfile | 4 +- .../samples_and_db}/README.md | 0 .../samples_and_db}/setup.py | 0 19 files changed, 1240 insertions(+), 2 deletions(-) create mode 100644 samples/containers/app_dev/Dockerfile create mode 100644 samples/containers/app_dev/README.md create mode 100644 samples/containers/app_dev/container_build.env create mode 100755 samples/containers/app_dev/container_build.sh create mode 100755 samples/containers/app_dev/otherfiles/autodeployscript.sh.template create mode 100755 samples/containers/app_dev/otherfiles/pyorcldb_generate_cert.sh create mode 100644 samples/containers/app_dev/otherfiles/pyorcldb_wsgi.conf.template create mode 100644 samples/containers/app_dev/sample_app/README.md create mode 100644 samples/containers/app_dev/sample_app/customer.py create mode 100644 samples/containers/app_dev/sample_app/pyorcldb_app.wsgi create mode 100644 samples/containers/app_dev/sample_app/requirements.txt create mode 100644 samples/containers/app_dev/sample_app/static/logo.png create mode 100644 samples/containers/app_dev/sample_app/static/styles/styles.css create mode 100644 samples/containers/app_dev/sample_app/templates/customer_list.html create mode 100644 samples/containers/app_dev/sample_app/templates/error_handler.html rename samples/{sample_container => containers/samples_and_db}/Dockerfile (95%) rename samples/{sample_container => containers/samples_and_db}/README.md (100%) rename samples/{sample_container => containers/samples_and_db}/setup.py (100%) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 523ab256..03ac3821 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -107,6 +107,8 @@ Common Changes #) Error ``DPY-2056: registered handler for protocol "{protocol}" failed for arg "{arg}"`` is now raised when an exception occurs when calling the registered handler for a protocol. +#) Added a sample Dockerfile that can be used to create a container for + developing and deploying python-oracledb applications. #) Internal change: improve handling of metadata. #) Internal build tool change: bumped minimum Cython version to 3.0.10 to avoid bug in earlier versions. diff --git a/samples/containers/app_dev/Dockerfile b/samples/containers/app_dev/Dockerfile new file mode 100644 index 00000000..9e2bf1b7 --- /dev/null +++ b/samples/containers/app_dev/Dockerfile @@ -0,0 +1,318 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025 Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# +# NAME +# +# Dockerfile +# +# PURPOSE +# +# Sample Dockerfile that creates a container image for Python application +# development and application deployment. The container has python-oracledb +# installed, the Apache web server with WSGI, and (optionally) Oracle Instant +# Client pre-configured. +# +# The container is for testing and development purposes only. +# +# NOTE +# +# Read README.md and then use container_build.sh to build the container from +# this Dockerfile +# + +# Variable declaration for base OS +# -------------------------------- +ARG OPY_OS_VERSION=8 + +# Base OS container image details +# ------------------------------- +FROM ghcr.io/oracle/oraclelinux:${OPY_OS_VERSION} AS baseos + +# OS Username +ARG OPY_USERNAME + +# OS Username's Groupname +ARG OPY_GROUPNAME + +# Python Version details +ARG OPY_PYTHON_VERSION + +# Instant Client Details +ARG OPY_INSTANT_CLIENT_VERSION + +# User base directory +ARG OPY_BASEDIR=/opt + +# OS User home directory +ARG OPY_OS_USER_HOMEDIR=/home/${OPY_USERNAME} + +# Virtual environment directory name +ARG OPY_PYTHON_VIRTUAL_DIRNAME=pyorcldb_env + +# OS Version +ARG OPY_OS_VERSION=8 + +RUN dnf upgrade && \ + # Install latest Instant Client 23ai + ( ( [ ${OPY_INSTANT_CLIENT_VERSION} == 23 ] && \ + dnf install oracle-instantclient-release-23ai-el${OPY_OS_VERSION} && \ + dnf install oracle-instantclient-basic \ + oracle-instantclient-sqlplus ) || \ + # Install latest Instant Client 21c + ( [ ${OPY_INSTANT_CLIENT_VERSION} == 21 ] && \ + dnf install oracle-instantclient-release-el${OPY_OS_VERSION} && \ + dnf install oracle-instantclient-basic \ + oracle-instantclient-sqlplus ) || \ + ( [ ${OPY_INSTANT_CLIENT_VERSION} == None ] && \ + echo "Not installing Instant Client" ) || \ + # Install specific Instant Client 19c release update + ( \ + # Install Instant Client 19c on Oracle Linux 9 + ( ( [ ${OPY_OS_VERSION} == 9 ] && \ + dnf install oracle-instantclient-release-el${OPY_OS_VERSION} && \ + dnf install \ + oracle-instantclient${OPY_INSTANT_CLIENT_VERSION}-basic \ + oracle-instantclient${OPY_INSTANT_CLIENT_VERSION}-sqlplus \ + ) || \ + # Install Instant Client 19c on Oracle Linux 8 + ( dnf install oracle-release-el${OPY_OS_VERSION} && \ + dnf install \ + oracle-instantclient${OPY_INSTANT_CLIENT_VERSION}-basic \ + oracle-instantclient${OPY_INSTANT_CLIENT_VERSION}-sqlplus \ + ) ) ) || \ + ( echo "Not installing Instant Client" ) \ + ) && \ + # Install sudo, openssl and Python + # Installing ncurses to include 'clear' command + dnf install sudo openssl apr apr-util ncurses && \ + echo "Python Version - ${OPY_PYTHON_VERSION}" && \ + dnf install python${OPY_PYTHON_VERSION} python${OPY_PYTHON_VERSION}-pip \ + python${OPY_PYTHON_VERSION}-devel && \ + ( ( [ ${OPY_OS_VERSION} == 9 ] && \ + update-alternatives --install /usr/bin/python python \ + /usr/bin/python${OPY_PYTHON_VERSION} 1 ) || \ + ( echo "Not required to set Python default" ) \ + ) && \ + # Add OS user + useradd -U -d ${OPY_OS_USER_HOMEDIR} ${OPY_USERNAME} && \ + # Enable sudo access for appuser + echo "%appuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \ + # Create required directories + mkdir ${OPY_BASEDIR}/downloads && \ + chown ${OPY_USERNAME}:${OPY_GROUPNAME} ${OPY_BASEDIR}/downloads && \ + mkdir ${OPY_BASEDIR}/wallet && \ + chown ${OPY_USERNAME}:${OPY_GROUPNAME} ${OPY_BASEDIR}/wallet && \ + mkdir ${OPY_BASEDIR}/apache && \ + chown ${OPY_USERNAME}:${OPY_GROUPNAME} ${OPY_BASEDIR}/apache && \ + mkdir ${OPY_BASEDIR}/${OPY_PYTHON_VIRTUAL_DIRNAME} && \ + chown ${OPY_USERNAME}:${OPY_GROUPNAME} \ + ${OPY_BASEDIR}/${OPY_PYTHON_VIRTUAL_DIRNAME} && \ + mkdir ${OPY_BASEDIR}/app && \ + chown ${OPY_USERNAME}:${OPY_GROUPNAME} ${OPY_BASEDIR}/app && \ + mkdir ${OPY_BASEDIR}/cert && \ + chown ${OPY_USERNAME}:${OPY_GROUPNAME} ${OPY_BASEDIR}/cert && \ + # Clean up cache + dnf clean all + +FROM baseos AS pythonenv + +# OS Username +ARG OPY_USERNAME + +# OS Username's Groupname +ARG OPY_GROUPNAME + +# Python Version details +ARG OPY_PYTHON_VERSION + +# Virtual environment directory name +ARG OPY_PYTHON_VIRTUAL_DIRNAME=pyorcldb_env + +# User base directory +ARG OPY_BASEDIR=/opt + +# OS User home directory +ARG OPY_OS_USER_HOMEDIR=/home/${OPY_USERNAME} + +# Switch User +USER ${OPY_USERNAME} + +# Copy Auto deploy script template +COPY --chown=${OPY_USERNAME}:${OPY_GROUPNAME} \ + otherfiles/autodeployscript.sh.template \ + ${OPY_OS_USER_HOMEDIR}/autodeployscript.sh + +# Configure virtual environment +RUN cd ${OPY_BASEDIR} && \ + python${OPY_PYTHON_VERSION} -m venv ${OPY_PYTHON_VIRTUAL_DIRNAME} && \ + # Source the Python virtual environment + source ${OPY_BASEDIR}/${OPY_PYTHON_VIRTUAL_DIRNAME}/bin/activate && \ + # Upgrade pip utility + python -m pip install --upgrade pip && \ + # Install python-oracledb driver for Python + # Install wheel module (pre-requisite for WSGI module) + python -m pip install oracledb wheel && \ + # Configure PATH and set the Python virtual environment user profile + echo "export PATH=${OPY_BASEDIR}/apache/bin:\$PATH" >> \ + ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + echo "source ${OPY_BASEDIR}/${OPY_PYTHON_VIRTUAL_DIRNAME}/bin/activate" >> \ + ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + echo "if [ ! -f ${OPY_BASEDIR}/cert/certificate.pem ]; then" >> \ + ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + echo " ~/pyorcldb_generate_cert.sh" >> ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + echo "fi" >> ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + echo "~/autodeployscript.sh" >> ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + echo "" >> ${OPY_OS_USER_HOMEDIR}/.bashrc && \ + sed -i "s/OPY_OSUSERBASEDIR/\\${OPY_BASEDIR}/g" \ + ${OPY_OS_USER_HOMEDIR}/autodeployscript.sh + +# Intermediate image for building Apache Server +# --------------------------------------------- +FROM pythonenv AS apachebuildenv + +# Variables declaration for ApacheBuildEnv +# ---------------------------------------- + +# Apache Server Configuration - Version +ARG OPY_APACHE_SERVER_VERSION + +# Python Version details +ARG OPY_PYTHON_VERSION +ARG OPY_PYTHON_VERSION_WITHOUTPERIOD + +# OS Username +ARG OPY_USERNAME + +# OS Username's Groupname +ARG OPY_GROUPNAME + +# Virtual environment directory name +ARG OPY_PYTHON_VIRTUAL_DIRNAME=pyorcldb_env + +# User base directory +ARG OPY_BASEDIR=/opt + +# Apache HTTP Server Listen port number +ARG OPY_APACHE_LISTEN_PORT + +# Switch username +USER ${OPY_USERNAME} + +# Copy Apache WSGI configuration file template into downloads directory +COPY --chown=${OPY_USERNAME}:${OPY_GROUPNAME} \ + otherfiles/pyorcldb_wsgi.conf.template ${OPY_BASEDIR}/downloads + +# Install Apache Server +RUN sudo dnf install \ + # Install prerequisite packages for Apache installation + gcc make expat-devel pcre-devel apr-devel apr-util-devel \ + redhat-rpm-config openssl-devel perl && \ + # Download Apache server source + cd ${OPY_BASEDIR}/downloads && \ + curl -o ${OPY_APACHE_SERVER_VERSION}.tar.gz \ + "https://archive.apache.org/dist/httpd/${OPY_APACHE_SERVER_VERSION}.tar.gz" && \ + tar xzvf ${OPY_APACHE_SERVER_VERSION}.tar.gz && \ + cd ${OPY_APACHE_SERVER_VERSION} && \ + # Prepare and install Apache server + ./configure --enable-ssl --prefix=${OPY_BASEDIR}/apache && \ + make && \ + make install && \ + # Set path of the Perl in Apache's apxs utility. This is required for + # building wsgi_mod. + sed -i "s/\/replace\/with\/path\/to\/perl\/interpreter/\/usr\/bin\/perl/g" \ + ${OPY_BASEDIR}/apache/bin/apxs && \ + export PATH=${OPY_BASEDIR}/apache/bin:$PATH && \ + source ${OPY_BASEDIR}/${OPY_PYTHON_VIRTUAL_DIRNAME}/bin/activate &&\ + # Install WSGI module + python -m pip install mod_wsgi && \ + # Copy the mod_wsgi module from python virtual environment to Apache home + cp ${OPY_BASEDIR}/${OPY_PYTHON_VIRTUAL_DIRNAME}/lib/python${OPY_PYTHON_VERSION}/site-packages/mod_wsgi/server/mod_wsgi-py${OPY_PYTHON_VERSION_WITHOUTPERIOD}.cpython-${OPY_PYTHON_VERSION_WITHOUTPERIOD}-x86_64-linux-gnu.so \ + ${OPY_BASEDIR}/apache/modules/mod_wsgi.so && \ + # Configure httpd.conf + # -------------------- + # 1. Removed listening on port 80 + # 2. Enabled SSL module + # 3. Set default characterset to UTF-8 + cd ${OPY_BASEDIR}/apache/conf && \ + sed -i "s/Listen 80/# Listen 80/g" httpd.conf && \ + sed -i "s/#LoadModule ssl_module/LoadModule ssl_module/g" httpd.conf && \ + echo "AddDefaultCharset UTF-8" >> httpd.conf && \ + echo "Include conf/extra/pyorcldb_wsgi.conf" >> httpd.conf && \ + # Populate pyorcldb_wsgi.conf file into Apache's conf/extra directory + cd ${OPY_BASEDIR}/apache/conf/extra && \ + cp ${OPY_BASEDIR}/downloads/pyorcldb_wsgi.conf.template \ + ./pyorcldb_wsgi.conf && \ + sed -i "s/OPY_BASEDIR/\\${OPY_BASEDIR}/g" pyorcldb_wsgi.conf && \ + sed -i "s/OPY_PYTHONVIRTUALDIRNAME/${OPY_PYTHON_VIRTUAL_DIRNAME}/g" \ + pyorcldb_wsgi.conf && \ + sed -i "s/OPY_APACHE_LISTEN_PORT/${OPY_APACHE_LISTEN_PORT}/g" \ + pyorcldb_wsgi.conf && \ + rm ${OPY_BASEDIR}/downloads/pyorcldb_wsgi.conf.template && \ + # Clean all cached packages from DNF cache + sudo dnf clean all && \ + # Clean all downloads directory + cd ${OPY_BASEDIR}/downloads && \ + rm -rf * + +# Python Development Image +# ------------------------ +FROM pythonenv AS developmentimage + +# OS Username +ARG OPY_USERNAME + +# OS Username's Groupname +ARG OPY_GROUPNAME + +# OS User home directory +ARG OPY_OS_USER_HOMEDIR=/home/${OPY_USERNAME} + +# User base directory +ARG OPY_BASEDIR=/opt + +# Image metadata details +ARG OPY_IMAGE_VERSION +ARG OPY_IMAGE_RELEASE_DATE +ARG OPY_IMAGE_RELEASE_STATUS + +# Image metadata +# Uncomment the below lines to provide the label information +# LABEL version= +# LABEL release-date= +# LABEL maintainer= +# LABEL release-status= + +# Switch to OS user from root +# --------------------------- +USER ${OPY_USERNAME} + +WORKDIR ${OPY_OS_USER_HOMEDIR} + +# Get Apache Server +COPY --from=apachebuildenv --chown=${OPY_USERNAME}:${OPY_GROUPNAME} \ + ${OPY_BASEDIR}/apache ${OPY_BASEDIR}/apache + +# Copy SSL certificate generation script +COPY --chown=${OPY_USERNAME}:${OPY_GROUPNAME} \ + otherfiles/pyorcldb_generate_cert.sh ${OPY_OS_USER_HOMEDIR}/ diff --git a/samples/containers/app_dev/README.md b/samples/containers/app_dev/README.md new file mode 100644 index 00000000..f05d3257 --- /dev/null +++ b/samples/containers/app_dev/README.md @@ -0,0 +1,236 @@ +# Oracle python-oracledb environment for Python application developers + +This Dockerfile creates a sample Oracle Linux container with the Oracle Python +driver python-oracledb, the Apache web server with WSGI, and (optionally) +Oracle Instant Client. You need to have access to an existing database. + +A sample Flask application can optionally be deployed. + +This image can be used for development and deployment of Python applications +for demonstrations and testing. + +It has been tested on macOS using podman and docker. + +## Build Instructions + +- Edit `container_build.env` and set your desired values. + +- Build the container image using the `container_build.sh` script: + + ``` + ./container_build.sh + ``` + + By default, Apache has SSL enabled and is listening on port 8443. + +## Usage for Application Devlopment + +- Run a container: + + ``` + podman run -it -p 8443:8443 --name my_python_dev pyorcldbdev + ``` + + You can now create and run your own Python applications using the `python` + binary. + + See lower for how to deploy applications such as the sample Flask application + in the container. + +- If you want to use the Apache HTTP server, it is configured with the WSGI + module. The listening port is set to 8443. (Note port 80 is disabled). + + The Python home for the WSGI module is `/opt/pyorcldb_env`. Refer to the + Apache HTTP server configuration file + `/opt/apache/conf/extra/pyorcldb_wsgi.conf` for more information. + + To start Apache in the container: + + ``` + $ apachectl start + ``` + + To stop Apache: + + ``` + $ apachectl stop + ``` + +- An Oracle Database wallet and/or `tnsnames.ora` file can be copied into, or + mounted in, `/opt/wallet` to connect to Oracle Database or Oracle Autonomous + Database. + +- By default, the Python virtual environment file `/opt/pyorcldb_env` is + sourced in the bash shell. The virtual environment can be enabled or + disabled. + + To enable the Python virtual environment: + + ``` + $ source /opt/pyorcldb_env/bin/activate + ``` + + To disable the Python virtual environment: + + ``` + $ deactivate + ``` + +## Container Environment + +### Default Environment + +- Oracle Linux 8 +- Python 3.12 +- Apache HTTP server 2.4 + +### Optional packages + +- Oracle Instant Client (Basic and SQL*Plus packages) + + By default, Oracle Instant Client is not installed. If you require it, select + the version in + `python-oracledb/samples/containers/app_dev/container_build.env` before + building the container image. + + Supported versions: 23ai, 21c and 19c + + Recommended version: 23ai + + +### Python modules pre-installed + +- Oracle Database driver for Python - oracledb (latest available version) +- Pre-requisites for oracledb - cffi, cryptography, pycparser + +### Default Apache HTTP configuration + +- Server name - pyorcldbdemo +- Protocol - HTTPS +- Port # - 8443 +- WSGI Python home - `/opt/env/pyorcldb_env` +- SSL certificate - `/opt/cert/certificate.pem` (For demo and testing purposes) +- SSL certificate key - `/opt/cert/privatekey.pem` (For demo and testing purposes) +- Document root - `/opt/app` +- Error log - `/opt/apache/logs/pyorcldb_error.log` +- Custom log - `/opt/apache/logs/appstack_app_access.log` + +### Directories + +- Python application home - `/opt/app` +- Python virtual environment - `/opt/pyorcldb_env` +- Apache HTTP server home - `/opt/apache` +- Oracle Database wallet home - `/opt/wallet` +- SSL certificate and key - `/opt/cert` (For demo and testing purposes) + +### The default SSL certificate and key + +The SSL certificate and key generated in `/opt/cert` can be used for demo and +testing purposes only. This is strictly not for production use. Configure or +replace them with your own certificate and key. + +## Deploying the Sample Flask Application + +The GitHub directory +[samples/containers/app_dev/sample_app](https://github.com/oracle/python-oracledb/tree/main/samples/containers/app_dev/sample_app) +contains a sample Flask web application that queries a small customer +database. This application can be enabled by copying or mounting it into the +container. + +The sample application uses database credentials set via environment variables: + +1. `PYO_SAMPLES_MAIN_USER` - Oracle Database username +2. `PYO_SAMPLES_MAIN_PASSWORD` - Oracle Database password +3. `PYO_SAMPLES_CONNECT_STRING` - Oracle Database TNS Alias or Easy Connect + string +4. `TNS_ADMIN` - The location of the tnsnames.ora file or wallet files +5. `PYO_SAMPLES_WALLET_PASSWORD` - Oracle Database wallet password +6. `PYO_SAMPLES_WALLET_LOCATION` - Oracle Database wallet location. This + variable is optional and the sample application will use TNS_ADMIN path as + wallet location when this variable is not set. + +Here is an example to deploy the sample application without a database wallet: + +``` +podman run -it -p 8443:8443 --env DEPLOY_APP="TRUE" --env APP_NAME="customer" \ + --env PYO_SAMPLES_MAIN_USER="user" --env PYO_SAMPLES_MAIN_PASSWORD="passwd" \ + --env PYO_SAMPLES_CONNECT_STRING="myhostname:1521/mydbservicename" \ + -v ./sample_app:/opt/app --name my_app1 pyorcldbdev +``` + +Here is an example to deploy the sample application with a database wallet, for +example to connect to Oracle Autonomous Database Serverless (ADB-S): + +``` +podman run -it -p 8443:8443 --env DEPLOY_SAMPLE_APP="TRUE" \ + --env APP_NAME="customer" --env PYO_SAMPLES_MAIN_USER="myuserid" \ + --env PYO_SAMPLES_MAIN_PASSWORD="mypassword" \ + --env PYO_SAMPLES_CONNECT_STRING="mydb1" \ + --env PYO_SAMPLES_WALLET_PASSWORD="mywalletpassword" \ + --env TNS_ADMIN=/opt/wallet -v ./sample_app:/opt/app \ + -v /disk/path/mywallet:/opt/wallet --name my_app1 \ + pyorcldbdev +``` + +The application will be installed in the container in `/opt/app`. + +The deployed sample application can be accessed using the host browser with the +URL: + +``` +https://localhost:8443/app/customer +``` + +Because the SSL certificate is self-signed, you may have to accept accessing +the URL in the browser. + +Review log files such as `/opt/apache/logs/pyorcldb_error.log` if you have +problems accessing the application. + +## Deploying your own Flask Application + +Your own Python Flask application can be easily deployed with Apache. The +following setup is required: + +- In your application directory, run `pip freeze > requirements.txt` to + generate a `requirements.txt` file for installing Python libraries required + for the application. + + Note that the application does not need to bundle the Python virtual + environment: this container will create one. + +- Add a WSGI file `pyorcldb_app.wsgi` in the root directory of the application + and configure the application. For example: + + ``` + from mymodule1 import app as application + + def func1(): + return application + ``` + + In the above code, `app` is the Flask object created in the Python module + `mymodule1.py`. That Flask object is renamed to `application` to comply with + the naming conventions expected by the WSGI server. + +- Set the module name in the environment variable `APP_NAME` and set + `DEPLOY_APP` to `TRUE` while creating the container. + +Here is a sample command to deploy a Flask application while creating the +container: + +``` +podman run -it -p 8443:8443 --env DEPLOY_APP="TRUE" --env APP_NAME="mymodule1" \ + -v /mydisk/myapplication:/opt/app --name myapp2 pyorcldbdev +``` + +Pass in any other environment as required by your application, for example the +database connection details. + +This command will deploy your application into the container `myapp2` under +directory `/opt/app`. The auto-deployment will start the Apache HTTP server on +port 8443. + +The URL of the deployed Flask application or web service will be in this +pattern `https://localhost:8443/app/`. The URL for the above +sample `podman run` command is `https://localhost:8443/app/mymodule1`. diff --git a/samples/containers/app_dev/container_build.env b/samples/containers/app_dev/container_build.env new file mode 100644 index 00000000..c707cc14 --- /dev/null +++ b/samples/containers/app_dev/container_build.env @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# NAME +# +# container_build.env +# +# PURPOSE +# +# This environment file sets up the required attributes for a Python +# application container image. +# +# NOTE +# +# The build script container_build.sh will source it while building the +# container image. +# + +# Use podman by default to build the container +export CONTAINER_TOOL=podman +#export CONTAINER_TOOL=docker + +# Python version +export OPY_PYTHON_VERSION=3.12 + +# Python version without dots +export OPY_PYTHON_VERSION_WITHOUTPERIOD=312 + +# Container OS user name to use +export OPY_USERNAME=appuser + +# Container OS group name to use +export OPY_GROUPNAME=appuser + +# Apache HTTP server version +# You should use the latest version from https://archive.apache.org/dist/httpd/ +export OPY_APACHE_SERVER_VERSION=httpd-2.4.62 + +# Apache HTTP server listen port +export OPY_APACHE_LISTEN_PORT=8443 + +# Oracle Instant Client +# For Instant Client 19c, you should use the latest Release Update from +# https://yum.oracle.com/repo/OracleLinux/OL8/oracle/instantclient/x86_64/ +export OPY_INSTANT_CLIENT_VERSION=None # don't install Instant Client +#export OPY_INSTANT_CLIENT_VERSION=19.25 # Latest 19c RU available for OL8 +#export OPY_INSTANT_CLIENT_VERSION=21 # don't specify the RU for 21; the latest will be used +#export OPY_INSTANT_CLIENT_VERSION=23 # don't specify the RU for 23; the latest will be used + +# Update the image label details +# ------------------------------ +# Container image version +export OPY_IMAGE_VERSION= + +# Container release date +export OPY_IMAGE_RELEASE_DATE= + +# Container release status +export OPY_IMAGE_RELEASE_STATUS= diff --git a/samples/containers/app_dev/container_build.sh b/samples/containers/app_dev/container_build.sh new file mode 100755 index 00000000..81811b98 --- /dev/null +++ b/samples/containers/app_dev/container_build.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# +# NAME +# +# container_build.sh +# +# PURPOSE +# +# Top level script to build a container image for Python application +# developers. +# +# USAGE +# +# ./container_build.sh +# + +# Sourcing environment file to setup the build environment +source container_build.env + +# Pulling base image Oracle Linux container image +$CONTAINER_TOOL pull ghcr.io/oracle/oraclelinux:$OPY_OS_VERSION + +# Building Container image +$CONTAINER_TOOL build --tag pyorcldbdev \ + --build-arg OPY_PYTHON_VERSION \ + --build-arg OPY_PYTHON_VERSION_WITHOUTPERIOD \ + --build-arg OPY_USERNAME \ + --build-arg OPY_GROUPNAME \ + --build-arg OPY_IMAGE_RELEASE_DATE \ + --build-arg OPY_APACHE_SERVER_VERSION \ + --build-arg OPY_INSTANT_CLIENT_VERSION \ + --build-arg OPY_APACHE_LISTEN_PORT . --no-cache diff --git a/samples/containers/app_dev/otherfiles/autodeployscript.sh.template b/samples/containers/app_dev/otherfiles/autodeployscript.sh.template new file mode 100755 index 00000000..8d86e746 --- /dev/null +++ b/samples/containers/app_dev/otherfiles/autodeployscript.sh.template @@ -0,0 +1,98 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# +# NAME +# +# autodeployscript.sh +# +# PURPOSE +# +# Deploy a user application during container creation. + +apache_conf_file="OPY_OSUSERBASEDIR/apache/conf/extra/pyorcldb_wsgi.conf" +requirements_txt="OPY_OSUSERBASEDIR/app/requirements.txt" +search_txt="# Add your WSGI file details below in the next line" +auto_deploy="${DEPLOY_APP:-FALSE}" +appname="${APP_NAME:-No_App_Name}" + +( [ ${auto_deploy} != "TRUE" ] && echo "No need to deploy application" && exit 0) + +( [ ${auto_deploy} == "TRUE" ] && [ ${appname} == "No_App_Name" ] && + echo "Application name is not found" && \ + echo "Application name is required to deploy your application" && \ + echo "Please set it in environment variable APP_NAME" && exit -1 ) + +( [ ${auto_deploy} == "TRUE" ] && + sed -i "s/OPY_APPNAME/${appname}/g" $apache_conf_file && + sed -i "s/No_App_Name/${appname}/g" $apache_conf_file ) + +search_txt1="# DocumentRoot" +replace_txt1="DocumentRoot" + +search_txt2="# WSGIScriptAlias \\/app" +replace_txt2="WSGIScriptAlias \\/app" + +search_txt3="# /tmp/null && \ + [ $? -eq 0 ] && echo "Application deployment was successfully completed! Starting Apache HTTP server.." ) || + [ "TRUE" == "TRUE" ] ) + +exit 0 diff --git a/samples/containers/app_dev/otherfiles/pyorcldb_generate_cert.sh b/samples/containers/app_dev/otherfiles/pyorcldb_generate_cert.sh new file mode 100755 index 00000000..cb05891c --- /dev/null +++ b/samples/containers/app_dev/otherfiles/pyorcldb_generate_cert.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# +# NAME +# +# pyorcldb_generate_cert.sh +# +# PURPOSE +# +# Sample generation of an SSL certificate and key for testing purposes +# +# USAGE +# ./pyorcldb_generate_cert.sh +# +cd /opt/cert +openssl req -newkey rsa:4096 -x509 -sha512 -days 365 -nodes -out \ + certificate.pem -keyout privatekey.pem \ + -subj "/C=OC/ST=DEMO/L=DEMO/O=DEMO/CN=DEMO" 2>/dev/null diff --git a/samples/containers/app_dev/otherfiles/pyorcldb_wsgi.conf.template b/samples/containers/app_dev/otherfiles/pyorcldb_wsgi.conf.template new file mode 100644 index 00000000..a9d861d9 --- /dev/null +++ b/samples/containers/app_dev/otherfiles/pyorcldb_wsgi.conf.template @@ -0,0 +1,44 @@ +# Sample WSGI configuration for testing purposes + +Define opy_basedir OPY_BASEDIR +Define opy_port OPY_APACHE_LISTEN_PORT +Define opy_pythonvirtualdirname OPY_PYTHONVIRTUALDIRNAME +Define opy_appdir app +Define opy_appname OPY_APPNAME + +ServerName pyorcldbdemo + +Listen 0.0.0.0:${opy_port} + + + LoadModule wsgi_module modules/mod_wsgi.so + + +WSGIPythonHome ${opy_basedir}/${opy_pythonvirtualdirname} + + + +# Alias /static/ ${opy_basedir}/${opy_appdir}/static/ +# +# Require all granted +# + + ErrorLog ${opy_basedir}/apache/logs/pyorcldb_error.log + CustomLog ${opy_basedir}/apache/logs/appstack_app_access.log "%h %l %u %t \"%r\" %>s %b" + + SSLEngine on + SSLCertificateFile ${opy_basedir}/cert/certificate.pem + SSLCertificateKeyFile ${opy_basedir}/cert/privatekey.pem + + # Deployed application + # DocumentRoot ${opy_basedir}/${opy_appdir} + # WSGIDaemonProcess ${opy_appname} python-path=${opy_basedir}/${opy_appdir} + # WSGIScriptAlias /app ${opy_basedir}/${opy_appdir}/pyorcldb_app.wsgi + # + # WSGIProcessGroup ${opy_appname} + # Order allow,deny + # Allow from all + # Require all granted + # + + diff --git a/samples/containers/app_dev/sample_app/README.md b/samples/containers/app_dev/sample_app/README.md new file mode 100644 index 00000000..0bb48f14 --- /dev/null +++ b/samples/containers/app_dev/sample_app/README.md @@ -0,0 +1,4 @@ +This directory contains a sample application that can be installed in the +container. See the upper level README for details. + +The sample is a Flask web app that queries a small customer database. diff --git a/samples/containers/app_dev/sample_app/customer.py b/samples/containers/app_dev/sample_app/customer.py new file mode 100644 index 00000000..d17ccb10 --- /dev/null +++ b/samples/containers/app_dev/sample_app/customer.py @@ -0,0 +1,228 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025 Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# customer.py +# ----------------------------------------------------------------------------- + +import os + +from flask import Flask, render_template, request, redirect, url_for +import oracledb + +app = Flask(__name__, template_folder="templates") +app.config["CONNECTION_POOL"] = None +app.config["CONNECTION_STATUS"] = None +drop_table_23ai = "drop table if exists customer_info" +drop_table = """begin + execute immediate 'drop table customer_info'; + exception + when others then + if sqlcode != -942 then + raise; + end if; + end;""" +stmts = [ + """create table customer_info ( + id number generated always as + identity(start with 1001 increment by 1), + name varchar2(30), + dob date, + city varchar2(30), + zipcode number)""", + """insert into customer_info (name, dob, city, zipcode) + values ('Allen', '01-Sep-1980', 'Belmont', 56009)""", + """insert into customer_info (name, dob, city, zipcode) + values ('Bob', '12-Aug-2009', 'San Jose', 56012)""", + """insert into customer_info (name, dob, city, zipcode) + values ('Christina', '30-Jul-1994', 'San Carlos', 56023)""", +] + + +# init_session(): a 'session callback' to efficiently set any initial state +# that each connection should have. +def init_session(connection, requestedTag_ignored): + with connection.cursor() as cursor: + cursor.execute( + """ + alter session set + time_zone = 'UTC' + nls_date_format = 'DD-MON-YYYY' + """ + ) + + +def create_pool(): + pool_min = 2 + pool_max = 2 + pool_inc = 0 + + tns_admin = os.environ.get("TNS_ADMIN") + wallet_password = os.environ.get("PYO_SAMPLES_WALLET_PASSWORD") + wallet_location = os.environ.get("PYO_SAMPLES_WALLET_LOCATION") + if ( + wallet_password is not None + and wallet_location is None + and tns_admin is not None + ): + wallet_location = tns_admin + + if wallet_password is not None: + try: + db_pool = oracledb.create_pool( + user=os.environ.get("PYO_SAMPLES_MAIN_USER"), + password=os.environ.get("PYO_SAMPLES_MAIN_PASSWORD"), + wallet_password=wallet_password, + wallet_location=wallet_location, + config_dir=tns_admin, + dsn=os.environ.get("PYO_SAMPLES_CONNECT_STRING"), + min=pool_min, + max=pool_max, + increment=pool_inc, + session_callback=init_session, + ) + error_msg = "Success!" + except oracledb.DatabaseError as e: + db_pool = None + (error,) = e.args + error_msg = error.message + else: + try: + db_pool = oracledb.create_pool( + user=os.environ.get("PYO_SAMPLES_MAIN_USER"), + password=os.environ.get("PYO_SAMPLES_MAIN_PASSWORD"), + dsn=os.environ.get("PYO_SAMPLES_CONNECT_STRING"), + config_dir=tns_admin, + min=pool_min, + max=pool_max, + increment=pool_inc, + session_callback=init_session, + ) + error_msg = "Success!" + except oracledb.DatabaseError as e: + db_pool = None + (error,) = e.args + error_msg = error.message + + return (db_pool, error_msg) + + +# Create customer details table and populate records +# Note: The customer_info table will be re-created everytime during this +# application startup. +def create_schema(): + pool = app.config["CONNECTION_POOL"] + dbstatus = app.config["CONNECTION_STATUS"] + + if dbstatus != "Success!": + return + + try: + connection = pool.acquire() + except oracledb.DatabaseError: + return + + with connection.cursor() as cursor: + dbversion = connection.version + dbversion = dbversion.split(".")[0] + if int(dbversion) >= 23: + cursor.execute(drop_table_23ai) + else: + cursor.execute(drop_table) + for stmt in stmts: + cursor.execute(stmt) + connection.commit() + + +@app.route("/entry", methods=["POST", "GET"]) +def entry(): + stmt = """insert into customer_info (name, dob, city, zipcode) values + (:1, :2, :3, :4)""" + + if request.method == "GET": + msg = "Customer details added successfully!" + return render_template("customer_entry_redirect.html", form_data=msg) + + if request.method == "POST": + data = [ + request.form["cus_name"], + request.form["cus_dob"], + request.form["cus_city"], + request.form["cus_zipcode"], + ] + + try: + pool = app.config["CONNECTION_POOL"] + with pool.acquire() as connection: + connection.autocommit = True + with connection.cursor() as cursor: + cursor.execute(stmt, data) + except oracledb.DatabaseError as e: + (error,) = e.args + dbstatus = error.message + return render_template("error_handler.html", form_data=dbstatus) + + return redirect(url_for("customer")) + + +@app.route("/customer", methods=["POST", "GET"]) +def customer(): + stmt = "select * from customer_info" + pool = app.config["CONNECTION_POOL"] + dbstatus = app.config["CONNECTION_STATUS"] + + if dbstatus != "Success!": + return render_template("error_handle.html", form_data=dbstatus) + + try: + connection = pool.acquire() + except oracledb.DatabaseError as e: + (error,) = e.args + dbstatus = error.message + return render_template("error_handler.html", form_data=dbstatus) + + with connection.cursor() as cursor: + cursor.execute(stmt) + data = cursor.fetchall() + return render_template("customer_list.html", form_data=data) + + +# MAIN + +# Create the connection pool +( + app.config["CONNECTION_POOL"], + app.config["CONNECTION_STATUS"], +) = create_pool() + +# Warning: the CUSTOMER_INFO table used by this application will be re-created +# during application startup so any previous changes (addition of customers) +# will vanish. + +create_schema() +if __name__ == "__main__": + app.run( + host="0.0.0.0", + port=8443, + ) diff --git a/samples/containers/app_dev/sample_app/pyorcldb_app.wsgi b/samples/containers/app_dev/sample_app/pyorcldb_app.wsgi new file mode 100644 index 00000000..2c392273 --- /dev/null +++ b/samples/containers/app_dev/sample_app/pyorcldb_app.wsgi @@ -0,0 +1,5 @@ +from customer import app as application + + +def func1(): + return application diff --git a/samples/containers/app_dev/sample_app/requirements.txt b/samples/containers/app_dev/sample_app/requirements.txt new file mode 100644 index 00000000..d9ccdf77 --- /dev/null +++ b/samples/containers/app_dev/sample_app/requirements.txt @@ -0,0 +1,2 @@ +Flask +oracledb diff --git a/samples/containers/app_dev/sample_app/static/logo.png b/samples/containers/app_dev/sample_app/static/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..c8d6ea8a45bd362b30b10938ce69a99f73773f61 GIT binary patch literal 2330 zcmcIm`8N~{8=WyjLuf2hRD(gX_hNcwglLAb4JLb%NQ3NamJu4u*s_HeV=D>~8Izr4 zr?FMGPO`AH;H8Sq?2d%TXsols^o7vt)#h+MQ z+dF)>=pk1Kx+CatBv0WSP(yv0m}|yg9ZW5M{AK)R7~T0z0KPT{7U`3Bz*O7}z#&ke zf=1};`jL)4z^&ZA=%n=}ki;9r!3Gn886)^v6IdoQI;X&snLk6jN~alUEjwfSKwyhf z+Xq(=O^o=C!M|nO+1q;keVs@@6E4G|F;syMMAW+U_ypaX-W&^l8*y+c1u9?w9g1v4 z%CTkErL$x5yDzKy(6CV1JS|Au$&uBctybC}+k8`vPP-QwLVgcTos{_anBBl`Ml?2W zp+NPHfu4@oP^Q4V!I)+2YM+%pYk>u7RhNE$rsHGtZs(>@00algtsjXrx|0G##HfY4 zeG3(To(vF04d0J0RnM=E%;cf-4A#q1#=WE7hCEScg!iuj`|p;(J5PaUkc@onqCBS_ z!Cz?{0hS3_G8R_1v=IYWPLz^F${4)BSz;i!;K+7C$!C&DBxWn@m1lQUCES}NIO2a4 zH>@15Qr-M!NtVjZ|0;;oBKNU!vwYWd^m>%A%;!z12KVp37aC^-KPl*V4d4aStwu}^ zEnBUbFKpBw0j^!`nF1qU6~7*xTCs=f^Q`+dj6}*E(|o=A&Ox*lp%;fAJXo)V$jZx2 zdz$f(E->p)*DcqzH@VHclLX&*I0-W+B$`7EZYqGv#fZ47bQc7189_3Zt%>w<|kIas8fuU%{5N>V}s;?2xE&%=e?c z&`P?Q#bv}zo)-?|Z%hLhg3;#HrBVBI4jFXmyOP={kbD~d55cBqK$CE!?$?$}8Ipbg zlhv*4w1*|E5|%RZ-a;ZA8-|hWtCK_SX_}~1n=20^5rdMfa%B;(hJPv{_%bITXk+h$ zAU$heEL#bQ#$`X4`F27(m!YbJP|THG0_>m75^(8?YZ(>&n&q5LVVa=oxe8r2^{@k* zQoFpCAg7IqU<~P|SN8C%o*dh-NGfW|BuwzROY8b9)!xx@QKg$|wNG1R_*V`vDXJLk zNuo21`$SG9Umq{!@{n^WoeWyDdm`clI)M7K%~wuesXHe#km&ndTx~oj?%ry?d?04$ z**~9v=pGIyYff%y;XdclBED#^^30VhH%Qa~mPS$LUz#-f}%s$^Xv)okQ$QSUzGS6N|5XCynu97#E{HHFe3!46}|cL z)xq+`)VX?xg1P&y9D%ThYt5#YU)q&J=v`kEal`w>M>Ta(cbtt=vek>J+-Xa^Za#dF zt1rV2AkT$xWl zo%yq7)y2@?(R_Ly6rNdvRG5hFla=99dh<)DgMC%paTTss8IMH#@n6%G>-qGjHvcDY=)Yi2T{W71=FE@@-_}H z#34-+lGLs2_qh$fxYIJb;I>;kCAYL=mNK?8YZ`WJ%zJkTkk>WJug32Gz1URyR$m7+ zvb@nvraj%!gvaZ>EQ2H4T0i|ErV2Z?c?4HFi63#mA7Mh@4-Av#t}J|pl8op3(sT6< z)CszlSHTE=-H7T0_n0jGv19`}zw6*&ArxHk0~~&{ykZYFOSZRnVFa9WU}ck!rUCn} zG1QowXJCFDx~bWP%|RX0V%UrIYvN_d1dnHl+-5M>xDK`S{uH_B0zQg`Z=rrWkFDbI zH=Hin#3f+{I$QYfG!1E#DWDUfhk- zXu>*ID2aC&kixQK>=T8kw46z(dD)yq#{T66txW#nIc~=Mfc=F+TZ1Fj0+Uz~$!J;%1$etS^=GpvCVCumk$vh5-5QqPf>Hq)z_5V^M9O4y@S#r!g V62Yvb(vz|S(8b`;6 + + + My Customer Sample Application - python-oracledb developer environment + + +

+ +

Customer list

+
+ + + + + + + + + {% for item in form_data %} + + + + + + + + {% endfor %} +
Customer IDNameDate of birthCityZipcode
{{item[0]}} {{item[1]}} {{item[2]}} {{item[3]}} {{item[4]}}
+

Add new customer

+
+ + + + + + + + + + + + + + + + + +
+ +
+ {% with messages = get_flashed_messages() %} + {% if messages %} +
    + {% for message in messages %} +
  • {{message}}
  • + {% endfor %} +
+ {% endif %} + {% endwith %} + + diff --git a/samples/containers/app_dev/sample_app/templates/error_handler.html b/samples/containers/app_dev/sample_app/templates/error_handler.html new file mode 100644 index 00000000..7f7ff390 --- /dev/null +++ b/samples/containers/app_dev/sample_app/templates/error_handler.html @@ -0,0 +1,22 @@ + + + + My Customer Sample Application - python-oracledb developer environment + + +
+ +

Database Error

+
+ + + + + + + +
Error Message
+ {% for item in form_data %}{{item[0]}}{% endfor %} +
+ + diff --git a/samples/sample_container/Dockerfile b/samples/containers/samples_and_db/Dockerfile similarity index 95% rename from samples/sample_container/Dockerfile rename to samples/containers/samples_and_db/Dockerfile index abae563c..0d612b8f 100644 --- a/samples/sample_container/Dockerfile +++ b/samples/containers/samples_and_db/Dockerfile @@ -85,8 +85,8 @@ WORKDIR /samples/ RUN curl -LO https://github.com/oracle/python-oracledb/archive/refs/heads/main.zip && \ unzip main.zip && \ - cp python-oracledb-main/samples/sample_container/setup.py . && \ - /bin/rm -rf python-oracledb-main/samples/sample_container/ python-oracledb-main/samples/notebooks/ && \ + cp python-oracledb-main/samples/containers/samples_and_db/setup.py . && \ + /bin/rm -rf python-oracledb-main/samples/containers/ python-oracledb-main/samples/notebooks/ && \ mv python-oracledb-main/samples/* . && \ /bin/rm -rf python-oracledb-main samples main.zip && \ cat create_schema.py >> /samples/setup.py && \ diff --git a/samples/sample_container/README.md b/samples/containers/samples_and_db/README.md similarity index 100% rename from samples/sample_container/README.md rename to samples/containers/samples_and_db/README.md diff --git a/samples/sample_container/setup.py b/samples/containers/samples_and_db/setup.py similarity index 100% rename from samples/sample_container/setup.py rename to samples/containers/samples_and_db/setup.py From 5fd1065e5ef339e074717b8b0f57491444dc944d Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 11:03:02 -0700 Subject: [PATCH 028/178] Fix conversion from BINARY_DOUBLE/BINARY_FLOAT to string. --- src/oracledb/impl/base/converters.pyx | 10 +++++++++- tests/test_3600_outputtypehandler.py | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/oracledb/impl/base/converters.pyx b/src/oracledb/impl/base/converters.pyx index 0e8fda0e..48c4b6be 100644 --- a/src/oracledb/impl/base/converters.pyx +++ b/src/oracledb/impl/base/converters.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2024, Oracle and/or its affiliates. +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -180,6 +180,14 @@ cdef object convert_oracle_data_to_python(OracleMetadata from_metadata, elif ora_type_num == ORA_TYPE_NUM_NUMBER: return convert_number_to_python_str(&data.buffer) + # Oracle BINARY_DOUBLE + elif ora_type_num == ORA_TYPE_NUM_BINARY_DOUBLE: + return str(data.buffer.as_double) + + # Oracle BINARY_FLOAT + elif ora_type_num == ORA_TYPE_NUM_BINARY_FLOAT: + return str(data.buffer.as_float) + # Oracle DATE, TIMESTAMP (WITH (LOCAL) TIME ZONE) elif ora_type_num in ( ORA_TYPE_NUM_DATE, diff --git a/tests/test_3600_outputtypehandler.py b/tests/test_3600_outputtypehandler.py index 5d89eae5..2622c30c 100644 --- a/tests/test_3600_outputtypehandler.py +++ b/tests/test_3600_outputtypehandler.py @@ -707,6 +707,26 @@ def type_handler_2(cursor, metadata): self.cursor.execute(sql, [0]) self.assertEqual(self.cursor.fetchall(), []) + def test_3677(self): + "3677 - output type handler: from BINARY_DOUBLE to VARCHAR" + str_value = "36.75" if test_env.get_is_thin() else "3.675E+001" + self.__test_type_handler( + oracledb.DB_TYPE_BINARY_DOUBLE, + oracledb.DB_TYPE_VARCHAR, + 36.75, + str_value, + ) + + def test_3678(self): + "3678 - output type handler: from BINARY_FLOAT to VARCHAR" + str_value = "16.25" if test_env.get_is_thin() else "1.625E+001" + self.__test_type_handler( + oracledb.DB_TYPE_BINARY_FLOAT, + oracledb.DB_TYPE_VARCHAR, + 16.25, + str_value, + ) + if __name__ == "__main__": test_env.run_test_cases() From d93397ff33ed98c0fa6bc32233212e0fcf3ca1f4 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 11:03:56 -0700 Subject: [PATCH 029/178] Force thin mode to be enabled when creating connections or pools with asyncio, thus improving the error message and also ensuring that thick and thin modes to not run concurrently. --- doc/src/release_notes.rst | 4 ++++ src/oracledb/connection.py | 1 + src/oracledb/pool.py | 1 + utils/templates/connection.py | 1 + utils/templates/pool.py | 1 + 5 files changed, 8 insertions(+) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 03ac3821..6573f474 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -104,6 +104,10 @@ Common Changes #) All Oracle errors that result in the connection no longer being usable will be raised as ``DPY-4011: the database or network closed the connection`` with the underlying reason being included in the error message. +#) Error ``DPY-2053: python-oracledb thin mode cannot be used because thick + mode has already been enabled`` is now raised when attempting to use + asyncio in thick mode + (`issue 448 `__). #) Error ``DPY-2056: registered handler for protocol "{protocol}" failed for arg "{arg}"`` is now raised when an exception occurs when calling the registered handler for a protocol. diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 8978a1c4..04ed7ce0 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1984,6 +1984,7 @@ def connect_async( # build connection class and call the implementation connect to # actually establish the connection + oracledb.enable_thin_mode() return conn_class(dsn, pool, params, kwargs) return connect_async diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index b86c2d6e..6175d9bc 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -1105,6 +1105,7 @@ def create_pool_async( params=params, **kwargs, ) + oracledb.enable_thin_mode() if not issubclass(pool_class, AsyncConnectionPool): errors._raise_err(errors.ERR_INVALID_POOL_CLASS) return pool_class(dsn, params=params, cache_name=pool_alias, **kwargs) diff --git a/utils/templates/connection.py b/utils/templates/connection.py index 27992ab3..c9415158 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -1747,6 +1747,7 @@ def connect_async( # build connection class and call the implementation connect to # actually establish the connection + oracledb.enable_thin_mode() return conn_class(dsn, pool, params, kwargs) return connect_async diff --git a/utils/templates/pool.py b/utils/templates/pool.py index c43956e6..281ff65d 100644 --- a/utils/templates/pool.py +++ b/utils/templates/pool.py @@ -799,6 +799,7 @@ def create_pool_async( params=params, **kwargs, ) + oracledb.enable_thin_mode() if not issubclass(pool_class, AsyncConnectionPool): errors._raise_err(errors.ERR_INVALID_POOL_CLASS) return pool_class(dsn, params=params, cache_name=pool_alias, **kwargs) From 97bc753c9d24516b8da82a623a69fdbc6b9ee05c Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 11:04:43 -0700 Subject: [PATCH 030/178] Refactor to allow for other user callouts in the future. --- src/oracledb/base_impl.pxd | 1 + src/oracledb/impl/base/connect_params.pyx | 13 +++++++++++++ src/oracledb/impl/base/parsers.pyx | 8 +------- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index b7f4c28e..0ab0098a 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -568,6 +568,7 @@ cdef class ConnectParamsImpl: cdef bytes _get_password(self) cdef str _get_private_key(self) cdef str _get_token(self) + cdef object _get_public_instance(self) cdef object _get_token_expires(self, str token) cdef str _get_wallet_password(self) cdef int _parse_connect_string(self, str connect_string) except -1 diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index 6628424f..c83304ce 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -257,6 +257,19 @@ cdef class ConnectParamsImpl: errors._raise_err(errors.ERR_EXPIRED_ACCESS_TOKEN) return self._xor_bytes(self._token, self._token_obfuscator).decode() + cdef object _get_public_instance(self): + """ + Returns the public instance to use when making calls out to user + defined code. + """ + cdef object inst + if isinstance(self, PoolParamsImpl): + inst = PY_TYPE_POOL_PARAMS.__new__(PY_TYPE_POOL_PARAMS) + else: + inst = PY_TYPE_CONNECT_PARAMS.__new__(PY_TYPE_CONNECT_PARAMS) + inst._impl = self + return inst + cdef object _get_token_expires(self, str token): """ Gets the expiry date from the token. diff --git a/src/oracledb/impl/base/parsers.pyx b/src/oracledb/impl/base/parsers.pyx index a9dcdb0b..3416963c 100644 --- a/src/oracledb/impl/base/parsers.pyx +++ b/src/oracledb/impl/base/parsers.pyx @@ -383,14 +383,8 @@ cdef class ConnectStringParser(BaseParser): if protocol is not None: fn = REGISTERED_PROTOCOLS.get(protocol) if fn is not None: - if isinstance(self.params_impl, PoolParamsImpl): - params = PY_TYPE_POOL_PARAMS.__new__(PY_TYPE_POOL_PARAMS) - else: - params = PY_TYPE_CONNECT_PARAMS.__new__( - PY_TYPE_CONNECT_PARAMS - ) - params._impl = self.params_impl arg = self.data_as_str[self.temp_pos:] + params = self.params_impl._get_public_instance() try: fn(protocol, arg, params) except Exception as e: From 9cfc4447ea74de7804fe3afa43424be16795a3f4 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 12:26:58 -0700 Subject: [PATCH 031/178] Ensure that the service is started if it already exists. --- tests/ext/test_ext_2400_tg_async.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/ext/test_ext_2400_tg_async.py b/tests/ext/test_ext_2400_tg_async.py index e28aace4..3a3e25a9 100644 --- a/tests/ext/test_ext_2400_tg_async.py +++ b/tests/ext/test_ext_2400_tg_async.py @@ -62,6 +62,13 @@ async def __perform_setup(self): ) (count,) = await cursor.fetchone() if count > 0: + try: + await cursor.callproc( + "dbms_service.start_service", [self.service_name] + ) + except Exception as e: + if not str(e).startswith("ORA-44305:"): + raise return await cursor.execute( f""" From 54c638a421b1aff4f8054eae4a67e71a40c83979 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 13:58:22 -0700 Subject: [PATCH 032/178] Added support for importing plugins based on an environment variable. --- tests/test_env.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tests/test_env.py b/tests/test_env.py index 04dc1b07..0630c715 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -42,6 +42,7 @@ # PYO_TEST_DRIVER_MODE: python-oracledb mode (thick or thin) to use # PYO_TEST_EXTERNAL_USER: user for testing external authentication # PYO_TEST_EDITION_NAME: name of edition for editioning tests +# PYO_TEST_PLUGINS: list of plugins to import before running tests # # PYO_TEST_CONNECT_STRING can be set to an Easy Connect string, or a # Net Service Name from a tnsnames.ora file or external naming service, @@ -65,6 +66,7 @@ # ----------------------------------------------------------------------------- import getpass +import importlib import os import secrets import sys @@ -85,11 +87,29 @@ PARAMETERS = {} +def _initialize(): + """ + Performs initialization of the test environment. This ensures the desired + mode is set, imports any required plugins and establishes a test + connection to ensure that the supplied credentials are correct. + """ + if not get_is_thin(): + oracledb.init_oracle_client() + plugin_names = os.environ.get("PYO_TEST_PLUGINS") + if plugin_names is not None: + for name in plugin_names.split(","): + module_name = f"oracledb.plugins.{name}" + print("importing module", module_name) + importlib.import_module(module_name) + get_connection() + + def get_value(name, label, default_value=None, password=False): try: return PARAMETERS[name] except KeyError: pass + requires_initialization = len(PARAMETERS) == 0 env_name = "PYO_TEST_" + name value = os.environ.get(env_name) if value is None: @@ -103,12 +123,12 @@ def get_value(name, label, default_value=None, password=False): if not value: value = default_value PARAMETERS[name] = value + if requires_initialization: + _initialize() return value def get_admin_connection(use_async=False): - if not get_is_thin() and oracledb.is_thin_mode(): - oracledb.init_oracle_client() admin_user = get_value("ADMIN_USER", "Administrative user", "admin") admin_password = get_value( "ADMIN_PASSWORD", f"Password for {admin_user}", password=True @@ -209,7 +229,6 @@ def get_client_version(): if get_is_thin(): value = (23, 7) else: - oracledb.init_oracle_client() value = oracledb.clientversion()[:2] PARAMETERS[name] = value return value @@ -228,8 +247,6 @@ def get_connect_params(): def get_connection(dsn=None, use_async=False, **kwargs): - if not get_is_thin() and oracledb.is_thin_mode(): - oracledb.init_oracle_client() if dsn is None: dsn = get_connect_string() method = oracledb.connect_async if use_async else oracledb.connect @@ -445,6 +462,7 @@ def run_sql_script(conn, script_name, **kwargs): def run_test_cases(): + get_is_thin() unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) @@ -664,8 +682,6 @@ def is_on_oracle_cloud(self, connection=None): return is_on_oracle_cloud(connection) def setUp(self): - if not get_is_thin() and oracledb.is_thin_mode(): - oracledb.init_oracle_client() if self.requires_connection: self.conn = get_connection() self.cursor = self.conn.cursor() From 7e8b89cd6285d2075049993bd1b374ab631e9d52 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 13:58:53 -0700 Subject: [PATCH 033/178] Small doc updates. --- doc/src/index.rst | 4 ++- doc/src/user_guide/appendix_a.rst | 9 +++---- doc/src/user_guide/appendix_c.rst | 1 + doc/src/user_guide/installation.rst | 6 ++--- doc/src/user_guide/introduction.rst | 3 ++- doc/src/user_guide/sql_execution.rst | 37 ++++++++++++++++++++++------ 6 files changed, 41 insertions(+), 19 deletions(-) diff --git a/doc/src/index.rst b/doc/src/index.rst index 2ac38246..a6f418ca 100644 --- a/doc/src/index.rst +++ b/doc/src/index.rst @@ -2,7 +2,9 @@ Welcome to python-oracledb's documentation ========================================== The python-oracledb driver is an open source Python module that enables access -to Oracle Database. Python-oracledb is the new name for the cx_Oracle driver. +to Oracle Database. Python-oracledb is the renamed, major version successor to +cx_Oracle 8.3. The cx_Oracle driver is obsolete and should not be used for new +development. You can use assistive technology products, such as screen readers, while you work with the python-oracledb documentation. You can also use the keyboard diff --git a/doc/src/user_guide/appendix_a.rst b/doc/src/user_guide/appendix_a.rst index 734a9ab8..5715200a 100644 --- a/doc/src/user_guide/appendix_a.rst +++ b/doc/src/user_guide/appendix_a.rst @@ -9,8 +9,9 @@ Oracle Database. This mode does not need Oracle Client libraries. However, some additional functionality is available when python-oracledb uses them. Python-oracledb is said to be in 'Thick' mode when Oracle Client libraries are used. Both modes have comprehensive functionality supporting the Python -Database API v2.0 Specification. See :ref:`initialization` for how to enable -Thick mode. +Database API v2.0 Specification `PEP 249 +`__. See :ref:`initialization` for how to +enable Thick mode. The following table summarizes the Oracle Database features supported by python-oracledb Thin and Thick modes, and by cx_Oracle 8.3. For more details @@ -26,10 +27,6 @@ see :ref:`driverdiff` and :ref:`compatibility`. - python-oracledb Thin Mode - python-oracledb Thick Mode - cx_Oracle 8.3 - * - Python Database API Support (see `PEP 249 `__) - - Yes - a couple of features are not feasible. Many extensions. - - Yes - a couple of features are not feasible. Many extensions. - - Yes - a couple of features are not feasible. Many extensions. * - Oracle Client version - Not applicable - Release 11.2 and later diff --git a/doc/src/user_guide/appendix_c.rst b/doc/src/user_guide/appendix_c.rst index 0ef6fb27..108eb7c3 100644 --- a/doc/src/user_guide/appendix_c.rst +++ b/doc/src/user_guide/appendix_c.rst @@ -7,6 +7,7 @@ Appendix C: The python-oracledb and cx_Oracle Drivers The python-oracledb driver is the renamed, major version successor to cx_Oracle 8.3. As a major release, the python-oracledb driver has :ref:`new features ` and some :ref:`deprecations`. Also see :ref:`upgrading83`. +The cx_Oracle driver is obsolete and should not be used for new development. .. _compatibility: diff --git a/doc/src/user_guide/installation.rst b/doc/src/user_guide/installation.rst index 511232c8..621636fd 100644 --- a/doc/src/user_guide/installation.rst +++ b/doc/src/user_guide/installation.rst @@ -7,9 +7,9 @@ Installing python-oracledb The python-oracledb driver allows Python 3 applications to connect to Oracle Database. -Python-oracledb is the new name for the Python `cx_Oracle driver -`__. If you are upgrading from -cx_Oracle, see :ref:`upgrading83`. +The python-oracledb driver is the renamed, major version successor to cx_Oracle +8.3. For upgrade information, see :ref:`upgrading83`. The cx_Oracle driver is +obsolete and should not be used for new development. .. figure:: /images/python-oracledb-thin-arch.png :alt: architecture of the python-oracledb driver diff --git a/doc/src/user_guide/introduction.rst b/doc/src/user_guide/introduction.rst index 863e5557..bced3cc6 100644 --- a/doc/src/user_guide/introduction.rst +++ b/doc/src/user_guide/introduction.rst @@ -26,7 +26,8 @@ Changes in python-oracledb releases can be found in the :ref:`release notes `. The python-oracledb driver is the renamed, major version successor to cx_Oracle -8.3. For upgrade information, see :ref:`upgrading83`. +8.3. For upgrade information, see :ref:`upgrading83`. The cx_Oracle driver is +obsolete and should not be used for new development. Getting Started =============== diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst index afedb952..44560fd5 100644 --- a/doc/src/user_guide/sql_execution.rst +++ b/doc/src/user_guide/sql_execution.rst @@ -180,7 +180,7 @@ To extract the column names from a query you can use code like: with connection.cursor() as cursor: cursor.execute("select * from locations") - columns = [col[0] for col in cursor.description] + columns = [col.name for col in cursor.description] print(columns) for r in cursor: print(r) @@ -192,6 +192,22 @@ This will print:: (1100, '93091 Calle della Testa', '10934', 'Venice', None, 'IT') . . . +**Changing Column Names to Lowercase** + +To change all column names to lowercase you could do: + +.. code-block:: python + + cursor.execute("select * from locations where location_id = 1000") + + columns = [col.name.lower() for col in cursor.description] + print(columns) + +The output is:: + + ['location_id', 'street_address', 'postal_code', 'city', 'state_province', + 'country_id'] + .. _defaultfetchtypes: Fetch Data Types @@ -452,13 +468,15 @@ the database. The :meth:`Cursor.rowfactory` method is called with the tuple fetched from the database before it is returned to the application. The method can convert the tuple to a different value. +**Fetching Rows as Dictionaries** + For example, to fetch each row of a query as a dictionary: .. code-block:: python cursor.execute("select * from locations where location_id = 1000") - columns = [col[0] for col in cursor.description] + columns = [col.name for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) data = cursor.fetchone() print(data) @@ -484,8 +502,11 @@ only one of the similarly named columns will be included in the dictionary: dogs.color from cats, dogs +**Example with an Output Type Handler, Outconverter, and Row Factory** + An example showing an :ref:`output type handler `, an -:ref:`outconverter `, and a row factory is: +:ref:`outconverter `, and a :ref:`row factory ` +is: .. code-block:: python @@ -505,17 +526,17 @@ An example showing an :ref:`output type handler `, an cursor.execute("select 123 as col1, 'abc' as col2 from dual") - columns = [col[0] for col in cursor.description] + columns = [col.name.lower() for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) for r in cursor.fetchall(): print(r) The database converts the number to a string before it is returned to -python-oracledb. The outconverter appends "was a string" to this value. -Finally the row factory changes the complete row to a dictionary. The output -is:: +python-oracledb. The outconverter appends "was a string" to this value. The +column names are converted to lowercase. Finally, the row factory changes the +complete row to a dictionary. The output is:: - {'COL1': '123 was a string', 'COL2': 'abc'} + {'col1': '123 was a string', 'col2': 'abc'} .. _numberprecision: From e4832f2cbedf4cc877a7906cd80ac4652999914e Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 14:00:18 -0700 Subject: [PATCH 034/178] Fixed bug when using asyncio and calling a stored procedure with data that exceeds 32767 bytes in length (#441). --- doc/src/release_notes.rst | 3 ++ src/oracledb/impl/thin/cursor.pyx | 23 ++++++++++++-- src/oracledb/impl/thin/var.pyx | 11 +++++-- tests/sql/create_schema.sql | 38 +++++++++++++++++++++++- tests/test_4100_cursor_callproc.py | 16 +++++++++- tests/test_6200_cursor_callproc_async.py | 16 +++++++++- 6 files changed, 99 insertions(+), 8 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 6573f474..c6ea1dbc 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -42,6 +42,9 @@ Thin Mode Changes doesn't support the end of response flag. #) Fixed hang when using asyncio and a connection is unexpectedly closed by the database. +#) Fixed bug when using :ref:`asyncio ` and calling a + stored procedure with data that exceeds 32767 bytes in length + (`issue 441 `__). #) Error ``DPY-6002: The distinguished name (DN) on the server certificate does not match the expected value: "{expected_dn}"`` now shows the expected value. diff --git a/src/oracledb/impl/thin/cursor.pyx b/src/oracledb/impl/thin/cursor.pyx index 904a902f..16595850 100644 --- a/src/oracledb/impl/thin/cursor.pyx +++ b/src/oracledb/impl/thin/cursor.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -269,13 +269,30 @@ cdef class AsyncThinCursorImpl(BaseThinCursorImpl): message = self._create_message(FetchMessage, cursor) await self._conn_impl._protocol._process_single_message(message) + async def _preprocess_execute_async(self, object conn): + """ + Performs the necessary steps required before actually executing the + statement associated with the cursor. + """ + cdef: + ThinVarImpl var_impl + BindInfo bind_info + ssize_t idx + self._preprocess_execute(conn) + for bind_info in self._statement._bind_info_list: + var_impl = bind_info._bind_var_impl + if var_impl._coroutine_indexes is not None: + for idx in var_impl._coroutine_indexes: + var_impl._values[idx] = await var_impl._values[idx] + var_impl._coroutine_indexes = None + async def execute(self, cursor): cdef: object conn = cursor.connection BaseAsyncProtocol protocol MessageWithData message protocol = self._conn_impl._protocol - self._preprocess_execute(conn) + await self._preprocess_execute_async(conn) message = self._create_message(ExecuteMessage, cursor) message.num_execs = 1 await protocol._process_single_message(message) @@ -294,7 +311,7 @@ cdef class AsyncThinCursorImpl(BaseThinCursorImpl): # set up message to send protocol = self._conn_impl._protocol - self._preprocess_execute(cursor.connection) + await self._preprocess_execute_async(cursor.connection) message = self._create_message(ExecuteMessage, cursor) message.num_execs = num_execs message.batcherrors = batcherrors diff --git a/src/oracledb/impl/thin/var.pyx b/src/oracledb/impl/thin/var.pyx index 8ed07f73..ab08882f 100644 --- a/src/oracledb/impl/thin/var.pyx +++ b/src/oracledb/impl/thin/var.pyx @@ -32,6 +32,7 @@ cdef class ThinVarImpl(BaseVarImpl): cdef: object _last_raw_value + list _coroutine_indexes cdef int _bind(self, object conn, BaseCursorImpl cursor_impl, uint32_t num_execs, object name, uint32_t pos) except -1: @@ -44,10 +45,11 @@ cdef class ThinVarImpl(BaseVarImpl): ssize_t idx, num_binds, num_vars BindInfo bind_info str normalized_name - object value, lob + bint is_async + object value # for PL/SQL blocks, if the size of a string or bytes object exceeds - # 32,767 bytes it must be converted to a BLOB/CLOB; and out converter + # 32,767 bytes it must be converted to a BLOB/CLOB; an out converter # needs to be established as well to return the string in the way that # the user expects to get it if stmt._is_plsql and metadata.max_size > 32767: @@ -67,6 +69,7 @@ cdef class ThinVarImpl(BaseVarImpl): self.outconverter = converter # for variables containing LOBs, create temporary LOBs, if needed + is_async = thin_cursor_impl._conn_impl._protocol._transport._is_async if metadata.dbtype._ora_type_num == ORA_TYPE_NUM_CLOB \ or metadata.dbtype._ora_type_num == ORA_TYPE_NUM_BLOB: for idx, value in enumerate(self._values): @@ -74,6 +77,10 @@ cdef class ThinVarImpl(BaseVarImpl): and not isinstance(value, (PY_TYPE_LOB, PY_TYPE_ASYNC_LOB)): self._values[idx] = conn.createlob(metadata.dbtype, value) + if is_async: + if self._coroutine_indexes is None: + self._coroutine_indexes = [] + self._coroutine_indexes.append(idx) # bind by name if name is not None: diff --git a/tests/sql/create_schema.sql b/tests/sql/create_schema.sql index ede9aa9f..e9486286 100644 --- a/tests/sql/create_schema.sql +++ b/tests/sql/create_schema.sql @@ -1,5 +1,5 @@ /*----------------------------------------------------------------------------- - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. * * This software is dual-licensed to you under the Universal Permissive License * (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -1417,3 +1417,39 @@ create or replace package body &main_user..pkg_SessionCallback as end; / + +create or replace package &main_user..pkg_TestLOBs as + + procedure GetSize( + a_BLOB blob, + a_Size out number + ); + + procedure GetSize( + a_CLOB clob, + a_Size out number + ); + +end; +/ + +create or replace package body &main_user..pkg_TestLOBs as + + procedure GetSize( + a_BLOB blob, + a_Size out number + ) is + begin + a_Size := dbms_lob.getlength(a_BLOB); + end; + + procedure GetSize( + a_CLOB clob, + a_Size out number + ) is + begin + a_Size := dbms_lob.getlength(a_CLOB); + end; + +end; +/ diff --git a/tests/test_4100_cursor_callproc.py b/tests/test_4100_cursor_callproc.py index 62a11006..ed1fe1eb 100644 --- a/tests/test_4100_cursor_callproc.py +++ b/tests/test_4100_cursor_callproc.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -444,6 +444,20 @@ def test_4124(self): with self.assertRaisesFullCode("ORA-06550"): self.cursor.callproc("func_Test2", ("hello", 5, True)) + def test_4125(self): + "4125 - test calling a procedure with a string > 32767 characters" + data = "4125" * 16000 + size_var = self.cursor.var(int) + self.cursor.callproc("pkg_TestLobs.GetSize", [data, size_var]) + self.assertEqual(size_var.getvalue(), len(data)) + + def test_4126(self): + "4125 - test calling a procedure with raw data > 32767 bytes" + data = b"4126" * 16250 + size_var = self.cursor.var(int) + self.cursor.callproc("pkg_TestLobs.GetSize", [data, size_var]) + self.assertEqual(size_var.getvalue(), len(data)) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_6200_cursor_callproc_async.py b/tests/test_6200_cursor_callproc_async.py index 560b4787..025f3884 100644 --- a/tests/test_6200_cursor_callproc_async.py +++ b/tests/test_6200_cursor_callproc_async.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2023, 2024, Oracle and/or its affiliates. +# Copyright (c) 2023, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -119,6 +119,20 @@ async def test_6208(self): "func_Test", oracledb.NUMBER, [], kwargs ) + async def test_6209(self): + "6209 - test calling a procedure with a string > 32767 characters" + data = "6209" * 16000 + size_var = self.cursor.var(int) + await self.cursor.callproc("pkg_TestLobs.GetSize", [data, size_var]) + self.assertEqual(size_var.getvalue(), len(data)) + + async def test_6210(self): + "6210 - test calling a procedure with raw data > 32767 bytes" + data = b"6210" * 16250 + size_var = self.cursor.var(int) + await self.cursor.callproc("pkg_TestLobs.GetSize", [data, size_var]) + self.assertEqual(size_var.getvalue(), len(data)) + if __name__ == "__main__": test_env.run_test_cases() From 950220a03a29f48a2fad5f3d6249f9fdc1749e27 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 14:01:10 -0700 Subject: [PATCH 035/178] Use Optional[] for optional arguments and avoid specifying defaults since in many cases such are inaccurate. --- src/oracledb/connect_params.py | 202 +++++++++--------- src/oracledb/connection.py | 321 ++++++++++++++-------------- src/oracledb/pool.py | 342 +++++++++++++++--------------- src/oracledb/pool_params.py | 258 +++++++++++----------- utils/build_from_template.py | 10 +- utils/templates/connect_params.py | 2 +- utils/templates/connection.py | 121 ++++++----- utils/templates/pool.py | 86 ++++---- utils/templates/pool_params.py | 2 +- 9 files changed, 684 insertions(+), 660 deletions(-) diff --git a/src/oracledb/connect_params.py b/src/oracledb/connect_params.py index 44d822cf..805b5abd 100644 --- a/src/oracledb/connect_params.py +++ b/src/oracledb/connect_params.py @@ -35,7 +35,7 @@ import functools import ssl -from typing import Union, Callable, Any +from typing import Union, Callable, Any, Optional import oracledb @@ -56,56 +56,56 @@ class ConnectParams: def __init__( self, *, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = 1521, - protocol: str = "tcp", - https_proxy: str = None, - https_proxy_port: int = 0, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = oracledb.PURITY_DEFAULT, - expire_time: int = 0, - retry_count: int = 0, - retry_delay: int = 1, - tcp_connect_timeout: float = 20.0, - ssl_server_dn_match: bool = True, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = False, - externalauth: bool = False, - mode: oracledb.AuthMode = oracledb.AUTH_MODE_DEFAULT, - disable_oob: bool = False, - stmtcachesize: int = oracledb.defaults.stmtcachesize, - edition: str = None, - tag: str = None, - matchanytag: bool = False, - config_dir: str = oracledb.defaults.config_dir, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = 8192, - pool_boundary: str = None, - use_tcp_fast_open: bool = False, - ssl_version: ssl.TLSVersion = None, - program: str = oracledb.defaults.program, - machine: str = oracledb.defaults.machine, - terminal: str = oracledb.defaults.terminal, - osuser: str = oracledb.defaults.osuser, - driver_name: str = oracledb.defaults.driver_name, - use_sni: bool = False, - handle: int = 0, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ): """ All parameters are optional. A brief description of each parameter @@ -828,56 +828,56 @@ def parse_dsn_with_credentials(self, dsn: str) -> tuple: def set( self, *, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = None, - protocol: str = None, - https_proxy: str = None, - https_proxy_port: int = None, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = None, - expire_time: int = None, - retry_count: int = None, - retry_delay: int = None, - tcp_connect_timeout: float = None, - ssl_server_dn_match: bool = None, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = None, - externalauth: bool = None, - mode: oracledb.AuthMode = None, - disable_oob: bool = None, - stmtcachesize: int = None, - edition: str = None, - tag: str = None, - matchanytag: bool = None, - config_dir: str = None, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = None, - pool_boundary: str = None, - use_tcp_fast_open: bool = None, - ssl_version: ssl.TLSVersion = None, - program: str = None, - machine: str = None, - terminal: str = None, - osuser: str = None, - driver_name: str = None, - use_sni: bool = None, - handle: int = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ): """ All parameters are optional. A brief description of each parameter diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 04ed7ce0..4eb0e5a2 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -41,7 +41,7 @@ from . import __name__ as MODULE_NAME -from typing import Any, Callable, Type, Union +from typing import Any, Callable, Type, Union, Optional from . import constants, driver_mode, errors from . import base_impl, thick_impl, thin_impl from . import pool as pool_module @@ -505,10 +505,10 @@ class Connection(BaseConnection): def __init__( self, - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.ConnectionPool" = None, - params: ConnectParams = None, + pool: Optional["pool_module.ConnectionPool"] = None, + params: Optional[ConnectParams] = None, **kwargs, ) -> None: """ @@ -674,7 +674,7 @@ def commit(self) -> None: self._impl.commit() def createlob( - self, lob_type: DbType, data: Union[str, bytes] = None + self, lob_type: DbType, data: Optional[Union[str, bytes]] = None ) -> LOB: """ Create and return a new temporary LOB of the specified type. @@ -752,13 +752,13 @@ def maxBytesPerCharacter(self) -> int: def msgproperties( self, - payload: Union[bytes, str, DbObject] = None, - correlation: str = None, - delay: int = None, - exceptionq: str = None, - expiration: int = None, - priority: int = None, - recipients: list = None, + payload: Optional[Union[bytes, str, DbObject]] = None, + correlation: Optional[str] = None, + delay: Optional[int] = None, + exceptionq: Optional[str] = None, + expiration: Optional[int] = None, + priority: Optional[int] = None, + recipients: Optional[list] = None, ) -> MessageProperties: """ Create and return a message properties object. If the parameters are @@ -807,9 +807,9 @@ def proxy_user(self) -> Union[str, None]: def queue( self, name: str, - payload_type: Union[DbObjectType, str] = None, + payload_type: Optional[Union[DbObjectType, str]] = None, *, - payloadType: DbObjectType = None, + payloadType: Optional[DbObjectType] = None, ) -> Queue: """ Creates and returns a queue which is used to enqueue and dequeue @@ -862,7 +862,10 @@ def shutdown(self, mode: int = 0) -> None: self._impl.shutdown(mode) def startup( - self, force: bool = False, restrict: bool = False, pfile: str = None + self, + force: bool = False, + restrict: bool = False, + pfile: Optional[str] = None, ) -> None: """ Startup the database. This is equivalent to the SQL*Plus command @@ -880,19 +883,19 @@ def subscribe( self, namespace: int = constants.SUBSCR_NAMESPACE_DBCHANGE, protocol: int = constants.SUBSCR_PROTO_CALLBACK, - callback: Callable = None, + callback: Optional[Callable] = None, timeout: int = 0, operations: int = constants.OPCODE_ALLOPS, port: int = 0, qos: int = constants.SUBSCR_QOS_DEFAULT, - ip_address: str = None, + ip_address: Optional[str] = None, grouping_class: int = constants.SUBSCR_GROUPING_CLASS_NONE, grouping_value: int = 0, grouping_type: int = constants.SUBSCR_GROUPING_TYPE_SUMMARY, - name: str = None, + name: Optional[str] = None, client_initiated: bool = False, *, - ipAddress: str = None, + ipAddress: Optional[str] = None, groupingClass: int = constants.SUBSCR_GROUPING_CLASS_NONE, groupingValue: int = 0, groupingType: int = constants.SUBSCR_GROUPING_TYPE_SUMMARY, @@ -1062,7 +1065,9 @@ def tpc_begin( errors._raise_err(errors.ERR_INVALID_TPC_BEGIN_FLAGS) self._impl.tpc_begin(xid, flags, timeout) - def tpc_commit(self, xid: Xid = None, one_phase: bool = False) -> None: + def tpc_commit( + self, xid: Optional[Xid] = None, one_phase: bool = False + ) -> None: """ Prepare the global transaction for commit. Return a boolean indicating if a transaction was actually prepared in order to avoid the error @@ -1083,7 +1088,7 @@ def tpc_commit(self, xid: Xid = None, one_phase: bool = False) -> None: self._impl.tpc_commit(xid, one_phase) def tpc_end( - self, xid: Xid = None, flags: int = constants.TPC_END_NORMAL + self, xid: Optional[Xid] = None, flags: int = constants.TPC_END_NORMAL ) -> None: """ Ends (detaches from) a TPC (two-phase commit) transaction. @@ -1103,7 +1108,7 @@ def tpc_forget(self, xid: Xid) -> None: self._verify_xid(xid) self._impl.tpc_forget(xid) - def tpc_prepare(self, xid: Xid = None) -> bool: + def tpc_prepare(self, xid: Optional[Xid] = None) -> bool: """ Prepares a global transaction for commit. After calling this function, no further activity should take place on this connection until either @@ -1138,7 +1143,7 @@ def tpc_recover(self) -> list: cursor.rowfactory = Xid return cursor.fetchall() - def tpc_rollback(self, xid: Xid = None) -> None: + def tpc_rollback(self, xid: Optional[Xid] = None) -> None: """ When called with no arguments, rolls back the transaction previously started with tpc_begin(). @@ -1176,12 +1181,12 @@ class constructor does not check the validity of the supplied keyword @functools.wraps(f) def connect( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.ConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.ConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[Connection] = Connection, - params: ConnectParams = None, + params: Optional[ConnectParams] = None, **kwargs, ) -> Connection: f( @@ -1218,62 +1223,62 @@ def connect( @_connection_factory def connect( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.ConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.ConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[Connection] = Connection, - params: ConnectParams = None, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = 1521, - protocol: str = "tcp", - https_proxy: str = None, - https_proxy_port: int = 0, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = oracledb.PURITY_DEFAULT, - expire_time: int = 0, - retry_count: int = 0, - retry_delay: int = 1, - tcp_connect_timeout: float = 20.0, - ssl_server_dn_match: bool = True, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = False, - externalauth: bool = False, - mode: oracledb.AuthMode = oracledb.AUTH_MODE_DEFAULT, - disable_oob: bool = False, - stmtcachesize: int = oracledb.defaults.stmtcachesize, - edition: str = None, - tag: str = None, - matchanytag: bool = False, - config_dir: str = oracledb.defaults.config_dir, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = 8192, - pool_boundary: str = None, - use_tcp_fast_open: bool = False, - ssl_version: ssl.TLSVersion = None, - program: str = oracledb.defaults.program, - machine: str = oracledb.defaults.machine, - terminal: str = oracledb.defaults.terminal, - osuser: str = oracledb.defaults.osuser, - driver_name: str = oracledb.defaults.driver_name, - use_sni: bool = False, - handle: int = 0, + params: Optional[ConnectParams] = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ) -> Connection: """ Factory function which creates a connection to the database and returns it. @@ -1605,8 +1610,8 @@ async def callfunc( self, name: str, return_type: Any, - parameters: Union[list, tuple] = None, - keyword_parameters: dict = None, + parameters: Optional[Union[list, tuple]] = None, + keyword_parameters: Optional[dict] = None, ) -> Any: """ Call a PL/SQL function with the given name. @@ -1622,8 +1627,8 @@ async def callfunc( async def callproc( self, name: str, - parameters: Union[list, tuple] = None, - keyword_parameters: dict = None, + parameters: Optional[Union[list, tuple]] = None, + keyword_parameters: Optional[dict] = None, ) -> list: """ Call a PL/SQL procedure with the given name. @@ -1659,7 +1664,7 @@ async def commit(self) -> None: await self._impl.commit() async def createlob( - self, lob_type: DbType, data: Union[str, bytes] = None + self, lob_type: DbType, data: Optional[Union[str, bytes]] = None ) -> AsyncLOB: """ Create and return a new temporary LOB of the specified type. @@ -1685,7 +1690,9 @@ def cursor(self, scrollable: bool = False) -> AsyncCursor: return AsyncCursor(self, scrollable) async def execute( - self, statement: str, parameters: Union[list, tuple, dict] = None + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, ) -> None: """ Execute a statement against the database. @@ -1713,9 +1720,9 @@ async def executemany( async def fetchall( self, statement: str, - parameters: Union[list, tuple, dict] = None, - arraysize: int = None, - rowfactory: Callable = None, + parameters: Optional[Union[list, tuple, dict]] = None, + arraysize: Optional[int] = None, + rowfactory: Optional[Callable] = None, ) -> list: """ Executes a query and returns all of the rows. After the rows are @@ -1732,9 +1739,9 @@ async def fetchall( async def fetchmany( self, statement: str, - parameters: Union[list, tuple, dict] = None, - num_rows: int = None, - rowfactory: Callable = None, + parameters: Optional[Union[list, tuple, dict]] = None, + num_rows: Optional[int] = None, + rowfactory: Optional[Callable] = None, ) -> list: """ Executes a query and returns up to the specified number of rows. After @@ -1753,8 +1760,8 @@ async def fetchmany( async def fetchone( self, statement: str, - parameters: Union[list, tuple, dict] = None, - rowfactory: Callable = None, + parameters: Optional[Union[list, tuple, dict]] = None, + rowfactory: Optional[Callable] = None, ) -> Any: """ Executes a query and returns the first row of the result set if one @@ -1839,7 +1846,7 @@ async def tpc_begin( await self._impl.tpc_begin(xid, flags, timeout) async def tpc_commit( - self, xid: Xid = None, one_phase: bool = False + self, xid: Optional[Xid] = None, one_phase: bool = False ) -> None: """ Prepare the global transaction for commit. Return a boolean indicating @@ -1861,7 +1868,7 @@ async def tpc_commit( await self._impl.tpc_commit(xid, one_phase) async def tpc_end( - self, xid: Xid = None, flags: int = constants.TPC_END_NORMAL + self, xid: Optional[Xid] = None, flags: int = constants.TPC_END_NORMAL ) -> None: """ Ends (detaches from) a TPC (two-phase commit) transaction. @@ -1881,7 +1888,7 @@ async def tpc_forget(self, xid: Xid) -> None: self._verify_xid(xid) await self._impl.tpc_forget(xid) - async def tpc_prepare(self, xid: Xid = None) -> bool: + async def tpc_prepare(self, xid: Optional[Xid] = None) -> bool: """ Prepares a global transaction for commit. After calling this function, no further activity should take place on this connection until either @@ -1916,7 +1923,7 @@ async def tpc_recover(self) -> list: cursor.rowfactory = Xid return await cursor.fetchall() - async def tpc_rollback(self, xid: Xid = None) -> None: + async def tpc_rollback(self, xid: Optional[Xid] = None) -> None: """ When called with no arguments, rolls back the transaction previously started with tpc_begin(). @@ -1940,12 +1947,12 @@ def _async_connection_factory(f): @functools.wraps(f) def connect_async( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.AsyncConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.AsyncConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[AsyncConnection] = AsyncConnection, - params: ConnectParams = None, + params: Optional[ConnectParams] = None, **kwargs, ) -> AsyncConnection: # check arguments @@ -1992,62 +1999,62 @@ def connect_async( @_async_connection_factory def connect_async( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.AsyncConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.AsyncConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[AsyncConnection] = AsyncConnection, - params: ConnectParams = None, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = 1521, - protocol: str = "tcp", - https_proxy: str = None, - https_proxy_port: int = 0, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = oracledb.PURITY_DEFAULT, - expire_time: int = 0, - retry_count: int = 0, - retry_delay: int = 1, - tcp_connect_timeout: float = 20.0, - ssl_server_dn_match: bool = True, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = False, - externalauth: bool = False, - mode: oracledb.AuthMode = oracledb.AUTH_MODE_DEFAULT, - disable_oob: bool = False, - stmtcachesize: int = oracledb.defaults.stmtcachesize, - edition: str = None, - tag: str = None, - matchanytag: bool = False, - config_dir: str = oracledb.defaults.config_dir, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = 8192, - pool_boundary: str = None, - use_tcp_fast_open: bool = False, - ssl_version: ssl.TLSVersion = None, - program: str = oracledb.defaults.program, - machine: str = oracledb.defaults.machine, - terminal: str = oracledb.defaults.terminal, - osuser: str = oracledb.defaults.osuser, - driver_name: str = oracledb.defaults.driver_name, - use_sni: bool = False, - handle: int = 0, + params: Optional[ConnectParams] = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ) -> AsyncConnection: """ Factory function which creates a connection to the database and returns it. diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index 6175d9bc..efcab9b0 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -36,7 +36,7 @@ import functools import ssl import threading -from typing import Callable, Type, Union, Any +from typing import Callable, Type, Union, Any, Optional import oracledb @@ -53,10 +53,10 @@ class BaseConnectionPool: def __init__( self, - dsn: str = None, + dsn: Optional[str] = None, *, - params: PoolParams = None, - cache_name=None, + params: Optional[PoolParams] = None, + cache_name: Optional[str] = None, **kwargs, ) -> None: """ @@ -379,14 +379,14 @@ def _set_connection_type(self, conn_class): def acquire( self, - user: str = None, - password: str = None, - cclass: str = None, + user: Optional[str] = None, + password: Optional[str] = None, + cclass: Optional[str] = None, purity: int = oracledb.PURITY_DEFAULT, - tag: str = None, + tag: Optional[str] = None, matchanytag: bool = False, - shardingkey: list = None, - supershardingkey: list = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, ) -> "connection_module.Connection": """ Acquire a connection from the pool and return it. @@ -458,7 +458,9 @@ def drop(self, connection: "connection_module.Connection") -> None: connection._impl = None def release( - self, connection: "connection_module.Connection", tag: str = None + self, + connection: "connection_module.Connection", + tag: Optional[str] = None, ) -> None: """ Release the connection back to the pool now, rather than whenever @@ -491,17 +493,17 @@ def release( def reconfigure( self, - min: int = None, - max: int = None, - increment: int = None, - getmode: int = None, - timeout: int = None, - wait_timeout: int = None, - max_lifetime_session: int = None, - max_sessions_per_shard: int = None, - soda_metadata_cache: bool = None, - stmtcachesize: int = None, - ping_interval: int = None, + min: Optional[int] = None, + max: Optional[int] = None, + increment: Optional[int] = None, + getmode: Optional[int] = None, + timeout: Optional[int] = None, + wait_timeout: Optional[int] = None, + max_lifetime_session: Optional[int] = None, + max_sessions_per_shard: Optional[int] = None, + soda_metadata_cache: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + ping_interval: Optional[int] = None, ) -> None: """ Reconfigures various parameters of a connection pool. The pool size @@ -578,11 +580,11 @@ def _pool_factory(f): @functools.wraps(f) def create_pool( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = ConnectionPool, - pool_alias: str = None, - params: PoolParams = None, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, **kwargs, ) -> ConnectionPool: f( @@ -601,75 +603,75 @@ def create_pool( @_pool_factory def create_pool( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = ConnectionPool, - pool_alias: str = None, - params: PoolParams = None, - min: int = 1, - max: int = 2, - increment: int = 1, - connectiontype: Type["oracledb.Connection"] = None, - getmode: oracledb.PoolGetMode = oracledb.POOL_GETMODE_WAIT, - homogeneous: bool = True, - timeout: int = 0, - wait_timeout: int = 0, - max_lifetime_session: int = 0, - session_callback: Callable = None, - max_sessions_per_shard: int = 0, - soda_metadata_cache: bool = False, - ping_interval: int = 60, - ping_timeout: int = 5000, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = 1521, - protocol: str = "tcp", - https_proxy: str = None, - https_proxy_port: int = 0, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = oracledb.PURITY_DEFAULT, - expire_time: int = 0, - retry_count: int = 0, - retry_delay: int = 1, - tcp_connect_timeout: float = 20.0, - ssl_server_dn_match: bool = True, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = False, - externalauth: bool = False, - mode: oracledb.AuthMode = oracledb.AUTH_MODE_DEFAULT, - disable_oob: bool = False, - stmtcachesize: int = oracledb.defaults.stmtcachesize, - edition: str = None, - tag: str = None, - matchanytag: bool = False, - config_dir: str = oracledb.defaults.config_dir, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = 8192, - pool_boundary: str = None, - use_tcp_fast_open: bool = False, - ssl_version: ssl.TLSVersion = None, - program: str = oracledb.defaults.program, - machine: str = oracledb.defaults.machine, - terminal: str = oracledb.defaults.terminal, - osuser: str = oracledb.defaults.osuser, - driver_name: str = oracledb.defaults.driver_name, - use_sni: bool = False, - handle: int = 0, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, + min: Optional[int] = None, + max: Optional[int] = None, + increment: Optional[int] = None, + connectiontype: Optional[Type["oracledb.Connection"]] = None, + getmode: Optional[oracledb.PoolGetMode] = None, + homogeneous: Optional[bool] = None, + timeout: Optional[int] = None, + wait_timeout: Optional[int] = None, + max_lifetime_session: Optional[int] = None, + session_callback: Optional[Callable] = None, + max_sessions_per_shard: Optional[int] = None, + soda_metadata_cache: Optional[bool] = None, + ping_interval: Optional[int] = None, + ping_timeout: Optional[int] = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ) -> ConnectionPool: """ Creates a connection pool with the supplied parameters and returns it. @@ -965,14 +967,14 @@ def _set_connection_type(self, conn_class): def acquire( self, - user: str = None, - password: str = None, - cclass: str = None, + user: Optional[str] = None, + password: Optional[str] = None, + cclass: Optional[str] = None, purity: int = oracledb.PURITY_DEFAULT, - tag: str = None, + tag: Optional[str] = None, matchanytag: bool = False, - shardingkey: list = None, - supershardingkey: list = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, ) -> "connection_module.AsyncConnection": """ Acquire a connection from the pool and return it. @@ -1046,7 +1048,9 @@ async def drop(self, connection: "connection_module.Connection") -> None: connection._impl = None async def release( - self, connection: "connection_module.AsyncConnection", tag: str = None + self, + connection: "connection_module.AsyncConnection", + tag: Optional[str] = None, ) -> None: """ Release the connection back to the pool now, rather than whenever @@ -1091,11 +1095,11 @@ def _async_pool_factory(f): @functools.wraps(f) def create_pool_async( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = AsyncConnectionPool, - pool_alias: str = None, - params: PoolParams = None, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, **kwargs, ) -> AsyncConnectionPool: f( @@ -1115,75 +1119,75 @@ def create_pool_async( @_async_pool_factory def create_pool_async( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = AsyncConnectionPool, - pool_alias: str = None, - params: PoolParams = None, - min: int = 1, - max: int = 2, - increment: int = 1, - connectiontype: Type["oracledb.AsyncConnection"] = None, - getmode: oracledb.PoolGetMode = oracledb.POOL_GETMODE_WAIT, - homogeneous: bool = True, - timeout: int = 0, - wait_timeout: int = 0, - max_lifetime_session: int = 0, - session_callback: Callable = None, - max_sessions_per_shard: int = 0, - soda_metadata_cache: bool = False, - ping_interval: int = 60, - ping_timeout: int = 5000, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = 1521, - protocol: str = "tcp", - https_proxy: str = None, - https_proxy_port: int = 0, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = oracledb.PURITY_DEFAULT, - expire_time: int = 0, - retry_count: int = 0, - retry_delay: int = 1, - tcp_connect_timeout: float = 20.0, - ssl_server_dn_match: bool = True, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = False, - externalauth: bool = False, - mode: oracledb.AuthMode = oracledb.AUTH_MODE_DEFAULT, - disable_oob: bool = False, - stmtcachesize: int = oracledb.defaults.stmtcachesize, - edition: str = None, - tag: str = None, - matchanytag: bool = False, - config_dir: str = oracledb.defaults.config_dir, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = 8192, - pool_boundary: str = None, - use_tcp_fast_open: bool = False, - ssl_version: ssl.TLSVersion = None, - program: str = oracledb.defaults.program, - machine: str = oracledb.defaults.machine, - terminal: str = oracledb.defaults.terminal, - osuser: str = oracledb.defaults.osuser, - driver_name: str = oracledb.defaults.driver_name, - use_sni: bool = False, - handle: int = 0, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, + min: Optional[int] = None, + max: Optional[int] = None, + increment: Optional[int] = None, + connectiontype: Optional[Type["oracledb.AsyncConnection"]] = None, + getmode: Optional[oracledb.PoolGetMode] = None, + homogeneous: Optional[bool] = None, + timeout: Optional[int] = None, + wait_timeout: Optional[int] = None, + max_lifetime_session: Optional[int] = None, + session_callback: Optional[Callable] = None, + max_sessions_per_shard: Optional[int] = None, + soda_metadata_cache: Optional[bool] = None, + ping_interval: Optional[int] = None, + ping_timeout: Optional[int] = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ) -> AsyncConnectionPool: """ Creates a connection pool with the supplied parameters and returns it. diff --git a/src/oracledb/pool_params.py b/src/oracledb/pool_params.py index 36b63be6..10788b6a 100644 --- a/src/oracledb/pool_params.py +++ b/src/oracledb/pool_params.py @@ -34,7 +34,7 @@ # ----------------------------------------------------------------------------- import ssl -from typing import Callable, Type, Union, Any +from typing import Callable, Type, Union, Any, Optional import oracledb @@ -55,70 +55,70 @@ class PoolParams(ConnectParams): def __init__( self, *, - min: int = 1, - max: int = 2, - increment: int = 1, - connectiontype: Type["oracledb.Connection"] = None, - getmode: oracledb.PoolGetMode = oracledb.POOL_GETMODE_WAIT, - homogeneous: bool = True, - timeout: int = 0, - wait_timeout: int = 0, - max_lifetime_session: int = 0, - session_callback: Callable = None, - max_sessions_per_shard: int = 0, - soda_metadata_cache: bool = False, - ping_interval: int = 60, - ping_timeout: int = 5000, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = 1521, - protocol: str = "tcp", - https_proxy: str = None, - https_proxy_port: int = 0, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = oracledb.PURITY_DEFAULT, - expire_time: int = 0, - retry_count: int = 0, - retry_delay: int = 1, - tcp_connect_timeout: float = 20.0, - ssl_server_dn_match: bool = True, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = False, - externalauth: bool = False, - mode: oracledb.AuthMode = oracledb.AUTH_MODE_DEFAULT, - disable_oob: bool = False, - stmtcachesize: int = oracledb.defaults.stmtcachesize, - edition: str = None, - tag: str = None, - matchanytag: bool = False, - config_dir: str = oracledb.defaults.config_dir, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = 8192, - pool_boundary: str = None, - use_tcp_fast_open: bool = False, - ssl_version: ssl.TLSVersion = None, - program: str = oracledb.defaults.program, - machine: str = oracledb.defaults.machine, - terminal: str = oracledb.defaults.terminal, - osuser: str = oracledb.defaults.osuser, - driver_name: str = oracledb.defaults.driver_name, - use_sni: bool = False, - handle: int = 0, + min: Optional[int] = None, + max: Optional[int] = None, + increment: Optional[int] = None, + connectiontype: Optional[Type["oracledb.Connection"]] = None, + getmode: Optional[oracledb.PoolGetMode] = None, + homogeneous: Optional[bool] = None, + timeout: Optional[int] = None, + wait_timeout: Optional[int] = None, + max_lifetime_session: Optional[int] = None, + session_callback: Optional[Callable] = None, + max_sessions_per_shard: Optional[int] = None, + soda_metadata_cache: Optional[bool] = None, + ping_interval: Optional[int] = None, + ping_timeout: Optional[int] = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ): """ All parameters are optional. A brief description of each parameter @@ -575,70 +575,70 @@ def copy(self) -> "PoolParams": def set( self, *, - min: int = None, - max: int = None, - increment: int = None, - connectiontype: Type["oracledb.Connection"] = None, - getmode: oracledb.PoolGetMode = None, - homogeneous: bool = None, - timeout: int = None, - wait_timeout: int = None, - max_lifetime_session: int = None, - session_callback: Callable = None, - max_sessions_per_shard: int = None, - soda_metadata_cache: bool = None, - ping_interval: int = None, - ping_timeout: int = None, - user: str = None, - proxy_user: str = None, - password: str = None, - newpassword: str = None, - wallet_password: str = None, - access_token: Union[str, tuple, Callable] = None, - host: str = None, - port: int = None, - protocol: str = None, - https_proxy: str = None, - https_proxy_port: int = None, - service_name: str = None, - instance_name: str = None, - sid: str = None, - server_type: str = None, - cclass: str = None, - purity: oracledb.Purity = None, - expire_time: int = None, - retry_count: int = None, - retry_delay: int = None, - tcp_connect_timeout: float = None, - ssl_server_dn_match: bool = None, - ssl_server_cert_dn: str = None, - wallet_location: str = None, - events: bool = None, - externalauth: bool = None, - mode: oracledb.AuthMode = None, - disable_oob: bool = None, - stmtcachesize: int = None, - edition: str = None, - tag: str = None, - matchanytag: bool = None, - config_dir: str = None, - appcontext: list = None, - shardingkey: list = None, - supershardingkey: list = None, - debug_jdwp: str = None, - connection_id_prefix: str = None, - ssl_context: Any = None, - sdu: int = None, - pool_boundary: str = None, - use_tcp_fast_open: bool = None, - ssl_version: ssl.TLSVersion = None, - program: str = None, - machine: str = None, - terminal: str = None, - osuser: str = None, - driver_name: str = None, - use_sni: bool = None, - handle: int = None, + min: Optional[int] = None, + max: Optional[int] = None, + increment: Optional[int] = None, + connectiontype: Optional[Type["oracledb.Connection"]] = None, + getmode: Optional[oracledb.PoolGetMode] = None, + homogeneous: Optional[bool] = None, + timeout: Optional[int] = None, + wait_timeout: Optional[int] = None, + max_lifetime_session: Optional[int] = None, + session_callback: Optional[Callable] = None, + max_sessions_per_shard: Optional[int] = None, + soda_metadata_cache: Optional[bool] = None, + ping_interval: Optional[int] = None, + ping_timeout: Optional[int] = None, + user: Optional[str] = None, + proxy_user: Optional[str] = None, + password: Optional[str] = None, + newpassword: Optional[str] = None, + wallet_password: Optional[str] = None, + access_token: Optional[Union[str, tuple, Callable]] = None, + host: Optional[str] = None, + port: Optional[int] = None, + protocol: Optional[str] = None, + https_proxy: Optional[str] = None, + https_proxy_port: Optional[int] = None, + service_name: Optional[str] = None, + instance_name: Optional[str] = None, + sid: Optional[str] = None, + server_type: Optional[str] = None, + cclass: Optional[str] = None, + purity: Optional[oracledb.Purity] = None, + expire_time: Optional[int] = None, + retry_count: Optional[int] = None, + retry_delay: Optional[int] = None, + tcp_connect_timeout: Optional[float] = None, + ssl_server_dn_match: Optional[bool] = None, + ssl_server_cert_dn: Optional[str] = None, + wallet_location: Optional[str] = None, + events: Optional[bool] = None, + externalauth: Optional[bool] = None, + mode: Optional[oracledb.AuthMode] = None, + disable_oob: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + edition: Optional[str] = None, + tag: Optional[str] = None, + matchanytag: Optional[bool] = None, + config_dir: Optional[str] = None, + appcontext: Optional[list] = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, + debug_jdwp: Optional[str] = None, + connection_id_prefix: Optional[str] = None, + ssl_context: Optional[Any] = None, + sdu: Optional[int] = None, + pool_boundary: Optional[str] = None, + use_tcp_fast_open: Optional[bool] = None, + ssl_version: Optional[ssl.TLSVersion] = None, + program: Optional[str] = None, + machine: Optional[str] = None, + terminal: Optional[str] = None, + osuser: Optional[str] = None, + driver_name: Optional[str] = None, + use_sni: Optional[bool] = None, + handle: Optional[int] = None, ): """ All parameters are optional. A brief description of each parameter diff --git a/utils/build_from_template.py b/utils/build_from_template.py index 4ccb1335..49a84440 100644 --- a/utils/build_from_template.py +++ b/utils/build_from_template.py @@ -187,7 +187,7 @@ def args_with_defaults_content(indent): Generates the content for the args_with_defaults template tag. """ args_joiner = "\n" + indent - args = [f"{f.name}: {f.typ} = {f.default}," for f in fields] + args = [f"{f.name}: Optional[{f.typ}] = None," for f in fields] return args_joiner.join(args) @@ -217,7 +217,7 @@ def async_args_with_defaults_content(indent): Generates the content for the async_args_with_defaults template tag. """ args_joiner = "\n" + indent - args = [f"{f.name}: {f.async_typ} = {f.default}," for f in fields] + args = [f"{f.name}: Optional[{f.async_typ}] = None," for f in fields] return args_joiner.join(args) @@ -242,7 +242,7 @@ def params_constructor_args_content(indent): """ args_joiner = f"\n{indent}" args = ["self,", "*,"] + [ - f"{f.name}: {f.typ} = {f.default}," for f in fields + f"{f.name}: Optional[{f.typ}] = None," for f in fields ] return args_joiner.join(args) @@ -334,7 +334,9 @@ def params_setter_args_content(indent): Generates the content for the params_setter_args template tag. """ args_joiner = f"\n{indent}" - args = ["self,", "*,"] + [f"{f.name}: {f.typ} = None," for f in fields] + args = ["self,", "*,"] + [ + f"{f.name}: Optional[{f.typ}] = None," for f in fields + ] return args_joiner.join(args) diff --git a/utils/templates/connect_params.py b/utils/templates/connect_params.py index 941b1c2f..8b206af8 100644 --- a/utils/templates/connect_params.py +++ b/utils/templates/connect_params.py @@ -33,7 +33,7 @@ import functools import ssl -from typing import Union, Callable, Any +from typing import Union, Callable, Any, Optional import oracledb diff --git a/utils/templates/connection.py b/utils/templates/connection.py index c9415158..e914006a 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -39,7 +39,7 @@ from . import __name__ as MODULE_NAME -from typing import Any, Callable, Type, Union +from typing import Any, Callable, Type, Union, Optional from . import constants, driver_mode, errors from . import base_impl, thick_impl, thin_impl from . import pool as pool_module @@ -503,10 +503,10 @@ class Connection(BaseConnection): def __init__( self, - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.ConnectionPool" = None, - params: ConnectParams = None, + pool: Optional["pool_module.ConnectionPool"] = None, + params: Optional[ConnectParams] = None, **kwargs, ) -> None: """ @@ -672,7 +672,7 @@ def commit(self) -> None: self._impl.commit() def createlob( - self, lob_type: DbType, data: Union[str, bytes] = None + self, lob_type: DbType, data: Optional[Union[str, bytes]] = None ) -> LOB: """ Create and return a new temporary LOB of the specified type. @@ -750,13 +750,13 @@ def maxBytesPerCharacter(self) -> int: def msgproperties( self, - payload: Union[bytes, str, DbObject] = None, - correlation: str = None, - delay: int = None, - exceptionq: str = None, - expiration: int = None, - priority: int = None, - recipients: list = None, + payload: Optional[Union[bytes, str, DbObject]] = None, + correlation: Optional[str] = None, + delay: Optional[int] = None, + exceptionq: Optional[str] = None, + expiration: Optional[int] = None, + priority: Optional[int] = None, + recipients: Optional[list] = None, ) -> MessageProperties: """ Create and return a message properties object. If the parameters are @@ -805,9 +805,9 @@ def proxy_user(self) -> Union[str, None]: def queue( self, name: str, - payload_type: Union[DbObjectType, str] = None, + payload_type: Optional[Union[DbObjectType, str]] = None, *, - payloadType: DbObjectType = None, + payloadType: Optional[DbObjectType] = None, ) -> Queue: """ Creates and returns a queue which is used to enqueue and dequeue @@ -860,7 +860,10 @@ def shutdown(self, mode: int = 0) -> None: self._impl.shutdown(mode) def startup( - self, force: bool = False, restrict: bool = False, pfile: str = None + self, + force: bool = False, + restrict: bool = False, + pfile: Optional[str] = None, ) -> None: """ Startup the database. This is equivalent to the SQL*Plus command @@ -878,19 +881,19 @@ def subscribe( self, namespace: int = constants.SUBSCR_NAMESPACE_DBCHANGE, protocol: int = constants.SUBSCR_PROTO_CALLBACK, - callback: Callable = None, + callback: Optional[Callable] = None, timeout: int = 0, operations: int = constants.OPCODE_ALLOPS, port: int = 0, qos: int = constants.SUBSCR_QOS_DEFAULT, - ip_address: str = None, + ip_address: Optional[str] = None, grouping_class: int = constants.SUBSCR_GROUPING_CLASS_NONE, grouping_value: int = 0, grouping_type: int = constants.SUBSCR_GROUPING_TYPE_SUMMARY, - name: str = None, + name: Optional[str] = None, client_initiated: bool = False, *, - ipAddress: str = None, + ipAddress: Optional[str] = None, groupingClass: int = constants.SUBSCR_GROUPING_CLASS_NONE, groupingValue: int = 0, groupingType: int = constants.SUBSCR_GROUPING_TYPE_SUMMARY, @@ -1060,7 +1063,9 @@ def tpc_begin( errors._raise_err(errors.ERR_INVALID_TPC_BEGIN_FLAGS) self._impl.tpc_begin(xid, flags, timeout) - def tpc_commit(self, xid: Xid = None, one_phase: bool = False) -> None: + def tpc_commit( + self, xid: Optional[Xid] = None, one_phase: bool = False + ) -> None: """ Prepare the global transaction for commit. Return a boolean indicating if a transaction was actually prepared in order to avoid the error @@ -1081,7 +1086,7 @@ def tpc_commit(self, xid: Xid = None, one_phase: bool = False) -> None: self._impl.tpc_commit(xid, one_phase) def tpc_end( - self, xid: Xid = None, flags: int = constants.TPC_END_NORMAL + self, xid: Optional[Xid] = None, flags: int = constants.TPC_END_NORMAL ) -> None: """ Ends (detaches from) a TPC (two-phase commit) transaction. @@ -1101,7 +1106,7 @@ def tpc_forget(self, xid: Xid) -> None: self._verify_xid(xid) self._impl.tpc_forget(xid) - def tpc_prepare(self, xid: Xid = None) -> bool: + def tpc_prepare(self, xid: Optional[Xid] = None) -> bool: """ Prepares a global transaction for commit. After calling this function, no further activity should take place on this connection until either @@ -1136,7 +1141,7 @@ def tpc_recover(self) -> list: cursor.rowfactory = Xid return cursor.fetchall() - def tpc_rollback(self, xid: Xid = None) -> None: + def tpc_rollback(self, xid: Optional[Xid] = None) -> None: """ When called with no arguments, rolls back the transaction previously started with tpc_begin(). @@ -1174,12 +1179,12 @@ class constructor does not check the validity of the supplied keyword @functools.wraps(f) def connect( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.ConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.ConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[Connection] = Connection, - params: ConnectParams = None, + params: Optional[ConnectParams] = None, **kwargs, ) -> Connection: f( @@ -1216,12 +1221,12 @@ def connect( @_connection_factory def connect( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.ConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.ConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[Connection] = Connection, - params: ConnectParams = None, + params: Optional[ConnectParams] = None, # {{ args_with_defaults }} ) -> Connection: """ @@ -1368,8 +1373,8 @@ async def callfunc( self, name: str, return_type: Any, - parameters: Union[list, tuple] = None, - keyword_parameters: dict = None, + parameters: Optional[Union[list, tuple]] = None, + keyword_parameters: Optional[dict] = None, ) -> Any: """ Call a PL/SQL function with the given name. @@ -1385,8 +1390,8 @@ async def callfunc( async def callproc( self, name: str, - parameters: Union[list, tuple] = None, - keyword_parameters: dict = None, + parameters: Optional[Union[list, tuple]] = None, + keyword_parameters: Optional[dict] = None, ) -> list: """ Call a PL/SQL procedure with the given name. @@ -1422,7 +1427,7 @@ async def commit(self) -> None: await self._impl.commit() async def createlob( - self, lob_type: DbType, data: Union[str, bytes] = None + self, lob_type: DbType, data: Optional[Union[str, bytes]] = None ) -> AsyncLOB: """ Create and return a new temporary LOB of the specified type. @@ -1448,7 +1453,9 @@ def cursor(self, scrollable: bool = False) -> AsyncCursor: return AsyncCursor(self, scrollable) async def execute( - self, statement: str, parameters: Union[list, tuple, dict] = None + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, ) -> None: """ Execute a statement against the database. @@ -1476,9 +1483,9 @@ async def executemany( async def fetchall( self, statement: str, - parameters: Union[list, tuple, dict] = None, - arraysize: int = None, - rowfactory: Callable = None, + parameters: Optional[Union[list, tuple, dict]] = None, + arraysize: Optional[int] = None, + rowfactory: Optional[Callable] = None, ) -> list: """ Executes a query and returns all of the rows. After the rows are @@ -1495,9 +1502,9 @@ async def fetchall( async def fetchmany( self, statement: str, - parameters: Union[list, tuple, dict] = None, - num_rows: int = None, - rowfactory: Callable = None, + parameters: Optional[Union[list, tuple, dict]] = None, + num_rows: Optional[int] = None, + rowfactory: Optional[Callable] = None, ) -> list: """ Executes a query and returns up to the specified number of rows. After @@ -1516,8 +1523,8 @@ async def fetchmany( async def fetchone( self, statement: str, - parameters: Union[list, tuple, dict] = None, - rowfactory: Callable = None, + parameters: Optional[Union[list, tuple, dict]] = None, + rowfactory: Optional[Callable] = None, ) -> Any: """ Executes a query and returns the first row of the result set if one @@ -1602,7 +1609,7 @@ async def tpc_begin( await self._impl.tpc_begin(xid, flags, timeout) async def tpc_commit( - self, xid: Xid = None, one_phase: bool = False + self, xid: Optional[Xid] = None, one_phase: bool = False ) -> None: """ Prepare the global transaction for commit. Return a boolean indicating @@ -1624,7 +1631,7 @@ async def tpc_commit( await self._impl.tpc_commit(xid, one_phase) async def tpc_end( - self, xid: Xid = None, flags: int = constants.TPC_END_NORMAL + self, xid: Optional[Xid] = None, flags: int = constants.TPC_END_NORMAL ) -> None: """ Ends (detaches from) a TPC (two-phase commit) transaction. @@ -1644,7 +1651,7 @@ async def tpc_forget(self, xid: Xid) -> None: self._verify_xid(xid) await self._impl.tpc_forget(xid) - async def tpc_prepare(self, xid: Xid = None) -> bool: + async def tpc_prepare(self, xid: Optional[Xid] = None) -> bool: """ Prepares a global transaction for commit. After calling this function, no further activity should take place on this connection until either @@ -1679,7 +1686,7 @@ async def tpc_recover(self) -> list: cursor.rowfactory = Xid return await cursor.fetchall() - async def tpc_rollback(self, xid: Xid = None) -> None: + async def tpc_rollback(self, xid: Optional[Xid] = None) -> None: """ When called with no arguments, rolls back the transaction previously started with tpc_begin(). @@ -1703,12 +1710,12 @@ def _async_connection_factory(f): @functools.wraps(f) def connect_async( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.AsyncConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.AsyncConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[AsyncConnection] = AsyncConnection, - params: ConnectParams = None, + params: Optional[ConnectParams] = None, **kwargs, ) -> AsyncConnection: # check arguments @@ -1755,12 +1762,12 @@ def connect_async( @_async_connection_factory def connect_async( - dsn: str = None, + dsn: Optional[str] = None, *, - pool: "pool_module.AsyncConnectionPool" = None, - pool_alias: str = None, + pool: Optional["pool_module.AsyncConnectionPool"] = None, + pool_alias: Optional[str] = None, conn_class: Type[AsyncConnection] = AsyncConnection, - params: ConnectParams = None, + params: Optional[ConnectParams] = None, # {{ args_with_defaults }} ) -> AsyncConnection: """ diff --git a/utils/templates/pool.py b/utils/templates/pool.py index 281ff65d..a1248f23 100644 --- a/utils/templates/pool.py +++ b/utils/templates/pool.py @@ -34,7 +34,7 @@ import functools import ssl import threading -from typing import Callable, Type, Union, Any +from typing import Callable, Type, Union, Any, Optional import oracledb @@ -51,10 +51,10 @@ class BaseConnectionPool: def __init__( self, - dsn: str = None, + dsn: Optional[str] = None, *, - params: PoolParams = None, - cache_name=None, + params: Optional[PoolParams] = None, + cache_name: Optional[str] = None, **kwargs, ) -> None: """ @@ -377,14 +377,14 @@ def _set_connection_type(self, conn_class): def acquire( self, - user: str = None, - password: str = None, - cclass: str = None, + user: Optional[str] = None, + password: Optional[str] = None, + cclass: Optional[str] = None, purity: int = oracledb.PURITY_DEFAULT, - tag: str = None, + tag: Optional[str] = None, matchanytag: bool = False, - shardingkey: list = None, - supershardingkey: list = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, ) -> "connection_module.Connection": """ Acquire a connection from the pool and return it. @@ -456,7 +456,9 @@ def drop(self, connection: "connection_module.Connection") -> None: connection._impl = None def release( - self, connection: "connection_module.Connection", tag: str = None + self, + connection: "connection_module.Connection", + tag: Optional[str] = None, ) -> None: """ Release the connection back to the pool now, rather than whenever @@ -489,17 +491,17 @@ def release( def reconfigure( self, - min: int = None, - max: int = None, - increment: int = None, - getmode: int = None, - timeout: int = None, - wait_timeout: int = None, - max_lifetime_session: int = None, - max_sessions_per_shard: int = None, - soda_metadata_cache: bool = None, - stmtcachesize: int = None, - ping_interval: int = None, + min: Optional[int] = None, + max: Optional[int] = None, + increment: Optional[int] = None, + getmode: Optional[int] = None, + timeout: Optional[int] = None, + wait_timeout: Optional[int] = None, + max_lifetime_session: Optional[int] = None, + max_sessions_per_shard: Optional[int] = None, + soda_metadata_cache: Optional[bool] = None, + stmtcachesize: Optional[int] = None, + ping_interval: Optional[int] = None, ) -> None: """ Reconfigures various parameters of a connection pool. The pool size @@ -576,11 +578,11 @@ def _pool_factory(f): @functools.wraps(f) def create_pool( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = ConnectionPool, - pool_alias: str = None, - params: PoolParams = None, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, **kwargs, ) -> ConnectionPool: f( @@ -599,11 +601,11 @@ def create_pool( @_pool_factory def create_pool( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = ConnectionPool, - pool_alias: str = None, - params: PoolParams = None, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, # {{ args_with_defaults }} ) -> ConnectionPool: """ @@ -659,14 +661,14 @@ def _set_connection_type(self, conn_class): def acquire( self, - user: str = None, - password: str = None, - cclass: str = None, + user: Optional[str] = None, + password: Optional[str] = None, + cclass: Optional[str] = None, purity: int = oracledb.PURITY_DEFAULT, - tag: str = None, + tag: Optional[str] = None, matchanytag: bool = False, - shardingkey: list = None, - supershardingkey: list = None, + shardingkey: Optional[list] = None, + supershardingkey: Optional[list] = None, ) -> "connection_module.AsyncConnection": """ Acquire a connection from the pool and return it. @@ -740,7 +742,9 @@ async def drop(self, connection: "connection_module.Connection") -> None: connection._impl = None async def release( - self, connection: "connection_module.AsyncConnection", tag: str = None + self, + connection: "connection_module.AsyncConnection", + tag: Optional[str] = None, ) -> None: """ Release the connection back to the pool now, rather than whenever @@ -785,11 +789,11 @@ def _async_pool_factory(f): @functools.wraps(f) def create_pool_async( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = AsyncConnectionPool, - pool_alias: str = None, - params: PoolParams = None, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, **kwargs, ) -> AsyncConnectionPool: f( @@ -809,11 +813,11 @@ def create_pool_async( @_async_pool_factory def create_pool_async( - dsn: str = None, + dsn: Optional[str] = None, *, pool_class: Type[ConnectionPool] = AsyncConnectionPool, - pool_alias: str = None, - params: PoolParams = None, + pool_alias: Optional[str] = None, + params: Optional[PoolParams] = None, # {{ async_args_with_defaults }} ) -> AsyncConnectionPool: """ diff --git a/utils/templates/pool_params.py b/utils/templates/pool_params.py index 6df381ff..2de09a42 100644 --- a/utils/templates/pool_params.py +++ b/utils/templates/pool_params.py @@ -32,7 +32,7 @@ # ----------------------------------------------------------------------------- import ssl -from typing import Callable, Type, Union, Any +from typing import Callable, Type, Union, Any, Optional import oracledb From 6b2a7d58163c5aa99abacb648a30bf51e035d93e Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 14:03:28 -0700 Subject: [PATCH 036/178] Add support for passthrough of connect strings to the Oracle Client library parser (for backwards compatibility). --- doc/src/api_manual/connect_params.rst | 19 ++++- doc/src/api_manual/defaults.rst | 27 +++++++ doc/src/api_manual/module.rst | 93 ++++++++++++++++++----- doc/src/api_manual/pool_params.rst | 7 +- doc/src/release_notes.rst | 3 +- src/oracledb/base_impl.pxd | 2 + src/oracledb/connect_params.py | 31 +++++++- src/oracledb/connection.py | 20 ++++- src/oracledb/defaults.py | 14 +++- src/oracledb/impl/base/connect_params.pyx | 13 +++- src/oracledb/impl/base/defaults.pyx | 3 +- src/oracledb/impl/thick/connection.pyx | 5 +- src/oracledb/impl/thick/pool.pyx | 5 +- src/oracledb/pool.py | 18 ++++- src/oracledb/pool_params.py | 20 ++++- tests/test_4500_connect_params.py | 2 + tests/test_4700_pool_params.py | 1 + tests/test_env.py | 1 + utils/fields.cfg | 12 ++- utils/templates/connection.py | 4 +- utils/templates/pool.py | 2 +- 21 files changed, 260 insertions(+), 42 deletions(-) diff --git a/doc/src/api_manual/connect_params.rst b/doc/src/api_manual/connect_params.rst index ef037d78..40b2896d 100644 --- a/doc/src/api_manual/connect_params.rst +++ b/doc/src/api_manual/connect_params.rst @@ -62,14 +62,17 @@ ConnectParams Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=None, handle=None) + driver_name=oracledb.defaults.driver_name, use_sni=None, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=None) Sets the values for one or more of the parameters of a ConnectParams object. .. versionchanged:: 3.0.0 - The ``use_sni`` and ``instance_name`` parameters were added. + The ``use_sni``, ``thick_mode_dsn_passthrough``, and ``instance_name`` + parameters were added. .. versionchanged:: 2.5.0 @@ -544,6 +547,18 @@ ConnectParams Attributes .. versionadded:: 2.5.0 +.. attribute:: ConnectParams.thick_mode_dsn_passthrough + + This read-only attribute is a boolean which indicates whether the connect + string should be passed unchanged to Oracle Client libraries for parsing or + if python-oracledb should parse the connect string itself when using Thick + mode. The default value is the value of + :attr:`defaults.thick_mode_dsn_passthrough`. + + This attribute is only supported in python-oracledb Thick mode. + + .. versionadded:: 3.0.0 + .. attribute:: ConnectParams.use_tcp_fast_open This read-only attribute is a boolean which indicates whether to use an diff --git a/doc/src/api_manual/defaults.rst b/doc/src/api_manual/defaults.rst index 11b44ae1..c429d027 100644 --- a/doc/src/api_manual/defaults.rst +++ b/doc/src/api_manual/defaults.rst @@ -162,3 +162,30 @@ Defaults Attributes This attribute is only used in python-oracledb Thin mode. .. versionadded:: 2.5.0 + +.. attribute:: defaults.thick_mode_dsn_passthrough + + The default value that determines whether :ref:`connection strings + ` passed to :meth:`oracledb.connect()` and + :meth:`oracledb.create_pool()` in python-oracledb Thick mode will be parsed + by Oracle Client libraries or by python-oracledb itself. + + When the value of this attribute is *True*, then connection strings passed + to these methods will be sent unchanged to the Oracle Client libraries. + + Setting this attribute to *False* makes Thick and Thin mode applications + behave similarly regarding connection string parameter handling and + locating any optional :ref:`tnsnames.ora files ` configuration + file, see :ref:`usingconfigfiles`. Connection strings used in connection + and pool creation methods in Thick mode are parsed by python-oracledb + itself and a generated connect descriptor is sent to the Oracle Client + libraries. The location of any optional :ref:`tnsnames.ora file + ` used to resolve a :ref:`TNS Alias ` is + determined by python-oracledb heuristics instead of by the Oracle Client + libraries. + + This attribute has an initial value of *True*. + + This attribute is ignored in python-oracledb Thin mode. + + .. versionadded:: 3.0.0 diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 6fb49361..455a54d2 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -52,7 +52,9 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=0) Constructor for creating a connection to the database. Returns a :ref:`Connection Object `. All parameters are optional and can be @@ -397,6 +399,15 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. + The ``thick_mode_dsn_passthrough`` parameter is expected to be a boolean + which indicates whether the connect string should be passed unchanged to + the Oracle Client libraries for parsing when using python-oracledb Thick + mode. If this parameter is set to *False* in Thick mode, connect strings + are parsed by python-oracledb itself and a generated connect descriptor is + sent to the Oracle Client libraries. This value is only used in the + python-oracledb Thick mode. The default value is the value of + :attr:`defaults.thick_mode_dsn_passthrough`. + If the ``handle`` parameter is specified, it must be of type OCISvcCtx\* and is only of use when embedding Python in an application (like PowerBuilder) which has already made the connection. The connection thus @@ -407,9 +418,9 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were - added. The ``pool`` parameter was deprecated. Use - :meth:`ConnectionPool.acquire()` instead. + The ``pool_alias``, ``instance_name``, ``use_sni``, and + ``thick_mode_dsn_passthrough`` parameters were added. The ``pool`` + parameter was deprecated. Use :meth:`ConnectionPool.acquire()` instead. .. versionchanged:: 2.5.0 @@ -454,7 +465,9 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=0) Constructor for creating a connection to the database. Returns an :ref:`AsyncConnection Object `. All parameters are optional @@ -734,13 +747,15 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. - The ``handle`` parameter is ignored in the python-oracledb Thin mode. + The ``thick_mode_dsn_passthrough`` and ``handle`` parameters are ignored in + python-oracledb Thin mode. .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were - added. The ``pool`` parameter was deprecated. Use - :meth:`AsyncConnectionPool.acquire()` instead. + The ``pool_alias``, ``instance_name``, ``use_sni``, and + ``thick_mode_dsn_passthrough`` parameters were added. The ``pool`` + parameter was deprecated. Use :meth:`AsyncConnectionPool.acquire()` + instead. .. versionchanged:: 2.5.0 @@ -783,7 +798,9 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=0) Contains all the parameters that can be used to establish a connection to the database. @@ -1082,6 +1099,15 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. + The ``thick_mode_dsn_passthrough`` parameter is expected to be a boolean + which indicates whether the connect string should be passed unchanged to + the Oracle Client libraries for parsing when using python-oracledb Thick + mode. If this parameter is set to *False* in Thick mode, connect strings + are parsed by python-oracledb itself and a generated connect descriptor is + sent to the Oracle Client libraries. This value is only used in the + python-oracledb Thick mode. The default value is the value of + :attr:`defaults.thick_mode_dsn_passthrough`. + The ``handle`` parameter is expected to be an integer which represents a pointer to a valid service context handle. This value is only used in the python-oracledb Thick mode. It should be used with extreme caution. The @@ -1089,7 +1115,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``use_sni`` and ``instance_name`` parameters were added. + The ``use_sni``, ``thick_mode_dsn_passthrough``, and ``instance_name`` + parameters were added. .. versionchanged:: 2.5.0 @@ -1150,7 +1177,9 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=0) Creates a connection pool with the supplied parameters and returns the :ref:`ConnectionPool object ` for the pool. See :ref:`Connection @@ -1570,6 +1599,15 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. + The ``thick_mode_dsn_passthrough`` parameter is expected to be a boolean + which indicates whether the connect string should be passed unchanged to + the Oracle Client libraries for parsing when using python-oracledb Thick + mode. If this parameter is set to *False* in Thick mode, connect strings + are parsed by python-oracledb itself and a generated connect descriptor is + sent to the Oracle Client libraries. This value is only used in the + python-oracledb Thick mode. The default value is + :attr:`defaults.thick_mode_dsn_passthrough`. + If the ``handle`` parameter is specified, it must be of type OCISvcCtx\* and is only of use when embedding Python in an application (like PowerBuilder) which has already made the connection. The connection thus @@ -1580,8 +1618,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were - added. + The ``pool_alias``, ``instance_name``, ``use_sni``, and + ``thick_mode_dsn_passthrough`` parameters were added. .. versionchanged:: 2.5.0 @@ -1631,7 +1669,9 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=0) Creates a connection pool with the supplied parameters and returns the :ref:`AsyncConnectionPool object ` for the pool. @@ -1970,12 +2010,13 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. - The ``handle`` parameter is ignored in the python-oracledb Thin mode. + The ``handle`` and ``thick_mode_dsn_passthrough`` parameters are ignored in + python-oracledb Thin mode. .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name`` and ``use_sni`` parameters were - added. + The ``pool_alias``, ``instance_name``, ``use_sni``, and + ``thick_mode_dsn_passthrough`` parameters were added. .. versionchanged:: 2.5.0 @@ -2192,7 +2233,9 @@ Oracledb Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=False, handle=0) + driver_name=oracledb.defaults.driver_name, use_sni=False, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=0) Creates and returns a :ref:`PoolParams Object `. The object can be passed to :meth:`oracledb.create_pool()`. @@ -2555,6 +2598,15 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. + The ``thick_mode_dsn_passthrough`` parameter is expected to be a boolean + which indicates whether the connect string should be passed unchanged to + the Oracle Client libraries for parsing when using python-oracledb Thick + mode. If this parameter is set to *False* in Thick mode, connect strings + are parsed by python-oracledb itself and a generated connect descriptor is + sent to the Oracle Client libraries. This value is only used in the + python-oracledb Thick mode. The default value is + :attr:`defualts.thick_mode_dsn_passthrough`. + The ``handle`` parameter is expected to be an integer which represents a pointer to a valid service context handle. This value is only used in the python-oracledb Thick mode. It should be used with extreme caution. The @@ -2562,7 +2614,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``use_sni`` and ``instance_name`` parameters were added. + The ``use_sni``, ``thick_mode_dsn_passthrough``, and ``instance_name`` + parameters were added. .. versionchanged:: 2.5.0 diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index 9ca30588..11ca1dfa 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -51,13 +51,16 @@ PoolParams Methods pool_boundary=None, use_tcp_fast_open=False, ssl_version=None, \ program=oracledb.defaults.program, machine=oracledb.defaults.machine, \ terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ - driver_name=oracledb.defaults.driver_name, use_sni=None, handle=None) + driver_name=oracledb.defaults.driver_name, use_sni=None, \ + thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ + handle=None) Sets one or more of the parameters. .. versionchanged:: 3.0.0 - The ``use_sni`` and ``instance_name`` parameters were added. + The ``use_sni``, ``thick_mode_dsn_passthrough``, and + ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index c6ea1dbc..402c246e 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -88,7 +88,8 @@ Common Changes #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". -#) All connect strings are now parsed by the driver. Previously, only Thin +#) All connect strings are parsed by the driver if the new parameter + ``thick_mode_dsn_passthrough`` is set to *True*. Previously, only Thin mode parsed all connect strings and Thick mode passed the connect string unchanged to the Oracle Client library to parse. Parameters unrecognized by the driver in :ref:`Easy Connect strings ` are now ignored. diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 0ab0098a..af7581df 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -241,6 +241,7 @@ cdef class DefaultsImpl: public str config_dir public bint fetch_lobs public bint fetch_decimals + public bint thick_mode_dsn_passthrough public uint32_t prefetchrows public uint32_t stmtcachesize public str program @@ -559,6 +560,7 @@ cdef class ConnectParamsImpl: public str terminal public str osuser public str driver_name + public bint thick_mode_dsn_passthrough cdef int _check_credentials(self) except -1 cdef int _copy(self, ConnectParamsImpl other_params) except -1 diff --git a/src/oracledb/connect_params.py b/src/oracledb/connect_params.py index 805b5abd..315b9161 100644 --- a/src/oracledb/connect_params.py +++ b/src/oracledb/connect_params.py @@ -105,6 +105,7 @@ def __init__( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ): """ @@ -302,6 +303,14 @@ def __init__( bypass the second TLS neogiation that would otherwise be required (default: False) + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without + parsing by the driver. Setting this to False makes thick and thin + mode applications behave similarly regarding connection string + parameter handling and locating any optional tnsnames.ora + configuration file (default: + oracledb.defaults.thick_mode_dsn_passthrough) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -356,7 +365,8 @@ def __repr__(self): + f"terminal={self.terminal!r}, " + f"osuser={self.osuser!r}, " + f"driver_name={self.driver_name!r}, " - + f"use_sni={self.use_sni!r}" + + f"use_sni={self.use_sni!r}, " + + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" + ")" ) @@ -740,6 +750,17 @@ def terminal(self) -> str: """ return self._impl.terminal + @property + def thick_mode_dsn_passthrough(self) -> bool: + """ + Boolean indicating whether to pass the connect string to the Oracle + Client libraries unchanged without parsing by the driver. Setting this + to False makes thick and thin mode applications behave similarly + regarding connection string parameter handling and locating any + optional tnsnames.ora configuration file. + """ + return self._impl.thick_mode_dsn_passthrough + @property def user(self) -> str: """ @@ -877,6 +898,7 @@ def set( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ): """ @@ -1061,6 +1083,13 @@ def set( - use_sni: boolean indicating whether to use the TLS SNI extension to bypass the second TLS neogiation that would otherwise be required + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without + parsing by the driver. Setting this to False makes thick and thin + mode applications behave similarly regarding connection string + parameter handling and locating any optional tnsnames.ora + configuration file + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 4eb0e5a2..bc179dc4 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -549,7 +549,7 @@ def __init__( errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs) + dsn = params_impl.process_args(dsn, kwargs, thin) # see if connection is being acquired from a pool if pool is None: @@ -1278,6 +1278,7 @@ def connect( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ) -> Connection: """ @@ -1494,6 +1495,13 @@ def connect( bypass the second TLS neogiation that would otherwise be required (default: False) + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without parsing + by the driver. Setting this to False makes thick and thin mode + applications behave similarly regarding connection string parameter + handling and locating any optional tnsnames.ora configuration file + (default: oracledb.defaults.thick_mode_dsn_passthrough) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -1553,7 +1561,7 @@ async def _connect(self, dsn, pool, params, kwargs): errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs) + dsn = params_impl.process_args(dsn, kwargs, thin=True) # see if connection is being acquired from a pool if pool is None: @@ -2054,6 +2062,7 @@ def connect_async( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ) -> AsyncConnection: """ @@ -2270,6 +2279,13 @@ def connect_async( bypass the second TLS neogiation that would otherwise be required (default: False) + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without parsing + by the driver. Setting this to False makes thick and thin mode + applications behave similarly regarding connection string parameter + handling and locating any optional tnsnames.ora configuration file + (default: oracledb.defaults.thick_mode_dsn_passthrough) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) diff --git a/src/oracledb/defaults.py b/src/oracledb/defaults.py index 2985b80d..c186d809 100644 --- a/src/oracledb/defaults.py +++ b/src/oracledb/defaults.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2021, 2024, Oracle and/or its affiliates. +# Copyright (c) 2021, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -175,5 +175,17 @@ def driver_name(self) -> str: def driver_name(self, value: str): self._impl.driver_name = value + @property + def thick_mode_dsn_passthrough(self) -> str: + """ + Specifies whether to pass connect strings to the Oracle Client + libraries unchanged when using thick mode. + """ + return self._impl.thick_mode_dsn_passthrough + + @thick_mode_dsn_passthrough.setter + def thick_mode_dsn_passthrough(self, value: str): + self._impl.thick_mode_dsn_passthrough = value + defaults = Defaults() diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index c83304ce..b34b7387 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -64,6 +64,7 @@ cdef class ConnectParamsImpl: self.machine = C_DEFAULTS.machine self.osuser = C_DEFAULTS.osuser self.driver_name = C_DEFAULTS.driver_name + self.thick_mode_dsn_passthrough = C_DEFAULTS.thick_mode_dsn_passthrough def set(self, dict args): """ @@ -102,6 +103,8 @@ cdef class ConnectParamsImpl: _set_str_param(args, "machine", self, check_network_character_set=True) _set_str_param(args, "osuser", self, check_network_character_set=True) _set_str_param(args, "driver_name", self) + _set_bool_param(args, "thick_mode_dsn_passthrough", + &self.thick_mode_dsn_passthrough) self._set_access_token_param(args.get("access_token")) # set parameters found on Description instances @@ -188,6 +191,8 @@ cdef class ConnectParamsImpl: self.machine = other_params.machine self.osuser = other_params.osuser self.driver_name = other_params.driver_name + self.thick_mode_dsn_passthrough = \ + other_params.thick_mode_dsn_passthrough cdef str _get_connect_string(self): """ @@ -531,7 +536,7 @@ cdef class ConnectParamsImpl: else: self.user = user - def process_args(self, str dsn, dict kwargs): + def process_args(self, str dsn, dict kwargs, bint thin): """ Processes the arguments to connect() and create_pool(). @@ -548,10 +553,10 @@ cdef class ConnectParamsImpl: if self.user is None and not self.externalauth and dsn is not None: user, password, dsn = self.parse_dsn_with_credentials(dsn) self.set(dict(user=user, password=password)) - if dsn is not None: - self.parse_connect_string(dsn) - else: + if dsn is None: dsn = self._get_connect_string() + elif thin or not self.thick_mode_dsn_passthrough: + self.parse_connect_string(dsn) return dsn diff --git a/src/oracledb/impl/base/defaults.pyx b/src/oracledb/impl/base/defaults.pyx index 2ff685d1..751b9efe 100644 --- a/src/oracledb/impl/base/defaults.pyx +++ b/src/oracledb/impl/base/defaults.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2024, Oracle and/or its affiliates. +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -45,6 +45,7 @@ cdef class DefaultsImpl: except: self.osuser = "" self.driver_name = None + self.thick_mode_dsn_passthrough = True cdef DefaultsImpl C_DEFAULTS = DefaultsImpl() DEFAULTS = C_DEFAULTS diff --git a/src/oracledb/impl/thick/connection.pyx b/src/oracledb/impl/thick/connection.pyx index 825c3d80..a49edb83 100644 --- a/src/oracledb/impl/thick/connection.pyx +++ b/src/oracledb/impl/thick/connection.pyx @@ -383,7 +383,10 @@ cdef class ThickConnImpl(BaseConnImpl): params.password_ptr = params.password params.password_len = len(params.password) if pool_impl is None: - connect_string = user_params._get_connect_string() + if user_params.thick_mode_dsn_passthrough: + connect_string = self.dsn + else: + connect_string = user_params._get_connect_string() if connect_string is not None: params.connect_string = connect_string.encode() params.connect_string_ptr = params.connect_string diff --git a/src/oracledb/impl/thick/pool.pyx b/src/oracledb/impl/thick/pool.pyx index f03aeba7..9fbd87a5 100644 --- a/src/oracledb/impl/thick/pool.pyx +++ b/src/oracledb/impl/thick/pool.pyx @@ -141,7 +141,10 @@ cdef class ThickPoolImpl(BasePoolImpl): if password_bytes is not None: password_ptr = password_bytes password_len = len(password_bytes) - connect_string = params._get_connect_string() + if params.thick_mode_dsn_passthrough: + connect_string = self.dsn + else: + connect_string = params._get_connect_string() if connect_string is not None: connect_string_bytes = connect_string.encode() connect_string_ptr = connect_string_bytes diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index efcab9b0..b01ba65d 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -92,7 +92,7 @@ def __init__( params_impl = params._impl.copy() with driver_mode.get_manager() as mode_mgr: thin = mode_mgr.thin - dsn = params_impl.process_args(dsn, kwargs) + dsn = params_impl.process_args(dsn, kwargs, thin) self._set_connection_type(params_impl.connectiontype) self._cache_name = cache_name if cache_name is not None: @@ -671,6 +671,7 @@ def create_pool( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ) -> ConnectionPool: """ @@ -944,6 +945,13 @@ def create_pool( bypass the second TLS neogiation that would otherwise be required (default: False) + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without parsing + by the driver. Setting this to False makes thick and thin mode + applications behave similarly regarding connection string parameter + handling and locating any optional tnsnames.ora configuration file + (default: oracledb.defaults.thick_mode_dsn_passthrough) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -1187,6 +1195,7 @@ def create_pool_async( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ) -> AsyncConnectionPool: """ @@ -1461,6 +1470,13 @@ def create_pool_async( bypass the second TLS neogiation that would otherwise be required (default: False) + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without parsing + by the driver. Setting this to False makes thick and thin mode + applications behave similarly regarding connection string parameter + handling and locating any optional tnsnames.ora configuration file + (default: oracledb.defaults.thick_mode_dsn_passthrough) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) diff --git a/src/oracledb/pool_params.py b/src/oracledb/pool_params.py index 10788b6a..f5b67b0a 100644 --- a/src/oracledb/pool_params.py +++ b/src/oracledb/pool_params.py @@ -118,6 +118,7 @@ def __init__( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ): """ @@ -371,6 +372,14 @@ def __init__( bypass the second TLS neogiation that would otherwise be required (default: False) + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without + parsing by the driver. Setting this to False makes thick and thin + mode applications behave similarly regarding connection string + parameter handling and locating any optional tnsnames.ora + configuration file (default: + oracledb.defaults.thick_mode_dsn_passthrough) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -439,7 +448,8 @@ def __repr__(self): + f"terminal={self.terminal!r}, " + f"osuser={self.osuser!r}, " + f"driver_name={self.driver_name!r}, " - + f"use_sni={self.use_sni!r}" + + f"use_sni={self.use_sni!r}, " + + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" + ")" ) @@ -638,6 +648,7 @@ def set( osuser: Optional[str] = None, driver_name: Optional[str] = None, use_sni: Optional[bool] = None, + thick_mode_dsn_passthrough: Optional[bool] = None, handle: Optional[int] = None, ): """ @@ -873,6 +884,13 @@ def set( - use_sni: boolean indicating whether to use the TLS SNI extension to bypass the second TLS neogiation that would otherwise be required + - thick_mode_dsn_passthrough: boolean indicating whether to pass the + connect string to the Oracle Client libraries unchanged without + parsing by the driver. Setting this to False makes thick and thin + mode applications behave similarly regarding connection string + parameter handling and locating any optional tnsnames.ora + configuration file + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index 80f33134..0edda59a 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -690,6 +690,7 @@ def test_4539(self): ("osuser", "me"), ("driver_name", "custom_driver"), ("use_sni", True), + ("thick_mode_dsn_passthrough", True), ] params = oracledb.ConnectParams(**dict(values)) parts = [f"{name}={value!r}" for name, value in values] @@ -743,6 +744,7 @@ def test_4539(self): ("osuser", "modified_osuser"), ("driver_name", "modified_driver_name"), ("use_sni", False), + ("thick_mode_dsn_passthrough", False), ] params.set(**dict(new_values)) parts = [f"{name}={value!r}" for name, value in new_values] diff --git a/tests/test_4700_pool_params.py b/tests/test_4700_pool_params.py index dd510239..f3b7a643 100644 --- a/tests/test_4700_pool_params.py +++ b/tests/test_4700_pool_params.py @@ -131,6 +131,7 @@ def test_4701(self): ("osuser", "me"), ("driver_name", "custom_driver"), ("use_sni", True), + ("thick_mode_dsn_passthrough", True), ] params = oracledb.PoolParams(**dict(values)) parts = [f"{name}={value!r}" for name, value in values] diff --git a/tests/test_env.py b/tests/test_env.py index 0630c715..660d814b 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -95,6 +95,7 @@ def _initialize(): """ if not get_is_thin(): oracledb.init_oracle_client() + oracledb.defaults.thick_mode_dsn_passthrough = False plugin_names = os.environ.get("PYO_TEST_PLUGINS") if plugin_names is not None: for name in plugin_names.split(","): diff --git a/utils/fields.cfg b/utils/fields.cfg index fff9fb9c..d308a402 100644 --- a/utils/fields.cfg +++ b/utils/fields.cfg @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -502,6 +502,16 @@ description = boolean indicating whether to use the TLS SNI extension to bypass the second TLS neogiation that would otherwise be required +[thick_mode_dsn_passthrough] +type = bool +default = oracledb.defaults.thick_mode_dsn_passthrough +description = + boolean indicating whether to pass the connect string to the Oracle Client + libraries unchanged without parsing by the driver. Setting this to False + makes thick and thin mode applications behave similarly regarding + connection string parameter handling and locating any optional tnsnames.ora + configuration file + [handle] type = int default = 0 diff --git a/utils/templates/connection.py b/utils/templates/connection.py index e914006a..eaabc2f3 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -547,7 +547,7 @@ def __init__( errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs) + dsn = params_impl.process_args(dsn, kwargs, thin) # see if connection is being acquired from a pool if pool is None: @@ -1316,7 +1316,7 @@ async def _connect(self, dsn, pool, params, kwargs): errors._raise_err(errors.ERR_INVALID_CONNECT_PARAMS) else: params_impl = params._impl.copy() - dsn = params_impl.process_args(dsn, kwargs) + dsn = params_impl.process_args(dsn, kwargs, thin=True) # see if connection is being acquired from a pool if pool is None: diff --git a/utils/templates/pool.py b/utils/templates/pool.py index a1248f23..dcaa0a51 100644 --- a/utils/templates/pool.py +++ b/utils/templates/pool.py @@ -90,7 +90,7 @@ def __init__( params_impl = params._impl.copy() with driver_mode.get_manager() as mode_mgr: thin = mode_mgr.thin - dsn = params_impl.process_args(dsn, kwargs) + dsn = params_impl.process_args(dsn, kwargs, thin) self._set_connection_type(params_impl.connectiontype) self._cache_name = cache_name if cache_name is not None: From 92fb4489dc3248a2be1a5c77b6e5919ab74bcd23 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 14:04:03 -0700 Subject: [PATCH 037/178] Improve calculation of default configuration directory. --- doc/src/release_notes.rst | 9 +++++++++ src/oracledb/impl/base/defaults.pyx | 4 ++++ src/oracledb/impl/thick/utils.pyx | 6 +++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 402c246e..cebbbbb9 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -72,6 +72,12 @@ Thin Mode Changes Thick Mode Changes ++++++++++++++++++ +#) The value of :attr:`defaults.config_dir` is now calculated from the + location of the Oracle Client shared library on some platforms. If a value + is supplied to the ``config_dir`` parameter of + :meth:`oracledb.init_oracle_client()`, then the value of + :attr:`defaults.config_dir` is set to that value after the call completes + successfully. #) Fixed bug that caused :attr:`oracledb._Error.isrecoverable` to always be `False`. @@ -88,6 +94,9 @@ Common Changes #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". +#) Set the default value of :attr:`defaults.config_dir` to + ``$ORACLE_HOME/network/admin`` if the environment variable ``ORACLE_HOME`` + is set. #) All connect strings are parsed by the driver if the new parameter ``thick_mode_dsn_passthrough`` is set to *True*. Previously, only Thin mode parsed all connect strings and Thick mode passed the connect string diff --git a/src/oracledb/impl/base/defaults.pyx b/src/oracledb/impl/base/defaults.pyx index 751b9efe..1acd65ff 100644 --- a/src/oracledb/impl/base/defaults.pyx +++ b/src/oracledb/impl/base/defaults.pyx @@ -33,6 +33,10 @@ cdef class DefaultsImpl: def __init__(self): self.arraysize = 100 self.config_dir = os.environ.get("TNS_ADMIN") + if self.config_dir is None: + oracle_home = os.environ.get("ORACLE_HOME") + if oracle_home is not None: + self.config_dir = os.path.join(oracle_home, "network", "admin") self.fetch_lobs = True self.fetch_decimals = False self.prefetchrows = 2 diff --git a/src/oracledb/impl/thick/utils.pyx b/src/oracledb/impl/thick/utils.pyx index a030ba93..639a4887 100644 --- a/src/oracledb/impl/thick/utils.pyx +++ b/src/oracledb/impl/thick/utils.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -531,6 +531,8 @@ def init_oracle_client(lib_dir=None, config_dir=None, error_url=None, params.useJsonId = True if config_dir is None: config_dir = C_DEFAULTS.config_dir + else: + C_DEFAULTS.config_dir = config_dir if lib_dir is not None: if isinstance(lib_dir, bytes): lib_dir_bytes = lib_dir @@ -558,6 +560,8 @@ def init_oracle_client(lib_dir=None, config_dir=None, error_url=None, ¶ms, &driver_info.context, &error_info) < 0: _raise_from_info(&error_info) + if config_dir is None and params.oracleClientConfigDir != NULL: + C_DEFAULTS.config_dir = params.oracleClientConfigDir.decode() if dpiContext_getClientVersion(driver_info.context, &driver_info.client_version_info) < 0: _raise_from_odpi() From 65cbf6e7f2e347f04aae04193d8c48c3d1635612 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 14:05:13 -0700 Subject: [PATCH 038/178] Improved support for planned database maintenance by internally sending explicit request boundaries when using python-oracledb connection pools. --- doc/src/release_notes.rst | 2 + src/oracledb/base_impl.pxd | 1 + src/oracledb/impl/thin/capabilities.pyx | 3 +- src/oracledb/impl/thin/connection.pyx | 1 + src/oracledb/impl/thin/constants.pxi | 8 + src/oracledb/impl/thin/messages.pyx | 445 +++++++++++++----------- src/oracledb/impl/thin/pool.pyx | 16 +- src/oracledb/impl/thin/protocol.pyx | 12 +- 8 files changed, 277 insertions(+), 211 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index cebbbbb9..522e8db2 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -27,6 +27,8 @@ Thin Mode Changes #) Added parameter :data:`ConnectParams.use_sni` to specify that the TLS SNI extension should be used to reduce the number of TLS neegotiations that are needed to connect to the database. +#) Improved support for planned database maintenance by internally sending + explicit request boundaries when using python-oracledb connection pools. #) Added parameter :data:`ConnectParams.instance_name` to specify the instance name to use when connecting to the database. Added support for setting the instance name in :ref:`Easy Connect strings `. diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index af7581df..8160209f 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -616,6 +616,7 @@ cdef class BaseConnImpl: readonly bint supports_bool ssize_t _oson_max_fname_size bint _allow_bind_str_to_lob + bint _in_request cdef object _check_value(self, OracleMetadata type_info, object value, bint* is_ok) diff --git a/src/oracledb/impl/thin/capabilities.pyx b/src/oracledb/impl/thin/capabilities.pyx index d79491ac..452a647f 100644 --- a/src/oracledb/impl/thin/capabilities.pyx +++ b/src/oracledb/impl/thin/capabilities.pyx @@ -126,7 +126,8 @@ cdef class Capabilities: self.compile_caps[TNS_CCAP_TTC2] = TNS_CCAP_ZLNP self.compile_caps[TNS_CCAP_OCI2] = TNS_CCAP_DRCP self.compile_caps[TNS_CCAP_CLIENT_FN] = TNS_CCAP_CLIENT_FN_MAX - self.compile_caps[TNS_CCAP_TTC4] = TNS_CCAP_INBAND_NOTIFICATION + self.compile_caps[TNS_CCAP_TTC4] = TNS_CCAP_INBAND_NOTIFICATION | \ + TNS_CCAP_EXPLICIT_BOUNDARY self.compile_caps[TNS_CCAP_TTC5] = TNS_CCAP_VECTOR_SUPPORT | \ TNS_CCAP_TOKEN_SUPPORTED | TNS_CCAP_PIPELINING_SUPPORT | \ TNS_CCAP_PIPELINING_BREAK diff --git a/src/oracledb/impl/thin/connection.pyx b/src/oracledb/impl/thin/connection.pyx index a3d896d0..0650424a 100644 --- a/src/oracledb/impl/thin/connection.pyx +++ b/src/oracledb/impl/thin/connection.pyx @@ -73,6 +73,7 @@ cdef class BaseThinConnImpl(BaseConnImpl): bint _is_pool_extra bytes _transaction_context uint8_t pipeline_mode + uint8_t _session_state_desired def __init__(self, str dsn, ConnectParamsImpl params): if not HAS_CRYPTOGRAPHY: diff --git a/src/oracledb/impl/thin/constants.pxi b/src/oracledb/impl/thin/constants.pxi index 2a433505..26c8d50f 100644 --- a/src/oracledb/impl/thin/constants.pxi +++ b/src/oracledb/impl/thin/constants.pxi @@ -296,6 +296,7 @@ cdef enum: TNS_FUNC_REEXECUTE_AND_FETCH = 78 TNS_FUNC_SESSION_GET = 162 TNS_FUNC_SESSION_RELEASE = 163 + TNS_FUNC_SESSION_STATE = 176 TNS_FUNC_SET_SCHEMA = 152 TNS_FUNC_TPC_TXN_SWITCH = 103 TNS_FUNC_TPC_TXN_CHANGE_STATE = 104 @@ -399,6 +400,7 @@ cdef enum: TNS_CCAP_DRCP = 0x10 TNS_CCAP_ZLNP = 0x04 TNS_CCAP_INBAND_NOTIFICATION = 0x04 + TNS_CCAP_EXPLICIT_BOUNDARY = 0x40 TNS_CCAP_END_OF_RESPONSE = 0x20 TNS_CCAP_CLIENT_FN_MAX = 12 TNS_CCAP_VECTOR_SUPPORT = 0x08 @@ -468,6 +470,12 @@ cdef enum: TNS_PIPELINE_MODE_CONTINUE_ON_ERROR = 1 TNS_PIPELINE_MODE_ABORT_ON_ERROR = 2 +# session state flags +cdef enum: + TNS_SESSION_STATE_REQUEST_BEGIN = 0x04 + TNS_SESSION_STATE_REQUEST_END = 0x08 + TNS_SESSION_STATE_EXPLICIT_BOUNDARY = 0x40 + # other constants cdef enum: TNS_ESCAPE_CHAR = 253 diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index 90af950c..d4b469eb 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -328,7 +328,210 @@ cdef class Message: self.warning = errors._Error(message, code=error_num, iswarning=True) + cdef int _write_begin_pipeline_piggyback(self, WriteBuffer buf) except -1: + """ + Writes the piggyback to the server that informs the server that a + pipeline is beginning. + """ + buf._data_flags |= TNS_DATA_FLAGS_BEGIN_PIPELINE + self._write_piggyback_code(buf, TNS_FUNC_PIPELINE_BEGIN) + buf.write_ub2(0) # error set ID + buf.write_uint8(0) # error set mode + buf.write_uint8(self.conn_impl.pipeline_mode) + + cdef int _write_close_cursors_piggyback(self, WriteBuffer buf) except -1: + """ + Writes the piggyback that informs the server of the cursors that can be + closed. + """ + self._write_piggyback_code(buf, TNS_FUNC_CLOSE_CURSORS) + buf.write_uint8(1) # pointer + self.conn_impl._statement_cache.write_cursors_to_close(buf) + + cdef int _write_current_schema_piggyback(self, WriteBuffer buf) except -1: + """ + Writes the piggyback that informs the server that a new current schema + is desired. + """ + cdef bytes schema_bytes + self._write_piggyback_code(buf, TNS_FUNC_SET_SCHEMA) + buf.write_uint8(1) # pointer + schema_bytes = self.conn_impl._current_schema.encode() + buf.write_ub4(len(schema_bytes)) + buf.write_bytes_with_length(schema_bytes) + + cdef int _write_close_temp_lobs_piggyback(self, + WriteBuffer buf) except -1: + """ + Writes the piggyback that informs the server of the temporary LOBs that + can be closed. + """ + cdef: + list lobs_to_close = self.conn_impl._temp_lobs_to_close + uint64_t total_size = 0 + self._write_piggyback_code(buf, TNS_FUNC_LOB_OP) + op_code = TNS_LOB_OP_FREE_TEMP | TNS_LOB_OP_ARRAY + + # temp lob data + buf.write_uint8(1) # pointer + buf.write_ub4(self.conn_impl._temp_lobs_total_size) + buf.write_uint8(0) # dest lob locator + buf.write_ub4(0) + buf.write_ub4(0) # source lob locator + buf.write_ub4(0) + buf.write_uint8(0) # source lob offset + buf.write_uint8(0) # dest lob offset + buf.write_uint8(0) # charset + buf.write_ub4(op_code) + buf.write_uint8(0) # scn + buf.write_ub4(0) # losbscn + buf.write_ub8(0) # lobscnl + buf.write_ub8(0) + buf.write_uint8(0) + + # array lob fields + buf.write_uint8(0) + buf.write_ub4(0) + buf.write_uint8(0) + buf.write_ub4(0) + buf.write_uint8(0) + buf.write_ub4(0) + for i in range(len(lobs_to_close)): + buf.write_bytes(lobs_to_close[i]) + + # reset values + self.conn_impl._temp_lobs_to_close = None + self.conn_impl._temp_lobs_total_size = 0 + + cdef int _write_end_to_end_piggyback(self, WriteBuffer buf) except -1: + """ + Writes the piggyback that informs the server of end-to-end attributes + that are being changed. + """ + cdef: + bytes action_bytes, client_identifier_bytes, client_info_bytes + BaseThinConnImpl conn_impl = self.conn_impl + bytes module_bytes, dbop_bytes + uint32_t flags = 0 + + # determine which flags to send + if conn_impl._action_modified: + flags |= TNS_END_TO_END_ACTION + if conn_impl._client_identifier_modified: + flags |= TNS_END_TO_END_CLIENT_IDENTIFIER + if conn_impl._client_info_modified: + flags |= TNS_END_TO_END_CLIENT_INFO + if conn_impl._module_modified: + flags |= TNS_END_TO_END_MODULE + if conn_impl._dbop_modified: + flags |= TNS_END_TO_END_DBOP + + # write initial packet data + self._write_piggyback_code(buf, TNS_FUNC_SET_END_TO_END_ATTR) + buf.write_uint8(0) # pointer (cidnam) + buf.write_uint8(0) # pointer (cidser) + buf.write_ub4(flags) + + # write client identifier header info + if conn_impl._client_identifier_modified: + buf.write_uint8(1) # pointer (client identifier) + if conn_impl._client_identifier is None: + buf.write_ub4(0) + else: + client_identifier_bytes = conn_impl._client_identifier.encode() + buf.write_ub4(len(client_identifier_bytes)) + else: + buf.write_uint8(0) # pointer (client identifier) + buf.write_ub4(0) # length of client identifier + + # write module header info + if conn_impl._module_modified: + buf.write_uint8(1) # pointer (module) + if conn_impl._module is None: + buf.write_ub4(0) + else: + module_bytes = conn_impl._module.encode() + buf.write_ub4(len(module_bytes)) + else: + buf.write_uint8(0) # pointer (module) + buf.write_ub4(0) # length of module + + # write action header info + if conn_impl._action_modified: + buf.write_uint8(1) # pointer (action) + if conn_impl._action is None: + buf.write_ub4(0) + else: + action_bytes = conn_impl._action.encode() + buf.write_ub4(len(action_bytes)) + else: + buf.write_uint8(0) # pointer (action) + buf.write_ub4(0) # length of action + + # write unsupported bits + buf.write_uint8(0) # pointer (cideci) + buf.write_ub4(0) # length (cideci) + buf.write_uint8(0) # cidcct + buf.write_ub4(0) # cidecs + + # write client info header info + if conn_impl._client_info_modified: + buf.write_uint8(1) # pointer (client info) + if conn_impl._client_info is None: + buf.write_ub4(0) + else: + client_info_bytes = conn_impl._client_info.encode() + buf.write_ub4(len(client_info_bytes)) + else: + buf.write_uint8(0) # pointer (client info) + buf.write_ub4(0) # length of client info + + # write more unsupported bits + buf.write_uint8(0) # pointer (cidkstk) + buf.write_ub4(0) # length (cidkstk) + buf.write_uint8(0) # pointer (cidktgt) + buf.write_ub4(0) # length (cidktgt) + + # write dbop header info + if conn_impl._dbop_modified: + buf.write_uint8(1) # pointer (dbop) + if conn_impl._dbop is None: + buf.write_ub4(0) + else: + dbop_bytes = conn_impl._dbop.encode() + buf.write_ub4(len(dbop_bytes)) + else: + buf.write_uint8(0) # pointer (dbop) + buf.write_ub4(0) # length of dbop + + # write strings + if conn_impl._client_identifier_modified \ + and conn_impl._client_identifier is not None: + buf.write_bytes_with_length(client_identifier_bytes) + if conn_impl._module_modified and conn_impl._module is not None: + buf.write_bytes_with_length(module_bytes) + if conn_impl._action_modified and conn_impl._action is not None: + buf.write_bytes_with_length(action_bytes) + if conn_impl._client_info_modified \ + and conn_impl._client_info is not None: + buf.write_bytes_with_length(client_info_bytes) + if conn_impl._dbop_modified and conn_impl._dbop is not None: + buf.write_bytes_with_length(dbop_bytes) + + # reset flags and values + conn_impl._action_modified = False + conn_impl._action = None + conn_impl._client_identifier_modified = False + conn_impl._client_identifier = None + conn_impl._client_info_modified = False + conn_impl._client_info = None + conn_impl._dbop_modified = False + conn_impl._dbop = None + conn_impl._module_modified = False + conn_impl._module = None + cdef int _write_function_code(self, WriteBuffer buf) except -1: + self._write_piggybacks(buf) buf.write_uint8(self.message_type) buf.write_uint8(self.function_code) buf.write_seq_num() @@ -340,12 +543,50 @@ cdef class Message: cdef int _write_piggyback_code(self, WriteBuffer buf, uint8_t code) except -1: + """ + Writes the header for piggybacks for the specified function code. + """ buf.write_uint8(TNS_MSG_TYPE_PIGGYBACK) buf.write_uint8(code) buf.write_seq_num() if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_23_1_EXT_1: buf.write_ub8(self.token_num) + cdef int _write_piggybacks(self, WriteBuffer buf) except -1: + """ + Writes all of the piggybacks to the server. + """ + if self.conn_impl.pipeline_mode != 0: + self._write_begin_pipeline_piggyback(buf) + self.conn_impl.pipeline_mode = 0 + if self.conn_impl._current_schema_modified: + self._write_current_schema_piggyback(buf) + if self.conn_impl._statement_cache is not None \ + and self.conn_impl._statement_cache._num_cursors_to_close > 0 \ + and not self.conn_impl._drcp_establish_session: + self._write_close_cursors_piggyback(buf) + if self.conn_impl._action_modified \ + or self.conn_impl._client_identifier_modified \ + or self.conn_impl._client_info_modified \ + or self.conn_impl._dbop_modified \ + or self.conn_impl._module_modified: + self._write_end_to_end_piggyback(buf) + if self.conn_impl._temp_lobs_total_size > 0: + self._write_close_temp_lobs_piggyback(buf) + if self.conn_impl._session_state_desired != 0: + self._write_session_state_piggyback(buf) + + cdef int _write_session_state_piggyback(self, WriteBuffer buf) except -1: + """ + Write the session state piggyback. This is used to let the database + know when the client is beginning and ending a request. The database + uses this information to optimise its resources. + """ + cdef uint8_t state = self.conn_impl._session_state_desired + self._write_piggyback_code(buf, TNS_FUNC_SESSION_STATE) + buf.write_ub8(state | TNS_SESSION_STATE_EXPLICIT_BOUNDARY) + self.conn_impl._session_state_desired = 0 + cdef int postprocess(self) except -1: pass @@ -1002,13 +1243,6 @@ cdef class MessageWithData(Message): if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_12_2: buf.write_ub4(0) # oaccolid - cdef int _write_begin_pipeline_piggyback(self, WriteBuffer buf) except -1: - buf._data_flags |= TNS_DATA_FLAGS_BEGIN_PIPELINE - self._write_piggyback_code(buf, TNS_FUNC_PIPELINE_BEGIN) - buf.write_ub2(0) # error set ID - buf.write_uint8(0) # error set mode - buf.write_uint8(self.conn_impl.pipeline_mode) - cdef int _write_bind_params_column(self, WriteBuffer buf, OracleMetadata metadata, object value) except -1: @@ -1138,199 +1372,6 @@ cdef class MessageWithData(Message): self._write_bind_params_column(buf, metadata, var_impl._values[pos + offset]) - cdef int _write_close_cursors_piggyback(self, WriteBuffer buf) except -1: - self._write_piggyback_code(buf, TNS_FUNC_CLOSE_CURSORS) - buf.write_uint8(1) # pointer - self.conn_impl._statement_cache.write_cursors_to_close(buf) - - cdef int _write_current_schema_piggyback(self, WriteBuffer buf) except -1: - cdef bytes schema_bytes - self._write_piggyback_code(buf, TNS_FUNC_SET_SCHEMA) - buf.write_uint8(1) # pointer - schema_bytes = self.conn_impl._current_schema.encode() - buf.write_ub4(len(schema_bytes)) - buf.write_bytes_with_length(schema_bytes) - - cdef int _write_close_temp_lobs_piggyback(self, - WriteBuffer buf) except -1: - cdef: - list lobs_to_close = self.conn_impl._temp_lobs_to_close - uint64_t total_size = 0 - self._write_piggyback_code(buf, TNS_FUNC_LOB_OP) - op_code = TNS_LOB_OP_FREE_TEMP | TNS_LOB_OP_ARRAY - - # temp lob data - buf.write_uint8(1) # pointer - buf.write_ub4(self.conn_impl._temp_lobs_total_size) - buf.write_uint8(0) # dest lob locator - buf.write_ub4(0) - buf.write_ub4(0) # source lob locator - buf.write_ub4(0) - buf.write_uint8(0) # source lob offset - buf.write_uint8(0) # dest lob offset - buf.write_uint8(0) # charset - buf.write_ub4(op_code) - buf.write_uint8(0) # scn - buf.write_ub4(0) # losbscn - buf.write_ub8(0) # lobscnl - buf.write_ub8(0) - buf.write_uint8(0) - - # array lob fields - buf.write_uint8(0) - buf.write_ub4(0) - buf.write_uint8(0) - buf.write_ub4(0) - buf.write_uint8(0) - buf.write_ub4(0) - for i in range(len(lobs_to_close)): - buf.write_bytes(lobs_to_close[i]) - - # reset values - self.conn_impl._temp_lobs_to_close = None - self.conn_impl._temp_lobs_total_size = 0 - - cdef int _write_end_to_end_piggyback(self, WriteBuffer buf) except -1: - cdef: - bytes action_bytes, client_identifier_bytes, client_info_bytes - BaseThinConnImpl conn_impl = self.conn_impl - bytes module_bytes, dbop_bytes - uint32_t flags = 0 - - # determine which flags to send - if conn_impl._action_modified: - flags |= TNS_END_TO_END_ACTION - if conn_impl._client_identifier_modified: - flags |= TNS_END_TO_END_CLIENT_IDENTIFIER - if conn_impl._client_info_modified: - flags |= TNS_END_TO_END_CLIENT_INFO - if conn_impl._module_modified: - flags |= TNS_END_TO_END_MODULE - if conn_impl._dbop_modified: - flags |= TNS_END_TO_END_DBOP - - # write initial packet data - self._write_piggyback_code(buf, TNS_FUNC_SET_END_TO_END_ATTR) - buf.write_uint8(0) # pointer (cidnam) - buf.write_uint8(0) # pointer (cidser) - buf.write_ub4(flags) - - # write client identifier header info - if conn_impl._client_identifier_modified: - buf.write_uint8(1) # pointer (client identifier) - if conn_impl._client_identifier is None: - buf.write_ub4(0) - else: - client_identifier_bytes = conn_impl._client_identifier.encode() - buf.write_ub4(len(client_identifier_bytes)) - else: - buf.write_uint8(0) # pointer (client identifier) - buf.write_ub4(0) # length of client identifier - - # write module header info - if conn_impl._module_modified: - buf.write_uint8(1) # pointer (module) - if conn_impl._module is None: - buf.write_ub4(0) - else: - module_bytes = conn_impl._module.encode() - buf.write_ub4(len(module_bytes)) - else: - buf.write_uint8(0) # pointer (module) - buf.write_ub4(0) # length of module - - # write action header info - if conn_impl._action_modified: - buf.write_uint8(1) # pointer (action) - if conn_impl._action is None: - buf.write_ub4(0) - else: - action_bytes = conn_impl._action.encode() - buf.write_ub4(len(action_bytes)) - else: - buf.write_uint8(0) # pointer (action) - buf.write_ub4(0) # length of action - - # write unsupported bits - buf.write_uint8(0) # pointer (cideci) - buf.write_ub4(0) # length (cideci) - buf.write_uint8(0) # cidcct - buf.write_ub4(0) # cidecs - - # write client info header info - if conn_impl._client_info_modified: - buf.write_uint8(1) # pointer (client info) - if conn_impl._client_info is None: - buf.write_ub4(0) - else: - client_info_bytes = conn_impl._client_info.encode() - buf.write_ub4(len(client_info_bytes)) - else: - buf.write_uint8(0) # pointer (client info) - buf.write_ub4(0) # length of client info - - # write more unsupported bits - buf.write_uint8(0) # pointer (cidkstk) - buf.write_ub4(0) # length (cidkstk) - buf.write_uint8(0) # pointer (cidktgt) - buf.write_ub4(0) # length (cidktgt) - - # write dbop header info - if conn_impl._dbop_modified: - buf.write_uint8(1) # pointer (dbop) - if conn_impl._dbop is None: - buf.write_ub4(0) - else: - dbop_bytes = conn_impl._dbop.encode() - buf.write_ub4(len(dbop_bytes)) - else: - buf.write_uint8(0) # pointer (dbop) - buf.write_ub4(0) # length of dbop - - # write strings - if conn_impl._client_identifier_modified \ - and conn_impl._client_identifier is not None: - buf.write_bytes_with_length(client_identifier_bytes) - if conn_impl._module_modified and conn_impl._module is not None: - buf.write_bytes_with_length(module_bytes) - if conn_impl._action_modified and conn_impl._action is not None: - buf.write_bytes_with_length(action_bytes) - if conn_impl._client_info_modified \ - and conn_impl._client_info is not None: - buf.write_bytes_with_length(client_info_bytes) - if conn_impl._dbop_modified and conn_impl._dbop is not None: - buf.write_bytes_with_length(dbop_bytes) - - # reset flags and values - conn_impl._action_modified = False - conn_impl._action = None - conn_impl._client_identifier_modified = False - conn_impl._client_identifier = None - conn_impl._client_info_modified = False - conn_impl._client_info = None - conn_impl._dbop_modified = False - conn_impl._dbop = None - conn_impl._module_modified = False - conn_impl._module = None - - cdef int _write_piggybacks(self, WriteBuffer buf) except -1: - if self.conn_impl.pipeline_mode != 0: - self._write_begin_pipeline_piggyback(buf) - self.conn_impl.pipeline_mode = 0 - if self.conn_impl._current_schema_modified: - self._write_current_schema_piggyback(buf) - if self.conn_impl._statement_cache._num_cursors_to_close > 0 \ - and not self.conn_impl._drcp_establish_session: - self._write_close_cursors_piggyback(buf) - if self.conn_impl._action_modified \ - or self.conn_impl._client_identifier_modified \ - or self.conn_impl._client_info_modified \ - or self.conn_impl._dbop_modified \ - or self.conn_impl._module_modified: - self._write_end_to_end_piggyback(buf) - if self.conn_impl._temp_lobs_total_size > 0: - self._write_close_temp_lobs_piggyback(buf) - cdef int postprocess(self) except -1: """ Run any variable out converter functions on all non-null values that @@ -2088,9 +2129,6 @@ cdef class ExecuteMessage(MessageWithData): if self.conn_impl.autocommit and not self.parse_only: options |= TNS_EXEC_OPTION_COMMIT - # write piggybacks, if needed - self._write_piggybacks(buf) - # write body of message self._write_function_code(buf) buf.write_ub4(options) # execute options @@ -2208,7 +2246,6 @@ cdef class ExecuteMessage(MessageWithData): exec_flags_2 |= TNS_EXEC_OPTION_COMMIT_REEXECUTE num_iters = self.num_execs - self._write_piggybacks(buf) self._write_function_code(buf) buf.write_ub4(stmt._cursor_id) buf.write_ub4(num_iters) diff --git a/src/oracledb/impl/thin/pool.pyx b/src/oracledb/impl/thin/pool.pyx index f34c758e..1ebfcb54 100644 --- a/src/oracledb/impl/thin/pool.pyx +++ b/src/oracledb/impl/thin/pool.pyx @@ -194,6 +194,16 @@ cdef class BaseThinPoolImpl(BasePoolImpl): return request break + cdef BaseThinConnImpl _post_acquire(self, BaseThinConnImpl conn_impl): + """ + Called after an acquire has succeeded. The connection is added to the + list of busy connections and is marked as being in a request. + """ + self._busy_conn_impls.append(conn_impl) + conn_impl._session_state_desired = TNS_SESSION_STATE_REQUEST_BEGIN + conn_impl._in_request = True + return conn_impl + cdef int _post_create_conn_impl(self, BaseThinConnImpl conn_impl) except -1: """ @@ -636,8 +646,7 @@ cdef class ThinPoolImpl(BaseThinPoolImpl): request.waiting = False if not request.completed: errors._raise_err(errors.ERR_POOL_NO_CONNECTION_AVAILABLE) - self._busy_conn_impls.append(request.conn_impl) - return request.conn_impl + return self._post_acquire(request.conn_impl) def close(self, bint force): """ @@ -828,8 +837,7 @@ cdef class AsyncThinPoolImpl(BaseThinPoolImpl): ) except asyncio.TimeoutError: errors._raise_err(errors.ERR_POOL_NO_CONNECTION_AVAILABLE) - self._busy_conn_impls.append(request.conn_impl) - return request.conn_impl + return self._post_acquire(request.conn_impl) async def close(self, bint force): """ diff --git a/src/oracledb/impl/thin/protocol.pyx b/src/oracledb/impl/thin/protocol.pyx index 9a65bbc0..d9cb428f 100644 --- a/src/oracledb/impl/thin/protocol.pyx +++ b/src/oracledb/impl/thin/protocol.pyx @@ -167,9 +167,17 @@ cdef class Protocol(BaseProtocol): self._force_close() # rollback any open transaction and release the DRCP session, if - # applicable + # applicable; end the request, if one was started (and that + # information made it to the database) if self._transport is not None: - if self._txn_in_progress: + if conn_impl._in_request \ + and conn_impl._session_state_desired != 0: + conn_impl._in_request = False + if self._txn_in_progress or conn_impl._in_request: + if conn_impl._in_request: + conn_impl._session_state_desired = \ + TNS_SESSION_STATE_REQUEST_END + conn_impl._in_request = False if conn_impl._transaction_context is not None: message = conn_impl._create_tpc_rollback_message() else: From 084ecf0a6ce5e8edf5a93a0740ddd3c7b0ea5955 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Thu, 13 Feb 2025 14:05:52 -0700 Subject: [PATCH 039/178] Remove unneded code. --- src/oracledb/impl/thin/connection.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/oracledb/impl/thin/connection.pyx b/src/oracledb/impl/thin/connection.pyx index 0650424a..e1fc0360 100644 --- a/src/oracledb/impl/thin/connection.pyx +++ b/src/oracledb/impl/thin/connection.pyx @@ -523,7 +523,6 @@ cdef class ThinConnImpl(BaseThinConnImpl): if message.state != TNS_TPC_TXN_STATE_ABORTED: errors._raise_err(errors.ERR_UNKNOWN_TRANSACTION_STATE, state=message.state) - self._transaction_context = None cdef class AsyncThinConnImpl(BaseThinConnImpl): @@ -1095,4 +1094,3 @@ cdef class AsyncThinConnImpl(BaseThinConnImpl): if message.state != TNS_TPC_TXN_STATE_ABORTED: errors._raise_err(errors.ERR_UNKNOWN_TRANSACTION_STATE, state=message.state) - self._transaction_context = None From 4d487c53c0561bf350f92d9afc38a2727a9b7c03 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Fri, 14 Feb 2025 09:19:04 -0700 Subject: [PATCH 040/178] Add AQ support in thin mode for single enqueue and dequeue of RAW and Oracle object payload types (#437). --- doc/src/release_notes.rst | 2 + doc/src/user_guide/appendix_a.rst | 2 +- doc/src/user_guide/aq.rst | 60 +++- samples/multi_consumer_aq.py | 7 +- samples/object_aq.py | 7 +- samples/raw_aq.py | 7 +- src/oracledb/base_impl.pxd | 4 + src/oracledb/errors.py | 2 + src/oracledb/impl/base/buffer.pyx | 22 +- src/oracledb/impl/base/connection.pyx | 5 +- src/oracledb/impl/thin/capabilities.pyx | 1 + src/oracledb/impl/thin/connection.pyx | 9 + src/oracledb/impl/thin/constants.pxi | 63 ++++ src/oracledb/impl/thin/messages.pyx | 345 +++++++++++++++++++ src/oracledb/impl/thin/packet.pyx | 22 +- src/oracledb/impl/thin/queue.pyx | 420 ++++++++++++++++++++++++ src/oracledb/thin_impl.pyx | 7 + tests/test_2700_aq.py | 40 ++- tests/test_7800_aq_raw.py | 400 ++++++++++++++++++++++ 19 files changed, 1393 insertions(+), 32 deletions(-) create mode 100644 src/oracledb/impl/thin/queue.pyx create mode 100644 tests/test_7800_aq_raw.py diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 522e8db2..d24e4834 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -17,6 +17,8 @@ oracledb 3.0.0 (TBD) Thin Mode Changes +++++++++++++++++ +#) Added :ref:`Oracle Advanced Queuing ` support for single + enqueue and dequeue of RAW and Oracle object payload types. #) Added namespace package :ref:`oracledb.plugins ` for plugins that can be used to extend the capability of python-oracledb. #) Added support for property :attr:`ConnectionPool.max_lifetime_session` diff --git a/doc/src/user_guide/appendix_a.rst b/doc/src/user_guide/appendix_a.rst index 5715200a..117b8f77 100644 --- a/doc/src/user_guide/appendix_a.rst +++ b/doc/src/user_guide/appendix_a.rst @@ -248,7 +248,7 @@ see :ref:`driverdiff` and :ref:`compatibility`. - Yes - Yes * - Oracle Transactional Event Queues and Advanced Queuing (AQ) (see :ref:`aqusermanual`) - - No + - Yes - RAW and named Oracle object payloads - Yes - Yes * - Call timeouts (see :attr:`Connection.call_timeout`) diff --git a/doc/src/user_guide/aq.rst b/doc/src/user_guide/aq.rst index 72c87c06..5d06e7fc 100644 --- a/doc/src/user_guide/aq.rst +++ b/doc/src/user_guide/aq.rst @@ -15,10 +15,6 @@ receiving of various payloads, such as RAW values, JSON, JMS, and objects. Transactional Event Queues use a highly optimized implementation of Advanced Queuing. They were previously called AQ Sharded Queues. -.. note:: - - TxEventQ and AQ Classic queues are only supported in python-oracledb Thick - mode. See :ref:`enablingthick`. Python-oracledb API calls are the same for Transactional Event Queues and Classic Queues, however there are differences in support for some payload @@ -31,11 +27,18 @@ types. - The JSON payload requires Oracle Client libraries 21c (or later) and Oracle Database 21c (or later). -There are examples of AQ Classic Queues in the `GitHub examples +JSON and JMS payloads, array message queuing and dequeuing operations, and +:ref:`Recipient Lists ` are only supported in python-oracledb +:ref:`Thick mode `. + +There are examples of AQ Classic Queues in the `GitHub samples `__ directory. **Transactional Event Queue Support** +Transactional Event Queues are only supported in python-oracledb :ref:`Thick +mode `. + - RAW and named Oracle object payloads are supported for single and array message enqueuing and dequeuing when using Oracle Client 19c (or later) and connected to Oracle Database 19c (or later). @@ -55,7 +58,15 @@ Creating a Queue Before being used in applications, queues need to be created in the database. -**Using RAW Payloads** +To experiment with queueing, you can grant yourself privileges, for example in +SQL*Plus as a DBA user: + +.. code-block:: sql + + grant aq_administrator_role, aq_user_role to &&username; + grant execute on dbms_aq to &&username; + +**Creating RAW Payload Queues** To use SQL*Plus to create a Classic Queue for the RAW payload which is suitable for sending string or bytes messages: @@ -79,7 +90,7 @@ To create a Transactional Event Queue for RAW payloads: end; / -**Using JSON Payloads** +**Creating JSON Payload Queues** Queues can also be created for JSON payloads. For example, to create a Classic Queue in SQL*Plus: @@ -99,7 +110,7 @@ Enqueuing Messages To send messages in Python, you connect and get a :ref:`queue `. The queue can then be used for enqueuing, dequeuing, or for both. -**Using RAW Payloads** +**Enqueuing RAW Payloads** You can connect to the database and get the queue that was created with RAW payload type by using: @@ -123,13 +134,14 @@ messages: connection.commit() Since the queue is a RAW queue, strings are internally encoded to bytes using -``message.encode()`` before being enqueued. +`encode() `__ +before being enqueued. -The use of :meth:`~Connection.commit()` means that messages are sent only when -any database transaction related to them is committed. This behavior can be -altered, see :ref:`aqoptions`. +The use of :meth:`~Connection.commit()` allows messages to be sent only when +any database transaction related to them is committed. This default behavior +can be altered, see :ref:`aqoptions`. -**Using JSON Payloads** +**Enqueuing JSON Payloads** You can connect to the database and get the queue that was created with JSON payload type by using: @@ -162,9 +174,11 @@ Dequeuing Messages ================== Dequeuing is performed similarly. To dequeue a message call the method -:meth:`~Queue.deqone()` as shown in the examples below. +:meth:`~Queue.deqone()` as shown in the examples below. This returns a +:ref:`MessageProperties ` object containing the message payload +and related attributes. -**Using RAW Payloads** +**Dequeuing RAW Payloads** .. code-block:: python @@ -174,9 +188,21 @@ Dequeuing is performed similarly. To dequeue a message call the method print(message.payload.decode()) Note that if the message is expected to be a string, the bytes must be decoded -by the application using ``message.payload.decode()``, as shown. +by the application using `decode() +`__, as shown. + +If there are no messages in the queue, :meth:`~Queue.deqone()` will wait for +one to be enqueued. This default behavior can be altered, see +:ref:`aqoptions`. + +Various :ref:`message properties ` can be accessed. For example +to show the :attr:`~MessageProperties.msgid` of a dequeued message: + +.. code-block:: python + + print(message.msgid.hex()) -**Using JSON Payloads** +**Dequeuing JSON Payloads** .. code-block:: python diff --git a/samples/multi_consumer_aq.py b/samples/multi_consumer_aq.py index c0744d0d..a5c59605 100644 --- a/samples/multi_consumer_aq.py +++ b/samples/multi_consumer_aq.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2023, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # Portions Copyright 2007-2015, Anthony Tuininga. All rights reserved. # @@ -37,8 +37,9 @@ import oracledb import sample_env -# this script is currently only supported in python-oracledb thick mode -oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) QUEUE_NAME = "DEMO_RAW_QUEUE_MULTI" PAYLOAD_DATA = [ diff --git a/samples/object_aq.py b/samples/object_aq.py index eaf0803c..233ba012 100644 --- a/samples/object_aq.py +++ b/samples/object_aq.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2016, 2023, Oracle and/or its affiliates. +# Copyright (c) 2016, 2025, Oracle and/or its affiliates. # # Portions Copyright 2007-2015, Anthony Tuininga. All rights reserved. # @@ -39,8 +39,9 @@ import oracledb import sample_env -# this script is currently only supported in python-oracledb thick mode -oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) BOOK_TYPE_NAME = "UDT_BOOK" QUEUE_NAME = "DEMO_BOOK_QUEUE" diff --git a/samples/raw_aq.py b/samples/raw_aq.py index 526a00e7..9defb7ab 100644 --- a/samples/raw_aq.py +++ b/samples/raw_aq.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2019, 2023, Oracle and/or its affiliates. +# Copyright (c) 2019, 2025, Oracle and/or its affiliates. # # Portions Copyright 2007-2015, Anthony Tuininga. All rights reserved. # @@ -37,8 +37,9 @@ import oracledb import sample_env -# this script is currently only supported in python-oracledb thick mode -oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) QUEUE_NAME = "DEMO_RAW_QUEUE" PAYLOAD_DATA = [ diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 8160209f..1f89c082 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -317,6 +317,7 @@ cdef class Buffer: bint write_length=*) except -1 cdef int write_oracle_number(self, bytes num_bytes) except -1 cdef int write_raw(self, const char_type *data, ssize_t length) except -1 + cdef int write_sb4(self, int32_t value) except -1 cdef int write_str(self, str value) except -1 cdef int write_uint8(self, uint8_t value) except -1 cdef int write_uint16be(self, uint16_t value) except -1 @@ -955,10 +956,13 @@ cdef object convert_oracle_data_to_python(OracleMetadata from_metadata, OracleData* data, const char* encoding_errors, bint from_dbobject) +cdef object convert_date_to_python(OracleDataBuffer *buffer) cdef uint16_t decode_uint16be(const char_type *buf) cdef uint32_t decode_uint32be(const char_type *buf) cdef uint16_t decode_uint16le(const char_type *buf) cdef uint64_t decode_uint64be(const char_type *buf) +cdef int decode_date(const uint8_t* ptr, ssize_t num_bytes, + OracleDataBuffer *buffer) cdef void encode_uint16be(char_type *buf, uint16_t value) cdef void encode_uint16le(char_type *buf, uint16_t value) cdef void encode_uint32be(char_type *buf, uint32_t value) diff --git a/src/oracledb/errors.py b/src/oracledb/errors.py index 7f2f590c..9de4c810 100644 --- a/src/oracledb/errors.py +++ b/src/oracledb/errors.py @@ -358,6 +358,7 @@ def _raise_not_supported(feature: str) -> None: ERR_UNKNOWN_SERVER_PIGGYBACK = 5009 ERR_UNKNOWN_TRANSACTION_STATE = 5010 ERR_UNEXPECTED_PIPELINE_FAILURE = 5011 +ERR_NOT_IMPLEMENTED = 5012 # error numbers that result in OperationalError ERR_LISTENER_REFUSED_CONNECTION = 6000 @@ -713,6 +714,7 @@ def _raise_not_supported(feature: str) -> None: ERR_NO_STATEMENT_PREPARED: "statement must be prepared first", ERR_NOT_A_QUERY: "the executed statement does not return rows", ERR_NOT_CONNECTED: "not connected to database", + ERR_NOT_IMPLEMENTED: "not implemented", ERR_NUMBER_STRING_OF_ZERO_LENGTH: "invalid number: zero length string", ERR_NUMBER_STRING_TOO_LONG: "invalid number: string too long", ERR_NUMBER_WITH_EMPTY_EXPONENT: "invalid number: empty exponent", diff --git a/src/oracledb/impl/base/buffer.pyx b/src/oracledb/impl/base/buffer.pyx index ea8e025e..86d53477 100644 --- a/src/oracledb/impl/base/buffer.pyx +++ b/src/oracledb/impl/base/buffer.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -791,6 +791,26 @@ cdef class Buffer: length -= bytes_to_write data += bytes_to_write + cdef int write_sb4(self, int32_t value) except -1: + """ + Writes a 32-bit signed integer to the buffer in universal format. + """ + cdef uint8_t sign = 0 + if value < 0: + value = -value + sign = 0x80 + if value == 0: + self.write_uint8(0) + elif value <= UINT8_MAX: + self.write_uint8(1 | sign) + self.write_uint8( value) + elif value <= UINT16_MAX: + self.write_uint8(2 | sign) + self.write_uint16be( value) + else: + self.write_uint8(4 | sign) + self.write_uint32be(value) + cdef int write_str(self, str value) except -1: """ Writes a string to the buffer as UTF-8 encoded bytes. diff --git a/src/oracledb/impl/base/connection.pyx b/src/oracledb/impl/base/connection.pyx index 180bbe74..a9cdb07a 100644 --- a/src/oracledb/impl/base/connection.pyx +++ b/src/oracledb/impl/base/connection.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -225,6 +225,9 @@ cdef class BaseConnImpl: cursor_impl.prefetchrows = C_DEFAULTS.prefetchrows return cursor_impl + def create_msg_props_impl(self): + errors._raise_not_supported("creating a message property object") + def create_queue_impl(self): errors._raise_not_supported("creating a queue") diff --git a/src/oracledb/impl/thin/capabilities.pyx b/src/oracledb/impl/thin/capabilities.pyx index 452a647f..b3590061 100644 --- a/src/oracledb/impl/thin/capabilities.pyx +++ b/src/oracledb/impl/thin/capabilities.pyx @@ -104,6 +104,7 @@ cdef class Capabilities: TNS_CCAP_CTB_IMPLICIT_POOL self.compile_caps[TNS_CCAP_FIELD_VERSION] = self.ttc_field_version self.compile_caps[TNS_CCAP_SERVER_DEFINE_CONV] = 1 + self.compile_caps[TNS_CCAP_DEQUEUE_WITH_SELECTOR] = 1 self.compile_caps[TNS_CCAP_TTC1] = \ TNS_CCAP_FAST_BVEC | TNS_CCAP_END_OF_CALL_STATUS | \ TNS_CCAP_IND_RCD diff --git a/src/oracledb/impl/thin/connection.pyx b/src/oracledb/impl/thin/connection.pyx index e1fc0360..90f10306 100644 --- a/src/oracledb/impl/thin/connection.pyx +++ b/src/oracledb/impl/thin/connection.pyx @@ -438,6 +438,15 @@ cdef class ThinConnImpl(BaseThinConnImpl): self._force_close() raise + def create_msg_props_impl(self): + cdef ThinMsgPropsImpl impl + impl = ThinMsgPropsImpl() + impl._conn_impl = self + return impl + + def create_queue_impl(self): + return ThinQueueImpl.__new__(ThinQueueImpl) + def create_temp_lob_impl(self, DbType dbtype): cdef ThinLobImpl lob_impl = self._create_lob_impl(dbtype) lob_impl.create_temp() diff --git a/src/oracledb/impl/thin/constants.pxi b/src/oracledb/impl/thin/constants.pxi index 26c8d50f..fc8446eb 100644 --- a/src/oracledb/impl/thin/constants.pxi +++ b/src/oracledb/impl/thin/constants.pxi @@ -58,6 +58,57 @@ cdef enum: TNS_MARKER_TYPE_RESET = 2 TNS_MARKER_TYPE_INTERRUPT = 3 +# AQ delivery modes +cdef enum: + TNS_AQ_MSG_BUFFERED = 2 + TNS_AQ_MSG_PERSISTENT = 1 + TNS_AQ_MSG_PERSISTENT_OR_BUFFERED = 3 + +# AQ dequeue modes +cdef enum: + TNS_AQ_DEQ_BROWSE = 1 + TNS_AQ_DEQ_LOCKED = 2 + TNS_AQ_DEQ_REMOVE = 3 + TNS_AQ_DEQ_REMOVE_NODATA = 4 + +# AQ dequeue navigation modes +cdef enum: + TNS_AQ_DEQ_FIRST_MSG = 1 + TNS_AQ_DEQ_NEXT_MSG = 3 + TNS_AQ_DEQ_NEXT_TRANSACTION = 2 + +# AQ dequeue visibility modes +cdef enum: + TNS_AQ_DEQ_IMMEDIATE = 1 + TNS_AQ_DEQ_ON_COMMIT = 2 + +# AQ dequeue wait modes +cdef enum: + TNS_AQ_DEQ_NO_WAIT = 0 + TNS_AQ_DEQ_WAIT_FOREVER = -1 + +# AQ enqueue visibility modes +cdef enum: + TNS_AQ_ENQ_IMMEDIATE = 1 + TNS_AQ_ENQ_ON_COMMIT = 2 + +# AQ message states +cdef enum: + TNS_AQ_MSG_EXPIRED = 3 + TNS_AQ_MSG_PROCESSED = 2 + TNS_AQ_MSG_READY = 0 + TNS_AQ_MSG_WAITING = 1 + +# AQ other constants +cdef enum: + TNS_AQ_MSG_NO_DELAY = 0 + TNS_AQ_MSG_NO_EXPIRATION = -1 + +# AQ flags +cdef enum: + TNS_KPD_AQ_BUFMSG = 0x02 + TNS_KPD_AQ_EITHER = 0x10 + # errors cdef enum: TNS_ERR_INCONSISTENT_DATA_TYPES = 932 @@ -69,6 +120,7 @@ cdef enum: TNS_ERR_SESSION_SHUTDOWN = 12572 TNS_ERR_ARRAY_DML_ERRORS = 24381 TNS_ERR_EXCEEDED_IDLE_TIME = 2396 + TNS_ERR_NO_MESSAGES_FOUND = 25228 # message types cdef enum: @@ -286,6 +338,8 @@ cdef enum: TNS_FUNC_EXECUTE = 94 TNS_FUNC_FETCH = 5 TNS_FUNC_LOB_OP = 96 + TNS_FUNC_AQ_ENQ = 121 + TNS_FUNC_AQ_DEQ = 122 TNS_FUNC_LOGOFF = 9 TNS_FUNC_PING = 147 TNS_FUNC_PIPELINE_BEGIN = 199 @@ -329,6 +383,7 @@ cdef enum: TNS_CCAP_FEATURE_BACKPORT = 5 TNS_CCAP_FIELD_VERSION = 7 TNS_CCAP_SERVER_DEFINE_CONV = 8 + TNS_CCAP_DEQUEUE_WITH_SELECTOR = 9 TNS_CCAP_TTC1 = 15 TNS_CCAP_OCI1 = 16 TNS_CCAP_TDS_VERSION = 17 @@ -470,6 +525,13 @@ cdef enum: TNS_PIPELINE_MODE_CONTINUE_ON_ERROR = 1 TNS_PIPELINE_MODE_ABORT_ON_ERROR = 2 +# AQ extension keywords +cdef enum: + TNS_AQ_EXT_KEYWORD_AGENT_NAME = 64 + TNS_AQ_EXT_KEYWORD_AGENT_ADDRESS = 65 + TNS_AQ_EXT_KEYWORD_AGENT_PROTOCOL = 66 + TNS_AQ_EXT_KEYWORD_ORIGINAL_MSGID = 69 + # session state flags cdef enum: TNS_SESSION_STATE_REQUEST_BEGIN = 0x04 @@ -487,6 +549,7 @@ cdef enum: TNS_SERVER_CONVERTS_CHARS = 0x01 TNS_JSON_MAX_LENGTH = 32 * 1024 * 1024 TNS_VECTOR_MAX_LENGTH = 1 * 1024 * 1024 + TNS_AQ_MESSAGE_ID_LENGTH = 16 # base 64 encoding alphabet cdef bytes TNS_BASE64_ALPHABET = \ diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index d4b469eb..547f8b49 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -2331,6 +2331,351 @@ cdef class FetchMessage(MessageWithData): buf.write_ub4(self.cursor_impl._fetch_array_size) +@cython.final +cdef class DeqMessage(Message): + cdef: + ThinQueueImpl queue_impl + ThinDeqOptionsImpl deq_options_impl + ThinMsgPropsImpl props_impl + bint no_msg_found + + cdef int _initialize_hook(self) except -1: + """ + Perform initialization + """ + self.function_code = TNS_FUNC_AQ_DEQ + + cdef int _process_error_info(self, ReadBuffer buf) except -1: + """ + Process error information from the buffer. If the error that indicates + that no messages were received is detected, the error is cleared and + the flag set so that the dequeue can handle that case. + """ + Message._process_error_info(self, buf) + if self.error_info.num == TNS_ERR_NO_MESSAGES_FOUND: + self.error_info.num = 0 + self.error_occurred = False + self.no_msg_found = True + + cdef int _process_return_parameters(self, ReadBuffer buf) except -1: + """ + Process the return parameters of the AQ Dequeue request. + """ + cdef: + uint32_t num_bytes, num_extensions, i + ssize_t temp_num_bytes + const char_type *ptr + uint16_t temp16, keyword + bytes temp + OracleData data + uint32_t imageLength + ThinDbObjectImpl obj_impl + ThinDbObjectTypeImpl type_impl + buf.read_ub4(&num_bytes) + if num_bytes > 0: + buf.read_sb4(&self.props_impl.priority) # priority + buf.read_sb4(&self.props_impl.delay) # delay + buf.read_sb4(&self.props_impl.expiration) # expiration + # correlation id + buf.read_ub4(&num_bytes) + if num_bytes > 0: + buf.read_raw_bytes_and_length(&ptr, &temp_num_bytes) + self.props_impl.correlation = ptr[:temp_num_bytes].decode() + buf.read_sb4(&self.props_impl.num_attempts) + # exception queue name + buf.read_ub4(&num_bytes) + if num_bytes > 0: + buf.read_raw_bytes_and_length(&ptr, &temp_num_bytes) + self.props_impl.exceptionq = ptr[:temp_num_bytes].decode() + buf.read_sb4(&self.props_impl.state) + buf.read_ub4(&num_bytes) # enqueue time + if num_bytes > 0: + buf.read_raw_bytes_and_length(&ptr, &temp_num_bytes) + decode_date(ptr, temp_num_bytes, &data.buffer) + self.props_impl.enq_time = convert_date_to_python(&data.buffer) + buf.read_ub4(&num_bytes) # transaction id + if num_bytes > 0: + ptr = buf._get_raw(num_bytes) + self.props_impl.enq_txn_id = ptr[:num_bytes] + buf.read_ub4(&num_extensions) # number of extensions + if num_extensions > 0: + buf.skip_ub1() + for i in range(num_extensions): + temp = None + temp16 = 0 + buf.read_ub4(&num_bytes) # text value length + if num_bytes > 0: + buf.read_raw_bytes_and_length(&ptr, &temp_num_bytes) + temp = ptr[:temp_num_bytes] + temp16 = temp_num_bytes + buf.read_ub4(&num_bytes) # binary value length + if num_bytes > 0: + buf.read_raw_bytes_and_length(&ptr, &temp_num_bytes) + temp = ptr[:temp_num_bytes] + buf.read_ub2(&keyword) # extension keyword + if (keyword == TNS_AQ_EXT_KEYWORD_AGENT_NAME and + temp is not None and temp16 > 0): + self.props_impl.sender_agent_name = temp + if (keyword == TNS_AQ_EXT_KEYWORD_AGENT_ADDRESS and + temp is not None and temp16 > 0): + self.props_impl.sender_agent_address = temp + if (keyword == TNS_AQ_EXT_KEYWORD_AGENT_PROTOCOL and + temp is not None): + self.props_impl.sender_agent_protocol = temp + if (keyword == TNS_AQ_EXT_KEYWORD_ORIGINAL_MSGID and + temp is not None): + self.props_impl.original_msg_id = temp + buf.read_ub4(&num_bytes) # user properties + if num_bytes > 0: + errors._raise_err(errors.ERR_NOT_IMPLEMENTED) + buf.skip_ub4() # csn + buf.skip_ub4() # dsn + buf.skip_ub4() # flags + if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_21_1: + buf.skip_ub4() # shard number + buf.read_ub4(&num_bytes) # num recipients + if num_bytes > 0: + errors._raise_err(errors.ERR_NOT_IMPLEMENTED) + if (not self.queue_impl.is_json and + self.queue_impl.payload_type is not None): + type_impl = self.queue_impl.payload_type + obj_impl = buf.read_dbobject(type_impl) + if obj_impl is None: + obj_impl = type_impl.create_new_object() + self.props_impl.payload = PY_TYPE_DB_OBJECT._from_impl(obj_impl) + elif self.queue_impl.payload_type is None: + buf.read_ub4(&num_bytes) # TOID len + if num_bytes > 0: + buf.skip_raw_bytes(num_bytes) + buf.read_ub4(&num_bytes) # OID len + if num_bytes > 0: + buf.skip_raw_bytes(num_bytes) + buf.read_ub4(&num_bytes) # snapshot + if num_bytes > 0: + buf.skip_raw_bytes(num_bytes) + buf.skip_ub2() # version no + buf.read_ub4(&imageLength) # image len + buf.skip_ub2() # flags + if imageLength > 0: + self.props_impl.payload = buf.read_bytes()[4:imageLength] + else: + self.props_impl.payload = b'' + ptr = buf._get_raw(TNS_AQ_MESSAGE_ID_LENGTH) + self.props_impl.msgid =ptr[:TNS_AQ_MESSAGE_ID_LENGTH] + + cdef int _write_message(self, WriteBuffer buf) except -1: + """ + Write message to the network buffers. + """ + cdef: + bytes queue_name_bytes + bytes consumer_name_bytes + bytes correlation_bytes + bytes condition_bytes + uint16_t delivery_mode + int deq_flags + self._write_function_code(buf) + queue_name_bytes = self.queue_impl.name.encode() + buf.write_uint8(1) # queue name (pointer) + buf.write_ub4(len(queue_name_bytes)) # queue name length + buf.write_uint8(1) # message properties + buf.write_uint8(1) # msg props length + buf.write_uint8(1) # recipient list + buf.write_uint8(1) # recipient list length + if self.deq_options_impl.consumer_name: + consumer_name_bytes = self.deq_options_impl.consumer_name.encode() + buf.write_uint8(1) # consumer name + buf.write_ub4(len(consumer_name_bytes)) + else: + consumer_name_bytes = None + buf.write_uint8(0) # consumer name + buf.write_ub4(0) # consumer name length + buf.write_sb4(self.deq_options_impl.mode) # dequeue mode + buf.write_sb4(self.deq_options_impl.navigation) # navigation + buf.write_sb4(self.deq_options_impl.visibility) # visibility + buf.write_sb4(self.deq_options_impl.wait) # wait + if self.deq_options_impl.msgid: + buf.write_uint8(1) # select mesg id + buf.write_ub4(TNS_AQ_MESSAGE_ID_LENGTH) # mesg id len + else: + buf.write_uint8(0) # select mesg id + buf.write_ub4(0) # select mesg id length + if self.deq_options_impl.correlation: + correlation_bytes = self.deq_options_impl.correlation.encode() + buf.write_uint8(1) # correlation id + buf.write_ub4(len(correlation_bytes)) # correlation id len + else: + correlation_bytes = None + buf.write_uint8(0) # correlation id + buf.write_ub4(0) # correlation id len + buf.write_uint8(1) # toid of payload + buf.write_ub4(16) # toid length + buf.write_ub2(self.props_impl.version) # version of type + buf.write_uint8(1) # payload + buf.write_uint8(1) # return msg id + buf.write_ub4(16) # mesg id length + deq_flags = 0 + delivery_mode = self.deq_options_impl.delivery_mode + if (delivery_mode == TNS_AQ_MSG_BUFFERED): + deq_flags |= TNS_KPD_AQ_BUFMSG + elif (delivery_mode == TNS_AQ_MSG_PERSISTENT_OR_BUFFERED): + deq_flags |= TNS_KPD_AQ_EITHER + buf.write_ub4(deq_flags) # dequeue flags + if self.deq_options_impl.condition: + condition_bytes = self.deq_options_impl.condition.encode() + buf.write_uint8(1) # condition (pointer) + buf.write_ub4(len(condition_bytes)) # condition length + else: + condition_bytes = None + buf.write_uint8(0) # condition + buf.write_ub4(0) # condition length + buf.write_uint8(0) # extensions + buf.write_ub4(0) # number of extensions + buf.write_uint8(0) # JSON payload + buf.write_ub4(-1) # shard id + + buf.write_bytes_with_length(queue_name_bytes) + if consumer_name_bytes is not None: + buf.write_bytes_with_length(consumer_name_bytes) + if self.deq_options_impl.msgid: + buf.write_bytes(self.deq_options_impl.msgid) + if correlation_bytes is not None: + buf.write_bytes_with_length(correlation_bytes) + buf.write_bytes(self.queue_impl.payload_toid) + if condition_bytes is not None: + buf.write_bytes_with_length(condition_bytes) + + +@cython.final +cdef class EnqMessage(Message): + cdef: + ThinQueueImpl queue_impl + ThinEnqOptionsImpl enq_options_impl + ThinMsgPropsImpl props_impl + + cdef int _initialize_hook(self) except -1: + """ + perform initialization + """ + self.function_code = TNS_FUNC_AQ_ENQ + + cdef int _process_return_parameters(self, ReadBuffer buf) except -1: + """ + Process the return parameters for the AQ enqueue request. + """ + cdef const char_type *ptr = buf._get_raw(TNS_AQ_MESSAGE_ID_LENGTH) + self.props_impl.msgid = ptr[:TNS_AQ_MESSAGE_ID_LENGTH] + buf.skip_ub2() # extensions length + + cdef int _write_message(self, WriteBuffer buf) except -1: + """ + Write message to the network buffers. + """ + cdef: + bytes queue_name_bytes + bytes correlation_bytes + bytes exceptionq_bytes + int enq_flags + + self._write_function_code(buf) + queue_name_bytes = self.queue_impl.name.encode() + buf.write_uint8(1) # queue name (pointer) + buf.write_ub4(len(queue_name_bytes)) # queue name length + buf.write_ub4(self.props_impl.priority) + buf.write_ub4(self.props_impl.delay) + buf.write_sb4(self.props_impl.expiration) + if self.props_impl.correlation is None: + buf.write_ub4(0) # correlation + else: + correlation_bytes = self.props_impl.correlation.encode() + buf.write_ub4(len(correlation_bytes)) + buf.write_bytes_with_length(correlation_bytes) + buf.write_ub4(0) # number of attempts + if self.props_impl.exceptionq is None: + buf.write_ub4(0) # exception queue + else: + exceptionq_bytes = self.props_impl.exceptionq.encode() + buf.write_ub4(len(exceptionq_bytes)) + buf.write_bytes_with_length(exceptionq_bytes) + buf.write_ub4(self.props_impl.state) + buf.write_ub4(0) # enqueue time length + buf.write_ub4(0) # enqueue transaction id length + buf.write_ub4(4) # number of extensions + buf.write_uint8(0x0e) # unknown extra byte + buf.write_extension_values(None, None, TNS_AQ_EXT_KEYWORD_AGENT_NAME) + buf.write_extension_values(None, None, TNS_AQ_EXT_KEYWORD_AGENT_ADDRESS) + buf.write_extension_values(None, b'\x00', + TNS_AQ_EXT_KEYWORD_AGENT_PROTOCOL) + buf.write_extension_values(None, None, + TNS_AQ_EXT_KEYWORD_ORIGINAL_MSGID) + buf.write_ub4(0) # user property + buf.write_ub4(0) # cscn + buf.write_ub4(0) # dscn + buf.write_ub4(0) # flags + buf.write_ub4(0xffffffffl) # shard id + + if self.props_impl.recipients is None: + buf.write_uint8(0) # recipients (pointer) + buf.write_ub4(0) # number of key/value pairs + else: + buf.write_uint8(1) + buf.write_ub4(len(self.props_impl.recipients) * 3) + buf.write_ub4(self.enq_options_impl.visibility) + buf.write_uint8(0) # relative message id + buf.write_ub4(0) # relative message length + buf.write_ub4(0) # sequence deviation + buf.write_uint8(1) # TOID of payload (pointer) + buf.write_ub4(16) # TOID of payload length + buf.write_ub2(self.props_impl.version) + if self.queue_impl.is_json: + buf.write_uint8(0) # payload (pointer) + buf.write_uint8(0) # RAW payload (pointer) + buf.write_ub4(0) # RAW payload length + elif self.queue_impl.payload_type is not None: + buf.write_uint8(1) # payload (pointer) + buf.write_uint8(0) # RAW payload (pointer) + buf.write_ub4(0) # RAW payload (length) + else: + buf.write_uint8(0) # payload (pointer) + buf.write_uint8(1) # RAW payload (pointer) + buf.write_ub4(len(self.props_impl.payloadObject)) + buf.write_uint8(1) # return message id (pointer) + buf.write_ub4(TNS_AQ_MESSAGE_ID_LENGTH) # return message id length + enq_flags = 0 + if (self.enq_options_impl.delivery_mode == TNS_AQ_MSG_BUFFERED): + enq_flags |= TNS_KPD_AQ_BUFMSG + buf.write_ub4(enq_flags) # enqueue flags + buf.write_uint8(0) # extensions 1 (pointer) + buf.write_ub4(0) # number of extensions 1 + buf.write_uint8(0) # extensions 2 (pointer) + buf.write_ub4(0) # number of extensions 2 + buf.write_uint8(0) # source sequence number + buf.write_ub4(0) # source sequence length + buf.write_uint8(0) # max sequence number + buf.write_ub4(0) # max sequence length + buf.write_uint8(0) # output ack length + buf.write_uint8(0) # correlation (pointer) + buf.write_ub4(0) # correlation length + buf.write_uint8(0) # sender name (pointer) + buf.write_ub4(0) # sender name length + buf.write_uint8(0) # sender address (pointer) + buf.write_ub4(0) # sender address length + buf.write_uint8(0) # sender charset id (pointer) + buf.write_uint8(0) # sender ncharset id (pointer) + if self.queue_impl.is_json: + buf.write_uint8(1) # JSON payload (pointer) + else: + buf.write_uint8(0) # JSON payload (pointer) + + buf.write_bytes_with_length(queue_name_bytes) + buf.write_bytes(self.props_impl.toid) + if not self.queue_impl.is_json: + if self.queue_impl.payload_type is not None: + buf.write_dbobject(self.props_impl.payloadObject) + else: + buf.write_bytes(self.props_impl.payloadObject) + + @cython.final cdef class LobOpMessage(Message): cdef: diff --git a/src/oracledb/impl/thin/packet.pyx b/src/oracledb/impl/thin/packet.pyx index 2e0aeced..ba895b66 100644 --- a/src/oracledb/impl/thin/packet.pyx +++ b/src/oracledb/impl/thin/packet.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -879,6 +879,26 @@ cdef class WriteBuffer(Buffer): self.write_ub4(obj_impl.flags) # flags self.write_bytes_with_length(packed_data) + cdef int write_extension_values(self, str txt_value, bytes bytes_value, + uint16_t keyword) except -1: + """ + Writes extension's text value, binary value and keyword entry to the + buffer. + """ + cdef bytes txt_value_bytes + if txt_value is None: + self.write_uint8(0) + else: + txt_value_bytes = txt_value.encode() + self.write_ub4(len(txt_value_bytes)) + self.write_bytes_with_length(txt_value_bytes) + if bytes_value is None: + self.write_uint8(0) + else: + self.write_ub4(len(bytes_value)) + self.write_bytes_with_length(bytes_value) + self.write_ub2(keyword) + cdef int write_lob_with_length(self, BaseThinLobImpl lob_impl) except -1: """ Writes a LOB locator to the buffer. diff --git a/src/oracledb/impl/thin/queue.pyx b/src/oracledb/impl/thin/queue.pyx new file mode 100644 index 00000000..552d9953 --- /dev/null +++ b/src/oracledb/impl/thin/queue.pyx @@ -0,0 +1,420 @@ +#------------------------------------------------------------------------------ +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------ + +#------------------------------------------------------------------------------ +# queue.pyx +# +# Cython file defining the thin Queue implementation class (embedded in +# thin_impl.pyx). +#------------------------------------------------------------------------------ + +cdef class ThinQueueImpl(BaseQueueImpl): + + cdef: + ThinConnImpl _conn_impl + bytes payload_toid + + cdef Message _create_enq_message(self, ThinMsgPropsImpl props_impl): + """ + Create the message for enqueuing the provided payload. + """ + cdef EnqMessage message + message = self._conn_impl._create_message(EnqMessage) + message.queue_impl = self + message.enq_options_impl = self.enq_options_impl + message.props_impl = props_impl + return message + + def deq_one(self): + """ + Internal method for dequeuing a single message from a queue. + """ + cdef: + Protocol protocol = self._conn_impl._protocol + DeqMessage message + ThinMsgPropsImpl props_impl + props_impl = ThinMsgPropsImpl() + props_impl._initialize(self) + message = self._conn_impl._create_message(DeqMessage) + message.queue_impl = self + message.deq_options_impl = self.deq_options_impl + message.props_impl = props_impl + protocol._process_single_message(message) + if not message.no_msg_found: + return message.props_impl + + def enq_one(self, ThinMsgPropsImpl props_impl): + """ + Internal method for enqueuing a single message into a queue. + """ + cdef: + Protocol protocol = self._conn_impl._protocol + Message message + message = self._create_enq_message(props_impl) + protocol._process_single_message(message) + + def initialize(self, ThinConnImpl conn_impl, str name, + ThinDbObjectTypeImpl payload_type, bint is_json): + """ + Internal method for initializing the queue. + """ + self._conn_impl = conn_impl + self.is_json = is_json + self.deq_options_impl = ThinDeqOptionsImpl() + self.enq_options_impl = ThinEnqOptionsImpl() + self.payload_type = payload_type + if self.is_json: + errors._raise_not_supported("JSON payload in AQ") + elif self.payload_type is not None: + self.payload_toid = payload_type.oid + else: + self.payload_toid = bytes([0]*15+[0x17]) + self.name = name + + +cdef class ThinDeqOptionsImpl(BaseDeqOptionsImpl): + cdef: + str condition + str consumer_name + str correlation + uint16_t delivery_mode + uint32_t mode + bytes msgid + uint32_t navigation + str transformation + uint32_t visibility + uint32_t wait + + def __init__(self): + self.delivery_mode = TNS_AQ_MSG_PERSISTENT + self.mode = TNS_AQ_DEQ_REMOVE + self.navigation = TNS_AQ_DEQ_NEXT_MSG + self.visibility = TNS_AQ_DEQ_ON_COMMIT + self.wait = TNS_AQ_DEQ_WAIT_FOREVER + + def get_condition(self): + """ + Internal method for getting the condition. + """ + return self.condition + + def get_consumer_name(self): + """ + Internal method for getting the consumer name. + """ + return self.consumer_name + + def get_correlation(self): + """ + Internal method for getting the correlation. + """ + return self.correlation + + def get_message_id(self): + """ + Internal method for getting the message id. + """ + return self.msgid + + def get_mode(self): + """ + Internal method for getting the mode. + """ + return self.mode + + def get_navigation(self): + """ + Internal method for getting the navigation. + """ + return self.navigation + + def get_transformation(self): + """ + Internal method for getting the transformation. + """ + return self.transformation + + def get_visibility(self): + """ + Internal method for getting the visibility. + """ + return self.visibility + + def get_wait(self): + """ + Internal method for getting the wait. + """ + return self.wait + + def set_condition(self, str value): + """ + Internal method for setting the condition. + """ + self.condition = value + + def set_consumer_name(self, str value): + """ + Internal method for setting the consumer name. + """ + self.consumer_name = value + + def set_correlation(self, str value): + """ + Internal method for setting the correlation. + """ + self.correlation = value + + def set_delivery_mode(self, uint16_t value): + """ + Internal method for setting the delivery mode. + """ + self.delivery_mode = value + + def set_mode(self, uint32_t value): + """ + Internal method for setting the mode. + """ + self.mode = value + + def set_message_id(self, bytes value): + """ + Internal method for setting the message id. + """ + self.msgid = value + + def set_navigation(self, uint32_t value): + """ + Internal method for setting the navigation. + """ + self.navigation = value + + def set_transformation(self, str value): + """ + Internal method for setting the transformation. + """ + self.transformation = value + + def set_visibility(self, uint32_t value): + """ + Internal method for setting the visibility. + """ + self.visibility = value + + def set_wait(self, uint32_t value): + """ + Internal method for setting the wait. + """ + self.wait = value + + +cdef class ThinEnqOptionsImpl(BaseEnqOptionsImpl): + cdef: + str transformation + uint32_t visibility + uint32_t delivery_mode + + def __init__(self): + self.visibility = TNS_AQ_ENQ_ON_COMMIT + self.delivery_mode = TNS_AQ_MSG_PERSISTENT + + def get_transformation(self): + """ + Internal method for getting the transformation. + """ + return self.transformation + + def get_visibility(self): + """ + Internal method for getting the visibility. + """ + return self.visibility + + def set_delivery_mode(self, uint16_t value): + """ + Internal method for setting the delivery mode. + """ + self.delivery_mode = value + + def set_transformation(self, str value): + """ + Internal method for setting the transformation. + """ + self.transformation = value + + def set_visibility(self, uint32_t value): + """ + Internal method for setting the visibility. + """ + self.visibility = value + + +cdef class ThinMsgPropsImpl(BaseMsgPropsImpl): + + cdef: + int32_t delay + str correlation + str exceptionq + int32_t expiration + int32_t priority + list recipients + int32_t num_attempts + uint32_t delivery_mode + cydatetime.datetime enq_time + bytes msgid + int32_t state + object payloadObject + bytes toid + int32_t version + ThinConnImpl _conn_impl + bytes enq_txn_id + bytes sender_agent_name + bytes sender_agent_address + unsigned char sender_agent_protocol + bytes original_msg_id + + def __init__(self): + self.delay = TNS_AQ_MSG_NO_DELAY + self.expiration = TNS_AQ_MSG_NO_EXPIRATION + self.recipients = [] + self.version = 1 + self.sender_agent_protocol = 0 + + cdef int _initialize(self, ThinQueueImpl queue_impl) except -1: + """ + Internal method to initialize the message properties. + """ + self._conn_impl = queue_impl._conn_impl + self.toid = queue_impl.payload_toid + + def get_num_attempts(self): + """ + Internal method for getting the number of attempts made. + """ + return self.num_attempts + + def get_correlation(self): + """ + Internal method for getting the correlation. + """ + return self.correlation + + def get_delay(self): + """ + Internal method for getting the delay. + """ + return self.delay + + def get_delivery_mode(self): + """ + Internal method for getting the delivery mode. + """ + return self.delivery_mode + + def get_enq_time(self): + """ + Internal method for getting the enqueue time. + """ + return self.enq_time + + def get_exception_queue(self): + """ + Internal method for getting the exception queue. + """ + return self.exceptionq + + def get_expiration(self): + """ + Internal method for getting the message expiration. + """ + return self.expiration + + def get_message_id(self): + """ + Internal method for getting the message id. + """ + return self.msgid + + def get_priority(self): + """ + Internal method for getting the priority. + """ + return self.priority + + def get_state(self): + """ + Internal method for getting the message state. + """ + return self.state + + def set_correlation(self, str value): + """ + Internal method for setting the correlation. + """ + self.correlation = value + + def set_delay(self, int32_t value): + """ + Internal method for setting the delay. + """ + self.delay = value + + def set_exception_queue(self, str value): + """ + Internal method for setting the exception queue. + """ + self.exceptionq = value + + def set_expiration(self, int32_t value): + """ + Internal method for setting the message expiration. + """ + self.expiration = value + + def set_payload_bytes(self, bytes value): + """ + Internal method for setting the payload from bytes. + """ + self.payloadObject = value + self.toid = bytes([0]*15+[0x17]) + + def set_payload_object(self, ThinDbObjectImpl value): + """ + Internal method for setting the payload from an object. + """ + if not isinstance(value, ThinDbObjectImpl): + raise TypeError("Expected ThinDbObjectImpl instance.") + self.payloadObject = value + self.toid = value.toid[4:20] + + def set_priority(self, int32_t value): + """ + Internal method for setting the priority. + """ + self.priority = value + + def set_recipients(self, list value): + """ + Internal method for setting the recipients list. + """ + self.recipients = value diff --git a/src/oracledb/thin_impl.pyx b/src/oracledb/thin_impl.pyx index 8025cac9..13a29688 100644 --- a/src/oracledb/thin_impl.pyx +++ b/src/oracledb/thin_impl.pyx @@ -88,6 +88,10 @@ from .base_impl cimport ( BaseDbObjectAttrImpl, BaseDbObjectImpl, BaseDbObjectTypeImpl, + BaseDeqOptionsImpl, + BaseEnqOptionsImpl, + BaseMsgPropsImpl, + BaseQueueImpl, BaseLobImpl, BaseParser, BasePoolImpl, @@ -106,6 +110,7 @@ from .base_impl cimport ( Buffer, ConnectParamsImpl, convert_oracle_data_to_python, + convert_date_to_python, CS_FORM_IMPLICIT, CS_FORM_NCHAR, DbType, @@ -169,6 +174,7 @@ from .base_impl cimport ( TNS_NULL_LENGTH_INDICATOR, decode_uint16be, decode_uint32be, + decode_date, VectorDecoder, VectorEncoder, encode_uint16be, @@ -199,6 +205,7 @@ include "impl/thin/packet.pyx" include "impl/thin/data_types.pyx" include "impl/thin/messages.pyx" include "impl/thin/protocol.pyx" +include "impl/thin/queue.pyx" include "impl/thin/connection.pyx" include "impl/thin/statement.pyx" include "impl/thin/statement_cache.pyx" diff --git a/tests/test_2700_aq.py b/tests/test_2700_aq.py index 5aab25cd..b24c3df0 100644 --- a/tests/test_2700_aq.py +++ b/tests/test_2700_aq.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -35,7 +35,6 @@ import test_env -@unittest.skipIf(test_env.get_is_thin(), "thin mode doesn't support AQ yet") class TestCase(test_env.BaseTestCase): book_type_name = "UDT_BOOK" book_queue_name = "TEST_BOOK_QUEUE" @@ -253,6 +252,7 @@ def test_2708(self): self.assertIsNone(props) self.conn.commit() props = queue.deqone() + other_conn.commit() self.assertIsNotNone(props) def test_2709(self): @@ -375,6 +375,9 @@ def test_2713(self): props = queue.deqone() self.assertIsNone(props) + @unittest.skipIf( + test_env.get_is_thin(), "Thin mode doesn't support transformation yet" + ) def test_2714(self): "2714 - test dequeue transformation" queue = self.get_and_clear_queue( @@ -399,6 +402,9 @@ def test_2714(self): props = queue.deqone() self.assertEqual(props.payload.PRICE, expected_price) + @unittest.skipIf( + test_env.get_is_thin(), "Thin mode doesn't support transformation yet" + ) def test_2715(self): "2715 - test enqueue transformation" queue = self.get_and_clear_queue( @@ -463,6 +469,9 @@ def test_2718(self): props = queue.deqone() self.assertEqual(props.msgid, actual_msgid) + @unittest.skipIf( + test_env.get_is_thin(), "Thin mode doesn't support recipient list yet" + ) def test_2719(self): "2719 - verify use of recipients property" books_type = self.conn.gettype(self.book_type_name) @@ -486,6 +495,9 @@ def test_2719(self): props1 = queue.deqone() self.assertIsNone(props1) + @unittest.skipIf( + test_env.get_is_thin(), "thin mode doesn't support notification yet" + ) def test_2720(self): "2720 - verify attributes of AQ message which spawned notification" if self.is_on_oracle_cloud(self.conn): @@ -524,6 +536,10 @@ def notification_callback(message): self.assertTrue(condition.wait(5)) conn.unsubscribe(sub) + @unittest.skipIf( + test_env.get_is_thin(), + "thin mode doesn't support JSON payload for AQ yet", + ) def test_2721(self): "2721 - test enqueuing and dequeuing JSON payloads" queue = self.get_and_clear_queue(self.json_queue_name, "JSON") @@ -542,6 +558,10 @@ def test_2721(self): self.conn.commit() self.assertEqual(results, self.json_data) + @unittest.skipIf( + test_env.get_is_thin(), + "thin mode doesn't support JSON payload for AQ yet", + ) def test_2722(self): "2722 - test enqueuing to a JSON queue without a JSON payload" queue = self.get_and_clear_queue(self.json_queue_name, "JSON") @@ -616,6 +636,10 @@ def test_2728(self): with self.assertRaises(AttributeError): queue.deqoptions.deliverymode + @unittest.skipIf( + test_env.get_is_thin(), + "Thin mode doesn't support enqmany and deqmany yet", + ) def test_2729(self): "2729 - test correlation deqoption" queue = self.get_and_clear_queue( @@ -643,6 +667,10 @@ def test_2729(self): correlated_messages = queue.deqmany(num_messages + 1) self.assertEqual(len(correlated_messages), num_messages) + @unittest.skipIf( + test_env.get_is_thin(), + "Thin mode doesn't support enqmany and deqmany yet", + ) def test_2730(self): "2730 - test correlation deqoption with pattern-matching characters" queue = self.get_and_clear_queue( @@ -660,6 +688,10 @@ def test_2730(self): messages = queue.deqmany(5) self.assertEqual(len(messages), 2) + @unittest.skipIf( + test_env.get_is_thin(), + "Thin mode doesn't support enqmany and deqmany yet", + ) def test_2731(self): "2731 - test condition deqoption with priority" queue = self.get_and_clear_queue( @@ -687,6 +719,10 @@ def test_2731(self): data = book.TITLE, book.AUTHORS, book.PRICE self.assertEqual(data, self.book_data[ix]) + @unittest.skipIf( + test_env.get_is_thin(), + "Thin mode doesn't support enqmany and deqmany yet", + ) def test_2732(self): "2732 - test mode deqoption with DEQ_REMOVE_NODATA" queue = self.get_and_clear_queue( diff --git a/tests/test_7800_aq_raw.py b/tests/test_7800_aq_raw.py new file mode 100644 index 00000000..6bfab15a --- /dev/null +++ b/tests/test_7800_aq_raw.py @@ -0,0 +1,400 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +""" +7800 - Module for testing AQ with raw queues +""" + +import oracledb +import test_env + + +class TestCase(test_env.BaseTestCase): + raw_data = [ + b"sample raw data 1", + b"sample raw data 2", + b"sample raw data 3", + b"sample raw data 4", + b"sample raw data 5", + b"sample raw data 6", + ] + + def __verify_attr(self, obj, attrName, value): + setattr(obj, attrName, value) + self.assertEqual(getattr(obj, attrName), value) + + def test_7800(self): + "7800 - test dequeuing an empty RAW queue" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + self.assertIsNone(props) + + def test_7801(self): + "7801 - test enqueuing and dequeuing multiple RAW messages" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + props = self.conn.msgproperties() + for value in self.raw_data: + props.payload = value + queue.enqone(props) + self.conn.commit() + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + results = [] + while True: + props = queue.deqone() + if props is None: + break + value = props.payload + results.append(value) + self.conn.commit() + self.assertEqual(results, self.raw_data) + + def test_7802(self): + "7802 - test dequeuing with DEQ_REMOVE_NODATA in RAW queue" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[1] + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + queue.deqoptions.mode = oracledb.DEQ_REMOVE_NODATA + props = queue.deqone() + self.assertIsNotNone(props) + self.assertEqual(props.payload, b"") + + def test_7803(self): + "7803 - test getting/setting dequeue options attributes" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + options = queue.deqoptions + self.__verify_attr(options, "condition", "TEST_CONDITION") + self.__verify_attr(options, "consumername", "TEST_CONSUMERNAME") + self.__verify_attr(options, "correlation", "TEST_CORRELATION") + self.__verify_attr(options, "mode", oracledb.DEQ_LOCKED) + self.__verify_attr( + options, "navigation", oracledb.DEQ_NEXT_TRANSACTION + ) + self.__verify_attr(options, "transformation", "TEST_TRANSFORMATION") + self.__verify_attr(options, "visibility", oracledb.ENQ_IMMEDIATE) + self.__verify_attr(options, "wait", 1287) + self.__verify_attr(options, "msgid", b"mID") + + def test_7804(self): + "7804 - test enqueue options attributes RAW queue" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + options = queue.enqoptions + self.__verify_attr(options, "visibility", oracledb.ENQ_IMMEDIATE) + + def test_7805(self): + "7805 - test errors for invalid values for enqueue" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + self.assertRaises(TypeError, queue.enqone, value) + + def test_7806(self): + "7806 - test getting/setting message properties attributes" + props = self.conn.msgproperties() + self.__verify_attr(props, "correlation", "TEST_CORRELATION") + self.__verify_attr(props, "delay", 60) + self.__verify_attr(props, "exceptionq", "TEST_EXCEPTIONQ") + self.__verify_attr(props, "expiration", 30) + self.assertEqual(props.attempts, 0) + self.__verify_attr(props, "priority", 1) + self.assertEqual(props.state, oracledb.MSG_READY) + self.assertEqual(props.deliverymode, 0) + + def test_7807(self): + "7807 - test enqueue visibility option - ENQ_ON_COMMIT" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + queue.enqoptions.visibility = oracledb.ENQ_ON_COMMIT + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + + other_conn = test_env.get_connection() + queue = other_conn.queue("TEST_RAW_QUEUE") + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + self.assertIsNone(props) + self.conn.commit() + props = queue.deqone() + self.assertIsNotNone(props) + + def test_7808(self): + "7808 - test enqueue visibility option - ENQ_IMMEDIATE" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + queue.enqoptions.visibility = oracledb.ENQ_IMMEDIATE + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + + other_conn = test_env.get_connection() + queue = other_conn.queue("TEST_RAW_QUEUE") + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.visibility = oracledb.DEQ_ON_COMMIT + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + value = props.payload + results = value + other_conn.commit() + self.assertEqual(results, self.raw_data[0]) + + def test_7809(self): + "7809 - test enqueue/dequeue delivery modes identical - buffered" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + queue.enqoptions.deliverymode = oracledb.MSG_BUFFERED + queue.enqoptions.visibility = oracledb.ENQ_IMMEDIATE + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + + other_conn = test_env.get_connection() + queue = other_conn.queue("TEST_RAW_QUEUE") + queue.deqoptions.deliverymode = oracledb.MSG_BUFFERED + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.visibility = oracledb.DEQ_IMMEDIATE + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + value = props.payload + results = value + other_conn.commit() + self.assertEqual(results, self.raw_data[0]) + + def test_7810(self): + "7810 - test enqueue/dequeue delivery modes identical - persistent" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + queue.enqoptions.deliverymode = oracledb.MSG_PERSISTENT + queue.enqoptions.visibility = oracledb.ENQ_IMMEDIATE + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + + other_conn = test_env.get_connection() + queue = other_conn.queue("TEST_RAW_QUEUE") + queue.deqoptions.deliverymode = oracledb.MSG_PERSISTENT + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.visibility = oracledb.DEQ_IMMEDIATE + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + value = props.payload + results = value + other_conn.commit() + self.assertEqual(results, self.raw_data[0]) + + def test_7811(self): + "7811 - test enqueue/dequeue delivery modes the same" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + queue.enqoptions.deliverymode = oracledb.MSG_PERSISTENT_OR_BUFFERED + queue.enqoptions.visibility = oracledb.ENQ_IMMEDIATE + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + + other_conn = test_env.get_connection() + queue = other_conn.queue("TEST_RAW_QUEUE") + queue.deqoptions.deliverymode = oracledb.MSG_PERSISTENT_OR_BUFFERED + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.visibility = oracledb.DEQ_IMMEDIATE + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + value = props.payload + results = value + other_conn.commit() + self.assertEqual(results, self.raw_data[0]) + + def test_7812(self): + "7812 - test enqueue/dequeue delivery modes different" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + queue.enqoptions.deliverymode = oracledb.MSG_BUFFERED + queue.enqoptions.visibility = oracledb.ENQ_IMMEDIATE + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + + other_conn = test_env.get_connection() + queue = other_conn.queue("TEST_RAW_QUEUE") + queue.deqoptions.deliverymode = oracledb.MSG_PERSISTENT + queue.deqoptions.navigation = oracledb.DEQ_FIRST_MSG + queue.deqoptions.visibility = oracledb.DEQ_IMMEDIATE + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + props = queue.deqone() + self.assertIsNone(props) + + def test_7813(self): + "7813 - test error for message with no payload" + queue = self.conn.queue("TEST_RAW_QUEUE") + props = self.conn.msgproperties() + with self.assertRaisesFullCode("DPY-2000"): + queue.enqone(props) + + def test_7814(self): + "7814 - verify that the msgid property is returned correctly" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + props = self.conn.msgproperties(payload=value) + self.assertIsNone(props.msgid) + queue.enqone(props) + self.cursor.execute("select msgid from RAW_QUEUE_TAB") + (actual_msgid,) = self.cursor.fetchone() + self.assertEqual(props.msgid, actual_msgid) + props = queue.deqone() + self.assertEqual(props.msgid, actual_msgid) + + def test_7815(self): + "7815 - test message props enqtime" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + self.cursor.execute("select current_timestamp from dual") + (start_date,) = self.cursor.fetchone() + start_date = start_date.replace(microsecond=0) + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + props = queue.deqone() + self.cursor.execute("select current_timestamp from dual") + (end_date,) = self.cursor.fetchone() + end_date = end_date.replace(microsecond=0) + self.assertTrue(start_date <= props.enqtime <= end_date) + + def test_7816(self): + "7816 - test message props declared attributes" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + values = dict( + payload=value, + correlation="TEST_CORRELATION", + delay=0, + exceptionq="PYTHONTEST.TEST_EXCEPTIONQ", + expiration=15, + priority=1, + ) + props = self.conn.msgproperties(**values) + for attr_name in values: + self.assertEqual(getattr(props, attr_name), values[attr_name]) + queue.enqone(props) + self.conn.commit() + prop = queue.deqone() + for attr_name in values: + self.assertEqual(getattr(prop, attr_name), values[attr_name]) + + def test_7817(self): + "7817 - test getting queue attributes" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + self.assertEqual(queue.name, "TEST_RAW_QUEUE") + self.assertEqual(queue.connection, self.conn) + + def test_7818(self): + "7818 - test getting write-only attributes" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + for options in (queue.enqoptions, queue.deqoptions): + with self.assertRaises(AttributeError): + options.deliverymode + + def test_7819(self): + "7819 - test deqoption condition with priority" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + priorities = [5, 5, 5, 5, 10, 9, 9, 10, 9] + for priority in priorities: + value = self.raw_data[0] + props = self.conn.msgproperties(payload=value, priority=priority) + queue.enqone(props) + + queue.deqoptions.condition = "priority = 9" + results = [] + while True: + props = queue.deqone() + if props is None: + break + results.append(props.payload) + self.conn.commit() + self.assertEqual(len(results), 3) + + def test_7820(self): + "7820 - test deqoption correlation" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + correlations = [ + "sample", + "sample correlation", + "sample", + "sample", + "sample correlation", + ] + for correlation in correlations: + value = self.raw_data[0] + props = self.conn.msgproperties( + payload=value, correlation=correlation + ) + queue.enqone(props) + self.conn.commit() + queue.deqoptions.correlation = "sample correlation" + results = [] + while True: + props = queue.deqone() + if props is None: + break + results.append(props.payload) + self.conn.commit() + self.assertEqual(len(results), 2) + + def test_7821(self): + "7821 - test deqoption msgid" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + value = self.raw_data[0] + props = self.conn.msgproperties(payload=value) + queue.enqone(props) + queue.enqone(props) + self.conn.commit() + msgid = props.msgid + queue.enqone(props) + self.conn.commit() + queue.deqoptions.wait = oracledb.DEQ_NO_WAIT + queue.deqoptions.msgid = msgid + prop = queue.deqone() + self.conn.commit() + self.assertEqual(prop.msgid, msgid) + + def test_7822(self): + "7822 - test payload_type returns the correct value" + queue = self.conn.queue("TEST_RAW_QUEUE") + self.assertIsNone(queue.payload_type) + + def test_7823(self): + "7823 - test deprecated attributes (enqOptions, deqOptions)" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + self.assertEqual(queue.enqOptions, queue.enqoptions) + self.assertEqual(queue.deqOptions, queue.deqoptions) + + def test_7824(self): + "7824 - test deprecated AQ methods (enqOne, deqOne)" + value = b"Test 7823" + queue = self.get_and_clear_queue("TEST_RAW_QUEUE") + queue.enqOne(self.conn.msgproperties(value)) + props = queue.deqOne() + self.assertEqual(props.payload, value) + + +if __name__ == "__main__": + test_env.run_test_cases() From 82fc62b11c6eea52a751c38e472d158b7b287538 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Fri, 14 Feb 2025 09:21:05 -0700 Subject: [PATCH 041/178] Simplify generated code. --- src/oracledb/connect_params.py | 97 +++++++++++++------------ src/oracledb/pool_params.py | 125 ++++++++++++++++----------------- utils/build_from_template.py | 9 ++- 3 files changed, 114 insertions(+), 117 deletions(-) diff --git a/src/oracledb/connect_params.py b/src/oracledb/connect_params.py index 315b9161..548c70bc 100644 --- a/src/oracledb/connect_params.py +++ b/src/oracledb/connect_params.py @@ -319,55 +319,54 @@ def __init__( def __repr__(self): return ( - self.__class__.__qualname__ - + "(" - + f"user={self.user!r}, " - + f"proxy_user={self.proxy_user!r}, " - + f"host={self.host!r}, " - + f"port={self.port!r}, " - + f"protocol={self.protocol!r}, " - + f"https_proxy={self.https_proxy!r}, " - + f"https_proxy_port={self.https_proxy_port!r}, " - + f"service_name={self.service_name!r}, " - + f"instance_name={self.instance_name!r}, " - + f"sid={self.sid!r}, " - + f"server_type={self.server_type!r}, " - + f"cclass={self.cclass!r}, " - + f"purity={self.purity!r}, " - + f"expire_time={self.expire_time!r}, " - + f"retry_count={self.retry_count!r}, " - + f"retry_delay={self.retry_delay!r}, " - + f"tcp_connect_timeout={self.tcp_connect_timeout!r}, " - + f"ssl_server_dn_match={self.ssl_server_dn_match!r}, " - + f"ssl_server_cert_dn={self.ssl_server_cert_dn!r}, " - + f"wallet_location={self.wallet_location!r}, " - + f"events={self.events!r}, " - + f"externalauth={self.externalauth!r}, " - + f"mode={self.mode!r}, " - + f"disable_oob={self.disable_oob!r}, " - + f"stmtcachesize={self.stmtcachesize!r}, " - + f"edition={self.edition!r}, " - + f"tag={self.tag!r}, " - + f"matchanytag={self.matchanytag!r}, " - + f"config_dir={self.config_dir!r}, " - + f"appcontext={self.appcontext!r}, " - + f"shardingkey={self.shardingkey!r}, " - + f"supershardingkey={self.supershardingkey!r}, " - + f"debug_jdwp={self.debug_jdwp!r}, " - + f"connection_id_prefix={self.connection_id_prefix!r}, " - + f"ssl_context={self.ssl_context!r}, " - + f"sdu={self.sdu!r}, " - + f"pool_boundary={self.pool_boundary!r}, " - + f"use_tcp_fast_open={self.use_tcp_fast_open!r}, " - + f"ssl_version={self.ssl_version!r}, " - + f"program={self.program!r}, " - + f"machine={self.machine!r}, " - + f"terminal={self.terminal!r}, " - + f"osuser={self.osuser!r}, " - + f"driver_name={self.driver_name!r}, " - + f"use_sni={self.use_sni!r}, " - + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" - + ")" + self.__class__.__qualname__ + "(" + f"user={self.user!r}, " + f"proxy_user={self.proxy_user!r}, " + f"host={self.host!r}, " + f"port={self.port!r}, " + f"protocol={self.protocol!r}, " + f"https_proxy={self.https_proxy!r}, " + f"https_proxy_port={self.https_proxy_port!r}, " + f"service_name={self.service_name!r}, " + f"instance_name={self.instance_name!r}, " + f"sid={self.sid!r}, " + f"server_type={self.server_type!r}, " + f"cclass={self.cclass!r}, " + f"purity={self.purity!r}, " + f"expire_time={self.expire_time!r}, " + f"retry_count={self.retry_count!r}, " + f"retry_delay={self.retry_delay!r}, " + f"tcp_connect_timeout={self.tcp_connect_timeout!r}, " + f"ssl_server_dn_match={self.ssl_server_dn_match!r}, " + f"ssl_server_cert_dn={self.ssl_server_cert_dn!r}, " + f"wallet_location={self.wallet_location!r}, " + f"events={self.events!r}, " + f"externalauth={self.externalauth!r}, " + f"mode={self.mode!r}, " + f"disable_oob={self.disable_oob!r}, " + f"stmtcachesize={self.stmtcachesize!r}, " + f"edition={self.edition!r}, " + f"tag={self.tag!r}, " + f"matchanytag={self.matchanytag!r}, " + f"config_dir={self.config_dir!r}, " + f"appcontext={self.appcontext!r}, " + f"shardingkey={self.shardingkey!r}, " + f"supershardingkey={self.supershardingkey!r}, " + f"debug_jdwp={self.debug_jdwp!r}, " + f"connection_id_prefix={self.connection_id_prefix!r}, " + f"ssl_context={self.ssl_context!r}, " + f"sdu={self.sdu!r}, " + f"pool_boundary={self.pool_boundary!r}, " + f"use_tcp_fast_open={self.use_tcp_fast_open!r}, " + f"ssl_version={self.ssl_version!r}, " + f"program={self.program!r}, " + f"machine={self.machine!r}, " + f"terminal={self.terminal!r}, " + f"osuser={self.osuser!r}, " + f"driver_name={self.driver_name!r}, " + f"use_sni={self.use_sni!r}, " + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" + ")" ) def _flatten_value(f): diff --git a/src/oracledb/pool_params.py b/src/oracledb/pool_params.py index f5b67b0a..307f9370 100644 --- a/src/oracledb/pool_params.py +++ b/src/oracledb/pool_params.py @@ -388,69 +388,68 @@ def __init__( def __repr__(self): return ( - self.__class__.__qualname__ - + "(" - + f"min={self.min!r}, " - + f"max={self.max!r}, " - + f"increment={self.increment!r}, " - + f"connectiontype={self.connectiontype!r}, " - + f"getmode={self.getmode!r}, " - + f"homogeneous={self.homogeneous!r}, " - + f"timeout={self.timeout!r}, " - + f"wait_timeout={self.wait_timeout!r}, " - + f"max_lifetime_session={self.max_lifetime_session!r}, " - + f"session_callback={self.session_callback!r}, " - + f"max_sessions_per_shard={self.max_sessions_per_shard!r}, " - + f"soda_metadata_cache={self.soda_metadata_cache!r}, " - + f"ping_interval={self.ping_interval!r}, " - + f"ping_timeout={self.ping_timeout!r}, " - + f"user={self.user!r}, " - + f"proxy_user={self.proxy_user!r}, " - + f"host={self.host!r}, " - + f"port={self.port!r}, " - + f"protocol={self.protocol!r}, " - + f"https_proxy={self.https_proxy!r}, " - + f"https_proxy_port={self.https_proxy_port!r}, " - + f"service_name={self.service_name!r}, " - + f"instance_name={self.instance_name!r}, " - + f"sid={self.sid!r}, " - + f"server_type={self.server_type!r}, " - + f"cclass={self.cclass!r}, " - + f"purity={self.purity!r}, " - + f"expire_time={self.expire_time!r}, " - + f"retry_count={self.retry_count!r}, " - + f"retry_delay={self.retry_delay!r}, " - + f"tcp_connect_timeout={self.tcp_connect_timeout!r}, " - + f"ssl_server_dn_match={self.ssl_server_dn_match!r}, " - + f"ssl_server_cert_dn={self.ssl_server_cert_dn!r}, " - + f"wallet_location={self.wallet_location!r}, " - + f"events={self.events!r}, " - + f"externalauth={self.externalauth!r}, " - + f"mode={self.mode!r}, " - + f"disable_oob={self.disable_oob!r}, " - + f"stmtcachesize={self.stmtcachesize!r}, " - + f"edition={self.edition!r}, " - + f"tag={self.tag!r}, " - + f"matchanytag={self.matchanytag!r}, " - + f"config_dir={self.config_dir!r}, " - + f"appcontext={self.appcontext!r}, " - + f"shardingkey={self.shardingkey!r}, " - + f"supershardingkey={self.supershardingkey!r}, " - + f"debug_jdwp={self.debug_jdwp!r}, " - + f"connection_id_prefix={self.connection_id_prefix!r}, " - + f"ssl_context={self.ssl_context!r}, " - + f"sdu={self.sdu!r}, " - + f"pool_boundary={self.pool_boundary!r}, " - + f"use_tcp_fast_open={self.use_tcp_fast_open!r}, " - + f"ssl_version={self.ssl_version!r}, " - + f"program={self.program!r}, " - + f"machine={self.machine!r}, " - + f"terminal={self.terminal!r}, " - + f"osuser={self.osuser!r}, " - + f"driver_name={self.driver_name!r}, " - + f"use_sni={self.use_sni!r}, " - + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" - + ")" + self.__class__.__qualname__ + "(" + f"min={self.min!r}, " + f"max={self.max!r}, " + f"increment={self.increment!r}, " + f"connectiontype={self.connectiontype!r}, " + f"getmode={self.getmode!r}, " + f"homogeneous={self.homogeneous!r}, " + f"timeout={self.timeout!r}, " + f"wait_timeout={self.wait_timeout!r}, " + f"max_lifetime_session={self.max_lifetime_session!r}, " + f"session_callback={self.session_callback!r}, " + f"max_sessions_per_shard={self.max_sessions_per_shard!r}, " + f"soda_metadata_cache={self.soda_metadata_cache!r}, " + f"ping_interval={self.ping_interval!r}, " + f"ping_timeout={self.ping_timeout!r}, " + f"user={self.user!r}, " + f"proxy_user={self.proxy_user!r}, " + f"host={self.host!r}, " + f"port={self.port!r}, " + f"protocol={self.protocol!r}, " + f"https_proxy={self.https_proxy!r}, " + f"https_proxy_port={self.https_proxy_port!r}, " + f"service_name={self.service_name!r}, " + f"instance_name={self.instance_name!r}, " + f"sid={self.sid!r}, " + f"server_type={self.server_type!r}, " + f"cclass={self.cclass!r}, " + f"purity={self.purity!r}, " + f"expire_time={self.expire_time!r}, " + f"retry_count={self.retry_count!r}, " + f"retry_delay={self.retry_delay!r}, " + f"tcp_connect_timeout={self.tcp_connect_timeout!r}, " + f"ssl_server_dn_match={self.ssl_server_dn_match!r}, " + f"ssl_server_cert_dn={self.ssl_server_cert_dn!r}, " + f"wallet_location={self.wallet_location!r}, " + f"events={self.events!r}, " + f"externalauth={self.externalauth!r}, " + f"mode={self.mode!r}, " + f"disable_oob={self.disable_oob!r}, " + f"stmtcachesize={self.stmtcachesize!r}, " + f"edition={self.edition!r}, " + f"tag={self.tag!r}, " + f"matchanytag={self.matchanytag!r}, " + f"config_dir={self.config_dir!r}, " + f"appcontext={self.appcontext!r}, " + f"shardingkey={self.shardingkey!r}, " + f"supershardingkey={self.supershardingkey!r}, " + f"debug_jdwp={self.debug_jdwp!r}, " + f"connection_id_prefix={self.connection_id_prefix!r}, " + f"ssl_context={self.ssl_context!r}, " + f"sdu={self.sdu!r}, " + f"pool_boundary={self.pool_boundary!r}, " + f"use_tcp_fast_open={self.use_tcp_fast_open!r}, " + f"ssl_version={self.ssl_version!r}, " + f"program={self.program!r}, " + f"machine={self.machine!r}, " + f"terminal={self.terminal!r}, " + f"osuser={self.osuser!r}, " + f"driver_name={self.driver_name!r}, " + f"use_sni={self.use_sni!r}, " + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" + ")" ) @property diff --git a/utils/build_from_template.py b/utils/build_from_template.py index 49a84440..61f5f5ef 100644 --- a/utils/build_from_template.py +++ b/utils/build_from_template.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2022, 2023, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -312,7 +312,7 @@ def params_repr_content(indent): Generates the content for the params_repr template tag. """ parts = [ - f'\n{indent} + f"{field.name}={{self.{field.name}!r}}, "' + f'\n{indent} f"{field.name}={{self.{field.name}!r}}, "' for field in fields if not field.hidden ] @@ -321,10 +321,9 @@ def params_repr_content(indent): return ( func_def + f"\n{indent} return (" - + f"\n{indent} self.__class__.__qualname__" - + f'\n{indent} + "("' + + f'\n{indent} self.__class__.__qualname__ + "("' + "".join(parts) - + f'\n{indent} + ")"' + + f'\n{indent} ")"' + f"\n{indent} )" ) From 08e669a4e64fbfcf93eb6e9ae89c96fb04724a1c Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Mon, 17 Feb 2025 19:35:07 -0700 Subject: [PATCH 042/178] Added preliminary support for fetching data as Apache Arrow arrays with zero copy interchange with popular data frame libraries (#375). --- .gitignore | 4 + THIRD_PARTY_LICENSES.txt | 244 + doc/src/api_manual/connection.rst | 55 + doc/src/api_manual/dataframe.rst | 222 + doc/src/api_manual/defaults.rst | 4 + doc/src/index.rst | 1 + doc/src/release_notes.rst | 4 + doc/src/user_guide/sql_execution.rst | 363 +- doc/src/user_guide/tuning.rst | 11 + samples/dataframe_numpy.py | 71 + samples/dataframe_pandas.py | 90 + samples/dataframe_parquet_write.py | 87 + samples/dataframe_polars.py | 67 + samples/dataframe_pyarrow.py | 95 + samples/dataframe_torch.py | 67 + samples/sql/create_schema.sql | 38 + setup.cfg | 1 + setup.py | 26 +- src/oracledb/__init__.py | 6 +- src/oracledb/base_impl.pxd | 19 + src/oracledb/base_impl.pyx | 18 +- src/oracledb/connection.py | 37 + src/oracledb/errors.py | 9 + src/oracledb/impl/base/converters.pyx | 127 + src/oracledb/impl/base/cursor.pyx | 52 +- src/oracledb/impl/base/metadata.pyx | 36 +- src/oracledb/impl/base/utils.pyx | 2 + src/oracledb/impl/base/var.pyx | 24 +- src/oracledb/impl/thick/cursor.pyx | 15 +- src/oracledb/impl/thick/var.pyx | 80 +- src/oracledb/impl/thin/messages.pyx | 21 +- src/oracledb/interchange/__init__.py | 0 src/oracledb/interchange/buffer.py | 82 + src/oracledb/interchange/column.py | 205 + src/oracledb/interchange/dataframe.py | 151 + .../interchange/nanoarrow/nanoarrow.c | 3872 +++++++++++++++ .../interchange/nanoarrow/nanoarrow.h | 4279 +++++++++++++++++ src/oracledb/interchange/nanoarrow_bridge.pxd | 102 + src/oracledb/interchange/nanoarrow_bridge.pyx | 334 ++ src/oracledb/interchange/protocol.py | 282 ++ src/oracledb/thick_impl.pyx | 5 +- src/oracledb/thin_impl.pyx | 3 +- tests/sql/create_schema.sql | 13 + tests/test_8000_dataframe.py | 481 ++ utils/templates/connection.py | 37 + 45 files changed, 11710 insertions(+), 32 deletions(-) create mode 100644 doc/src/api_manual/dataframe.rst create mode 100644 samples/dataframe_numpy.py create mode 100644 samples/dataframe_pandas.py create mode 100644 samples/dataframe_parquet_write.py create mode 100644 samples/dataframe_polars.py create mode 100644 samples/dataframe_pyarrow.py create mode 100644 samples/dataframe_torch.py create mode 100644 src/oracledb/interchange/__init__.py create mode 100644 src/oracledb/interchange/buffer.py create mode 100644 src/oracledb/interchange/column.py create mode 100644 src/oracledb/interchange/dataframe.py create mode 100644 src/oracledb/interchange/nanoarrow/nanoarrow.c create mode 100644 src/oracledb/interchange/nanoarrow/nanoarrow.h create mode 100644 src/oracledb/interchange/nanoarrow_bridge.pxd create mode 100644 src/oracledb/interchange/nanoarrow_bridge.pyx create mode 100644 src/oracledb/interchange/protocol.py create mode 100644 tests/test_8000_dataframe.py diff --git a/.gitignore b/.gitignore index d672f61d..8c791a41 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,10 @@ build/ dist/ doc/build src/oracledb/*.c +src/oracledb/interchange/*.c tests/ext/config.ini .ipynb_checkpoints/ +.venv*/ +.idea samples/sample.csv +samples/sample.parquet diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index 5cd97ac0..1c1cf597 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -453,3 +453,247 @@ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +___________________________________________________________________________________________ + +Apache Arrow nanoarrow +Copyright 2023 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +---------- + + + +Apache nanoarrow 0.6.0 + + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---------- + +Copyright 2015-2023 Mikkel F. Jørgensen, dvide.com +Copyright (c) 2016 Mikkel Fahnøe Jørgensen, dvide.com +Copyright (c) 2005-2016 Paul Hsieh +Copyright (c) 2024 Mikkel Fahnøe Jørgensen, dvide.com + +---------- + +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. diff --git a/doc/src/api_manual/connection.rst b/doc/src/api_manual/connection.rst index a685dcea..c4f5f409 100644 --- a/doc/src/api_manual/connection.rst +++ b/doc/src/api_manual/connection.rst @@ -128,6 +128,61 @@ Connection Methods .. versionadded:: 2.1.0 +.. method:: Connection.fetch_df_all(statement, parameters=None, \ + arraysize=None) + + Fetches all rows of the SQL query ``statement``, returning them in an + :ref:`OracleDataFrame ` object. An empty + OracleDataFrame is returned if there are no rows available. + + The ``parameters`` parameter can be a list of tuples, where each tuple item + maps to one :ref:`bind variable placeholder ` in ``statement``. It + can also be a list of dictionaries, where the keys match the bind variable + placeholder names in ``statement``. + + The ``arraysize`` parameter can specified to tune performance of fetching + data across the network. It defaults to :attr:`defaults.arraysize`. + Internally, the ``fetch_df_all()``'s :attr:`Cursor.prefetchrows` size is + always set to the value of the explicit or default ``arraysize`` parameter + value. + + See :ref:`dataframeformat` for the supported data types and examples. + + .. note:: + + The data frame support in python-oracledb 3.0.0 is a pre-release and + may change in the next version. + + .. versionadded:: 3.0.0 + +.. method:: Connection.fetch_df_batches(statement, parameters=None, \ + size=None) + + This returns an iterator yielding the next ``size`` rows of the SQL query + ``statement`` in each iteration as an :ref:`OracleDataFrame + ` object. An empty OracleDataFrame is returned if there + are no rows available. + + The ``parameters`` parameter can be a list of tuples, where each tuple item + maps to one :ref:`bind variable placeholder ` in ``statement``. It + can also be a list of dictionaries, where the keys match the bind variable + placeholder names in ``statement``. + + The ``size`` parameter controls the number of records fetched in each + batch. It defaults to :attr:`defaults.arraysize`. Internally, the + ``fetch_df_batches()``'s :attr:`Cursor.arraysize`. and + :attr:`Cursor.prefetchrows` sizes are always set to the value of the + explicit or default ``size`` parameter value. + + See :ref:`dataframeformat` for the supported data types and examples. + + .. note:: + + The data frame support in python-oracledb 3.0.0 is a pre-release and + may change in the next version. + + .. versionadded:: 3.0.0 + .. method:: Connection.getSodaDatabase() Returns a :ref:`SodaDatabase ` object for Simple Oracle Document diff --git a/doc/src/api_manual/dataframe.rst b/doc/src/api_manual/dataframe.rst new file mode 100644 index 00000000..c1d7ec12 --- /dev/null +++ b/doc/src/api_manual/dataframe.rst @@ -0,0 +1,222 @@ +.. _oracledataframeobj: + +**************** +API: Data Frames +**************** + +Python-oracledb can fetch directly to the `Python DataFrame Interchange +Protocol `__ +format. + +See :ref:`dataframeformat` for more information, including the type mapping +from Oracle Database types to Arrow data types. + +.. note:: + + The data frame support in python-oracledb 3.0.0 is a pre-release and may + change in the next version. + +OracleDataFrame Objects +======================= + +OracleDataFrame objects are returned from the methods +:meth:`Connection.fetch_df_all()` and :meth:`Connection.fetch_df_batches()`. + +The OracleDataFrame object is an extension to the DB API. + +.. versionadded:: 3.0.0 + +.. _oracledataframemeth: + +OracleDataFrame Methods +----------------------- + +The object implements the Python DataFrame Interchange Protocol `DataFrame API +Interface `__ + +.. method:: OracleDataFrame.column_arrays() + + Returns a list of :ref:`OracleArrowArray ` objects, + each containing a select list column. + + This is an extension to the DataFrame Interchange Protocol. + +.. method:: OracleDataFrame.column_names() + + Returns a list of the column names in the data frame. + +.. method:: OracleDataFrame.get_chunks(n_chunks) + + Returns itself, since python-oracledb only uses one chunk. + +.. method:: OracleDataFrame.get_column(i) + + Returns an :ref:`OracleColumn ` object for the column + at the given index ``i``. + +.. method:: OracleDataFrame.get_column_by_name(name) + + Returns an :ref:`OracleColumn ` object for the column + with the given name ``name``. + +.. method:: OracleDataFrame.get_columns() + + Returns a list of :ref:`OracleColumn ` objects, one + object for each column in the data frame. + +.. method:: OracleDataFrame.num_chunks() + + Return the number of chunks the data frame consists of. + + This always returns 1. + +.. method:: OracleDataFrame.num_columns() + + Returns the number of columns in the data frame. + +.. method:: OracleDataFrame.num_rows() + + Returns the number of rows in the data frame. + +.. _oracledataframeattr: + +OracleDataFrame Attributes +-------------------------- + +.. attribute:: OracleDataFrame.metadata + + This read-only attribute returns the metadata for the data frame as a + dictionary with keys ``num_columns``, ``num_rows``, and ``num_chunks``, + showing the number of columns, rows, and chunks, respectively. The number + of chunks is always 1 in python-oracledb. + +.. _oraclearrowarrayobj: + +OracleArrowArray Objects +======================== + +OracleArrowArray objects are returned by +:meth:`OracleDataFrame.column_arrays()`. + +These are used for conversion to `PyArrow Tables +`__, see +:ref:`dataframeformat`. + +.. versionadded:: 3.0.0 + +.. _oraclecolumnobj: + +OracleColumn Objects +==================== + +OracleColumn objects are returned by :meth:`OracleDataFrame.get_column()`, +:meth:`OracleDataFrame.get_column_by_name()`, and +:meth:`OracleDataFrame.get_columns()`. + +.. versionadded:: 3.0.0 + +.. _oraclecolumnmeth: + +OracleColumn Methods +-------------------- + +.. method:: OracleColumn.get_buffers() + + Returns a dictionary containing the underlying buffers. + + The returned dictionary contains the ``data``, ``validity``, and ``offset`` + keys. + + The ``data`` attribute is a two-element tuple whose first element is a + buffer containing the data and whose second element is the data buffer's + associated dtype. + + The ``validity`` attribute is a a two-element tuple whose first element + is a buffer containing mask values indicating missing data and whose + second element is the mask value buffer's associated dtype. The value of + this attribute is *None* if the null representation is not a bit or byte + mask. + + The ``offset`` attribute is a two-element tuple whose first element is a + buffer containing the offset values for variable-size binary data (for + example, variable-length strings) and whose second element is the offsets + buffer's associated dtype. The value of this attribute is *None* if the + data buffer does not have an associated offsets buffer. + +.. method:: OracleColumn.get_chunks(n_chunks) + + Returns itself, since python-oracledb only uses one chunk. + +.. method:: OracleColumn.num_chunks() + + Returns the number of chunks the column consists of. + + This always returns 1. + +.. method:: OracleColumn.size() + + Returns the number of rows in the column. + +.. _oraclecolumnattr: + +OracleColumn Attributes +----------------------- + +.. attribute:: OracleColumn.describe_null + + This read-only property returns the description of the null representation + that the column uses. + +.. attribute:: OracleColumn.dtype + + This read-only attribute returns the Dtype description as a tuple + containing the values for the attributes ``kind``, ``bit-width``, + ``format string``, and ``endianess``. + + The ``kind`` attribute specifies the type of the data. + + The ``bit-width`` attribute specifies the number of bits as an integer. + + The ``format string`` attribute specifies the data type description format + string in Apache Arrow C Data Interface format. + + The ``endianess`` attribute specifies the byte order of the data type. + Currently, only native endianess is supported. + +.. attribute:: OracleColumn.metadata + + This read-only attribute returns the metadata for the column as a + dictionary with string keys. + +.. attribute:: OracleColumn.null_count + + This read-only attribute returns the number of null row values, if known. + +.. attribute:: OracleColumn.offset + + This read-only attribute specifies the offset of the first row. + +.. _oraclecolumnbufferobj: + +OracleColumnBuffer Objects +========================== + +A buffer object backed by an ArrowArray consisting of a single chunk. + +This is an internal class used for conversion to third party data frames. + +.. versionadded:: 3.0.0 + +.. _oraclecolumnbufferattr: + +OracleColumnBuffer Attributes +----------------------------- + +.. attribute:: OracleColumnBuffer.bufsize + + This read-only property returns the buffer size in bytes. + +.. attribute:: OracleColumnBuffer.ptr + + This read-only attribute specifies the pointer to the start of the buffer + as an integer. diff --git a/doc/src/api_manual/defaults.rst b/doc/src/api_manual/defaults.rst index c429d027..120b11fc 100644 --- a/doc/src/api_manual/defaults.rst +++ b/doc/src/api_manual/defaults.rst @@ -128,6 +128,10 @@ Defaults Attributes The default value for :attr:`Cursor.prefetchrows`. This is a query tuning attribute, see :ref:`Tuning Fetch Performance `. + This attribute is ignored when using :meth:`Connection.fetch_df_all()` or + :meth:`Connection.fetch_df_batches()` since these methods always set the + internal prefetch size to the relevant arraysize or size value. + This attribute has an initial value of *2*. .. attribute:: defaults.program diff --git a/doc/src/index.rst b/doc/src/index.rst index a6f418ca..cd2c9c28 100644 --- a/doc/src/index.rst +++ b/doc/src/index.rst @@ -61,6 +61,7 @@ API Manual api_manual/connection_pool.rst api_manual/pool_params.rst api_manual/cursor.rst + api_manual/dataframe.rst api_manual/fetch_info.rst api_manual/variable.rst api_manual/subscription.rst diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index d24e4834..41842d15 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -88,6 +88,10 @@ Thick Mode Changes Common Changes ++++++++++++++ +#) Added new methods :meth:`Connection.fetch_df_all()` and + :meth:`Connection.fetch_df_batches()` to fetch data as DataFrames + compliant with the Python DataFrame Interchange protocol. See + :ref:`dataframeformat`. #) Added support for Oracle Database 23ai SPARSE vectors. #) Added support for :ref:`naming and caching connection pools ` during creation, and retrieving them later from the diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst index 44560fd5..d3b63fa7 100644 --- a/doc/src/user_guide/sql_execution.rst +++ b/doc/src/user_guide/sql_execution.rst @@ -5,12 +5,19 @@ Executing SQL ************* Executing SQL statements is the primary way in which a Python application -communicates with Oracle Database. Statements are executed using the methods -:meth:`Cursor.execute()` or :meth:`Cursor.executemany()`. Statements include -queries, Data Manipulation Language (DML), and Data Definition Language (DDL). -A few other `specialty statements -`__ can also be executed. +communicates with Oracle Database. Statements include queries, Data +Manipulation Language (DML), and Data Definition Language (DDL). A few other +`specialty statements `__ can also be +executed. Statements are executed using one of these methods +:meth:`Cursor.execute()`, :meth:`Cursor.executemany()`, +:meth:`Connection.fetch_df_all()`, :meth:`Connection.fetch_df_batches()`, +:meth:`AsyncCursor.execute()`, :meth:`AsyncCursor.executemany()`, +:meth:`AsyncConnection.execute()`, :meth:`AsyncConnection.executemany()`, or +:meth:`AsyncConnection.run_pipeline()`. + +This chapter discusses python-oracledb's synchronous methods. The asynchronous +methods and pipelining functionality are discussed in detail in :ref:`asyncio`. PL/SQL statements are discussed in :ref:`plsqlexecution`. Other chapters contain information on specific data types and features. See :ref:`batchstmnt`, @@ -18,8 +25,9 @@ contain information on specific data types and features. See :ref:`batchstmnt`, Python-oracledb can be used to execute individual statements, one at a time. Once a statement has finished execution, only then will the next statement -execute. If you try to execute statements concurrently, the statements are -queued and run consecutively in the order they are in the code. +execute. If you try to execute statements concurrently in a single connection, +the statements are queued and run consecutively in the order they are executed +in the application code. Python-oracledb does not read SQL*Plus ".sql" files. To read SQL files, use a technique like the one in ``run_sql_script()`` in `samples/sample_env.py @@ -30,7 +38,7 @@ SQL statements should not contain a trailing semicolon (";") or forward slash .. code-block:: python - cursor.execute("select * from MyTable;") + cursor.execute("select * from MyTable;") # fails due to semicolon This is correct: @@ -42,8 +50,8 @@ This is correct: SQL Queries =========== -Queries (statements beginning with SELECT or WITH) can only be executed using -the method :meth:`Cursor.execute()`. Rows can then be iterated over, or can be +Queries (statements beginning with SELECT or WITH) can be executed using the +method :meth:`Cursor.execute()`. Rows can then be iterated over, or can be fetched using one of the methods :meth:`Cursor.fetchone()`, :meth:`Cursor.fetchmany()` or :meth:`Cursor.fetchall()`. There is a :ref:`default type mapping ` to Python types that can be @@ -52,9 +60,10 @@ optionally :ref:`overridden `. .. IMPORTANT:: Interpolating or concatenating user data with SQL statements, for example - ``cursor.execute("SELECT * FROM mytab WHERE mycol = '" + myvar + "'")``, is a security risk - and impacts performance. Use :ref:`bind variables ` instead. For - example, ``cursor.execute("SELECT * FROM mytab WHERE mycol = :mybv", mybv=myvar)``. + ``cursor.execute("SELECT * FROM mytab WHERE mycol = '" + myvar + "'")`` is + a security risk and impacts performance. Use :ref:`bind variables ` + instead, for example ``cursor.execute("SELECT * FROM mytab WHERE mycol = + :mybv", mybv=myvar)``. .. _fetching: @@ -120,6 +129,8 @@ Rows can be fetched in various ways. The fetch methods return data as tuples. To return results as dictionaries, see :ref:`rowfactories`. +- Data can also be fetched in Arrow data format, see :ref:`dataframeformat`. + Closing Cursors --------------- @@ -547,7 +558,7 @@ Oracle Database uses decimal numbers and these cannot be converted seamlessly to binary number representations like Python floats. In addition, the range of Oracle numbers exceeds that of floating point numbers. Python has decimal objects which do not have these limitations. In python-oracledb you can set -``oracledb.defaults.fetch_decimals`` so that Decimals are returned to the +:attr:`defaults.fetch_decimals` so that Decimals are returned to the application, ensuring that numeric precision is not lost when fetching certain numbers. @@ -579,7 +590,7 @@ This displays ``7.1 * 3 = 21.3`` See `samples/return_numbers_as_decimals.py `__ -An equivalent, longer, older coding idiom to :attr:`Defaults.fetch_decimals` is +An equivalent, longer, older coding idiom to :attr:`defaults.fetch_decimals` is to use an :ref:`output type handler ` do the conversion. .. code-block:: python @@ -723,6 +734,326 @@ Performance-sensitive applications should consider using scalar types instead of objects. If you do use objects, avoid calling :meth:`Connection.gettype()` unnecessarily, and avoid objects with large numbers of attributes. +.. _dataframeformat: + +Fetching using the DataFrame Interchange Protocol +------------------------------------------------- + +Python-oracledb can fetch directly to the `Python DataFrame Interchange +Protocol `__ +format. This then allows zero-copy data interchanges between Python data frame +libraries. It is an efficient way to work with data using Python libraries such +as `Apache Arrow `__, `Pandas +`__, `Polars `__, `NumPy +`__, `PyTorch `__, or to write files +in `Apache Parquet `__ format. + +.. note:: + + The data frame support in python-oracledb 3.0.0 is a pre-release and may + change in the next version. + +The method :meth:`Connection.fetch_df_all()` fetches all rows from a query. +The method :meth:`Connection.fetch_df_batches()` implements an iterator for +fetching batches of rows. The methods return :ref:`OracleDataFrame +` objects, whose :ref:`methods ` +implement the Python DataFrame Interchange Protocol `DataFrame API Interface +`__. + +For example, to fetch all rows from a query and print some information about +the results: + +.. code-block:: python + + sql = "select * from departments" + # Adjust arraysize to tune the query fetch performance + odf = connection.fetch_df_all(statement=sql, arraysize=100) + + print(odf.odf.column_names()) + print(f"{odf.num_columns()} columns") + print(f"{odf.num_rows()} rows") + +With Oracle Database's standard DEPARTMENTS table, this would display:: + + ['DEPARTMENT_ID', 'DEPARTMENT_NAME', 'MANAGER_ID', 'LOCATION_ID'] + 4 columns + 27 rows + +To do more extensive operations on an :ref:`OracleDataFrame +`, it can be converted to an appropriate library class, and +then methods of that library can be used. For example it could be converted to +a `Pandas DataFrame `__, or to a `PyArrow table +`__ as shown +later. + +**Data Frame Type Mapping** + +Internally, python-oracledb's :ref:`OracleDataFrame ` +support makes use of `Apache nanoarrow `__ +libraries to build data frames. + +The following data type mapping occurs from Oracle Database types to the Arrow +types used in OracleDataFrame objects. Querying any other types from Oracle +Database will result in an exception. + +.. list-table-with-summary:: + :header-rows: 1 + :class: wy-table-responsive + :widths: 1 1 + :align: left + :summary: The first column is the Oracle Database type. The second column is the Arrow data type used in the OracleDataFrame object. + + * - Oracle Database Type + - Arrow Data Type + * - DB_TYPE_NUMBER + - DECIMAL128, INT64, or DOUBLE. See notes below + * - DB_TYPE_CHAR + - STRING + * - DB_TYPE_VARCHAR + - STRING + * - DB_TYPE_BINARY_FLOAT + - FLOAT + * - DB_TYPE_BINARY_DOUBLE + - DOUBLE + * - DB_TYPE_BOOLEAN + - BOOLEAN + * - DB_TYPE_DATE + - TIMESTAMP + * - DB_TYPE_TIMESTAMP + - TIMESTAMP + * - DB_TYPE_TIMESTAMP_LTZ + - TIMESTAMP + * - DB_TYPE_TIMESTAMP_TZ + - TIMESTAMP + + +When converting Oracle Database NUMBERs, if :attr:`defaults.fetch_decimals` is +*True*, the Arrow data type is DECIMAL128. Note Arrow's DECIMAL128 format only +supports precision of up to 38 decimal digits. Else, if the Oracle number data +type has scale of 0, and precision less than or equal to 18, then the Arrow +data type is INT64. In all other cases, the Arrow data type is DOUBLE. + +The Arrow TIMESTAMP for Oracle Database DATEs will have a time unit of +"seconds". For Oracle Database TIMESTAMP types, the time unit depends on the +Oracle type's fractional precision: + +.. list-table-with-summary:: + :header-rows: 1 + :class: wy-table-responsive + :widths: 1 1 + :align: left + :summary: The first column is the Oracle Database TIMESTAMP-type fractional second precision. The second column is the resulting Arrow TIMESTAMP time unit. + + * - Oracle Database TIMESTAMP fractional second precision range + - Arrow TIMESTAMP time unit + * - 0 + - seconds + * - 1 - 3 + - milliseconds + * - 4 - 6 + - microconds + * - 7 - 9 + - nanoseconds + +Arrow TIMESTAMPs will not have timezone data. + +**Inserting OracleDataFrames into Oracle Database** + +To insert data currently in :ref:`OracleDataFrame ` format +into Oracle Database requires it to be converted. For example, you could +convert it into a Pandas DataFrame for insert with the Pandas method +``to_sql()``. Or convert into a Python list via the PyArrow +``Table.to_pylist()`` method and then use standard python-oracledb +functionality to execute a SQL INSERT statement. + +Creating PyArrow Tables ++++++++++++++++++++++++ + +An example that creates and uses a `PyArrow Table +`__ is: + +.. code-block:: python + + # Get an OracleDataFrame + # Adjust arraysize to tune the query fetch performance + sql = "select id, name from SampleQueryTab order by id" + odf = connection.fetch_df_all(statement=sql, arraysize=100) + + # Create a PyArrow table + pyarrow_table = pyarrow.Table.from_arrays( + arrays=odf.column_arrays(), names=odf.column_names() + ) + + print("\nNumber of rows and columns:") + (r, c) = pyarrow_table.shape + print(f"{r} rows, {c} columns") + +This makes use of :meth:`OracleDataFrame.column_arrays()` which returns a list +of :ref:`OracleArrowArray Objects `. + +See `samples/dataframe_pyarrow.py `__ for a runnable example. + +Creating Pandas DataFrames +++++++++++++++++++++++++++ + +An example that creates and uses a `Pandas DataFrame `__ is: + +.. code-block:: python + + import pandas + + # Get an OracleDataFrame + # Adjust arraysize to tune the query fetch performance + sql = "select * from mytable where id = :1" + myid = 12345 # the bind variable value + odf = connection.fetch_df_all(statement=sql, parameters=[myid], arraysize=1000) + + # Get a Pandas DataFrame from the data. + # This is a zero copy call + df = pandas.api.interchange.from_dataframe(odf) + + # Perform various Pandas operations on the DataFrame + print(df.T) # transform + print(df.tail(3)) # last three rows + +Using python-oracledb to fetch the interchange format will be more efficient +than using the Pandas ``read_sql()`` method. + +See `samples/dataframe_pandas.py `__ for a runnable example. + +Creating Polars Series +++++++++++++++++++++++ + +An example that creates and uses a `Polars Series +`__ is: + +.. code-block:: python + + import pyarrow + import polars + + # Get an OracleDataFrame + # Adjust arraysize to tune the query fetch performance + sql = "select id from SampleQueryTab order by id" + odf = connection.fetch_df_all(statement=sql, arraysize=100) + + # Convert to a Polars Series + pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) + p = polars.from_arrow(pyarrow_array) + + # Perform various Polars operations on the Series + print(p.sum()) + print(p.log10()) + +See `samples/dataframe_polars.py `__ for a runnable example. + +Writing Apache Parquet Files +++++++++++++++++++++++++++++ + +To write output in `Apache Parquet `__ file +format, you can use data frames as an efficient intermediary. Use the +:meth:`Connection.fetch_df_batches()` iterator and convert to a `PyArrow Table +`__ that can +be written by the PyArrow library. + +.. code-block:: python + + import pyarrow + import pyarrow.parquet as pq + + FILE_NAME = "sample.parquet" + + # Tune the fetch batch size for your query + BATCH_SIZE = 10000 + + sql = "select * from mytable" + pqwriter = None + for odf in connection.fetch_df_batches(statement=sql, size=BATCH_SIZE): + + # Get a PyArrow table from the query results + pyarrow_table = pyarrow.Table.from_arrays( + arrays=odf.column_arrays(), names=odf.column_names() + ) + + if not pqwriter: + pqwriter = pq.ParquetWriter(FILE_NAME, pyarrow_table.schema) + + pqwriter.write_table(pyarrow_table) + + pqwriter.close() + +See `samples/dataframe_parquet_write.py `__ +for a runnable example. + +The DLPack Protocol ++++++++++++++++++++ + +The DataFrame format facilitates working with query results as +tensors. Conversion can be done using the standard `DLPack Protocol +`__ implemented by PyArrow. + +**Using NumPy Arrays** + +For example, to convert to `NumPy `__ ``ndarray`` format: + +.. code-block:: python + + import pyarrow + import numpy + + SQL = "select id from SampleQueryTab order by id" + + # Get an OracleDataFrame + # Adjust arraysize to tune the query fetch performance + odf = connection.fetch_df_all(statement=SQL, arraysize=100) + + # Convert to an ndarray via the Python DLPack specification + pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) + np = numpy.from_dlpack(pyarrow_array) + + # Perform various numpy operations on the ndarray + + print(numpy.sum(np)) + print(numpy.log10(np)) + + +See `samples/dataframe_numpy.py `__ for a runnable example. + +**Using Torch** + +An example of working with data as a `Torch tensor +`__ is: + +.. code-block:: python + + import pyarrow + import torch + + SQL = "select id from SampleQueryTab order by id" + + # Get an OracleDataFrame + # Adjust arraysize to tune the query fetch performance + odf = connection.fetch_df_all(statement=SQL, arraysize=100) + + # Convert to a Torch tensor via the Python DLPack specification + pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) + tt = torch.from_dlpack(pyarrow_array) + + # Perform various Torch operations on the tensor + + print(torch.sum(tt)) + print(torch.log10(tt)) + +See `samples/dataframe_torch.py `__ for a runnable example. + .. _rowlimit: Limiting Rows diff --git a/doc/src/user_guide/tuning.rst b/doc/src/user_guide/tuning.rst index f0b64c89..9202c753 100644 --- a/doc/src/user_guide/tuning.rst +++ b/doc/src/user_guide/tuning.rst @@ -311,6 +311,17 @@ The ``arraysize`` value can also be set before calling the procedure: Also see `Avoiding Premature Prefetching`_. +Tuning Fetching for DataFrames +------------------------------ + +When fetching :ref:`data frames ` with +:meth:`Connection.fetch_df_all()` or :meth:`Connection.fetch_df_batches()`, +tuning of data transfer across the network is controlled by the methods +``arraysize`` or ``size`` parameters, respectively. + +Any :attr:`defaults.prefetchrows` value is ignored since these methods always +set the internal prefetch size to the relevant ``arraysize`` or ``size`` value. + Database Round-trips ==================== diff --git a/samples/dataframe_numpy.py b/samples/dataframe_numpy.py new file mode 100644 index 00000000..8bc7a476 --- /dev/null +++ b/samples/dataframe_numpy.py @@ -0,0 +1,71 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_numpy.py +# +# Shows how to use connection.fetch_df_all() to efficiently put data into a +# NumPy ndarray via the DLPack standard memory layout. +# ----------------------------------------------------------------------------- + +import pyarrow +import numpy + +import oracledb +import sample_env + +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) + +connection = oracledb.connect( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), +) + +SQL = "select id from SampleQueryTab order by id" + +# Get an OracleDataFrame +# Adjust arraysize to tune the query fetch performance +odf = connection.fetch_df_all(statement=SQL, arraysize=100) + +# Convert to an ndarray via the Python DLPack specification +pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) +np = numpy.from_dlpack(pyarrow_array) + +# If the array has nulls, an alternative is: +# np = pyarrow_array.to_numpy(zero_copy_only=False) + +print("Type:") +print(type(np)) # + +# Perform various numpy operations on the ndarray + +print("\nSum:") +print(numpy.sum(np)) + +print("\nLog10:") +print(numpy.log10(np)) diff --git a/samples/dataframe_pandas.py b/samples/dataframe_pandas.py new file mode 100644 index 00000000..f6165757 --- /dev/null +++ b/samples/dataframe_pandas.py @@ -0,0 +1,90 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_pandas.py +# +# Shows how to use connection.fetch_df_all() and connection.fetch_df_batches() +# to create Pandas dataframes. +# ----------------------------------------------------------------------------- + +import pandas +import oracledb +import sample_env + +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) + +connection = oracledb.connect( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), +) + +SQL = "select id, name from SampleQueryTab order by id" + +# Get an OracleDataFrame. +# Adjust arraysize to tune the query fetch performance +odf = connection.fetch_df_all(statement=SQL, arraysize=100) + +# Get a Pandas DataFrame from the data. +# This is a zero copy call +df = pandas.api.interchange.from_dataframe(odf) + +# Perform various Pandas operations on the DataFrame + +print("Columns:") +print(df.columns) + +print("\nDataframe description:") +print(df.describe()) + +print("\nLast three rows:") +print(df.tail(3)) + +print("\nTransform:") +print(df.T) + +# ----------------------------------------------------------------------------- + +# An example of batch fetching +# +# Note that since this particular example ends up with all query rows being +# held in memory, it would be more efficient to use fetch_df_all() as shown +# above. + +print("\nFetching in batches:") +df = pandas.DataFrame() + +# Tune 'size' for your data set. Here it is small to show the batch fetch +# behavior on the sample table. +for odf in connection.fetch_df_batches(statement=SQL, size=10): + df_b = pandas.api.interchange.from_dataframe(odf) + print(f"Appending {df_b.shape[0]} rows") + df = pandas.concat([df, df_b], ignore_index=True) + +print("\nLast three rows:") +print(df.tail(3)) diff --git a/samples/dataframe_parquet_write.py b/samples/dataframe_parquet_write.py new file mode 100644 index 00000000..02a7d93f --- /dev/null +++ b/samples/dataframe_parquet_write.py @@ -0,0 +1,87 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_parquet_write.py +# +# Shows how to use connection.fetch_df_batches() to write files in Parquet +# format. +# ----------------------------------------------------------------------------- + +import os + +import pyarrow +import pyarrow.parquet as pq + +import oracledb +import sample_env + +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) + +connection = oracledb.connect( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), +) + +PARQUET_FILE_NAME = "sample.parquet" + +if os.path.isfile(PARQUET_FILE_NAME): + os.remove(PARQUET_FILE_NAME) + +# Tune this for your query +FETCH_BATCH_SIZE = 10 + +SQL = "select id, name from SampleQueryTab order by id" +pqwriter = None + +for odf in connection.fetch_df_batches(statement=SQL, size=FETCH_BATCH_SIZE): + + pyarrow_table = pyarrow.Table.from_arrays( + arrays=odf.column_arrays(), names=odf.column_names() + ) + + if not pqwriter: + pqwriter = pq.ParquetWriter(PARQUET_FILE_NAME, pyarrow_table.schema) + + print(f"Writing a batch of {odf.num_rows()} rows") + pqwriter.write_table(pyarrow_table) + +pqwriter.close() + +# ----------------------------------------------------------------------------- +# Check the file was created + +print("\nParquet file metadata:") +print(pq.read_metadata(PARQUET_FILE_NAME)) + +# ----------------------------------------------------------------------------- +# Read the file + +print("\nParquet file data:") +t = pq.read_table(PARQUET_FILE_NAME, columns=["ID", "NAME"]) +print(t) diff --git a/samples/dataframe_polars.py b/samples/dataframe_polars.py new file mode 100644 index 00000000..aaa2859d --- /dev/null +++ b/samples/dataframe_polars.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_polars.py +# +# Shows how to use connection.fetch_df_all() to efficiently put data into a +# Polars Series +# ----------------------------------------------------------------------------- + +import pyarrow +import polars + +import oracledb +import sample_env + +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) + +connection = oracledb.connect( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), +) + +SQL = "select id from SampleQueryTab order by id" + +# Get an OracleDataFrame +# Adjust arraysize to tune the query fetch performance +odf = connection.fetch_df_all(statement=SQL, arraysize=100) + +# Convert to a Polars Series +pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) +p = polars.from_arrow(pyarrow_array) + +print(type(p)) # + +# Perform various Polars operations on the Series + +print("\nSum:") +print(p.sum()) + +print("\nLog10:") +print(p.log10()) diff --git a/samples/dataframe_pyarrow.py b/samples/dataframe_pyarrow.py new file mode 100644 index 00000000..ec69d8d6 --- /dev/null +++ b/samples/dataframe_pyarrow.py @@ -0,0 +1,95 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_pyarrow.py +# +# Shows how to use connection.fetch_df_all() to create PyArrow tables and +# arrays. +# ----------------------------------------------------------------------------- + +import pyarrow + +import oracledb +import sample_env + +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) + +connection = oracledb.connect( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), +) + +# ----------------------------------------------------------------------------- +# Creating a PyArrow table + +SQL1 = "select id, name from SampleQueryTab order by id" + +# Get an OracleDataFrame +# Adjust arraysize to tune the query fetch performance +odf = connection.fetch_df_all(statement=SQL1, arraysize=100) + +# Create a PyArrow table +pyarrow_table = pyarrow.Table.from_arrays( + arrays=odf.column_arrays(), names=odf.column_names() +) + +print("Type:") +print(type(pyarrow_table)) # + +# Perform various PyArrow operations + +print("\nColumn names:") +print(pyarrow_table.column_names) + +print("\nNumber of rows and columns:") +(r, c) = pyarrow_table.shape +print(f"{r} rows, {c} columns") + +# ----------------------------------------------------------------------------- +# Creating a PyArrow array + +SQL2 = "select id from SampleQueryTab order by id" + +# Get an OracleDataFrame +# Adjust arraysize to tune the query fetch performance +odf = connection.fetch_df_all(statement=SQL2, arraysize=100) + +# Create a PyArrow array +pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) + +print("Type:") +print(type(pyarrow_array)) # + +# Perform various PyArrow operations + +print("\nSum:") +print(pyarrow_array.sum()) + +print("\nFirst three elements:") +print(pyarrow_array.slice(0, 3)) diff --git a/samples/dataframe_torch.py b/samples/dataframe_torch.py new file mode 100644 index 00000000..e45d1940 --- /dev/null +++ b/samples/dataframe_torch.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_torch.py +# +# Shows how to use connection.fetch_df_all() to efficiently put data into a +# Torch tensor via the DLPack standard memory layout. +# ----------------------------------------------------------------------------- + +import pyarrow +import torch + +import oracledb +import sample_env + +# determine whether to use python-oracledb thin mode or thick mode +if not sample_env.get_is_thin(): + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) + +connection = oracledb.connect( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), +) + +SQL = "select id from SampleQueryTab order by id" + +# Get an OracleDataFrame +# Adjust arraysize to tune the query fetch performance +odf = connection.fetch_df_all(statement=SQL, arraysize=100) + +# Convert to a Torch tensor via the Python DLPack specification +pyarrow_array = pyarrow.array(odf.get_column_by_name("ID")) +tt = torch.from_dlpack(pyarrow_array) + +print(type(tt)) # + +# Perform various Torch operations on the tensor + +print("\nSum:") +print(torch.sum(tt)) + +print("\nLog10:") +print(torch.log10(tt)) diff --git a/samples/sql/create_schema.sql b/samples/sql/create_schema.sql index 7a7c816f..d160e984 100644 --- a/samples/sql/create_schema.sql +++ b/samples/sql/create_schema.sql @@ -391,6 +391,44 @@ insert into &main_user..SampleQueryTab values (6, 'Frankie') / insert into &main_user..SampleQueryTab values (7, 'Gerri') / +insert into &main_user..SampleQueryTab values (8, 'Harriet') +/ +insert into &main_user..SampleQueryTab values (9, 'Isabelle') +/ +insert into &main_user..SampleQueryTab values (10, 'Jarek') +/ +insert into &main_user..SampleQueryTab values (11, 'Krishna') +/ +insert into &main_user..SampleQueryTab values (12, 'Leo') +/ +insert into &main_user..SampleQueryTab values (13, 'Mia') +/ +insert into &main_user..SampleQueryTab values (14, 'Nathalie') +/ +insert into &main_user..SampleQueryTab values (15, 'Oscar') +/ +insert into &main_user..SampleQueryTab values (16, 'Pia') +/ +insert into &main_user..SampleQueryTab values (17, 'Quentin') +/ +insert into &main_user..SampleQueryTab values (18, 'Roger') +/ +insert into &main_user..SampleQueryTab values (19, 'Sally') +/ +insert into &main_user..SampleQueryTab values (20, 'Tully') +/ +insert into &main_user..SampleQueryTab values (21, 'Una') +/ +insert into &main_user..SampleQueryTab values (22, 'Valerie') +/ +insert into &main_user..SampleQueryTab values (23, 'William') +/ +insert into &main_user..SampleQueryTab values (24, 'Xavier') +/ +insert into &main_user..SampleQueryTab values (25, 'Yasmin') +/ +insert into &main_user..SampleQueryTab values (26, 'Zach') +/ commit / diff --git a/setup.cfg b/setup.cfg index 34e0650f..b79870ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,6 +47,7 @@ test_suite = tests packages = oracledb oracledb.plugins + oracledb.interchange package_dir = =src diff --git a/setup.py b/setup.py index b29bb2cc..9729f381 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2023, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -31,6 +31,14 @@ # base source directory source_dir = os.path.join("src", "oracledb") +# determine the nanoarrow bridge dependent source files (included) +base_dir = os.path.join(source_dir, "interchange") +nanoarrow_bridge_depends = [ + os.path.join(base_dir, "nanoarrow", "nanoarrow.c"), + os.path.join(base_dir, "nanoarrow", "nanoarrow.h"), +] +nanoarrow_bridge_pxd = os.path.join(base_dir, "nanoarrow_bridge.pxd") + # determine the base implementation dependent source files (included) impl_dir = os.path.join(source_dir, "impl", "base") base_depends = [ @@ -39,7 +47,7 @@ if n.endswith(".pyx") ] base_pxd = os.path.join(source_dir, "base_impl.pxd") -base_depends.append(base_pxd) +base_depends.extend([base_pxd, nanoarrow_bridge_pxd]) # determine the thick mode dependent source files (included) impl_dir = os.path.join(source_dir, "impl", "thick") @@ -91,21 +99,33 @@ Extension( "oracledb.base_impl", sources=["src/oracledb/base_impl.pyx"], + include_dirs=["src/oracledb/interchange/nanoarrow"], depends=base_depends, extra_compile_args=extra_compile_args, ), Extension( "oracledb.thin_impl", sources=["src/oracledb/thin_impl.pyx"], + include_dirs=["src/oracledb/interchange/nanoarrow"], depends=thin_depends, extra_compile_args=extra_compile_args, ), Extension( "oracledb.thick_impl", sources=["src/oracledb/thick_impl.pyx"], - include_dirs=["src/oracledb/impl/thick/odpi/include"], + include_dirs=[ + "src/oracledb/impl/thick/odpi/include", + "src/oracledb/interchange/nanoarrow", + ], depends=thick_depends, extra_compile_args=extra_compile_args, ), + Extension( + "oracledb.interchange.nanoarrow_bridge", + sources=["src/oracledb/interchange/nanoarrow_bridge.pyx"], + include_dirs=["src/oracledb/interchange/nanoarrow"], + depends=nanoarrow_bridge_depends, + extra_compile_args=extra_compile_args, + ), ] ) diff --git a/src/oracledb/__init__.py b/src/oracledb/__init__.py index 45cca983..1302e220 100644 --- a/src/oracledb/__init__.py +++ b/src/oracledb/__init__.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -314,6 +314,10 @@ SparseVector as SparseVector, ) +from .interchange.dataframe import ( + OracleDataFrame as OracleDataFrame, +) + from . import config_providers IntervalYM = collections.namedtuple("IntervalYM", ["years", "months"]) diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 1f89c082..42b5de16 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -33,10 +33,17 @@ from libc.stdint cimport int8_t, int16_t, int32_t, int64_t from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from libc.stdlib cimport abs from cpython cimport array ctypedef unsigned char char_type +from .interchange.nanoarrow_bridge cimport ( + ArrowTimeUnit, + ArrowType, + OracleArrowArray, +) + cdef enum: PY_TYPE_NUM_ARRAY = 13 PY_TYPE_NUM_BOOL = 4 @@ -438,9 +445,11 @@ cdef class OracleMetadata: readonly uint32_t vector_dimensions readonly uint8_t vector_format readonly uint8_t vector_flags + ArrowType _arrow_type uint8_t _py_type_num cdef int _finalize_init(self) except -1 + cdef int _set_arrow_type(self) except -1 cdef OracleMetadata copy(self) @staticmethod cdef OracleMetadata from_type(object typ) @@ -654,6 +663,7 @@ cdef class BaseCursorImpl: public type bind_style public dict bind_vars_by_name public object warning + public bint fetching_arrow uint32_t _buffer_rowcount uint32_t _buffer_index uint32_t _fetch_array_size @@ -691,6 +701,9 @@ cdef class BaseCursorImpl: cdef int _verify_var(self, object var) except -1 cdef int bind_many(self, object cursor, list parameters) except -1 cdef int bind_one(self, object cursor, object parameters) except -1 + cdef object _finish_building_arrow_arrays(self) + cdef int _create_arrow_arrays(self) except -1 + cdef class BaseVarImpl: @@ -709,6 +722,7 @@ cdef class BaseVarImpl: BaseConnImpl _conn_impl OracleMetadata _fetch_metadata list _values + OracleArrowArray _arrow_array bint _is_value_set cdef int _bind(self, object conn, BaseCursorImpl cursor, @@ -718,6 +732,7 @@ cdef class BaseVarImpl: cdef int _check_and_set_value(self, uint32_t pos, object value, bint* was_set) except -1 cdef DbType _check_fetch_conversion(self) + cdef int _create_arrow_array(self) except -1 cdef int _finalize_init(self) except -1 cdef DbType _get_adjusted_type(self, uint8_t ora_type_num) cdef list _get_array_value(self) @@ -951,6 +966,10 @@ cdef struct OracleData: OracleDataBuffer buffer +cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata, + OracleMetadata to_metadatda, + OracleData* data, + OracleArrowArray arrow_array) except -1 cdef object convert_oracle_data_to_python(OracleMetadata from_metadata, OracleMetadata to_metadatda, OracleData* data, diff --git a/src/oracledb/base_impl.pyx b/src/oracledb/base_impl.pyx index 69a872ee..f071fef1 100644 --- a/src/oracledb/base_impl.pyx +++ b/src/oracledb/base_impl.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -38,9 +38,24 @@ cimport cpython.datetime as cydatetime from libc.stdint cimport int8_t, int16_t, int32_t, int64_t from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t from libc.stdint cimport UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX +from libc.stdlib cimport atoi, atof from libc.string cimport memcpy from cpython cimport array +from .interchange.nanoarrow_bridge cimport ( + NANOARROW_TIME_UNIT_SECOND, + NANOARROW_TIME_UNIT_MILLI, + NANOARROW_TIME_UNIT_MICRO, + NANOARROW_TIME_UNIT_NANO, + NANOARROW_TYPE_BOOL, + NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DOUBLE, + NANOARROW_TYPE_FLOAT, + NANOARROW_TYPE_INT64, + NANOARROW_TYPE_STRING, + NANOARROW_TYPE_TIMESTAMP, +) + import array import base64 @@ -65,6 +80,7 @@ cdef type PY_TYPE_ASYNC_CURSOR cdef type PY_TYPE_ASYNC_LOB cdef type PY_TYPE_BOOL = bool cdef type PY_TYPE_CURSOR +cdef object PY_TYPE_DATAFRAME cdef type PY_TYPE_DATE = datetime.date cdef type PY_TYPE_DATETIME = datetime.datetime cdef type PY_TYPE_DECIMAL = decimal.Decimal diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index bc179dc4..e3aac76e 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -713,6 +713,43 @@ def encode_oson(self, value): self._verify_connected() return self._impl.encode_oson(value) + def fetch_df_all( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + arraysize: Optional[int] = None, + ): + """ + Fetch all data as OracleDataFrame. + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if arraysize is not None: + cursor.arraysize = arraysize + cursor.prefetchrows = cursor.arraysize + cursor.execute(statement, parameters) + return cursor._impl.fetch_df_all(cursor) + + def fetch_df_batches( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + size: Optional[int] = None, + ): + """ + Fetch data in batches. Each batch is an OracleDataFrame + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if size is not None: + cursor.arraysize = size + cursor.prefetchrows = cursor.arraysize + cursor.execute(statement, parameters) + if size is None: + yield cursor._impl.fetch_df_all(cursor) + else: + yield from cursor._impl.fetch_df_batches(cursor, batch_size=size) + def getSodaDatabase(self) -> SodaDatabase: """ Return a SODA database object for performing all operations on Simple diff --git a/src/oracledb/errors.py b/src/oracledb/errors.py index 9de4c810..ef0b2000 100644 --- a/src/oracledb/errors.py +++ b/src/oracledb/errors.py @@ -279,6 +279,7 @@ def _raise_not_supported(feature: str) -> None: ERR_PASSWORD_TYPE_HANDLER_FAILED = 2057 ERR_PLAINTEXT_PASSWORD_IN_CONFIG = 2058 ERR_MISSING_CONNECT_DESCRIPTOR = 2059 +ERR_ARROW_C_API_ERROR = 2060 # error numbers that result in NotSupportedError ERR_TIME_NOT_SUPPORTED = 3000 @@ -310,6 +311,7 @@ def _raise_not_supported(feature: str) -> None: ERR_CURSOR_DIFF_CONNECTION = 3027 ERR_UNSUPPORTED_PIPELINE_OPERATION = 3028 ERR_INVALID_NETWORK_NAME = 3029 +ERR_ARROW_UNSUPPORTED_DATA_TYPE = 3030 # error numbers that result in DatabaseError ERR_TNS_ENTRY_NOT_FOUND = 4000 @@ -853,4 +855,11 @@ def _raise_not_supported(feature: str) -> None: ERR_INVALID_NETWORK_NAME: ( '"{name}" includes characters that are not allowed' ), + ERR_ARROW_UNSUPPORTED_DATA_TYPE: ( + "conversion from Oracle Database type {db_type_name} to Apache " + "Arrow format is not supported" + ), + ERR_ARROW_C_API_ERROR: ( + "Arrow C Data Interface operation failed with error code {code}" + ), } diff --git a/src/oracledb/impl/base/converters.pyx b/src/oracledb/impl/base/converters.pyx index 48c4b6be..9e9416f8 100644 --- a/src/oracledb/impl/base/converters.pyx +++ b/src/oracledb/impl/base/converters.pyx @@ -68,6 +68,94 @@ cdef object convert_interval_ym_to_python(OracleDataBuffer *buffer): return PY_TYPE_INTERVAL_YM(value.years, value.months) +cdef int convert_number_to_arrow_decimal(OracleArrowArray arrow_array, + OracleDataBuffer *buffer) except -1: + """ + Converts a NUMBER value stored in the buffer to Arrow DECIMAL128. + """ + cdef: + char_type c + bint has_sign = 0 + char_type digits[39] # 38 digits + sign + OracleNumber *value = &buffer.as_number + uint8_t num_chars = 0, decimal_point_index = 0, allowed_max_chars = 0 + int64_t actual_scale = 0 + + if value.chars[0] == 45: # minus sign + has_sign = True + + if value.is_integer: + if has_sign: + allowed_max_chars = 39 + else: + allowed_max_chars = 38 + else: # decimal point + if has_sign: + allowed_max_chars = 40 + else: + allowed_max_chars = 39 + + # Arrow Decimal128 can only represent values with 38 decimal digits + if value.is_max_negative_value or value.num_chars > allowed_max_chars: + raise ValueError("Value cannot be represented as " + "Arrow Decimal128") + if value.is_integer: + arrow_array.append_decimal(value.chars, value.num_chars) + else: + for i in range(value.num_chars): + c = value.chars[i] + # count all characters except the decimal point + if c != 46: + digits[num_chars] = c + num_chars += 1 + else: + decimal_point_index = i + + # Append any trailing zeros. + actual_scale = num_chars - decimal_point_index + for i in range(abs(arrow_array.scale) - actual_scale): + digits[num_chars] = b'0' + num_chars += 1 + arrow_array.append_decimal(digits, num_chars) + + + +cdef int convert_number_to_arrow_double(OracleArrowArray arrow_array, + OracleDataBuffer *buffer) except -1: + """ + Converts a NUMBER value stored in the buffer to Arrow DOUBLE. + """ + cdef OracleNumber *value = &buffer.as_number + if value.is_max_negative_value: + arrow_array.append_double(-1.0e126) + else: + arrow_array.append_double(atof(value.chars[:value.num_chars])) + + +cdef int convert_number_to_arrow_int64(OracleArrowArray arrow_array, + OracleDataBuffer *buffer) except -1: + """ + Converts a NUMBER value stored in the buffer to Arrow INT64. + """ + cdef OracleNumber *value = &buffer.as_number + arrow_array.append_int64(atoi(value.chars[:value.num_chars])) + + +cdef int convert_number_to_arrow_string(OracleArrowArray arrow_array, + OracleDataBuffer *buffer) except -1: + """ + Converts a NUMBER value stored in the buffer to Arrow string. + """ + cdef: + OracleNumber *value = &buffer.as_number + char* ptr + if value.is_max_negative_value: + ptr = "-1e126" + arrow_array.append_bytes(ptr, 6) + else: + arrow_array.append_bytes(value.chars, value.num_chars) + + cdef object convert_number_to_python_decimal(OracleDataBuffer *buffer): """ Converts a NUMBER value stored in the buffer to Python decimal.Decimal(). @@ -131,6 +219,45 @@ cdef object convert_str_to_python(OracleDataBuffer *buffer, uint8_t csfrm, return rb.ptr[:rb.num_bytes].decode(ENCODING_UTF16, encoding_errors) +cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata, + OracleMetadata to_metadata, + OracleData* data, + OracleArrowArray arrow_array) except -1: + """ + Converts the value stored in OracleData to Arrow format. + """ + cdef: + ArrowType arrow_type + uint32_t db_type_num + OracleRawBytes* rb + int64_t ts + + # NULL values + if data.is_null: + return arrow_array.append_null() + + arrow_type = to_metadata._arrow_type + db_type_num = from_metadata.dbtype.num + if arrow_type == NANOARROW_TYPE_INT64: + convert_number_to_arrow_int64(arrow_array, &data.buffer) + elif arrow_type == NANOARROW_TYPE_DOUBLE: + if db_type_num == DB_TYPE_NUM_NUMBER: + convert_number_to_arrow_double(arrow_array, &data.buffer) + else: + arrow_array.append_double(data.buffer.as_double) + elif arrow_type == NANOARROW_TYPE_FLOAT: + arrow_array.append_float(data.buffer.as_float) + elif arrow_type == NANOARROW_TYPE_STRING: + rb = &data.buffer.as_raw_bytes + arrow_array.append_bytes( rb.ptr, rb.num_bytes) + elif arrow_type == NANOARROW_TYPE_TIMESTAMP: + ts = int(convert_date_to_python(&data.buffer).timestamp() * + arrow_array.factor) + arrow_array.append_int64(ts) + elif arrow_type == NANOARROW_TYPE_DECIMAL128: + convert_number_to_arrow_decimal(arrow_array, &data.buffer) + + cdef object convert_oracle_data_to_python(OracleMetadata from_metadata, OracleMetadata to_metadata, OracleData* data, diff --git a/src/oracledb/impl/base/cursor.pyx b/src/oracledb/impl/base/cursor.pyx index 11f5f8bc..a4ae7c78 100644 --- a/src/oracledb/impl/base/cursor.pyx +++ b/src/oracledb/impl/base/cursor.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -234,6 +234,8 @@ cdef class BaseCursorImpl: # finalize variable and store in arrays var_impl._finalize_init() + if self.fetching_arrow: + var_impl._create_arrow_array() self.fetch_var_impls[pos] = var_impl return var_impl @@ -358,6 +360,12 @@ cdef class BaseCursorImpl: self.bind_vars_by_name = None self.bind_style = None + cdef int _create_arrow_arrays(self) except -1: + cdef BaseVarImpl var_impl + for var_impl in self.fetch_var_impls: + if var_impl._arrow_array is None: + var_impl._create_arrow_array() + def _prepare_for_execute(self, object cursor, str statement, object parameters, object keyword_parameters): """ @@ -502,6 +510,19 @@ cdef class BaseCursorImpl: self._bind_values(cursor, type_handler, parameters, num_rows, row_num, defer_type_assignment) + cdef object _finish_building_arrow_arrays(self): + """ + Flush all buffers and return an Oracle Data frame. + """ + cdef: + BaseVarImpl var_impl + list columns = [] + for var_impl in self.fetch_var_impls: + var_impl._arrow_array.finish_building() + columns.append(var_impl._arrow_array) + var_impl._arrow_array = None + return PY_TYPE_DATAFRAME(columns) + def close(self, bint in_del=False): """ Closes the cursor and makes it unusable for further operations. @@ -551,6 +572,35 @@ cdef class BaseCursorImpl: if self._buffer_rowcount > 0: return self._create_row() + def fetch_df_all(self, cursor): + """ + Internal method used for fetching all data as OracleDataFrame + """ + while self._more_rows_to_fetch: + self._fetch_rows(cursor) + return self._finish_building_arrow_arrays() + + def fetch_df_batches(self, cursor, int batch_size): + """ + Internal method used for fetching next batch as OracleDataFrame + cursor.arraysize = batchsize + """ + cdef: + BaseConnImpl conn_impl = self._get_conn_impl() + bint returned = False + + # Return the prefetched batch (thin mode) + if conn_impl.thin: + returned = True + yield self._finish_building_arrow_arrays() + + while self._more_rows_to_fetch: + self._create_arrow_arrays() + self._fetch_rows(cursor) + if not returned or self._buffer_rowcount > 0: + returned = True + yield self._finish_building_arrow_arrays() + def get_array_dml_row_counts(self): errors._raise_not_supported("getting a list of array DML row counts") diff --git a/src/oracledb/impl/base/metadata.pyx b/src/oracledb/impl/base/metadata.pyx index 206f5ea8..e4bda9ca 100644 --- a/src/oracledb/impl/base/metadata.pyx +++ b/src/oracledb/impl/base/metadata.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2024, Oracle and/or its affiliates. +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -54,6 +54,40 @@ cdef class OracleMetadata: else: self._py_type_num = PY_TYPE_NUM_FLOAT + cdef int _set_arrow_type(self) except -1: + """ + Determine the arrow type to use for the data. + """ + cdef: + uint8_t py_type_num = self._py_type_num + uint32_t db_type_num = self.dbtype.num + if db_type_num == DB_TYPE_NUM_NUMBER: + if py_type_num == PY_TYPE_NUM_DECIMAL: + self._arrow_type = NANOARROW_TYPE_DECIMAL128 + elif py_type_num == PY_TYPE_NUM_STR: + self._arrow_type = NANOARROW_TYPE_STRING + elif py_type_num == PY_TYPE_NUM_INT and self.scale == 0 \ + and self.precision <= 18: + self._arrow_type = NANOARROW_TYPE_INT64 + else: + self._arrow_type = NANOARROW_TYPE_DOUBLE + elif db_type_num in (DB_TYPE_NUM_CHAR, DB_TYPE_NUM_VARCHAR): + self._arrow_type = NANOARROW_TYPE_STRING + elif db_type_num == DB_TYPE_NUM_BINARY_FLOAT: + self._arrow_type = NANOARROW_TYPE_FLOAT + elif db_type_num == DB_TYPE_NUM_BINARY_DOUBLE: + self._arrow_type = NANOARROW_TYPE_DOUBLE + elif db_type_num == DB_TYPE_NUM_BOOLEAN: + self._arrow_type = NANOARROW_TYPE_BOOL + elif db_type_num in (DB_TYPE_NUM_DATE, + DB_TYPE_NUM_TIMESTAMP, + DB_TYPE_NUM_TIMESTAMP_LTZ, + DB_TYPE_NUM_TIMESTAMP_TZ): + self._arrow_type = NANOARROW_TYPE_TIMESTAMP + else: + errors._raise_err(errors.ERR_ARROW_UNSUPPORTED_DATA_TYPE, + db_type_name=self.dbtype.name) + cdef OracleMetadata copy(self): """ Create a copy of the metadata and return it. diff --git a/src/oracledb/impl/base/utils.pyx b/src/oracledb/impl/base/utils.pyx index a66d55a0..04d19022 100644 --- a/src/oracledb/impl/base/utils.pyx +++ b/src/oracledb/impl/base/utils.pyx @@ -223,6 +223,7 @@ def init_base_impl(package): PY_TYPE_ASYNC_LOB, \ PY_TYPE_CONNECT_PARAMS, \ PY_TYPE_CURSOR, \ + PY_TYPE_DATAFRAME, \ PY_TYPE_DB_OBJECT, \ PY_TYPE_DB_OBJECT_TYPE, \ PY_TYPE_FETCHINFO, \ @@ -248,6 +249,7 @@ def init_base_impl(package): PY_TYPE_ASYNC_LOB = package.AsyncLOB PY_TYPE_CONNECT_PARAMS = package.ConnectParams PY_TYPE_CURSOR = package.Cursor + PY_TYPE_DATAFRAME = package.OracleDataFrame PY_TYPE_DB_OBJECT = package.DbObject PY_TYPE_DB_OBJECT_TYPE = package.DbObjectType PY_TYPE_FETCHINFO = package.FetchInfo diff --git a/src/oracledb/impl/base/var.pyx b/src/oracledb/impl/base/var.pyx index 9b96bf9f..0cf47f93 100644 --- a/src/oracledb/impl/base/var.pyx +++ b/src/oracledb/impl/base/var.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -247,6 +247,28 @@ cdef class BaseVarImpl: input_type=self._fetch_metadata.dbtype.name, output_type=self.metadata.dbtype.name) + cdef int _create_arrow_array(self) except -1: + """ + Creates an Arrow array based on the type information selected by the + user. + """ + cdef ArrowTimeUnit time_unit = NANOARROW_TIME_UNIT_SECOND + self.metadata._set_arrow_type() + if self.metadata._arrow_type == NANOARROW_TYPE_TIMESTAMP: + if self.metadata.scale > 0 and self.metadata.scale <= 3: + time_unit = NANOARROW_TIME_UNIT_MILLI + elif self.metadata.scale > 3 and self.metadata.scale <= 6: + time_unit = NANOARROW_TIME_UNIT_MICRO + elif self.metadata.scale > 6 and self.metadata.scale <= 9: + time_unit = NANOARROW_TIME_UNIT_NANO + self._arrow_array = OracleArrowArray( + arrow_type=self.metadata._arrow_type, + name=self.metadata.name, + precision=self.metadata.precision, + scale=self.metadata.scale, + time_unit=time_unit, + ) + cdef int _finalize_init(self) except -1: """ Internal method that finalizes initialization of the variable. diff --git a/src/oracledb/impl/thick/cursor.pyx b/src/oracledb/impl/thick/cursor.pyx index 4353ea7d..38a0e1a3 100644 --- a/src/oracledb/impl/thick/cursor.pyx +++ b/src/oracledb/impl/thick/cursor.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -151,6 +151,8 @@ cdef class ThickCursorImpl(BaseCursorImpl): self._buffer_index = 0 self._buffer_rowcount = num_rows_in_buffer self._more_rows_to_fetch = more_rows_to_fetch + if self.fetching_arrow: + self._populate_arrow_arrays() cdef BaseConnImpl _get_conn_impl(self): """ @@ -250,6 +252,17 @@ cdef class ThickCursorImpl(BaseCursorImpl): if status < 0: _raise_from_odpi() + cdef int _populate_arrow_arrays(self) except -1: + """ + Populate Arrow arrays with fetched data. + """ + cdef: + ThickVarImpl var_impl + uint32_t i + for var_impl in self.fetch_var_impls: + for i in range(self._buffer_rowcount): + var_impl._transform_element_to_arrow(i) + def _set_oci_attr(self, uint32_t attr_num, uint32_t attr_type, object value): """ diff --git a/src/oracledb/impl/thick/var.pyx b/src/oracledb/impl/thick/var.pyx index 186952a0..25584df0 100644 --- a/src/oracledb/impl/thick/var.pyx +++ b/src/oracledb/impl/thick/var.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -341,6 +341,84 @@ cdef class ThickVarImpl(BaseVarImpl): cpython.PyList_SET_ITEM(return_value, i, element_value) return return_value + cdef int _transform_element_to_arrow(self, uint32_t pos): + """ + Transforms a single element from the value supplied by ODPI-C to its + equivalent Arrow format. + """ + cdef: + dpiData *data = &self._data[pos] + uint32_t ora_type_num + OracleData ora_data + dpiBytes *as_bytes + ora_data.is_null = data.isNull + if not data.isNull: + ora_type_num = self._fetch_metadata.dbtype.num + if ora_type_num == DPI_ORACLE_TYPE_NATIVE_DOUBLE: + ora_data.buffer.as_double = data.value.asDouble + elif ora_type_num == DPI_ORACLE_TYPE_NATIVE_FLOAT: + ora_data.buffer.as_float = data.value.asFloat + elif ora_type_num == DPI_ORACLE_TYPE_BOOLEAN: + ora_data.buffer.as_bool = data.value.asBoolean + elif ora_type_num in ( + DPI_ORACLE_TYPE_CHAR, + DPI_ORACLE_TYPE_LONG_VARCHAR, + DPI_ORACLE_TYPE_LONG_RAW, + DPI_ORACLE_TYPE_RAW, + DPI_ORACLE_TYPE_VARCHAR, + ): + as_bytes = &data.value.asBytes; + ora_data.buffer.as_raw_bytes.ptr = \ + as_bytes.ptr; + ora_data.buffer.as_raw_bytes.num_bytes = as_bytes.length; + elif ora_type_num in ( + DPI_ORACLE_TYPE_DATE, + DPI_ORACLE_TYPE_TIMESTAMP, + DPI_ORACLE_TYPE_TIMESTAMP_LTZ, + DPI_ORACLE_TYPE_TIMESTAMP_TZ, + ): + ora_data.buffer.as_date.year = data.value.asTimestamp.year; + ora_data.buffer.as_date.month = data.value.asTimestamp.month; + ora_data.buffer.as_date.day = data.value.asTimestamp.day; + ora_data.buffer.as_date.hour = data.value.asTimestamp.hour; + ora_data.buffer.as_date.minute = data.value.asTimestamp.minute; + ora_data.buffer.as_date.second = data.value.asTimestamp.second; + ora_data.buffer.as_date.fsecond = \ + data.value.asTimestamp.fsecond // 1000; + ora_data.buffer.as_date.tz_hour_offset = \ + data.value.asTimestamp.tzHourOffset; + ora_data.buffer.as_date.tz_minute_offset = \ + data.value.asTimestamp.tzMinuteOffset; + elif ora_type_num == DPI_ORACLE_TYPE_INTERVAL_DS: + ora_data.buffer.as_interval_ds.days = \ + data.value.asIntervalDS.days; + ora_data.buffer.as_interval_ds.hours = \ + data.value.asIntervalDS.hours; + ora_data.buffer.as_interval_ds.minutes = \ + data.value.asIntervalDS.minutes; + ora_data.buffer.as_interval_ds.seconds = \ + data.value.asIntervalDS.seconds; + ora_data.buffer.as_interval_ds.fseconds = \ + data.value.asIntervalDS.fseconds; + elif ora_type_num == DPI_ORACLE_TYPE_INTERVAL_YM: + ora_data.buffer.as_interval_ym.years = \ + data.value.asIntervalYM.years; + ora_data.buffer.as_interval_ym.months = \ + data.value.asIntervalYM.months; + elif ora_type_num == DPI_ORACLE_TYPE_NUMBER: + as_bytes = &data.value.asBytes; + ora_data.buffer.as_number.is_max_negative_value = 0; + ora_data.buffer.as_number.is_integer = \ + memchr(as_bytes.ptr, b'.', as_bytes.length) == NULL; + memcpy(ora_data.buffer.as_number.chars, as_bytes.ptr, + as_bytes.length); + ora_data.buffer.as_number.num_chars = as_bytes.length; + else: + errors._raise_err(errors.ERR_DB_TYPE_NOT_SUPPORTED, + name=self._fetch_metadata.dbtype.name) + convert_oracle_data_to_arrow(self._fetch_metadata, self.metadata, + &ora_data, self._arrow_array) + cdef object _transform_element_to_python(self, uint32_t pos, dpiData *data): """ diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index 547f8b49..2475166f 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -754,6 +754,10 @@ cdef class MessageWithData(Message): var_impl._fetch_metadata) statement._last_output_type_handler = type_handler + # Create OracleArrowArray if fetching arrow is enabled + if cursor_impl.fetching_arrow: + cursor_impl._create_arrow_arrays() + # the list of output variables is equivalent to the fetch variables self.out_var_impls = cursor_impl.fetch_var_impls @@ -838,10 +842,15 @@ cdef class MessageWithData(Message): buf.read_oracle_data(metadata, &data, from_dbobject=False) if metadata.dbtype._csfrm == CS_FORM_NCHAR: buf._caps._check_ncharset_id() - column_value = convert_oracle_data_to_python( - metadata, var_impl.metadata, &data, var_impl._encoding_errors, - from_dbobject=False - ) + if self.cursor_impl.fetching_arrow: + convert_oracle_data_to_arrow( + metadata, var_impl.metadata, &data, var_impl._arrow_array + ) + else: + column_value = convert_oracle_data_to_python( + metadata, var_impl.metadata, &data, + var_impl._encoding_errors, from_dbobject=False + ) if not self.in_fetch: buf.read_sb4(&actual_num_bytes) if actual_num_bytes < 0 and ora_type_num == ORA_TYPE_NUM_BOOLEAN: @@ -2116,6 +2125,8 @@ cdef class ExecuteMessage(MessageWithData): self.cursor_impl._set_fetch_array_size(num_iters) if num_iters > 0 and not stmt._no_prefetch: options |= TNS_EXEC_OPTION_FETCH + if self.cursor_impl.fetching_arrow: + options |= TNS_EXEC_OPTION_NO_COMPRESSED_FETCH if not stmt._is_plsql and not self.parse_only: options |= TNS_EXEC_OPTION_NOT_PLSQL elif stmt._is_plsql and num_params > 0: @@ -2239,6 +2250,8 @@ cdef class ExecuteMessage(MessageWithData): and not info._is_return_bind] if self.function_code == TNS_FUNC_REEXECUTE_AND_FETCH: exec_flags_1 |= TNS_EXEC_OPTION_EXECUTE + if self.cursor_impl.fetching_arrow: + exec_flags_1 |= TNS_EXEC_OPTION_NO_COMPRESSED_FETCH num_iters = self.cursor_impl.prefetchrows self.cursor_impl._set_fetch_array_size(num_iters) else: diff --git a/src/oracledb/interchange/__init__.py b/src/oracledb/interchange/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/oracledb/interchange/buffer.py b/src/oracledb/interchange/buffer.py new file mode 100644 index 00000000..04461be0 --- /dev/null +++ b/src/oracledb/interchange/buffer.py @@ -0,0 +1,82 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# buffer.py +# +# Implements the Buffer class as documented in DataFrame API +# ----------------------------------------------------------------------------- + +from .protocol import ( + Buffer, + DlpackDeviceType, +) + + +class OracleColumnBuffer(Buffer): + """ + OracleColumnBuffer represents a contiguous memory buffer in the DataFrame + Interchange Protocol. It provides access to raw binary data that backs + various components of the data frame such as column values, validity masks + and offsets for variable-length data types. + """ + + def __init__(self, buffer_type, size_in_bytes, address) -> None: + self.buffer_type = buffer_type + self.size_in_bytes = size_in_bytes + self.address = address + + def __dlpack__(self): + """ + Represent this structure as a DLPack interface. + """ + raise NotImplementedError("__dlpack__") + + def __dlpack_device__(self) -> tuple[DlpackDeviceType, None]: + """ + Device type and device ID for where the data + in the buffer resides + """ + return (DlpackDeviceType.CPU, None) + + def __repr__(self) -> str: + device = self.__dlpack_device__()[0].name + return ( + f"OracleColumnBuffer(bufsize={self.bufsize}, " + f"ptr={self.ptr}, type={self.buffer_type}, device={device!r})" + ) + + @property + def bufsize(self) -> int: + """ + Returns the total size of buffer in bytes. + """ + return self.size_in_bytes + + @property + def ptr(self) -> int: + """ + Returns the memory address of the buffer. + """ + return self.address diff --git a/src/oracledb/interchange/column.py b/src/oracledb/interchange/column.py new file mode 100644 index 00000000..9bf24a59 --- /dev/null +++ b/src/oracledb/interchange/column.py @@ -0,0 +1,205 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# column.py +# +# Implements the Column class as documented in DataFrame API +# ----------------------------------------------------------------------------- + +from typing import Any, Iterable, Optional + +from .buffer import OracleColumnBuffer +from .protocol import ( + Column, + Dtype, + ColumnBuffers, + ColumnNullType, + DtypeKind, + Endianness, +) + +from .nanoarrow_bridge import ( + NANOARROW_TIME_UNIT_SECOND, + NANOARROW_TIME_UNIT_MILLI, + NANOARROW_TIME_UNIT_MICRO, + NANOARROW_TIME_UNIT_NANO, + NANOARROW_TYPE_DOUBLE, + NANOARROW_TYPE_FLOAT, + NANOARROW_TYPE_INT64, + NANOARROW_TYPE_STRING, + NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TYPE_DECIMAL128, +) + + +class OracleColumn(Column): + """ + OracleColumn represents a column in the DataFrame Interchange Protocol. It + provides a standardized way to expose a column's data, metadata and chunks, + allowing interoperability between data frame libraries. + """ + + def __init__(self, ora_arrow_array: object): + self.ora_arrow_array = ora_arrow_array + self._buffer_info = ora_arrow_array.get_buffer_info() + + def __arrow_c_array__(self, requested_schema=None): + return self.ora_arrow_array.__arrow_c_array__( + requested_schema=requested_schema + ) + + def _data_buffer(self): + buffer = self._buffer_info.get("data") + if buffer is None: + return None + size_bytes, address = buffer + data_buffer = OracleColumnBuffer( + size_in_bytes=size_bytes, address=address, buffer_type="data" + ) + return data_buffer, self.dtype + + def _offsets_buffer(self): + buffer = self._buffer_info.get("offsets") + if buffer is None: + return None + size_bytes, address = buffer + offsets_buffer = OracleColumnBuffer( + size_in_bytes=size_bytes, address=address, buffer_type="offsets" + ) + dtype = (DtypeKind.INT, 32, "i", Endianness.NATIVE) + return offsets_buffer, dtype + + def _validity_buffer(self): + buffer = self._buffer_info.get("validity") + if buffer is None: + return None + size_bytes, address = buffer + validity_buffer = OracleColumnBuffer( + size_in_bytes=size_bytes, address=address, buffer_type="validity" + ) + dtype = (DtypeKind.BOOL, 1, "b", Endianness.NATIVE) + return validity_buffer, dtype + + @property + def describe_null(self) -> tuple[ColumnNullType, Optional[int]]: + """ + Returns a description of the null representation used by the column. + """ + if self.null_count == 0: + return ColumnNullType.NON_NULLABLE, None + else: + return ColumnNullType.USE_BITMASK, 0 + + @property + def dtype(self) -> Dtype: + """ + Returns the data type of the column. The returned dtype provides + information on the storage format and the type of data in the column. + """ + if self.ora_arrow_array.arrow_type == NANOARROW_TYPE_INT64: + return (DtypeKind.INT, 64, "l", Endianness.NATIVE) + elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_DOUBLE: + return (DtypeKind.FLOAT, 64, "g", Endianness.NATIVE) + elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_FLOAT: + return (DtypeKind.FLOAT, 64, "g", Endianness.NATIVE) + elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_STRING: + return (DtypeKind.STRING, 8, "u", Endianness.NATIVE) + elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_TIMESTAMP: + if self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_MICRO: + return (DtypeKind.DATETIME, 64, "tsu:", Endianness.NATIVE) + elif self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_SECOND: + return (DtypeKind.DATETIME, 64, "tss:", Endianness.NATIVE) + elif self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_MILLI: + return (DtypeKind.DATETIME, 64, "tsm:", Endianness.NATIVE) + elif self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_NANO: + return (DtypeKind.DATETIME, 64, "tsn:", Endianness.NATIVE) + elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_DECIMAL128: + array = self.ora_arrow_array + return ( + DtypeKind.DECIMAL, + 128, + f"d:{array.precision}.{array.scale}", + Endianness.NATIVE, + ) + + def get_buffers(self) -> ColumnBuffers: + """ + Returns a dictionary specifying the memory buffers backing the column. + This currently consists of: + - "data": the main buffer storing column values + - "validity": a buffer containing null/missing values + - "offsets": a buffer for variable-length types like string + """ + return { + "data": self._data_buffer(), + "validity": self._validity_buffer(), + "offsets": self._offsets_buffer(), + } + + def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable[Column]: + """ + Return an iterator containing the column chunks. Currently this only + returns itself. + """ + yield self + + @property + def metadata(self) -> dict[str, Any]: + """ + Returns metadata about the column. + """ + return { + "name": self.ora_arrow_array.name, + "size": self.size(), + "num_chunks": self.num_chunks(), + } + + @property + def null_count(self) -> int: + """ + Returns the number of null elements. + """ + return self.ora_arrow_array.null_count + + def num_chunks(self) -> int: + """ + Returns the number of chunks used by the column. This method currently + always returns the value 1, implying that the column uses contiguous + memory. + """ + return 1 + + @property + def offset(self) -> int: + """ + Returns the offset of the first element. + """ + return self.ora_arrow_array.offset + + def size(self) -> int: + """ + Returns the number of elements in the column. + """ + return len(self.ora_arrow_array) diff --git a/src/oracledb/interchange/dataframe.py b/src/oracledb/interchange/dataframe.py new file mode 100644 index 00000000..f305ba8c --- /dev/null +++ b/src/oracledb/interchange/dataframe.py @@ -0,0 +1,151 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe.py +# +# Implement DataFrame class as documented in the standard +# https://data-apis.org/dataframe-protocol/latest/API.html +# ----------------------------------------------------------------------------- + +from typing import Any, Dict, Iterable, List, Optional + +from .column import OracleColumn + +from .protocol import DataFrame + + +class OracleDataFrame(DataFrame): + """ + OracleDataFrame is an implementation of the DataFrame Interchange Protocol. + It provides an interface for exchanging tabular data between different data + frame libraries (e.g. pandas, pyarrow, polars). + """ + + def __init__( + self, + oracle_arrow_arrays: List, + allow_copy: bool = True, + ): + self._cols = [] + self._cols_map = {} + self._rows = None + self._arrays = oracle_arrow_arrays + for ora_arrow_array in oracle_arrow_arrays: + column = OracleColumn(ora_arrow_array=ora_arrow_array) + self._rows = column.size() + self._cols.append(column) + self._cols_map[ora_arrow_array.name] = column + self.allow_copy = allow_copy + + def __dataframe__( + self, + nan_as_null: bool = False, # noqa: FBT001 + allow_copy: bool = True, # noqa: FBT001 + ) -> DataFrame: + """ + Returns a data frame adhering to the DataFrame Interchange protocol. + """ + return self + + def get_chunks( + self, n_chunks: Optional[int] = None + ) -> Iterable[DataFrame]: + """ + Returns an iterator for each of the chunks in the data frame. Since + there is currently only one chunk, this simply returns itself. + """ + yield self + + def column_arrays(self) -> List: + """ + Returns a list of the Arrow arrays corresponding to each column in the + data frame. + """ + return self._arrays + + def column_names(self) -> List[str]: + """ + Returns a list of the names of the columns in the data frame. + """ + return list(self._cols_map.keys()) + + def get_column(self, i: int) -> OracleColumn: + """ + Returns a column from the data frame given its zero-based index. If the + index is out of range, an IndexError exception is raised. + """ + if i < 0 or i >= self.num_columns(): + raise IndexError( + f"Column index {i} is out of bounds for " + f"DataFrame with {self.num_columns()} columns" + ) + return self._cols[i] + + def get_column_by_name(self, name: str) -> OracleColumn: + """ + Returns a column from the data frame given the name of the column. If + the column name is not found, a KeyError exception is raised. + """ + if name not in self._cols_map: + raise KeyError(f"Column {name} not found in DataFrame") + return self._cols_map[name] + + def get_columns(self) -> List[OracleColumn]: + """ + Returns a list of all of the columns in the data frame. + """ + return self._cols + + @property + def metadata(self) -> Dict[str, Any]: + """ + Returns metadata for the data frame. Currently this returns + information about the number of columns (num_columns), number of rows + (num_rows) and number of chunks (num_chunks). + """ + return { + "num_columns": self.num_columns(), + "num_rows": self.num_rows(), + "num_chunks": self.num_chunks(), + } + + def num_chunks(self) -> int: + """ + Returns the number of chunks (contiguous memory blocks) in the data + frame. Currently this always returns 1. + """ + return 1 + + def num_columns(self) -> int: + """ + Returns the number of columns in the data frame. + """ + return len(self._cols) + + def num_rows(self) -> int: + """ + Returns the number of rows in the data frame. + """ + return self._rows diff --git a/src/oracledb/interchange/nanoarrow/nanoarrow.c b/src/oracledb/interchange/nanoarrow/nanoarrow.c new file mode 100644 index 00000000..8f265988 --- /dev/null +++ b/src/oracledb/interchange/nanoarrow/nanoarrow.c @@ -0,0 +1,3872 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include + +#include "nanoarrow.h" + +const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; } + +int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; } + +ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { + if (error == NULL) { + return NANOARROW_OK; + } + + memset(error->message, 0, sizeof(error->message)); + + va_list args; + va_start(args, fmt); + int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args); + va_end(args); + + if (chars_needed < 0) { + return EINVAL; + } else if (((size_t)chars_needed) >= sizeof(error->message)) { + return ERANGE; + } else { + return NANOARROW_OK; + } +} + +void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { + layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY; + layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL; + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; + layout->buffer_data_type[1] = storage_type; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE; + layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED; + + layout->element_size_bits[0] = 1; + layout->element_size_bits[1] = 0; + layout->element_size_bits[2] = 0; + + layout->child_size_elements = 0; + + switch (storage_type) { + case NANOARROW_TYPE_UNINITIALIZED: + case NANOARROW_TYPE_NA: + case NANOARROW_TYPE_RUN_END_ENCODED: + layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE; + layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED; + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; + layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; + layout->element_size_bits[0] = 0; + break; + + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_MAP: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; + layout->element_size_bits[1] = 32; + break; + + case NANOARROW_TYPE_LARGE_LIST: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + layout->element_size_bits[1] = 64; + break; + + case NANOARROW_TYPE_STRUCT: + case NANOARROW_TYPE_FIXED_SIZE_LIST: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; + layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; + break; + + case NANOARROW_TYPE_BOOL: + layout->element_size_bits[1] = 1; + break; + + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT8: + layout->element_size_bits[1] = 8; + break; + + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_HALF_FLOAT: + layout->element_size_bits[1] = 16; + break; + + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_FLOAT: + layout->element_size_bits[1] = 32; + break; + case NANOARROW_TYPE_INTERVAL_MONTHS: + layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; + layout->element_size_bits[1] = 32; + break; + + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + layout->element_size_bits[1] = 64; + break; + + case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + layout->element_size_bits[1] = 128; + break; + + case NANOARROW_TYPE_DECIMAL256: + layout->element_size_bits[1] = 256; + break; + + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY; + break; + + case NANOARROW_TYPE_DENSE_UNION: + layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID; + layout->buffer_data_type[0] = NANOARROW_TYPE_INT8; + layout->element_size_bits[0] = 8; + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; + layout->element_size_bits[1] = 32; + break; + + case NANOARROW_TYPE_SPARSE_UNION: + layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID; + layout->buffer_data_type[0] = NANOARROW_TYPE_INT8; + layout->element_size_bits[0] = 8; + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; + layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; + break; + + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; + layout->element_size_bits[1] = 32; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; + layout->buffer_data_type[2] = storage_type; + break; + + case NANOARROW_TYPE_LARGE_STRING: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + layout->element_size_bits[1] = 64; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; + layout->buffer_data_type[2] = NANOARROW_TYPE_STRING; + break; + case NANOARROW_TYPE_LARGE_BINARY: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + layout->element_size_bits[1] = 64; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; + layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY; + break; + + case NANOARROW_TYPE_BINARY_VIEW: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; + layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY_VIEW; + layout->element_size_bits[1] = 128; + break; + case NANOARROW_TYPE_STRING_VIEW: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; + layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW; + layout->element_size_bits[1] = 128; + + default: + break; + } +} + +void* ArrowMalloc(int64_t size) { return malloc(size); } + +void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); } + +void ArrowFree(void* ptr) { free(ptr); } + +static uint8_t* ArrowBufferAllocatorMallocReallocate( + struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, + int64_t new_size) { + NANOARROW_UNUSED(allocator); + NANOARROW_UNUSED(old_size); + return (uint8_t*)ArrowRealloc(ptr, new_size); +} + +static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator, + uint8_t* ptr, int64_t size) { + NANOARROW_UNUSED(allocator); + NANOARROW_UNUSED(size); + if (ptr != NULL) { + ArrowFree(ptr); + } +} + +static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = { + &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL}; + +struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) { + return ArrowBufferAllocatorMalloc; +} + +static uint8_t* ArrowBufferDeallocatorReallocate(struct ArrowBufferAllocator* allocator, + uint8_t* ptr, int64_t old_size, + int64_t new_size) { + NANOARROW_UNUSED(new_size); + + // Attempting to reallocate a buffer with a custom deallocator is + // a programming error. In debug mode, crash here. +#if defined(NANOARROW_DEBUG) + NANOARROW_PRINT_AND_DIE(ENOMEM, + "It is an error to reallocate a buffer whose allocator is " + "ArrowBufferDeallocator()"); +#endif + + // In release mode, ensure the the deallocator is called exactly + // once using the pointer it was given and return NULL, which + // will trigger the caller to return ENOMEM. + allocator->free(allocator, ptr, old_size); + *allocator = ArrowBufferAllocatorDefault(); + return NULL; +} + +struct ArrowBufferAllocator ArrowBufferDeallocator( + void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t size), + void* private_data) { + struct ArrowBufferAllocator allocator; + allocator.reallocate = &ArrowBufferDeallocatorReallocate; + allocator.free = custom_free; + allocator.private_data = private_data; + return allocator; +} + +static const int kInt32DecimalDigits = 9; + +static const uint64_t kUInt32PowersOfTen[] = { + 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL, + 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL}; + +// Adapted from Arrow C++ to use 32-bit words for better C portability +// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L524-L544 +static void ShiftAndAdd(struct ArrowStringView value, uint32_t* out, int64_t out_size) { + // We use strtoll for parsing, which needs input that is null-terminated + char chunk_string[16]; + + for (int64_t posn = 0; posn < value.size_bytes;) { + int64_t remaining = value.size_bytes - posn; + + int64_t group_size; + if (remaining > kInt32DecimalDigits) { + group_size = kInt32DecimalDigits; + } else { + group_size = remaining; + } + + const uint64_t multiple = kUInt32PowersOfTen[group_size]; + + memcpy(chunk_string, value.data + posn, group_size); + chunk_string[group_size] = '\0'; + uint32_t chunk = (uint32_t)strtoll(chunk_string, NULL, 10); + + for (int64_t i = 0; i < out_size; i++) { + uint64_t tmp = out[i]; + tmp *= multiple; + tmp += chunk; + out[i] = (uint32_t)(tmp & 0xFFFFFFFFULL); + chunk = (uint32_t)(tmp >> 32); + } + posn += group_size; + } +} + +ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, + struct ArrowStringView value) { + // Check for sign + int is_negative = value.data[0] == '-'; + int has_sign = is_negative || value.data[0] == '+'; + value.data += has_sign; + value.size_bytes -= has_sign; + + // Check all characters are digits that are not the negative sign + for (int64_t i = 0; i < value.size_bytes; i++) { + char c = value.data[i]; + if (c < '0' || c > '9') { + return EINVAL; + } + } + + // Skip over leading 0s + int64_t n_leading_zeroes = 0; + for (int64_t i = 0; i < value.size_bytes; i++) { + if (value.data[i] == '0') { + n_leading_zeroes++; + } else { + break; + } + } + + value.data += n_leading_zeroes; + value.size_bytes -= n_leading_zeroes; + + // Use 32-bit words for portability + uint32_t words32[8]; + int n_words32 = decimal->n_words * 2; + NANOARROW_DCHECK(n_words32 <= 8); + memset(words32, 0, sizeof(words32)); + + ShiftAndAdd(value, words32, n_words32); + + if (decimal->low_word_index == 0) { + memcpy(decimal->words, words32, sizeof(uint32_t) * n_words32); + } else { + uint64_t lo; + uint64_t hi; + + for (int i = 0; i < decimal->n_words; i++) { + lo = (uint64_t)words32[i * 2]; + hi = (uint64_t)words32[i * 2 + 1] << 32; + decimal->words[decimal->n_words - i - 1] = lo | hi; + } + } + + if (is_negative) { + ArrowDecimalNegate(decimal); + } + + return NANOARROW_OK; +} + +// Adapted from Arrow C++ for C +// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365 +ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, + struct ArrowBuffer* buffer) { + NANOARROW_DCHECK(decimal->n_words == 2 || decimal->n_words == 4); + int is_negative = ArrowDecimalSign(decimal) < 0; + + uint64_t words_little_endian[4]; + if (decimal->low_word_index == 0) { + memcpy(words_little_endian, decimal->words, decimal->n_words * sizeof(uint64_t)); + } else { + for (int i = 0; i < decimal->n_words; i++) { + words_little_endian[i] = decimal->words[decimal->n_words - i - 1]; + } + } + + // We've already made a copy, so negate that if needed + if (is_negative) { + uint64_t carry = 1; + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = words_little_endian[i]; + elem = ~elem + carry; + carry &= (elem == 0); + words_little_endian[i] = elem; + } + } + + // Find the most significant word that is non-zero + int most_significant_elem_idx = -1; + for (int i = decimal->n_words - 1; i >= 0; i--) { + if (words_little_endian[i] != 0) { + most_significant_elem_idx = i; + break; + } + } + + // If they are all zero, the output is just '0' + if (most_significant_elem_idx == -1) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(buffer, '0')); + return NANOARROW_OK; + } + + // Define segments such that each segment represents 9 digits with the + // least significant group of 9 digits first. For example, if the input represents + // 9876543210123456789, then segments will be [123456789, 876543210, 9]. + // We handle at most a signed 256 bit integer, whose maximum value occupies 77 + // characters. Thus, we need at most 9 segments. + const uint32_t k1e9 = 1000000000U; + int num_segments = 0; + uint32_t segments[9]; + memset(segments, 0, sizeof(segments)); + uint64_t* most_significant_elem = words_little_endian + most_significant_elem_idx; + + do { + // Compute remainder = words_little_endian % 1e9 and words_little_endian = + // words_little_endian / 1e9. + uint32_t remainder = 0; + uint64_t* elem = most_significant_elem; + + do { + // Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer); + // *elem = dividend / 1e9; + // remainder = dividend % 1e9. + uint32_t hi = (uint32_t)(*elem >> 32); + uint32_t lo = (uint32_t)(*elem & 0xFFFFFFFFULL); + uint64_t dividend_hi = ((uint64_t)(remainder) << 32) | hi; + uint64_t quotient_hi = dividend_hi / k1e9; + remainder = (uint32_t)(dividend_hi % k1e9); + uint64_t dividend_lo = ((uint64_t)(remainder) << 32) | lo; + uint64_t quotient_lo = dividend_lo / k1e9; + remainder = (uint32_t)(dividend_lo % k1e9); + + *elem = (quotient_hi << 32) | quotient_lo; + } while (elem-- != words_little_endian); + + segments[num_segments++] = remainder; + } while (*most_significant_elem != 0 || most_significant_elem-- != words_little_endian); + + // We know our output has no more than 9 digits per segment, plus a negative sign, + // plus any further digits between our output of 9 digits plus enough + // extra characters to ensure that snprintf() with n = 21 (maximum length of %lu + // including a the null terminator) is bounded properly. + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, num_segments * 9 + 1 + 21 - 9)); + if (is_negative) { + buffer->data[buffer->size_bytes++] = '-'; + } + + // The most significant segment should have no leading zeroes + int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%lu", + (unsigned long)segments[num_segments - 1]); + + // Ensure that an encoding error from snprintf() does not result + // in an out-of-bounds access. + if (n_chars < 0) { + return ERANGE; + } + + buffer->size_bytes += n_chars; + + // Subsequent output needs to be left-padded with zeroes such that each segment + // takes up exactly 9 digits. + for (int i = num_segments - 2; i >= 0; i--) { + int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%09lu", + (unsigned long)segments[i]); + buffer->size_bytes += n_chars; + NANOARROW_DCHECK(buffer->size_bytes <= buffer->capacity_bytes); + } + + return NANOARROW_OK; +} +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include "nanoarrow.h" + +static void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { + if (schema->format != NULL) ArrowFree((void*)schema->format); + if (schema->name != NULL) ArrowFree((void*)schema->name); + if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); + + // This object owns the memory for all the children, but those + // children may have been generated elsewhere and might have + // their own release() callback. + if (schema->children != NULL) { + for (int64_t i = 0; i < schema->n_children; i++) { + if (schema->children[i] != NULL) { + if (schema->children[i]->release != NULL) { + ArrowSchemaRelease(schema->children[i]); + } + + ArrowFree(schema->children[i]); + } + } + + ArrowFree(schema->children); + } + + // This object owns the memory for the dictionary but it + // may have been generated somewhere else and have its own + // release() callback. + if (schema->dictionary != NULL) { + if (schema->dictionary->release != NULL) { + ArrowSchemaRelease(schema->dictionary); + } + + ArrowFree(schema->dictionary); + } + + // private data not currently used + if (schema->private_data != NULL) { + ArrowFree(schema->private_data); + } + + schema->release = NULL; +} + +static const char* ArrowSchemaFormatTemplate(enum ArrowType type) { + switch (type) { + case NANOARROW_TYPE_UNINITIALIZED: + return NULL; + case NANOARROW_TYPE_NA: + return "n"; + case NANOARROW_TYPE_BOOL: + return "b"; + + case NANOARROW_TYPE_UINT8: + return "C"; + case NANOARROW_TYPE_INT8: + return "c"; + case NANOARROW_TYPE_UINT16: + return "S"; + case NANOARROW_TYPE_INT16: + return "s"; + case NANOARROW_TYPE_UINT32: + return "I"; + case NANOARROW_TYPE_INT32: + return "i"; + case NANOARROW_TYPE_UINT64: + return "L"; + case NANOARROW_TYPE_INT64: + return "l"; + + case NANOARROW_TYPE_HALF_FLOAT: + return "e"; + case NANOARROW_TYPE_FLOAT: + return "f"; + case NANOARROW_TYPE_DOUBLE: + return "g"; + + case NANOARROW_TYPE_STRING: + return "u"; + case NANOARROW_TYPE_LARGE_STRING: + return "U"; + case NANOARROW_TYPE_STRING_VIEW: + return "vu"; + case NANOARROW_TYPE_BINARY: + return "z"; + case NANOARROW_TYPE_BINARY_VIEW: + return "vz"; + case NANOARROW_TYPE_LARGE_BINARY: + return "Z"; + + case NANOARROW_TYPE_DATE32: + return "tdD"; + case NANOARROW_TYPE_DATE64: + return "tdm"; + case NANOARROW_TYPE_INTERVAL_MONTHS: + return "tiM"; + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + return "tiD"; + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + return "tin"; + + case NANOARROW_TYPE_LIST: + return "+l"; + case NANOARROW_TYPE_LARGE_LIST: + return "+L"; + case NANOARROW_TYPE_STRUCT: + return "+s"; + case NANOARROW_TYPE_MAP: + return "+m"; + case NANOARROW_TYPE_RUN_END_ENCODED: + return "+r"; + + default: + return NULL; + } +} + +static int ArrowSchemaInitChildrenIfNeeded(struct ArrowSchema* schema, + enum ArrowType type) { + switch (type) { + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_FIXED_SIZE_LIST: + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); + ArrowSchemaInit(schema->children[0]); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item")); + break; + case NANOARROW_TYPE_MAP: + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaInitFromType(schema->children[0], NANOARROW_TYPE_STRUCT)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "entries")); + schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema->children[0], 2)); + ArrowSchemaInit(schema->children[0]->children[0]); + ArrowSchemaInit(schema->children[0]->children[1]); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetName(schema->children[0]->children[0], "key")); + schema->children[0]->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetName(schema->children[0]->children[1], "value")); + break; + case NANOARROW_TYPE_RUN_END_ENCODED: + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 2)); + ArrowSchemaInit(schema->children[0]); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "run_ends")); + schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + ArrowSchemaInit(schema->children[1]); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1], "values")); + default: + break; + } + + return NANOARROW_OK; +} + +void ArrowSchemaInit(struct ArrowSchema* schema) { + schema->format = NULL; + schema->name = NULL; + schema->metadata = NULL; + schema->flags = ARROW_FLAG_NULLABLE; + schema->n_children = 0; + schema->children = NULL; + schema->dictionary = NULL; + schema->private_data = NULL; + schema->release = &ArrowSchemaReleaseInternal; +} + +ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type) { + // We don't allocate the dictionary because it has to be nullptr + // for non-dictionary-encoded arrays. + + // Set the format to a valid format string for type + const char* template_format = ArrowSchemaFormatTemplate(type); + + // If type isn't recognized and not explicitly unset + if (template_format == NULL && type != NANOARROW_TYPE_UNINITIALIZED) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, template_format)); + + // For types with an umabiguous child structure, allocate children + return ArrowSchemaInitChildrenIfNeeded(schema, type); +} + +ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRUCT)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children)); + for (int64_t i = 0; i < n_children; i++) { + ArrowSchemaInit(schema->children[i]); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type) { + ArrowSchemaInit(schema); + + int result = ArrowSchemaSetType(schema, type); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema); + return result; + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, + enum ArrowType type, int32_t fixed_size) { + if (fixed_size <= 0) { + return EINVAL; + } + + char buffer[64]; + int n_chars; + switch (type) { + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + n_chars = snprintf(buffer, sizeof(buffer), "w:%" PRId32, fixed_size); + break; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + n_chars = snprintf(buffer, sizeof(buffer), "+w:%" PRId32, fixed_size); + break; + default: + return EINVAL; + } + + if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) { + return ERANGE; + } + + buffer[n_chars] = '\0'; + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, buffer)); + + if (type == NANOARROW_TYPE_FIXED_SIZE_LIST) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaInitChildrenIfNeeded(schema, type)); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, + int32_t decimal_precision, + int32_t decimal_scale) { + if (decimal_precision <= 0) { + return EINVAL; + } + + char buffer[64]; + int n_chars; + switch (type) { + case NANOARROW_TYPE_DECIMAL128: + n_chars = + snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale); + break; + case NANOARROW_TYPE_DECIMAL256: + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision, + decimal_scale); + break; + default: + return EINVAL; + } + + if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) { + return ERANGE; + } + + buffer[n_chars] = '\0'; + return ArrowSchemaSetFormat(schema, buffer); +} + +ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, + enum ArrowType run_end_type) { + switch (run_end_type) { + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_INT64: + break; + default: + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat( + schema, ArrowSchemaFormatTemplate(NANOARROW_TYPE_RUN_END_ENCODED))); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaInitChildrenIfNeeded(schema, NANOARROW_TYPE_RUN_END_ENCODED)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0], run_end_type)); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_UNINITIALIZED)); + + return NANOARROW_OK; +} + +static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) { + switch (time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + return "s"; + case NANOARROW_TIME_UNIT_MILLI: + return "m"; + case NANOARROW_TIME_UNIT_MICRO: + return "u"; + case NANOARROW_TIME_UNIT_NANO: + return "n"; + default: + return NULL; + } +} + +ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, + enum ArrowTimeUnit time_unit, + const char* timezone) { + const char* time_unit_str = ArrowTimeUnitFormatString(time_unit); + if (time_unit_str == NULL) { + return EINVAL; + } + + char buffer[128]; + int n_chars; + switch (type) { + case NANOARROW_TYPE_TIME32: + if (timezone != NULL) { + return EINVAL; + } + + switch (time_unit) { + case NANOARROW_TIME_UNIT_MICRO: + case NANOARROW_TIME_UNIT_NANO: + return EINVAL; + default: + break; + } + + n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); + break; + case NANOARROW_TYPE_TIME64: + if (timezone != NULL) { + return EINVAL; + } + + switch (time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + case NANOARROW_TIME_UNIT_MILLI: + return EINVAL; + default: + break; + } + + n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); + break; + case NANOARROW_TYPE_TIMESTAMP: + if (timezone == NULL) { + timezone = ""; + } + n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone); + break; + case NANOARROW_TYPE_DURATION: + if (timezone != NULL) { + return EINVAL; + } + n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str); + break; + default: + return EINVAL; + } + + if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) { + return ERANGE; + } + + buffer[n_chars] = '\0'; + + return ArrowSchemaSetFormat(schema, buffer); +} + +ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, + int64_t n_children) { + if (n_children < 0 || n_children > 127) { + return EINVAL; + } + + // Max valid size would be +ud:0,1,...126 = 401 characters + null terminator + char format_out[512]; + int64_t format_out_size = 512; + memset(format_out, 0, format_out_size); + int n_chars; + char* format_cursor = format_out; + + switch (type) { + case NANOARROW_TYPE_SPARSE_UNION: + n_chars = snprintf(format_cursor, format_out_size, "+us:"); + format_cursor += n_chars; + format_out_size -= n_chars; + break; + case NANOARROW_TYPE_DENSE_UNION: + n_chars = snprintf(format_cursor, format_out_size, "+ud:"); + format_cursor += n_chars; + format_out_size -= n_chars; + break; + default: + return EINVAL; + } + + // Ensure that an encoding error from snprintf() does not result + // in an out-of-bounds access. + if (n_chars < 0) { + return ERANGE; + } + + if (n_children > 0) { + n_chars = snprintf(format_cursor, format_out_size, "0"); + format_cursor += n_chars; + format_out_size -= n_chars; + + for (int64_t i = 1; i < n_children; i++) { + n_chars = snprintf(format_cursor, format_out_size, ",%" PRId64, i); + format_cursor += n_chars; + format_out_size -= n_chars; + } + } + + // Ensure that an encoding error from snprintf() does not result + // in an out-of-bounds access. + if (n_chars < 0) { + return ERANGE; + } + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, format_out)); + + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children)); + for (int64_t i = 0; i < n_children; i++) { + ArrowSchemaInit(schema->children[i]); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format) { + if (schema->format != NULL) { + ArrowFree((void*)schema->format); + } + + if (format != NULL) { + size_t format_size = strlen(format) + 1; + schema->format = (const char*)ArrowMalloc(format_size); + if (schema->format == NULL) { + return ENOMEM; + } + + memcpy((void*)schema->format, format, format_size); + } else { + schema->format = NULL; + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name) { + if (schema->name != NULL) { + ArrowFree((void*)schema->name); + } + + if (name != NULL) { + size_t name_size = strlen(name) + 1; + schema->name = (const char*)ArrowMalloc(name_size); + if (schema->name == NULL) { + return ENOMEM; + } + + memcpy((void*)schema->name, name, name_size); + } else { + schema->name = NULL; + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata) { + if (schema->metadata != NULL) { + ArrowFree((void*)schema->metadata); + } + + if (metadata != NULL) { + size_t metadata_size = ArrowMetadataSizeOf(metadata); + schema->metadata = (const char*)ArrowMalloc(metadata_size); + if (schema->metadata == NULL) { + return ENOMEM; + } + + memcpy((void*)schema->metadata, metadata, metadata_size); + } else { + schema->metadata = NULL; + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, + int64_t n_children) { + if (schema->children != NULL) { + return EEXIST; + } + + if (n_children > 0) { + schema->children = + (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct ArrowSchema*)); + + if (schema->children == NULL) { + return ENOMEM; + } + + schema->n_children = n_children; + + memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*)); + + for (int64_t i = 0; i < n_children; i++) { + schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); + + if (schema->children[i] == NULL) { + return ENOMEM; + } + + schema->children[i]->release = NULL; + } + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) { + if (schema->dictionary != NULL) { + return EEXIST; + } + + schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); + if (schema->dictionary == NULL) { + return ENOMEM; + } + + schema->dictionary->release = NULL; + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, + struct ArrowSchema* schema_out) { + ArrowSchemaInit(schema_out); + + int result = ArrowSchemaSetFormat(schema_out, schema->format); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + + schema_out->flags = schema->flags; + + result = ArrowSchemaSetName(schema_out, schema->name); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + + result = ArrowSchemaSetMetadata(schema_out, schema->metadata); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + + result = ArrowSchemaAllocateChildren(schema_out, schema->n_children); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + + for (int64_t i = 0; i < schema->n_children; i++) { + result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + } + + if (schema->dictionary != NULL) { + result = ArrowSchemaAllocateDictionary(schema_out); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + + result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary); + if (result != NANOARROW_OK) { + ArrowSchemaRelease(schema_out); + return result; + } + } + + return NANOARROW_OK; +} + +static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view, + enum ArrowType type) { + schema_view->type = type; + schema_view->storage_type = type; +} + +static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, + const char* format, + const char** format_end_out, + struct ArrowError* error) { + *format_end_out = format; + + // needed for decimal parsing + const char* parse_start; + char* parse_end; + + switch (format[0]) { + case 'n': + schema_view->type = NANOARROW_TYPE_NA; + schema_view->storage_type = NANOARROW_TYPE_NA; + *format_end_out = format + 1; + return NANOARROW_OK; + case 'b': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'c': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'C': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8); + *format_end_out = format + 1; + return NANOARROW_OK; + case 's': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'S': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'i': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'I': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'l': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'L': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'e': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'f': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT); + *format_end_out = format + 1; + return NANOARROW_OK; + case 'g': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE); + *format_end_out = format + 1; + return NANOARROW_OK; + + // decimal + case 'd': + if (format[1] != ':' || format[2] == '\0') { + ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'"); + return EINVAL; + } + + parse_start = format + 2; + schema_view->decimal_precision = (int32_t)strtol(parse_start, &parse_end, 10); + if (parse_end == parse_start || parse_end[0] != ',') { + ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'"); + return EINVAL; + } + + parse_start = parse_end + 1; + schema_view->decimal_scale = (int32_t)strtol(parse_start, &parse_end, 10); + if (parse_end == parse_start) { + ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'"); + return EINVAL; + } else if (parse_end[0] != ',') { + schema_view->decimal_bitwidth = 128; + } else { + parse_start = parse_end + 1; + schema_view->decimal_bitwidth = (int32_t)strtol(parse_start, &parse_end, 10); + if (parse_start == parse_end) { + ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'"); + return EINVAL; + } + } + + *format_end_out = parse_end; + + switch (schema_view->decimal_bitwidth) { + case 128: + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128); + return NANOARROW_OK; + case 256: + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256); + return NANOARROW_OK; + default: + ArrowErrorSet(error, + "Expected decimal bitwidth of 128 or 256 but found %" PRId32, + schema_view->decimal_bitwidth); + return EINVAL; + } + + // validity + data + case 'w': + schema_view->type = NANOARROW_TYPE_FIXED_SIZE_BINARY; + schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_BINARY; + if (format[1] != ':' || format[2] == '\0') { + ArrowErrorSet(error, "Expected ':' following 'w'"); + return EINVAL; + } + + schema_view->fixed_size = (int32_t)strtol(format + 2, (char**)format_end_out, 10); + return NANOARROW_OK; + + // validity + offset + data + case 'z': + schema_view->type = NANOARROW_TYPE_BINARY; + schema_view->storage_type = NANOARROW_TYPE_BINARY; + *format_end_out = format + 1; + return NANOARROW_OK; + case 'u': + schema_view->type = NANOARROW_TYPE_STRING; + schema_view->storage_type = NANOARROW_TYPE_STRING; + *format_end_out = format + 1; + return NANOARROW_OK; + + // validity + large_offset + data + case 'Z': + schema_view->type = NANOARROW_TYPE_LARGE_BINARY; + schema_view->storage_type = NANOARROW_TYPE_LARGE_BINARY; + *format_end_out = format + 1; + return NANOARROW_OK; + case 'U': + schema_view->type = NANOARROW_TYPE_LARGE_STRING; + schema_view->storage_type = NANOARROW_TYPE_LARGE_STRING; + *format_end_out = format + 1; + return NANOARROW_OK; + + // nested types + case '+': + switch (format[1]) { + // list has validity + offset or offset + case 'l': + schema_view->storage_type = NANOARROW_TYPE_LIST; + schema_view->type = NANOARROW_TYPE_LIST; + *format_end_out = format + 2; + return NANOARROW_OK; + + // large list has validity + large_offset or large_offset + case 'L': + schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST; + schema_view->type = NANOARROW_TYPE_LARGE_LIST; + *format_end_out = format + 2; + return NANOARROW_OK; + + // run end encoded has no buffer at all + case 'r': + schema_view->storage_type = NANOARROW_TYPE_RUN_END_ENCODED; + schema_view->type = NANOARROW_TYPE_RUN_END_ENCODED; + *format_end_out = format + 2; + return NANOARROW_OK; + + // just validity buffer + case 'w': + if (format[2] != ':' || format[3] == '\0') { + ArrowErrorSet(error, "Expected ':' following '+w'"); + return EINVAL; + } + + schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_LIST; + schema_view->type = NANOARROW_TYPE_FIXED_SIZE_LIST; + schema_view->fixed_size = + (int32_t)strtol(format + 3, (char**)format_end_out, 10); + return NANOARROW_OK; + case 's': + schema_view->storage_type = NANOARROW_TYPE_STRUCT; + schema_view->type = NANOARROW_TYPE_STRUCT; + *format_end_out = format + 2; + return NANOARROW_OK; + case 'm': + schema_view->storage_type = NANOARROW_TYPE_MAP; + schema_view->type = NANOARROW_TYPE_MAP; + *format_end_out = format + 2; + return NANOARROW_OK; + + // unions + case 'u': + switch (format[2]) { + case 'd': + schema_view->storage_type = NANOARROW_TYPE_DENSE_UNION; + schema_view->type = NANOARROW_TYPE_DENSE_UNION; + break; + case 's': + schema_view->storage_type = NANOARROW_TYPE_SPARSE_UNION; + schema_view->type = NANOARROW_TYPE_SPARSE_UNION; + break; + default: + ArrowErrorSet(error, + "Expected union format string +us: or " + "+ud: but found '%s'", + format); + return EINVAL; + } + + if (format[3] == ':') { + schema_view->union_type_ids = format + 4; + int64_t n_type_ids = + _ArrowParseUnionTypeIds(schema_view->union_type_ids, NULL); + if (n_type_ids != schema_view->schema->n_children) { + ArrowErrorSet(error, + "Expected union type_ids parameter to be a comma-separated " + "list of %" PRId64 " values between 0 and 127 but found '%s'", + schema_view->schema->n_children, schema_view->union_type_ids); + return EINVAL; + } + *format_end_out = format + strlen(format); + return NANOARROW_OK; + } else { + ArrowErrorSet(error, + "Expected union format string +us: or +ud: " + "but found '%s'", + format); + return EINVAL; + } + + default: + ArrowErrorSet(error, "Expected nested type format string but found '%s'", + format); + return EINVAL; + } + + // date/time types + case 't': + switch (format[1]) { + // date + case 'd': + switch (format[2]) { + case 'D': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); + schema_view->type = NANOARROW_TYPE_DATE32; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'm': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_DATE64; + *format_end_out = format + 3; + return NANOARROW_OK; + default: + ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'", + format + 2); + return EINVAL; + } + + // time of day + case 't': + switch (format[2]) { + case 's': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); + schema_view->type = NANOARROW_TYPE_TIME32; + schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'm': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); + schema_view->type = NANOARROW_TYPE_TIME32; + schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'u': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_TIME64; + schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'n': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_TIME64; + schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; + *format_end_out = format + 3; + return NANOARROW_OK; + default: + ArrowErrorSet( + error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'", + format + 2); + return EINVAL; + } + + // timestamp + case 's': + switch (format[2]) { + case 's': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_TIMESTAMP; + schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; + break; + case 'm': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_TIMESTAMP; + schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; + break; + case 'u': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_TIMESTAMP; + schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; + break; + case 'n': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_TIMESTAMP; + schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; + break; + default: + ArrowErrorSet( + error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'", + format + 2); + return EINVAL; + } + + if (format[3] != ':') { + ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format, + format + 3); + return EINVAL; + } + + schema_view->timezone = format + 4; + *format_end_out = format + strlen(format); + return NANOARROW_OK; + + // duration + case 'D': + switch (format[2]) { + case 's': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_DURATION; + schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'm': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_DURATION; + schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'u': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_DURATION; + schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'n': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); + schema_view->type = NANOARROW_TYPE_DURATION; + schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; + *format_end_out = format + 3; + return NANOARROW_OK; + default: + ArrowErrorSet(error, + "Expected 's', 'm', u', or 'n' following 'tD' but found '%s'", + format + 2); + return EINVAL; + } + + // interval + case 'i': + switch (format[2]) { + case 'M': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS); + *format_end_out = format + 3; + return NANOARROW_OK; + case 'D': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME); + *format_end_out = format + 3; + return NANOARROW_OK; + case 'n': + ArrowSchemaViewSetPrimitive(schema_view, + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + *format_end_out = format + 3; + return NANOARROW_OK; + default: + ArrowErrorSet(error, + "Expected 'M', 'D', or 'n' following 'ti' but found '%s'", + format + 2); + return EINVAL; + } + + default: + ArrowErrorSet( + error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'", + format + 1); + return EINVAL; + } + + // view types + case 'v': { + switch (format[1]) { + case 'u': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_STRING_VIEW); + *format_end_out = format + 2; + return NANOARROW_OK; + case 'z': + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BINARY_VIEW); + *format_end_out = format + 2; + return NANOARROW_OK; + default: + ArrowErrorSet(error, "Expected 'u', or 'z' following 'v' but found '%s'", + format + 1); + return EINVAL; + } + } + + default: + ArrowErrorSet(error, "Unknown format: '%s'", format); + return EINVAL; + } +} + +static ArrowErrorCode ArrowSchemaViewValidateNChildren( + struct ArrowSchemaView* schema_view, int64_t n_children, struct ArrowError* error) { + if (n_children != -1 && schema_view->schema->n_children != n_children) { + ArrowErrorSet( + error, "Expected schema with %" PRId64 " children but found %" PRId64 " children", + n_children, schema_view->schema->n_children); + return EINVAL; + } + + // Don't do a full validation of children but do check that they won't + // segfault if inspected + struct ArrowSchema* child; + for (int64_t i = 0; i < schema_view->schema->n_children; i++) { + child = schema_view->schema->children[i]; + if (child == NULL) { + ArrowErrorSet( + error, "Expected valid schema at schema->children[%" PRId64 "] but found NULL", + i); + return EINVAL; + } else if (child->release == NULL) { + ArrowErrorSet(error, + "Expected valid schema at schema->children[%" PRId64 + "] but found a released schema", + i); + return EINVAL; + } + } + + return NANOARROW_OK; +} + +static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schema_view, + struct ArrowError* error) { + return ArrowSchemaViewValidateNChildren(schema_view, -1, error); +} + +static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view, + struct ArrowError* error) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error)); + + if (schema_view->schema->children[0]->n_children != 2) { + ArrowErrorSet(error, + "Expected child of map type to have 2 children but found %" PRId64, + schema_view->schema->children[0]->n_children); + return EINVAL; + } + + if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) { + ArrowErrorSet(error, "Expected format of child of map type to be '+s' but found '%s'", + schema_view->schema->children[0]->format); + return EINVAL; + } + + if (schema_view->schema->children[0]->flags & ARROW_FLAG_NULLABLE) { + ArrowErrorSet(error, + "Expected child of map type to be non-nullable but was nullable"); + return EINVAL; + } + + if (schema_view->schema->children[0]->children[0]->flags & ARROW_FLAG_NULLABLE) { + ArrowErrorSet(error, "Expected key of map type to be non-nullable but was nullable"); + return EINVAL; + } + + return NANOARROW_OK; +} + +static ArrowErrorCode ArrowSchemaViewValidateDictionary( + struct ArrowSchemaView* schema_view, struct ArrowError* error) { + // check for valid index type + switch (schema_view->storage_type) { + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_INT64: + break; + default: + ArrowErrorSet( + error, + "Expected dictionary schema index type to be an integral type but found '%s'", + schema_view->schema->format); + return EINVAL; + } + + struct ArrowSchemaView dictionary_schema_view; + return ArrowSchemaViewInit(&dictionary_schema_view, schema_view->schema->dictionary, + error); +} + +static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_view, + enum ArrowType type, + struct ArrowError* error) { + switch (type) { + case NANOARROW_TYPE_NA: + case NANOARROW_TYPE_BOOL: + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_HALF_FLOAT: + case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_DECIMAL256: + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_DATE32: + case NANOARROW_TYPE_DATE64: + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + case NANOARROW_TYPE_TIMESTAMP: + case NANOARROW_TYPE_TIME32: + case NANOARROW_TYPE_TIME64: + case NANOARROW_TYPE_DURATION: + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + return ArrowSchemaViewValidateNChildren(schema_view, 0, error); + + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + if (schema_view->fixed_size <= 0) { + ArrowErrorSet(error, "Expected size > 0 for fixed size binary but found size %d", + schema_view->fixed_size); + return EINVAL; + } + return ArrowSchemaViewValidateNChildren(schema_view, 0, error); + + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_FIXED_SIZE_LIST: + return ArrowSchemaViewValidateNChildren(schema_view, 1, error); + + case NANOARROW_TYPE_RUN_END_ENCODED: + return ArrowSchemaViewValidateNChildren(schema_view, 2, error); + + case NANOARROW_TYPE_STRUCT: + return ArrowSchemaViewValidateNChildren(schema_view, -1, error); + + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_DENSE_UNION: + return ArrowSchemaViewValidateUnion(schema_view, error); + + case NANOARROW_TYPE_MAP: + return ArrowSchemaViewValidateMap(schema_view, error); + + case NANOARROW_TYPE_DICTIONARY: + return ArrowSchemaViewValidateDictionary(schema_view, error); + + default: + ArrowErrorSet(error, "Expected a valid enum ArrowType value but found %d", + schema_view->type); + return EINVAL; + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, + const struct ArrowSchema* schema, + struct ArrowError* error) { + if (schema == NULL) { + ArrowErrorSet(error, "Expected non-NULL schema"); + return EINVAL; + } + + if (schema->release == NULL) { + ArrowErrorSet(error, "Expected non-released schema"); + return EINVAL; + } + + schema_view->schema = schema; + + const char* format = schema->format; + if (format == NULL) { + ArrowErrorSet( + error, + "Error parsing schema->format: Expected a null-terminated string but found NULL"); + return EINVAL; + } + + size_t format_len = strlen(format); + if (format_len == 0) { + ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0"); + return EINVAL; + } + + const char* format_end_out; + int result = ArrowSchemaViewParse(schema_view, format, &format_end_out, error); + + if (result != NANOARROW_OK) { + if (error != NULL) { + char child_error[1024]; + memcpy(child_error, ArrowErrorMessage(error), 1024); + ArrowErrorSet(error, "Error parsing schema->format: %s", child_error); + } + + return result; + } + + if ((format + format_len) != format_end_out) { + ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%zu characters", + format, (int)(format_end_out - format), format_len); + return EINVAL; + } + + if (schema->dictionary != NULL) { + schema_view->type = NANOARROW_TYPE_DICTIONARY; + } + + NANOARROW_RETURN_NOT_OK( + ArrowSchemaViewValidate(schema_view, schema_view->storage_type, error)); + + if (schema_view->storage_type != schema_view->type) { + NANOARROW_RETURN_NOT_OK( + ArrowSchemaViewValidate(schema_view, schema_view->type, error)); + } + + int64_t unknown_flags = schema->flags & ~NANOARROW_FLAG_ALL_SUPPORTED; + if (unknown_flags != 0) { + ArrowErrorSet(error, "Unknown ArrowSchema flag"); + return EINVAL; + } + + if (schema->flags & ARROW_FLAG_DICTIONARY_ORDERED && + schema_view->type != NANOARROW_TYPE_DICTIONARY) { + ArrowErrorSet(error, + "ARROW_FLAG_DICTIONARY_ORDERED is only relevant for dictionaries"); + return EINVAL; + } + + if (schema->flags & ARROW_FLAG_MAP_KEYS_SORTED && + schema_view->type != NANOARROW_TYPE_MAP) { + ArrowErrorSet(error, "ARROW_FLAG_MAP_KEYS_SORTED is only relevant for a map type"); + return EINVAL; + } + + ArrowLayoutInit(&schema_view->layout, schema_view->storage_type); + if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) { + schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8; + } else if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_LIST) { + schema_view->layout.child_size_elements = schema_view->fixed_size; + } + + schema_view->extension_name = ArrowCharView(NULL); + schema_view->extension_metadata = ArrowCharView(NULL); + NANOARROW_RETURN_NOT_OK(ArrowMetadataGetValue(schema->metadata, + ArrowCharView("ARROW:extension:name"), + &schema_view->extension_name)); + NANOARROW_RETURN_NOT_OK(ArrowMetadataGetValue(schema->metadata, + ArrowCharView("ARROW:extension:metadata"), + &schema_view->extension_metadata)); + + return NANOARROW_OK; +} + +static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view, + char* out, int64_t n) { + const char* type_string = ArrowTypeString(schema_view->type); + switch (schema_view->type) { + case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_DECIMAL256: + return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string, + schema_view->decimal_precision, schema_view->decimal_scale); + case NANOARROW_TYPE_TIMESTAMP: + return snprintf(out, n, "%s('%s', '%s')", type_string, + ArrowTimeUnitString(schema_view->time_unit), schema_view->timezone); + case NANOARROW_TYPE_TIME32: + case NANOARROW_TYPE_TIME64: + case NANOARROW_TYPE_DURATION: + return snprintf(out, n, "%s('%s')", type_string, + ArrowTimeUnitString(schema_view->time_unit)); + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + case NANOARROW_TYPE_FIXED_SIZE_LIST: + return snprintf(out, n, "%s(%" PRId32 ")", type_string, schema_view->fixed_size); + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_DENSE_UNION: + return snprintf(out, n, "%s([%s])", type_string, schema_view->union_type_ids); + default: + return snprintf(out, n, "%s", type_string); + } +} + +// Helper for bookkeeping to emulate sprintf()-like behaviour spread +// among multiple sprintf calls. +static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, + int64_t* n_remaining, int64_t* n_chars) { + // In the unlikely snprintf() returning a negative value (encoding error), + // ensure the result won't cause an out-of-bounds access. + if (n_chars_last < 0) { + n_chars_last = 0; + } + + *n_chars += n_chars_last; + *n_remaining -= n_chars_last; + + // n_remaining is never less than 0 + if (*n_remaining < 0) { + *n_remaining = 0; + } + + // Can't do math on a NULL pointer + if (*out != NULL) { + *out += n_chars_last; + } +} + +int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, + char recursive) { + if (schema == NULL) { + return snprintf(out, n, "[invalid: pointer is null]"); + } + + if (schema->release == NULL) { + return snprintf(out, n, "[invalid: schema is released]"); + } + + struct ArrowSchemaView schema_view; + struct ArrowError error; + + if (ArrowSchemaViewInit(&schema_view, schema, &error) != NANOARROW_OK) { + return snprintf(out, n, "[invalid: %s]", ArrowErrorMessage(&error)); + } + + // Extension type and dictionary should include both the top-level type + // and the storage type. + int is_extension = schema_view.extension_name.size_bytes > 0; + int is_dictionary = schema->dictionary != NULL; + int64_t n_chars = 0; + int64_t n_chars_last = 0; + + // Uncommon but not technically impossible that both are true + if (is_extension && is_dictionary) { + n_chars_last = snprintf( + out, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.size_bytes, + schema_view.extension_name.data, ArrowTypeString(schema_view.storage_type)); + } else if (is_extension) { + n_chars_last = snprintf(out, n, "%.*s{", (int)schema_view.extension_name.size_bytes, + schema_view.extension_name.data); + } else if (is_dictionary) { + n_chars_last = + snprintf(out, n, "dictionary(%s)<", ArrowTypeString(schema_view.storage_type)); + } + + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + + if (!is_dictionary) { + n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n); + } else { + n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive); + } + + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + + if (recursive && schema->format[0] == '+') { + n_chars_last = snprintf(out, n, "<"); + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + + for (int64_t i = 0; i < schema->n_children; i++) { + if (i > 0) { + n_chars_last = snprintf(out, n, ", "); + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + } + + // ArrowSchemaToStringInternal() will validate the child and print the error, + // but we need the name first + if (schema->children[i] != NULL && schema->children[i]->release != NULL && + schema->children[i]->name != NULL) { + n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name); + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + } + + n_chars_last = ArrowSchemaToString(schema->children[i], out, n, recursive); + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + } + + n_chars_last = snprintf(out, n, ">"); + ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + } + + if (is_extension && is_dictionary) { + n_chars += snprintf(out, n, ">}"); + } else if (is_extension) { + n_chars += snprintf(out, n, "}"); + } else if (is_dictionary) { + n_chars += snprintf(out, n, ">"); + } + + // Ensure that we always return a positive result + if (n_chars > 0) { + return n_chars; + } else { + return 0; + } +} + +ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, + const char* metadata) { + reader->metadata = metadata; + + if (reader->metadata == NULL) { + reader->offset = 0; + reader->remaining_keys = 0; + } else { + memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t)); + reader->offset = sizeof(int32_t); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, + struct ArrowStringView* key_out, + struct ArrowStringView* value_out) { + if (reader->remaining_keys <= 0) { + return EINVAL; + } + + int64_t pos = 0; + + int32_t key_size; + memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t)); + pos += sizeof(int32_t); + + key_out->data = reader->metadata + reader->offset + pos; + key_out->size_bytes = key_size; + pos += key_size; + + int32_t value_size; + memcpy(&value_size, reader->metadata + reader->offset + pos, sizeof(int32_t)); + pos += sizeof(int32_t); + + value_out->data = reader->metadata + reader->offset + pos; + value_out->size_bytes = value_size; + pos += value_size; + + reader->offset += pos; + reader->remaining_keys--; + return NANOARROW_OK; +} + +int64_t ArrowMetadataSizeOf(const char* metadata) { + if (metadata == NULL) { + return 0; + } + + struct ArrowMetadataReader reader; + struct ArrowStringView key; + struct ArrowStringView value; + if (ArrowMetadataReaderInit(&reader, metadata) != NANOARROW_OK) { + return 0; + } + + int64_t size = sizeof(int32_t); + while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) { + size += sizeof(int32_t) + key.size_bytes + sizeof(int32_t) + value.size_bytes; + } + + return size; +} + +static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata, + struct ArrowStringView* key, + struct ArrowStringView* value_out) { + struct ArrowMetadataReader reader; + struct ArrowStringView existing_key; + struct ArrowStringView existing_value; + NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, metadata)); + + while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) == + NANOARROW_OK) { + int key_equal = key->size_bytes == existing_key.size_bytes && + strncmp(key->data, existing_key.data, existing_key.size_bytes) == 0; + if (key_equal) { + value_out->data = existing_value.data; + value_out->size_bytes = existing_value.size_bytes; + break; + } + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, + struct ArrowStringView* value_out) { + if (value_out == NULL) { + return EINVAL; + } + + return ArrowMetadataGetValueInternal(metadata, &key, value_out); +} + +char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) { + struct ArrowStringView value = ArrowCharView(NULL); + if (ArrowMetadataGetValue(metadata, key, &value) != NANOARROW_OK) { + return 0; + } + + return value.data != NULL; +} + +ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, + const char* metadata) { + ArrowBufferInit(buffer); + return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata)); +} + +static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer, + struct ArrowStringView* key, + struct ArrowStringView* value) { + if (value == NULL) { + return NANOARROW_OK; + } + + if (buffer->capacity_bytes == 0) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0)); + } + + if (((size_t)buffer->capacity_bytes) < sizeof(int32_t)) { + return EINVAL; + } + + int32_t n_keys; + memcpy(&n_keys, buffer->data, sizeof(int32_t)); + + int32_t key_size = (int32_t)key->size_bytes; + int32_t value_size = (int32_t)value->size_bytes; + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve( + buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size)); + + ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t)); + ArrowBufferAppendUnsafe(buffer, key->data, key_size); + ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t)); + ArrowBufferAppendUnsafe(buffer, value->data, value_size); + + n_keys++; + memcpy(buffer->data, &n_keys, sizeof(int32_t)); + + return NANOARROW_OK; +} + +static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer, + struct ArrowStringView* key, + struct ArrowStringView* value) { + // Inspect the current value to see if we can avoid copying the buffer + struct ArrowStringView current_value = ArrowCharView(NULL); + NANOARROW_RETURN_NOT_OK( + ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value)); + + // The key should be removed but no key exists + if (value == NULL && current_value.data == NULL) { + return NANOARROW_OK; + } + + // The key/value can be appended because no key exists + if (value != NULL && current_value.data == NULL) { + return ArrowMetadataBuilderAppendInternal(buffer, key, value); + } + + struct ArrowMetadataReader reader; + struct ArrowStringView existing_key; + struct ArrowStringView existing_value; + NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data)); + + struct ArrowBuffer new_buffer; + NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL)); + + while (reader.remaining_keys > 0) { + int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value); + if (result != NANOARROW_OK) { + ArrowBufferReset(&new_buffer); + return result; + } + + if (key->size_bytes == existing_key.size_bytes && + strncmp((const char*)key->data, (const char*)existing_key.data, + existing_key.size_bytes) == 0) { + result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value); + value = NULL; + } else { + result = + ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key, &existing_value); + } + + if (result != NANOARROW_OK) { + ArrowBufferReset(&new_buffer); + return result; + } + } + + ArrowBufferReset(buffer); + ArrowBufferMove(&new_buffer, buffer); + return NANOARROW_OK; +} + +ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value) { + return ArrowMetadataBuilderAppendInternal(buffer, &key, &value); +} + +ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value) { + return ArrowMetadataBuilderSetInternal(buffer, &key, &value); +} + +ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, + struct ArrowStringView key) { + return ArrowMetadataBuilderSetInternal(buffer, &key, NULL); +} +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include "nanoarrow.h" + +static void ArrowArrayReleaseInternal(struct ArrowArray* array) { + // Release buffers held by this array + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + if (private_data != NULL) { + ArrowBitmapReset(&private_data->bitmap); + ArrowBufferReset(&private_data->buffers[0]); + ArrowBufferReset(&private_data->buffers[1]); + ArrowFree(private_data->buffer_data); + for (int32_t i = 0; i < private_data->n_variadic_buffers; ++i) { + ArrowBufferReset(&private_data->variadic_buffers[i]); + } + ArrowFree(private_data->variadic_buffers); + ArrowFree(private_data->variadic_buffer_sizes); + ArrowFree(private_data); + } + + // This object owns the memory for all the children, but those + // children may have been generated elsewhere and might have + // their own release() callback. + if (array->children != NULL) { + for (int64_t i = 0; i < array->n_children; i++) { + if (array->children[i] != NULL) { + if (array->children[i]->release != NULL) { + ArrowArrayRelease(array->children[i]); + } + + ArrowFree(array->children[i]); + } + } + + ArrowFree(array->children); + } + + // This object owns the memory for the dictionary but it + // may have been generated somewhere else and have its own + // release() callback. + if (array->dictionary != NULL) { + if (array->dictionary->release != NULL) { + ArrowArrayRelease(array->dictionary); + } + + ArrowFree(array->dictionary); + } + + // Mark released + array->release = NULL; +} + +static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, + enum ArrowType storage_type) { + switch (storage_type) { + case NANOARROW_TYPE_UNINITIALIZED: + case NANOARROW_TYPE_NA: + case NANOARROW_TYPE_RUN_END_ENCODED: + array->n_buffers = 0; + break; + + case NANOARROW_TYPE_FIXED_SIZE_LIST: + case NANOARROW_TYPE_STRUCT: + case NANOARROW_TYPE_SPARSE_UNION: + array->n_buffers = 1; + break; + + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_MAP: + case NANOARROW_TYPE_BOOL: + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_HALF_FLOAT: + case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_DECIMAL256: + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + case NANOARROW_TYPE_DENSE_UNION: + array->n_buffers = 2; + break; + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1; + break; + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_LARGE_BINARY: + array->n_buffers = 3; + break; + + default: + return EINVAL; + + return NANOARROW_OK; + } + + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + private_data->storage_type = storage_type; + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, + enum ArrowType storage_type) { + array->length = 0; + array->null_count = 0; + array->offset = 0; + array->n_buffers = 0; + array->n_children = 0; + array->buffers = NULL; + array->children = NULL; + array->dictionary = NULL; + array->release = &ArrowArrayReleaseInternal; + array->private_data = NULL; + + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData)); + if (private_data == NULL) { + array->release = NULL; + return ENOMEM; + } + + ArrowBitmapInit(&private_data->bitmap); + ArrowBufferInit(&private_data->buffers[0]); + ArrowBufferInit(&private_data->buffers[1]); + private_data->buffer_data = + (const void**)ArrowMalloc(sizeof(void*) * NANOARROW_MAX_FIXED_BUFFERS); + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; ++i) { + private_data->buffer_data[i] = NULL; + } + private_data->n_variadic_buffers = 0; + private_data->variadic_buffers = NULL; + private_data->variadic_buffer_sizes = NULL; + + array->private_data = private_data; + array->buffers = (const void**)(private_data->buffer_data); + + // These are not technically "storage" in the sense that they do not appear + // in the ArrowSchemaView's storage_type member; however, allowing them here + // is helpful to maximize the number of types that can avoid going through + // ArrowArrayInitFromSchema(). + switch (storage_type) { + case NANOARROW_TYPE_DURATION: + case NANOARROW_TYPE_TIMESTAMP: + case NANOARROW_TYPE_TIME64: + case NANOARROW_TYPE_DATE64: + storage_type = NANOARROW_TYPE_INT64; + break; + case NANOARROW_TYPE_TIME32: + case NANOARROW_TYPE_DATE32: + storage_type = NANOARROW_TYPE_INT32; + break; + default: + break; + } + + int result = ArrowArraySetStorageType(array, storage_type); + if (result != NANOARROW_OK) { + ArrowArrayRelease(array); + return result; + } + + ArrowLayoutInit(&private_data->layout, storage_type); + // We can only know this not to be true when initializing based on a schema + // so assume this to be true. + private_data->union_type_id_is_child_index = 1; + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, + const struct ArrowArrayView* array_view, + struct ArrowError* error) { + NANOARROW_RETURN_NOT_OK_WITH_ERROR( + ArrowArrayInitFromType(array, array_view->storage_type), error); + int result; + + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + private_data->layout = array_view->layout; + + if (array_view->n_children > 0) { + result = ArrowArrayAllocateChildren(array, array_view->n_children); + if (result != NANOARROW_OK) { + ArrowArrayRelease(array); + return result; + } + + for (int64_t i = 0; i < array_view->n_children; i++) { + result = + ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error); + if (result != NANOARROW_OK) { + ArrowArrayRelease(array); + return result; + } + } + } + + if (array_view->dictionary != NULL) { + result = ArrowArrayAllocateDictionary(array); + if (result != NANOARROW_OK) { + ArrowArrayRelease(array); + return result; + } + + result = + ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error); + if (result != NANOARROW_OK) { + ArrowArrayRelease(array); + return result; + } + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, + const struct ArrowSchema* schema, + struct ArrowError* error) { + struct ArrowArrayView array_view; + NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error)); + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error)); + if (array_view.storage_type == NANOARROW_TYPE_DENSE_UNION || + array_view.storage_type == NANOARROW_TYPE_SPARSE_UNION) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + // We can still build arrays if this isn't true; however, the append + // functions won't work. Instead, we store this value and error only + // when StartAppending is called. + private_data->union_type_id_is_child_index = + _ArrowUnionTypeIdsWillEqualChildIndices(schema->format + 4, schema->n_children); + } + + ArrowArrayViewReset(&array_view); + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children) { + if (array->children != NULL) { + return EINVAL; + } + + if (n_children == 0) { + return NANOARROW_OK; + } + + array->children = + (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct ArrowArray*)); + if (array->children == NULL) { + return ENOMEM; + } + + memset(array->children, 0, n_children * sizeof(struct ArrowArray*)); + + for (int64_t i = 0; i < n_children; i++) { + array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); + if (array->children[i] == NULL) { + return ENOMEM; + } + array->children[i]->release = NULL; + } + + array->n_children = n_children; + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) { + if (array->dictionary != NULL) { + return EINVAL; + } + + array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); + if (array->dictionary == NULL) { + return ENOMEM; + } + + array->dictionary->release = NULL; + return NANOARROW_OK; +} + +void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer); + private_data->bitmap.size_bits = bitmap->size_bits; + bitmap->size_bits = 0; + private_data->buffer_data[0] = private_data->bitmap.buffer.data; + array->null_count = -1; +} + +ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, + struct ArrowBuffer* buffer) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + switch (i) { + case 0: + ArrowBufferMove(buffer, &private_data->bitmap.buffer); + private_data->buffer_data[i] = private_data->bitmap.buffer.data; + break; + case 1: + case 2: + ArrowBufferMove(buffer, &private_data->buffers[i - 1]); + private_data->buffer_data[i] = private_data->buffers[i - 1].data; + break; + default: + return EINVAL; + } + + return NANOARROW_OK; +} + +static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view, + struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + ArrowArrayViewInitFromType(array_view, private_data->storage_type); + array_view->layout = private_data->layout; + array_view->array = array; + array_view->length = array->length; + array_view->offset = array->offset; + array_view->null_count = array->null_count; + + array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data; + array_view->buffer_views[0].size_bytes = private_data->bitmap.buffer.size_bytes; + array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data; + array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes; + array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data; + array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes; + + int result = ArrowArrayViewAllocateChildren(array_view, array->n_children); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + + for (int64_t i = 0; i < array->n_children; i++) { + result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i]); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + } + + if (array->dictionary != NULL) { + result = ArrowArrayViewAllocateDictionary(array_view); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + + result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + } + + return NANOARROW_OK; +} + +static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array, + struct ArrowArrayView* array_view) { + // Loop through buffers and reserve the extra space that we know about + for (int64_t i = 0; i < array->n_buffers; i++) { + // Don't reserve on a validity buffer that hasn't been allocated yet + if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY && + ArrowArrayBuffer(array, i)->data == NULL) { + continue; + } + + int64_t additional_size_bytes = + array_view->buffer_views[i].size_bytes - ArrowArrayBuffer(array, i)->size_bytes; + + if (additional_size_bytes > 0) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes)); + } + } + + // Recursively reserve children + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK( + ArrowArrayReserveInternal(array->children[i], array_view->children[i])); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, + int64_t additional_size_elements) { + struct ArrowArrayView array_view; + NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array)); + + // Calculate theoretical buffer sizes (recursively) + ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements); + + // Walk the structure (recursively) + int result = ArrowArrayReserveInternal(array, &array_view); + ArrowArrayViewReset(&array_view); + if (result != NANOARROW_OK) { + return result; + } + + return NANOARROW_OK; +} + +static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY || + private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { + continue; + } + + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); + if (buffer->data == NULL) { + NANOARROW_RETURN_NOT_OK((ArrowBufferReserve(buffer, 1))); + } + } + + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i])); + } + + if (array->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary)); + } + + return NANOARROW_OK; +} + +static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + const bool is_binary_view = private_data->storage_type == NANOARROW_TYPE_STRING_VIEW || + private_data->storage_type == NANOARROW_TYPE_BINARY_VIEW; + const int32_t nfixed_buf = is_binary_view ? 2 : NANOARROW_MAX_FIXED_BUFFERS; + + for (int32_t i = 0; i < nfixed_buf; i++) { + private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data; + } + + if (is_binary_view) { + const int32_t nvirt_buf = private_data->n_variadic_buffers; + private_data->buffer_data = (const void**)ArrowRealloc( + private_data->buffer_data, sizeof(void*) * (nfixed_buf + nvirt_buf + 1)); + for (int32_t i = 0; i < nvirt_buf; i++) { + private_data->buffer_data[nfixed_buf + i] = private_data->variadic_buffers[i].data; + } + private_data->buffer_data[nfixed_buf + nvirt_buf] = + private_data->variadic_buffer_sizes; + array->buffers = (const void**)(private_data->buffer_data); + } + + for (int64_t i = 0; i < array->n_children; i++) { + ArrowArrayFlushInternalPointers(array->children[i]); + } + + if (array->dictionary != NULL) { + ArrowArrayFlushInternalPointers(array->dictionary); + } +} + +ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, + enum ArrowValidationLevel validation_level, + struct ArrowError* error) { + // Even if the data buffer is size zero, the pointer value needed to be non-null + // in some implementations (at least one version of Arrow C++ at the time this + // was added and C# as later discovered). Only do this fix if we can assume + // CPU data access. + if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) { + NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array), error); + } + + // Make sure the value we get with array->buffers[i] is set to the actual + // pointer (which may have changed from the original due to reallocation) + ArrowArrayFlushInternalPointers(array); + + if (validation_level == NANOARROW_VALIDATION_LEVEL_NONE) { + return NANOARROW_OK; + } + + // For validation, initialize an ArrowArrayView with our known buffer sizes + struct ArrowArrayView array_view; + NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayViewInitFromArray(&array_view, array), + error); + int result = ArrowArrayViewValidate(&array_view, validation_level, error); + ArrowArrayViewReset(&array_view); + return result; +} + +ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, + struct ArrowError* error) { + return ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_DEFAULT, error); +} + +void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, + enum ArrowType storage_type) { + memset(array_view, 0, sizeof(struct ArrowArrayView)); + array_view->storage_type = storage_type; + ArrowLayoutInit(&array_view->layout, storage_type); +} + +ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, + int64_t n_children) { + if (array_view->children != NULL) { + return EINVAL; + } + + if (n_children == 0) { + array_view->n_children = 0; + return NANOARROW_OK; + } + + array_view->children = + (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*)); + if (array_view->children == NULL) { + return ENOMEM; + } + + for (int64_t i = 0; i < n_children; i++) { + array_view->children[i] = NULL; + } + + array_view->n_children = n_children; + + for (int64_t i = 0; i < n_children; i++) { + array_view->children[i] = + (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView)); + if (array_view->children[i] == NULL) { + return ENOMEM; + } + ArrowArrayViewInitFromType(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) { + if (array_view->dictionary != NULL) { + return EINVAL; + } + + array_view->dictionary = + (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView)); + if (array_view->dictionary == NULL) { + return ENOMEM; + } + + ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED); + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, + const struct ArrowSchema* schema, + struct ArrowError* error) { + struct ArrowSchemaView schema_view; + int result = ArrowSchemaViewInit(&schema_view, schema, error); + if (result != NANOARROW_OK) { + return result; + } + + ArrowArrayViewInitFromType(array_view, schema_view.storage_type); + array_view->layout = schema_view.layout; + + result = ArrowArrayViewAllocateChildren(array_view, schema->n_children); + if (result != NANOARROW_OK) { + ArrowErrorSet(error, "ArrowArrayViewAllocateChildren() failed"); + ArrowArrayViewReset(array_view); + return result; + } + + for (int64_t i = 0; i < schema->n_children; i++) { + result = + ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + } + + if (schema->dictionary != NULL) { + result = ArrowArrayViewAllocateDictionary(array_view); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + + result = + ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(array_view); + return result; + } + } + + if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION || + array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) { + array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t)); + if (array_view->union_type_id_map == NULL) { + return ENOMEM; + } + + memset(array_view->union_type_id_map, -1, 256); + int32_t n_type_ids = _ArrowParseUnionTypeIds(schema_view.union_type_ids, + array_view->union_type_id_map + 128); + for (int8_t child_index = 0; child_index < n_type_ids; child_index++) { + int8_t type_id = array_view->union_type_id_map[128 + child_index]; + array_view->union_type_id_map[type_id] = child_index; + } + } + + return NANOARROW_OK; +} + +void ArrowArrayViewReset(struct ArrowArrayView* array_view) { + if (array_view->children != NULL) { + for (int64_t i = 0; i < array_view->n_children; i++) { + if (array_view->children[i] != NULL) { + ArrowArrayViewReset(array_view->children[i]); + ArrowFree(array_view->children[i]); + } + } + + ArrowFree(array_view->children); + } + + if (array_view->dictionary != NULL) { + ArrowArrayViewReset(array_view->dictionary); + ArrowFree(array_view->dictionary); + } + + if (array_view->union_type_id_map != NULL) { + ArrowFree(array_view->union_type_id_map); + } + + ArrowArrayViewInitFromType(array_view, NANOARROW_TYPE_UNINITIALIZED); +} + +void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; + + switch (array_view->layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_VALIDITY: + array_view->buffer_views[i].size_bytes = _ArrowBytesForBits(length); + continue; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + // Probably don't want/need to rely on the producer to have allocated an + // offsets buffer of length 1 for a zero-size array + array_view->buffer_views[i].size_bytes = + (length != 0) * element_size_bytes * (length + 1); + continue; + case NANOARROW_BUFFER_TYPE_DATA: + array_view->buffer_views[i].size_bytes = + _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) / + 8; + continue; + case NANOARROW_BUFFER_TYPE_TYPE_ID: + case NANOARROW_BUFFER_TYPE_UNION_OFFSET: + array_view->buffer_views[i].size_bytes = element_size_bytes * length; + continue; + case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: + case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: + case NANOARROW_BUFFER_TYPE_NONE: + array_view->buffer_views[i].size_bytes = 0; + continue; + } + } + + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRUCT: + case NANOARROW_TYPE_SPARSE_UNION: + for (int64_t i = 0; i < array_view->n_children; i++) { + ArrowArrayViewSetLength(array_view->children[i], length); + } + break; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + if (array_view->n_children >= 1) { + ArrowArrayViewSetLength(array_view->children[0], + length * array_view->layout.child_size_elements); + } + default: + break; + } +} + +// This version recursively extracts information from the array and stores it +// in the array view, performing any checks that require the original array. +static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error) { + array_view->array = array; + array_view->offset = array->offset; + array_view->length = array->length; + array_view->null_count = array->null_count; + array_view->variadic_buffer_sizes = NULL; + array_view->variadic_buffers = NULL; + array_view->n_variadic_buffers = 0; + + int64_t buffers_required = 0; + const int nfixed_buf = array_view->storage_type == NANOARROW_TYPE_STRING_VIEW || + array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW + ? NANOARROW_BINARY_VIEW_FIXED_BUFFERS + : NANOARROW_MAX_FIXED_BUFFERS; + for (int i = 0; i < nfixed_buf; i++) { + if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { + break; + } + + buffers_required++; + + // Set buffer pointer + array_view->buffer_views[i].data.data = array->buffers[i]; + + // If non-null, set buffer size to unknown. + if (array->buffers[i] == NULL) { + array_view->buffer_views[i].size_bytes = 0; + } else { + array_view->buffer_views[i].size_bytes = -1; + } + } + + if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW || + array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) { + const int64_t n_buffers = array->n_buffers; + const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS; + + const int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1); + array_view->n_variadic_buffers = nvariadic_buf; + buffers_required += nvariadic_buf + 1; + array_view->variadic_buffers = array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS; + array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers - 1]; + } + + if (buffers_required != array->n_buffers) { + ArrowErrorSet(error, + "Expected array with %" PRId64 " buffer(s) but found %" PRId64 + " buffer(s)", + buffers_required, array->n_buffers); + return EINVAL; + } + + // Check number of children + if (array_view->n_children != array->n_children) { + ArrowErrorSet(error, "Expected %" PRId64 " children but found %" PRId64 " children", + array_view->n_children, array->n_children); + return EINVAL; + } + + // Recurse for children + for (int64_t i = 0; i < array_view->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i], + array->children[i], error)); + } + + // Check dictionary + if (array->dictionary == NULL && array_view->dictionary != NULL) { + ArrowErrorSet(error, "Expected dictionary but found NULL"); + return EINVAL; + } + + if (array->dictionary != NULL && array_view->dictionary == NULL) { + ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member"); + return EINVAL; + } + + if (array->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK( + ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error)); + } + + return NANOARROW_OK; +} + +static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, + struct ArrowError* error) { + if (array_view->length < 0) { + ArrowErrorSet(error, "Expected length >= 0 but found length %" PRId64, + array_view->length); + return EINVAL; + } + + if (array_view->offset < 0) { + ArrowErrorSet(error, "Expected offset >= 0 but found offset %" PRId64, + array_view->offset); + return EINVAL; + } + + // Ensure that offset + length fits within an int64 before a possible overflow + if ((uint64_t)array_view->offset + (uint64_t)array_view->length > (uint64_t)INT64_MAX) { + ArrowErrorSet(error, "Offset + length is > INT64_MAX"); + return EINVAL; + } + + // Calculate buffer sizes that do not require buffer access. If marked as + // unknown, assign the buffer size; otherwise, validate it. + int64_t offset_plus_length = array_view->offset + array_view->length; + + // Only loop over the first two buffers because the size of the third buffer + // is always data dependent for all current Arrow types. + for (int i = 0; i < 2; i++) { + int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; + // Initialize with a value that will cause an error if accidentally used uninitialized + // Need to suppress the clang-tidy warning because gcc warns for possible use + int64_t min_buffer_size_bytes = // NOLINT(clang-analyzer-deadcode.DeadStores) + array_view->buffer_views[i].size_bytes + 1; + + switch (array_view->layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_VALIDITY: + if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) { + continue; + } + + min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length); + break; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + // Probably don't want/need to rely on the producer to have allocated an + // offsets buffer of length 1 for a zero-size array + min_buffer_size_bytes = + (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1); + break; + case NANOARROW_BUFFER_TYPE_DATA: + min_buffer_size_bytes = + _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * + offset_plus_length) / + 8; + break; + case NANOARROW_BUFFER_TYPE_TYPE_ID: + case NANOARROW_BUFFER_TYPE_UNION_OFFSET: + min_buffer_size_bytes = element_size_bytes * offset_plus_length; + break; + case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: + case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: + case NANOARROW_BUFFER_TYPE_NONE: + continue; + } + + // Assign or validate buffer size + if (array_view->buffer_views[i].size_bytes == -1) { + array_view->buffer_views[i].size_bytes = min_buffer_size_bytes; + } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) { + ArrowErrorSet(error, + "Expected %s array buffer %d to have size >= %" PRId64 + " bytes but found " + "buffer with %" PRId64 " bytes", + ArrowTypeString(array_view->storage_type), i, min_buffer_size_bytes, + array_view->buffer_views[i].size_bytes); + return EINVAL; + } + } + + // For list, fixed-size list and map views, we can validate the number of children + switch (array_view->storage_type) { + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_FIXED_SIZE_LIST: + case NANOARROW_TYPE_MAP: + if (array_view->n_children != 1) { + ArrowErrorSet(error, + "Expected 1 child of %s array but found %" PRId64 " child arrays", + ArrowTypeString(array_view->storage_type), array_view->n_children); + return EINVAL; + } + break; + case NANOARROW_TYPE_RUN_END_ENCODED: + if (array_view->n_children != 2) { + ArrowErrorSet( + error, "Expected 2 children for %s array but found %" PRId64 " child arrays", + ArrowTypeString(array_view->storage_type), array_view->n_children); + return EINVAL; + } + break; + default: + break; + } + + // For struct, the sparse union, and the fixed-size list views, we can validate child + // lengths. + int64_t child_min_length; + switch (array_view->storage_type) { + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_STRUCT: + child_min_length = (array_view->offset + array_view->length); + for (int64_t i = 0; i < array_view->n_children; i++) { + if (array_view->children[i]->length < child_min_length) { + ArrowErrorSet(error, + "Expected struct child %" PRId64 " to have length >= %" PRId64 + " but found child with " + "length %" PRId64, + i + 1, child_min_length, array_view->children[i]->length); + return EINVAL; + } + } + break; + + case NANOARROW_TYPE_FIXED_SIZE_LIST: + child_min_length = (array_view->offset + array_view->length) * + array_view->layout.child_size_elements; + if (array_view->children[0]->length < child_min_length) { + ArrowErrorSet(error, + "Expected child of fixed_size_list array to have length >= %" PRId64 + " but " + "found array with length %" PRId64, + child_min_length, array_view->children[0]->length); + return EINVAL; + } + break; + + case NANOARROW_TYPE_RUN_END_ENCODED: { + if (array_view->n_children != 2) { + ArrowErrorSet(error, + "Expected 2 children for run-end encoded array but found %" PRId64, + array_view->n_children); + return EINVAL; + } + struct ArrowArrayView* run_ends_view = array_view->children[0]; + struct ArrowArrayView* values_view = array_view->children[1]; + int64_t max_length; + switch (run_ends_view->storage_type) { + case NANOARROW_TYPE_INT16: + max_length = INT16_MAX; + break; + case NANOARROW_TYPE_INT32: + max_length = INT32_MAX; + break; + case NANOARROW_TYPE_INT64: + max_length = INT64_MAX; + break; + default: + ArrowErrorSet( + error, + "Run-end encoded array only supports INT16, INT32 or INT64 run-ends " + "but found run-ends type %s", + ArrowTypeString(run_ends_view->storage_type)); + return EINVAL; + } + + // There is already a check above that offset_plus_length < INT64_MAX + if (offset_plus_length > max_length) { + ArrowErrorSet(error, + "Offset + length of a run-end encoded array must fit in a value" + " of the run end type %s but is %" PRId64 " + %" PRId64, + ArrowTypeString(run_ends_view->storage_type), array_view->offset, + array_view->length); + return EINVAL; + } + + if (run_ends_view->length > values_view->length) { + ArrowErrorSet(error, + "Length of run_ends is greater than the length of values: %" PRId64 + " > %" PRId64, + run_ends_view->length, values_view->length); + return EINVAL; + } + + if (run_ends_view->length == 0 && values_view->length != 0) { + ArrowErrorSet(error, + "Run-end encoded array has zero length %" PRId64 + ", but values array has " + "non-zero length", + values_view->length); + return EINVAL; + } + + if (run_ends_view->null_count != 0) { + ArrowErrorSet(error, "Null count must be 0 for run ends array, but is %" PRId64, + run_ends_view->null_count); + return EINVAL; + } + break; + } + + default: + break; + } + + // Recurse for children + for (int64_t i = 0; i < array_view->n_children; i++) { + NANOARROW_RETURN_NOT_OK( + ArrowArrayViewValidateMinimal(array_view->children[i], error)); + } + + // Recurse for dictionary + if (array_view->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error)); + } + + return NANOARROW_OK; +} + +static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, + struct ArrowError* error) { + // Perform minimal validation. This will validate or assign + // buffer sizes as long as buffer access is not required. + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error)); + + // Calculate buffer sizes or child lengths that require accessing the offsets + // buffer. Where appropriate, validate that the first offset is >= 0. + // If a buffer size is marked as unknown, assign it; otherwise, validate it. + int64_t offset_plus_length = array_view->offset + array_view->length; + + int64_t first_offset; + int64_t last_offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + if (array_view->buffer_views[1].size_bytes != 0) { + first_offset = array_view->buffer_views[1].data.as_int32[array_view->offset]; + if (first_offset < 0) { + ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, + first_offset); + return EINVAL; + } + + last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; + if (last_offset < 0) { + ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, + last_offset); + return EINVAL; + } + + // If the data buffer size is unknown, assign it; otherwise, check it + if (array_view->buffer_views[2].size_bytes == -1) { + array_view->buffer_views[2].size_bytes = last_offset; + } else if (array_view->buffer_views[2].size_bytes < last_offset) { + ArrowErrorSet(error, + "Expected %s array buffer 2 to have size >= %" PRId64 + " bytes but found " + "buffer with %" PRId64 " bytes", + ArrowTypeString(array_view->storage_type), last_offset, + array_view->buffer_views[2].size_bytes); + return EINVAL; + } + } else if (array_view->buffer_views[2].size_bytes == -1) { + // If the data buffer size is unknown and there are no bytes in the offset buffer, + // set the data buffer size to 0. + array_view->buffer_views[2].size_bytes = 0; + } + break; + + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + if (array_view->buffer_views[1].size_bytes != 0) { + first_offset = array_view->buffer_views[1].data.as_int64[array_view->offset]; + if (first_offset < 0) { + ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, + first_offset); + return EINVAL; + } + + last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; + if (last_offset < 0) { + ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, + last_offset); + return EINVAL; + } + + // If the data buffer size is unknown, assign it; otherwise, check it + if (array_view->buffer_views[2].size_bytes == -1) { + array_view->buffer_views[2].size_bytes = last_offset; + } else if (array_view->buffer_views[2].size_bytes < last_offset) { + ArrowErrorSet(error, + "Expected %s array buffer 2 to have size >= %" PRId64 + " bytes but found " + "buffer with %" PRId64 " bytes", + ArrowTypeString(array_view->storage_type), last_offset, + array_view->buffer_views[2].size_bytes); + return EINVAL; + } + } else if (array_view->buffer_views[2].size_bytes == -1) { + // If the data buffer size is unknown and there are no bytes in the offset + // buffer, set the data buffer size to 0. + array_view->buffer_views[2].size_bytes = 0; + } + break; + + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array_view->n_children; i++) { + if (array_view->children[i]->length < offset_plus_length) { + ArrowErrorSet(error, + "Expected struct child %" PRId64 " to have length >= %" PRId64 + " but found child with " + "length %" PRId64, + i + 1, offset_plus_length, array_view->children[i]->length); + return EINVAL; + } + } + break; + + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_MAP: + if (array_view->buffer_views[1].size_bytes != 0) { + first_offset = array_view->buffer_views[1].data.as_int32[array_view->offset]; + if (first_offset < 0) { + ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, + first_offset); + return EINVAL; + } + + last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; + if (last_offset < 0) { + ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, + last_offset); + return EINVAL; + } + + if (array_view->children[0]->length < last_offset) { + ArrowErrorSet(error, + "Expected child of %s array to have length >= %" PRId64 + " but found array with " + "length %" PRId64, + ArrowTypeString(array_view->storage_type), last_offset, + array_view->children[0]->length); + return EINVAL; + } + } + break; + + case NANOARROW_TYPE_LARGE_LIST: + if (array_view->buffer_views[1].size_bytes != 0) { + first_offset = array_view->buffer_views[1].data.as_int64[array_view->offset]; + if (first_offset < 0) { + ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, + first_offset); + return EINVAL; + } + + last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; + if (last_offset < 0) { + ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, + last_offset); + return EINVAL; + } + + if (array_view->children[0]->length < last_offset) { + ArrowErrorSet(error, + "Expected child of large list array to have length >= %" PRId64 + " but found array " + "with length %" PRId64, + last_offset, array_view->children[0]->length); + return EINVAL; + } + } + break; + + case NANOARROW_TYPE_RUN_END_ENCODED: { + struct ArrowArrayView* run_ends_view = array_view->children[0]; + if (run_ends_view->length == 0) { + break; + } + + int64_t first_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0); + if (first_run_end < 1) { + ArrowErrorSet( + error, + "All run ends must be greater than 0 but the first run end is %" PRId64, + first_run_end); + return EINVAL; + } + + // offset + length < INT64_MAX is checked in ArrowArrayViewValidateMinimal() + int64_t last_run_end = + ArrowArrayViewGetIntUnsafe(run_ends_view, run_ends_view->length - 1); + if (last_run_end < offset_plus_length) { + ArrowErrorSet(error, + "Last run end is %" PRId64 " but it should be >= (%" PRId64 + " + %" PRId64 ")", + last_run_end, array_view->offset, array_view->length); + return EINVAL; + } + break; + } + default: + break; + } + + // Recurse for children + for (int64_t i = 0; i < array_view->n_children; i++) { + NANOARROW_RETURN_NOT_OK( + ArrowArrayViewValidateDefault(array_view->children[i], error)); + } + + // Recurse for dictionary + if (array_view->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error)); + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error) { + // Extract information from the array into the array view + NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); + + // Run default validation. Because we've marked all non-NULL buffers as having unknown + // size, validation will also update the buffer sizes as it goes. + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error)); + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error) { + // Extract information from the array into the array view + NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); + + // Run default validation. Because we've marked all non-NULL buffers as having unknown + // size, validation will also update the buffer sizes as it goes. + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error)); + + return NANOARROW_OK; +} + +static int ArrowAssertIncreasingInt32(struct ArrowBufferView view, + struct ArrowError* error) { + if (view.size_bytes <= (int64_t)sizeof(int32_t)) { + return NANOARROW_OK; + } + + for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) { + if (view.data.as_int32[i] < view.data.as_int32[i - 1]) { + ArrowErrorSet(error, "[%" PRId64 "] Expected element size >= 0", i); + return EINVAL; + } + } + + return NANOARROW_OK; +} + +static int ArrowAssertIncreasingInt64(struct ArrowBufferView view, + struct ArrowError* error) { + if (view.size_bytes <= (int64_t)sizeof(int64_t)) { + return NANOARROW_OK; + } + + for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) { + if (view.data.as_int64[i] < view.data.as_int64[i - 1]) { + ArrowErrorSet(error, "[%" PRId64 "] Expected element size >= 0", i); + return EINVAL; + } + } + + return NANOARROW_OK; +} + +static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value, + int8_t max_value, struct ArrowError* error) { + for (int64_t i = 0; i < view.size_bytes; i++) { + if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) { + ArrowErrorSet(error, + "[%" PRId64 "] Expected buffer value between %" PRId8 " and %" PRId8 + " but found value %" PRId8, + i, min_value, max_value, view.data.as_int8[i]); + return EINVAL; + } + } + + return NANOARROW_OK; +} + +static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values, + int64_t n_values, struct ArrowError* error) { + for (int64_t i = 0; i < view.size_bytes; i++) { + int item_found = 0; + for (int64_t j = 0; j < n_values; j++) { + if (view.data.as_int8[i] == values[j]) { + item_found = 1; + break; + } + } + + if (!item_found) { + ArrowErrorSet(error, "[%" PRId64 "] Unexpected buffer value %" PRId8, i, + view.data.as_int8[i]); + return EINVAL; + } + } + + return NANOARROW_OK; +} + +static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, + struct ArrowError* error) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + switch (array_view->layout.buffer_type[i]) { + // Only validate the portion of the buffer that is strictly required, + // which includes not validating the offset buffer of a zero-length array. + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + if (array_view->length == 0) { + continue; + } + if (array_view->layout.element_size_bits[i] == 32) { + struct ArrowBufferView sliced_offsets; + sliced_offsets.data.as_int32 = + array_view->buffer_views[i].data.as_int32 + array_view->offset; + sliced_offsets.size_bytes = (array_view->length + 1) * sizeof(int32_t); + NANOARROW_RETURN_NOT_OK(ArrowAssertIncreasingInt32(sliced_offsets, error)); + } else { + struct ArrowBufferView sliced_offsets; + sliced_offsets.data.as_int64 = + array_view->buffer_views[i].data.as_int64 + array_view->offset; + sliced_offsets.size_bytes = (array_view->length + 1) * sizeof(int64_t); + NANOARROW_RETURN_NOT_OK(ArrowAssertIncreasingInt64(sliced_offsets, error)); + } + break; + default: + break; + } + } + + if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION || + array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) { + struct ArrowBufferView sliced_type_ids; + sliced_type_ids.size_bytes = array_view->length * sizeof(int8_t); + if (array_view->length > 0) { + sliced_type_ids.data.as_int8 = + array_view->buffer_views[0].data.as_int8 + array_view->offset; + } else { + sliced_type_ids.data.as_int8 = NULL; + } + + if (array_view->union_type_id_map == NULL) { + // If the union_type_id map is NULL (e.g., when using ArrowArrayInitFromType() + + // ArrowArrayAllocateChildren() + ArrowArrayFinishBuilding()), we don't have enough + // information to validate this buffer. + ArrowErrorSet(error, + "Insufficient information provided for validation of union array"); + return EINVAL; + } else if (_ArrowParsedUnionTypeIdsWillEqualChildIndices( + array_view->union_type_id_map, array_view->n_children, + array_view->n_children)) { + NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8( + sliced_type_ids, 0, (int8_t)(array_view->n_children - 1), error)); + } else { + NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(sliced_type_ids, + array_view->union_type_id_map + 128, + array_view->n_children, error)); + } + } + + if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION && + array_view->union_type_id_map != NULL) { + // Check that offsets refer to child elements that actually exist + for (int64_t i = 0; i < array_view->length; i++) { + int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i); + int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i); + int64_t child_length = array_view->children[child_id]->length; + if (offset < 0 || offset > child_length) { + ArrowErrorSet(error, + "[%" PRId64 "] Expected union offset for child id %" PRId8 + " to be between 0 and %" PRId64 + " but " + "found offset value %" PRId64, + i, child_id, child_length, offset); + return EINVAL; + } + } + } + + if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) { + struct ArrowArrayView* run_ends_view = array_view->children[0]; + if (run_ends_view->length > 0) { + int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0); + for (int64_t i = 1; i < run_ends_view->length; i++) { + const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i); + if (run_end <= last_run_end) { + ArrowErrorSet( + error, + "Every run end must be strictly greater than the previous run end, " + "but run_ends[%" PRId64 " is %" PRId64 " and run_ends[%" PRId64 + "] is %" PRId64, + i, run_end, i - 1, last_run_end); + return EINVAL; + } + last_run_end = run_end; + } + } + } + + // Recurse for children + for (int64_t i = 0; i < array_view->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error)); + } + + // Dictionary valiation not implemented + if (array_view->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->dictionary, error)); + // TODO: validate the indices + } + + return NANOARROW_OK; +} + +ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, + enum ArrowValidationLevel validation_level, + struct ArrowError* error) { + switch (validation_level) { + case NANOARROW_VALIDATION_LEVEL_NONE: + return NANOARROW_OK; + case NANOARROW_VALIDATION_LEVEL_MINIMAL: + return ArrowArrayViewValidateMinimal(array_view, error); + case NANOARROW_VALIDATION_LEVEL_DEFAULT: + return ArrowArrayViewValidateDefault(array_view, error); + case NANOARROW_VALIDATION_LEVEL_FULL: + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error)); + return ArrowArrayViewValidateFull(array_view, error); + } + + ArrowErrorSet(error, "validation_level not recognized"); + return EINVAL; +} + +struct ArrowComparisonInternalState { + enum ArrowCompareLevel level; + int is_equal; + struct ArrowError* reason; +}; + +NANOARROW_CHECK_PRINTF_ATTRIBUTE static void ArrowComparePrependPath( + struct ArrowError* out, const char* fmt, ...) { + if (out == NULL) { + return; + } + + char prefix[128]; + prefix[0] = '\0'; + va_list args; + va_start(args, fmt); + int prefix_len = vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + + if (prefix_len <= 0) { + return; + } + + size_t out_len = strlen(out->message); + size_t out_len_to_move = sizeof(struct ArrowError) - prefix_len - 1; + if (out_len_to_move > out_len) { + out_len_to_move = out_len; + } + + memmove(out->message + prefix_len, out->message, out_len_to_move); + memcpy(out->message, prefix, prefix_len); + out->message[out_len + prefix_len] = '\0'; +} + +#define SET_NOT_EQUAL_AND_RETURN_IF_IMPL(cond_, state_, reason_) \ + do { \ + if (cond_) { \ + ArrowErrorSet(state_->reason, ": %s", reason_); \ + state_->is_equal = 0; \ + return; \ + } \ + } while (0) + +#define SET_NOT_EQUAL_AND_RETURN_IF(condition_, state_) \ + SET_NOT_EQUAL_AND_RETURN_IF_IMPL(condition_, state_, #condition_) + +static void ArrowArrayViewCompareBuffer(const struct ArrowArrayView* actual, + const struct ArrowArrayView* expected, int i, + struct ArrowComparisonInternalState* state) { + SET_NOT_EQUAL_AND_RETURN_IF( + actual->buffer_views[i].size_bytes != expected->buffer_views[i].size_bytes, state); + + int64_t buffer_size = actual->buffer_views[i].size_bytes; + if (buffer_size > 0) { + SET_NOT_EQUAL_AND_RETURN_IF( + memcmp(actual->buffer_views[i].data.data, expected->buffer_views[i].data.data, + buffer_size) != 0, + state); + } +} + +static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual, + const struct ArrowArrayView* expected, + struct ArrowComparisonInternalState* state) { + SET_NOT_EQUAL_AND_RETURN_IF(actual->storage_type != expected->storage_type, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->n_children != expected->n_children, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary == NULL && expected->dictionary != NULL, + state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary != NULL && expected->dictionary == NULL, + state); + + SET_NOT_EQUAL_AND_RETURN_IF(actual->length != expected->length, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->offset != expected->offset, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->null_count != expected->null_count, state); + + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + ArrowArrayViewCompareBuffer(actual, expected, i, state); + if (!state->is_equal) { + ArrowComparePrependPath(state->reason, ".buffers[%d]", i); + return; + } + } + + for (int64_t i = 0; i < actual->n_children; i++) { + ArrowArrayViewCompareIdentical(actual->children[i], expected->children[i], state); + if (!state->is_equal) { + ArrowComparePrependPath(state->reason, ".children[%" PRId64 "]", i); + return; + } + } + + if (actual->dictionary != NULL) { + ArrowArrayViewCompareIdentical(actual->dictionary, expected->dictionary, state); + if (!state->is_equal) { + ArrowComparePrependPath(state->reason, ".dictionary"); + return; + } + } +} + +// Top-level entry point to take care of creating, cleaning up, and +// propagating the ArrowComparisonInternalState to the caller +ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, + const struct ArrowArrayView* expected, + enum ArrowCompareLevel level, int* out, + struct ArrowError* reason) { + struct ArrowComparisonInternalState state; + state.level = level; + state.is_equal = 1; + state.reason = reason; + + switch (level) { + case NANOARROW_COMPARE_IDENTICAL: + ArrowArrayViewCompareIdentical(actual, expected, &state); + break; + default: + return EINVAL; + } + + *out = state.is_equal; + if (!state.is_equal) { + ArrowComparePrependPath(state.reason, "root"); + } + + return NANOARROW_OK; +} + +#undef SET_NOT_EQUAL_AND_RETURN_IF +#undef SET_NOT_EQUAL_AND_RETURN_IF_IMPL +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "nanoarrow.h" + +struct BasicArrayStreamPrivate { + struct ArrowSchema schema; + int64_t n_arrays; + struct ArrowArray* arrays; + int64_t arrays_i; +}; + +static int ArrowBasicArrayStreamGetSchema(struct ArrowArrayStream* array_stream, + struct ArrowSchema* schema) { + if (array_stream == NULL || array_stream->release == NULL) { + return EINVAL; + } + + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)array_stream->private_data; + return ArrowSchemaDeepCopy(&private_data->schema, schema); +} + +static int ArrowBasicArrayStreamGetNext(struct ArrowArrayStream* array_stream, + struct ArrowArray* array) { + if (array_stream == NULL || array_stream->release == NULL) { + return EINVAL; + } + + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)array_stream->private_data; + + if (private_data->arrays_i == private_data->n_arrays) { + array->release = NULL; + return NANOARROW_OK; + } + + ArrowArrayMove(&private_data->arrays[private_data->arrays_i++], array); + return NANOARROW_OK; +} + +static const char* ArrowBasicArrayStreamGetLastError( + struct ArrowArrayStream* array_stream) { + NANOARROW_UNUSED(array_stream); + return NULL; +} + +static void ArrowBasicArrayStreamRelease(struct ArrowArrayStream* array_stream) { + if (array_stream == NULL || array_stream->release == NULL) { + return; + } + + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)array_stream->private_data; + + if (private_data->schema.release != NULL) { + ArrowSchemaRelease(&private_data->schema); + } + + for (int64_t i = 0; i < private_data->n_arrays; i++) { + if (private_data->arrays[i].release != NULL) { + ArrowArrayRelease(&private_data->arrays[i]); + } + } + + if (private_data->arrays != NULL) { + ArrowFree(private_data->arrays); + } + + ArrowFree(private_data); + array_stream->release = NULL; +} + +ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, + struct ArrowSchema* schema, int64_t n_arrays) { + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)ArrowMalloc( + sizeof(struct BasicArrayStreamPrivate)); + if (private_data == NULL) { + return ENOMEM; + } + + ArrowSchemaMove(schema, &private_data->schema); + + private_data->n_arrays = n_arrays; + private_data->arrays = NULL; + private_data->arrays_i = 0; + + if (n_arrays > 0) { + private_data->arrays = + (struct ArrowArray*)ArrowMalloc(n_arrays * sizeof(struct ArrowArray)); + if (private_data->arrays == NULL) { + ArrowBasicArrayStreamRelease(array_stream); + return ENOMEM; + } + } + + for (int64_t i = 0; i < private_data->n_arrays; i++) { + private_data->arrays[i].release = NULL; + } + + array_stream->get_schema = &ArrowBasicArrayStreamGetSchema; + array_stream->get_next = &ArrowBasicArrayStreamGetNext; + array_stream->get_last_error = ArrowBasicArrayStreamGetLastError; + array_stream->release = ArrowBasicArrayStreamRelease; + array_stream->private_data = private_data; + return NANOARROW_OK; +} + +void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, + struct ArrowArray* array) { + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)array_stream->private_data; + ArrowArrayMove(array, &private_data->arrays[i]); +} + +ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, + struct ArrowError* error) { + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)array_stream->private_data; + + struct ArrowArrayView array_view; + NANOARROW_RETURN_NOT_OK( + ArrowArrayViewInitFromSchema(&array_view, &private_data->schema, error)); + + for (int64_t i = 0; i < private_data->n_arrays; i++) { + if (private_data->arrays[i].release != NULL) { + int result = ArrowArrayViewSetArray(&array_view, &private_data->arrays[i], error); + if (result != NANOARROW_OK) { + ArrowArrayViewReset(&array_view); + return result; + } + } + } + + ArrowArrayViewReset(&array_view); + return NANOARROW_OK; +} diff --git a/src/oracledb/interchange/nanoarrow/nanoarrow.h b/src/oracledb/interchange/nanoarrow/nanoarrow.h new file mode 100644 index 00000000..0738957c --- /dev/null +++ b/src/oracledb/interchange/nanoarrow/nanoarrow.h @@ -0,0 +1,4279 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_BUILD_ID_H_INCLUDED +#define NANOARROW_BUILD_ID_H_INCLUDED + +#define NANOARROW_VERSION_MAJOR 0 +#define NANOARROW_VERSION_MINOR 6 +#define NANOARROW_VERSION_PATCH 0 +#define NANOARROW_VERSION "0.6.0" + +#define NANOARROW_VERSION_INT \ + (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ + NANOARROW_VERSION_PATCH) + +#define NANOARROW_NAMESPACE PythonPkg + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_NANOARROW_TYPES_H_INCLUDED +#define NANOARROW_NANOARROW_TYPES_H_INCLUDED + +#include +#include + + + +#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Extra guard for versions of Arrow without the canonical guard +#ifndef ARROW_FLAG_DICTIONARY_ORDERED + +/// \defgroup nanoarrow-arrow-cdata Arrow C Data interface +/// +/// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html) +/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html) +/// interfaces are part of the +/// Arrow Columnar Format specification +/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for +/// documentation of these structures. +/// +/// @{ + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE + +#ifndef ARROW_C_STREAM_INTERFACE +#define ARROW_C_STREAM_INTERFACE + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_STREAM_INTERFACE +#endif // ARROW_FLAG_DICTIONARY_ORDERED + +/// @} + +// Utility macros +#define _NANOARROW_CONCAT(x, y) x##y +#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y) + +#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) return NAME; \ + } while (0) + +#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \ + NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) + +#define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \ + NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL) + +#if defined(NANOARROW_DEBUG) +#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d(%s)\n* %s:%d", EXPR_STR, \ + NAME, strerror(NAME), __FILE__, __LINE__); \ + return NAME; \ + } \ + } while (0) +#else +#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \ + return NAME; \ + } \ + } while (0) +#endif + +#if defined(NANOARROW_DEBUG) +// For checking ArrowErrorSet() calls for valid printf format strings/arguments +// If using mingw's c99-compliant printf, we need a different format-checking attribute +#if defined(__USE_MINGW_ANSI_STDIO) && defined(__MINGW_PRINTF_FORMAT) +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE \ + __attribute__((format(__MINGW_PRINTF_FORMAT, 2, 3))) +#elif defined(__GNUC__) +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE __attribute__((format(printf, 2, 3))) +#else +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE +#endif + +// For checking calls to functions that return ArrowErrorCode +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define NANOARROW_CHECK_RETURN_ATTRIBUTE __attribute__((warn_unused_result)) +#elif defined(_MSC_VER) && (_MSC_VER >= 1700) +#define NANOARROW_CHECK_RETURN_ATTRIBUTE _Check_return_ +#else +#define NANOARROW_CHECK_RETURN_ATTRIBUTE +#endif + +#else +#define NANOARROW_CHECK_RETURN_ATTRIBUTE +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE +#endif + +#define NANOARROW_UNUSED(x) (void)(x) + +/// \brief Return code for success. +/// \ingroup nanoarrow-errors +#define NANOARROW_OK 0 + +/// \brief Represents an errno-compatible error code +/// \ingroup nanoarrow-errors +typedef int ArrowErrorCode; + +#if defined(NANOARROW_DEBUG) +#define ArrowErrorCode NANOARROW_CHECK_RETURN_ATTRIBUTE ArrowErrorCode +#endif + +/// \brief Flags supported by ArrowSchemaViewInit() +/// \ingroup nanoarrow-schema-view +#define NANOARROW_FLAG_ALL_SUPPORTED \ + (ARROW_FLAG_DICTIONARY_ORDERED | ARROW_FLAG_NULLABLE | ARROW_FLAG_MAP_KEYS_SORTED) + +/// \brief Error type containing a UTF-8 encoded message. +/// \ingroup nanoarrow-errors +struct ArrowError { + /// \brief A character buffer with space for an error message. + char message[1024]; +}; + +/// \brief Ensure an ArrowError is null-terminated by zeroing the first character. +/// \ingroup nanoarrow-errors +/// +/// If error is NULL, this function does nothing. +static inline void ArrowErrorInit(struct ArrowError* error) { + if (error != NULL) { + error->message[0] = '\0'; + } +} + +/// \brief Get the contents of an error +/// \ingroup nanoarrow-errors +/// +/// If error is NULL, returns "", or returns the contents of the error message +/// otherwise. +static inline const char* ArrowErrorMessage(struct ArrowError* error) { + if (error == NULL) { + return ""; + } else { + return error->message; + } +} + +/// \brief Set the contents of an error from an existing null-terminated string +/// \ingroup nanoarrow-errors +/// +/// If error is NULL, this function does nothing. +static inline void ArrowErrorSetString(struct ArrowError* error, const char* src) { + if (error == NULL) { + return; + } + + int64_t src_len = strlen(src); + if (src_len >= ((int64_t)sizeof(error->message))) { + memcpy(error->message, src, sizeof(error->message) - 1); + error->message[sizeof(error->message) - 1] = '\0'; + } else { + memcpy(error->message, src, src_len); + error->message[src_len] = '\0'; + } +} + +/// \brief Check the result of an expression and return it if not NANOARROW_OK +/// \ingroup nanoarrow-errors +#define NANOARROW_RETURN_NOT_OK(EXPR) \ + _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR) + +/// \brief Check the result of an expression and return it if not NANOARROW_OK, +/// adding an auto-generated message to an ArrowError. +/// \ingroup nanoarrow-errors +/// +/// This macro is used to ensure that functions that accept an ArrowError +/// as input always set its message when returning an error code (e.g., when calling +/// a nanoarrow function that does *not* accept ArrowError). +#define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \ + _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \ + _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR) + +#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) +#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \ + do { \ + fprintf(stderr, "%s failed with code %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \ + __FILE__, (int)__LINE__); \ + abort(); \ + } while (0) +#endif + +#if defined(NANOARROW_DEBUG) +#define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \ + } while (0) + +/// \brief Assert that an expression's value is NANOARROW_OK +/// \ingroup nanoarrow-errors +/// +/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true), +/// print a message to stderr and abort. If nanoarrow was built in release mode, +/// this statement has no effect. You can customize fatal error behaviour +/// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h +/// This macro is provided as a convenience for users and is not used internally. +#define NANOARROW_ASSERT_OK(EXPR) \ + _NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR) + +#define _NANOARROW_DCHECK_IMPL(EXPR, EXPR_STR) \ + do { \ + if (!(EXPR)) NANOARROW_PRINT_AND_DIE(-1, EXPR_STR); \ + } while (0) + +#define NANOARROW_DCHECK(EXPR) _NANOARROW_DCHECK_IMPL(EXPR, #EXPR) +#else +#define NANOARROW_ASSERT_OK(EXPR) (void)(EXPR) +#define NANOARROW_DCHECK(EXPR) +#endif + +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); + + memcpy(dst, src, sizeof(struct ArrowSchema)); + src->release = NULL; +} + +static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { + NANOARROW_DCHECK(schema != NULL); + schema->release(schema); + NANOARROW_DCHECK(schema->release == NULL); +} + +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); + + memcpy(dst, src, sizeof(struct ArrowArray)); + src->release = NULL; +} + +static inline void ArrowArrayRelease(struct ArrowArray* array) { + NANOARROW_DCHECK(array != NULL); + array->release(array); + NANOARROW_DCHECK(array->release == NULL); +} + +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); + + memcpy(dst, src, sizeof(struct ArrowArrayStream)); + src->release = NULL; +} + +static inline const char* ArrowArrayStreamGetLastError( + struct ArrowArrayStream* array_stream) { + NANOARROW_DCHECK(array_stream != NULL); + + const char* value = array_stream->get_last_error(array_stream); + if (value == NULL) { + return ""; + } else { + return value; + } +} + +static inline ArrowErrorCode ArrowArrayStreamGetSchema( + struct ArrowArrayStream* array_stream, struct ArrowSchema* out, + struct ArrowError* error) { + NANOARROW_DCHECK(array_stream != NULL); + + int result = array_stream->get_schema(array_stream, out); + if (result != NANOARROW_OK && error != NULL) { + ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); + } + + return result; +} + +static inline ArrowErrorCode ArrowArrayStreamGetNext( + struct ArrowArrayStream* array_stream, struct ArrowArray* out, + struct ArrowError* error) { + NANOARROW_DCHECK(array_stream != NULL); + + int result = array_stream->get_next(array_stream, out); + if (result != NANOARROW_OK && error != NULL) { + ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); + } + + return result; +} + +static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream) { + NANOARROW_DCHECK(array_stream != NULL); + array_stream->release(array_stream); + NANOARROW_DCHECK(array_stream->release == NULL); +} + +static char _ArrowIsLittleEndian(void) { + uint32_t check = 1; + char first_byte; + memcpy(&first_byte, &check, sizeof(char)); + return first_byte; +} + +/// \brief Arrow type enumerator +/// \ingroup nanoarrow-utils +/// +/// These names are intended to map to the corresponding arrow::Type::type +/// enumerator; however, the numeric values are specifically not equal +/// (i.e., do not rely on numeric comparison). +enum ArrowType { + NANOARROW_TYPE_UNINITIALIZED = 0, + NANOARROW_TYPE_NA = 1, + NANOARROW_TYPE_BOOL, + NANOARROW_TYPE_UINT8, + NANOARROW_TYPE_INT8, + NANOARROW_TYPE_UINT16, + NANOARROW_TYPE_INT16, + NANOARROW_TYPE_UINT32, + NANOARROW_TYPE_INT32, + NANOARROW_TYPE_UINT64, + NANOARROW_TYPE_INT64, + NANOARROW_TYPE_HALF_FLOAT, + NANOARROW_TYPE_FLOAT, + NANOARROW_TYPE_DOUBLE, + NANOARROW_TYPE_STRING, + NANOARROW_TYPE_BINARY, + NANOARROW_TYPE_FIXED_SIZE_BINARY, + NANOARROW_TYPE_DATE32, + NANOARROW_TYPE_DATE64, + NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TYPE_TIME32, + NANOARROW_TYPE_TIME64, + NANOARROW_TYPE_INTERVAL_MONTHS, + NANOARROW_TYPE_INTERVAL_DAY_TIME, + NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL256, + NANOARROW_TYPE_LIST, + NANOARROW_TYPE_STRUCT, + NANOARROW_TYPE_SPARSE_UNION, + NANOARROW_TYPE_DENSE_UNION, + NANOARROW_TYPE_DICTIONARY, + NANOARROW_TYPE_MAP, + NANOARROW_TYPE_EXTENSION, + NANOARROW_TYPE_FIXED_SIZE_LIST, + NANOARROW_TYPE_DURATION, + NANOARROW_TYPE_LARGE_STRING, + NANOARROW_TYPE_LARGE_BINARY, + NANOARROW_TYPE_LARGE_LIST, + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO, + NANOARROW_TYPE_RUN_END_ENCODED, + NANOARROW_TYPE_BINARY_VIEW, + NANOARROW_TYPE_STRING_VIEW +}; + +/// \brief Get a string value of an enum ArrowType value +/// \ingroup nanoarrow-utils +/// +/// Returns NULL for invalid values for type +static inline const char* ArrowTypeString(enum ArrowType type); + +static inline const char* ArrowTypeString(enum ArrowType type) { + switch (type) { + case NANOARROW_TYPE_NA: + return "na"; + case NANOARROW_TYPE_BOOL: + return "bool"; + case NANOARROW_TYPE_UINT8: + return "uint8"; + case NANOARROW_TYPE_INT8: + return "int8"; + case NANOARROW_TYPE_UINT16: + return "uint16"; + case NANOARROW_TYPE_INT16: + return "int16"; + case NANOARROW_TYPE_UINT32: + return "uint32"; + case NANOARROW_TYPE_INT32: + return "int32"; + case NANOARROW_TYPE_UINT64: + return "uint64"; + case NANOARROW_TYPE_INT64: + return "int64"; + case NANOARROW_TYPE_HALF_FLOAT: + return "half_float"; + case NANOARROW_TYPE_FLOAT: + return "float"; + case NANOARROW_TYPE_DOUBLE: + return "double"; + case NANOARROW_TYPE_STRING: + return "string"; + case NANOARROW_TYPE_BINARY: + return "binary"; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + return "fixed_size_binary"; + case NANOARROW_TYPE_DATE32: + return "date32"; + case NANOARROW_TYPE_DATE64: + return "date64"; + case NANOARROW_TYPE_TIMESTAMP: + return "timestamp"; + case NANOARROW_TYPE_TIME32: + return "time32"; + case NANOARROW_TYPE_TIME64: + return "time64"; + case NANOARROW_TYPE_INTERVAL_MONTHS: + return "interval_months"; + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + return "interval_day_time"; + case NANOARROW_TYPE_DECIMAL128: + return "decimal128"; + case NANOARROW_TYPE_DECIMAL256: + return "decimal256"; + case NANOARROW_TYPE_LIST: + return "list"; + case NANOARROW_TYPE_STRUCT: + return "struct"; + case NANOARROW_TYPE_SPARSE_UNION: + return "sparse_union"; + case NANOARROW_TYPE_DENSE_UNION: + return "dense_union"; + case NANOARROW_TYPE_DICTIONARY: + return "dictionary"; + case NANOARROW_TYPE_MAP: + return "map"; + case NANOARROW_TYPE_EXTENSION: + return "extension"; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + return "fixed_size_list"; + case NANOARROW_TYPE_DURATION: + return "duration"; + case NANOARROW_TYPE_LARGE_STRING: + return "large_string"; + case NANOARROW_TYPE_LARGE_BINARY: + return "large_binary"; + case NANOARROW_TYPE_LARGE_LIST: + return "large_list"; + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + return "interval_month_day_nano"; + case NANOARROW_TYPE_RUN_END_ENCODED: + return "run_end_encoded"; + case NANOARROW_TYPE_BINARY_VIEW: + return "binary_view"; + case NANOARROW_TYPE_STRING_VIEW: + return "string_view"; + default: + return NULL; + } +} + +/// \brief Arrow time unit enumerator +/// \ingroup nanoarrow-utils +/// +/// These names and values map to the corresponding arrow::TimeUnit::type +/// enumerator. +enum ArrowTimeUnit { + NANOARROW_TIME_UNIT_SECOND = 0, + NANOARROW_TIME_UNIT_MILLI = 1, + NANOARROW_TIME_UNIT_MICRO = 2, + NANOARROW_TIME_UNIT_NANO = 3 +}; + +/// \brief Validation level enumerator +/// \ingroup nanoarrow-array +enum ArrowValidationLevel { + /// \brief Do not validate buffer sizes or content. + NANOARROW_VALIDATION_LEVEL_NONE = 0, + + /// \brief Validate buffer sizes that depend on array length but do not validate buffer + /// sizes that depend on buffer data access. + NANOARROW_VALIDATION_LEVEL_MINIMAL = 1, + + /// \brief Validate all buffer sizes, including those that require buffer data access, + /// but do not perform any checks that are O(1) along the length of the buffers. + NANOARROW_VALIDATION_LEVEL_DEFAULT = 2, + + /// \brief Validate all buffer sizes and all buffer content. This is useful in the + /// context of untrusted input or input that may have been corrupted in transit. + NANOARROW_VALIDATION_LEVEL_FULL = 3 +}; + +/// \brief Comparison level enumerator +/// \ingroup nanoarrow-utils +enum ArrowCompareLevel { + /// \brief Consider arrays equal if buffers contain identical content + /// and have identical offset, null count, and length. Note that this is + /// a much stricter check than logical equality, which would take into + /// account potentially different content of null slots, arrays with a + /// non-zero offset, and other considerations. + NANOARROW_COMPARE_IDENTICAL, +}; + +/// \brief Get a string value of an enum ArrowTimeUnit value +/// \ingroup nanoarrow-utils +/// +/// Returns NULL for invalid values for time_unit +static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit); + +static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) { + switch (time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + return "s"; + case NANOARROW_TIME_UNIT_MILLI: + return "ms"; + case NANOARROW_TIME_UNIT_MICRO: + return "us"; + case NANOARROW_TIME_UNIT_NANO: + return "ns"; + default: + return NULL; + } +} + +/// \brief Functional types of buffers as described in the Arrow Columnar Specification +/// \ingroup nanoarrow-array-view +enum ArrowBufferType { + NANOARROW_BUFFER_TYPE_NONE, + NANOARROW_BUFFER_TYPE_VALIDITY, + NANOARROW_BUFFER_TYPE_TYPE_ID, + NANOARROW_BUFFER_TYPE_UNION_OFFSET, + NANOARROW_BUFFER_TYPE_DATA_OFFSET, + NANOARROW_BUFFER_TYPE_DATA, + NANOARROW_BUFFER_TYPE_VARIADIC_DATA, + NANOARROW_BUFFER_TYPE_VARIADIC_SIZE +}; + +/// \brief The maximum number of fixed buffers in an ArrowArrayView or ArrowLayout +/// \ingroup nanoarrow-array-view +#define NANOARROW_MAX_FIXED_BUFFERS 3 + +/// \brief An non-owning view of a string +/// \ingroup nanoarrow-utils +struct ArrowStringView { + /// \brief A pointer to the start of the string + /// + /// If size_bytes is 0, this value may be NULL. + const char* data; + + /// \brief The size of the string in bytes, + /// + /// (Not including the null terminator.) + int64_t size_bytes; +}; + +/// \brief Return a view of a const C string +/// \ingroup nanoarrow-utils +static inline struct ArrowStringView ArrowCharView(const char* value); + +static inline struct ArrowStringView ArrowCharView(const char* value) { + struct ArrowStringView out; + + out.data = value; + if (value) { + out.size_bytes = (int64_t)strlen(value); + } else { + out.size_bytes = 0; + } + + return out; +} + +union ArrowBufferViewData { + const void* data; + const int8_t* as_int8; + const uint8_t* as_uint8; + const int16_t* as_int16; + const uint16_t* as_uint16; + const int32_t* as_int32; + const uint32_t* as_uint32; + const int64_t* as_int64; + const uint64_t* as_uint64; + const double* as_double; + const float* as_float; + const char* as_char; + const union ArrowBinaryView* as_binary_view; +}; + +/// \brief An non-owning view of a buffer +/// \ingroup nanoarrow-utils +struct ArrowBufferView { + /// \brief A pointer to the start of the buffer + /// + /// If size_bytes is 0, this value may be NULL. + union ArrowBufferViewData data; + + /// \brief The size of the buffer in bytes + int64_t size_bytes; +}; + +/// \brief Array buffer allocation and deallocation +/// \ingroup nanoarrow-buffer +/// +/// Container for allocate, reallocate, and free methods that can be used +/// to customize allocation and deallocation of buffers when constructing +/// an ArrowArray. +struct ArrowBufferAllocator { + /// \brief Reallocate a buffer or return NULL if it cannot be reallocated + uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t old_size, int64_t new_size); + + /// \brief Deallocate a buffer allocated by this allocator + void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); + + /// \brief Opaque data specific to the allocator + void* private_data; +}; + +typedef void (*ArrowBufferDeallocatorCallback)(struct ArrowBufferAllocator* allocator, + uint8_t* ptr, int64_t size); + +/// \brief An owning mutable view of a buffer +/// \ingroup nanoarrow-buffer +struct ArrowBuffer { + /// \brief A pointer to the start of the buffer + /// + /// If capacity_bytes is 0, this value may be NULL. + uint8_t* data; + + /// \brief The size of the buffer in bytes + int64_t size_bytes; + + /// \brief The capacity of the buffer in bytes + int64_t capacity_bytes; + + /// \brief The allocator that will be used to reallocate and/or free the buffer + struct ArrowBufferAllocator allocator; +}; + +/// \brief An owning mutable view of a bitmap +/// \ingroup nanoarrow-bitmap +struct ArrowBitmap { + /// \brief An ArrowBuffer to hold the allocated memory + struct ArrowBuffer buffer; + + /// \brief The number of bits that have been appended to the bitmap + int64_t size_bits; +}; + +/// \brief A description of an arrangement of buffers +/// \ingroup nanoarrow-utils +/// +/// Contains the minimum amount of information required to +/// calculate the size of each buffer in an ArrowArray knowing only +/// the length and offset of the array. +struct ArrowLayout { + /// \brief The function of each buffer + enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS]; + + /// \brief The data type of each buffer + enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS]; + + /// \brief The size of an element each buffer or 0 if this size is variable or unknown + int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS]; + + /// \brief The number of elements in the child array per element in this array for a + /// fixed-size list + int64_t child_size_elements; +}; + +/// \brief A non-owning view of an ArrowArray +/// \ingroup nanoarrow-array-view +/// +/// This data structure provides access to the values contained within +/// an ArrowArray with fields provided in a more readily-extractible +/// form. You can re-use an ArrowArrayView for multiple ArrowArrays +/// with the same storage type, use it to represent a hypothetical +/// ArrowArray that does not exist yet, or use it to validate the buffers +/// of a future ArrowArray. +struct ArrowArrayView { + /// \brief The underlying ArrowArray or NULL if it has not been set or + /// if the buffers in this ArrowArrayView are not backed by an ArrowArray. + const struct ArrowArray* array; + + /// \brief The number of elements from the physical start of the buffers. + int64_t offset; + + /// \brief The number of elements in this view. + int64_t length; + + /// \brief A cached null count or -1 to indicate that this value is unknown. + int64_t null_count; + + /// \brief The type used to store values in this array + /// + /// This type represents only the minimum required information to + /// extract values from the array buffers (e.g., for a Date32 array, + /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded + /// arrays, this will be the index type. + enum ArrowType storage_type; + + /// \brief The buffer types, strides, and sizes of this Array's buffers + struct ArrowLayout layout; + + /// \brief This Array's buffers as ArrowBufferView objects + struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS]; + + /// \brief The number of children of this view + int64_t n_children; + + /// \brief Pointers to views of this array's children + struct ArrowArrayView** children; + + /// \brief Pointer to a view of this array's dictionary + struct ArrowArrayView* dictionary; + + /// \brief Union type id to child index mapping + /// + /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer + /// such that child_index == union_type_id_map[type_id] and + /// type_id == union_type_id_map[128 + child_index]. This value may be + /// NULL in the case where child_id == type_id. + int8_t* union_type_id_map; + + /// \brief Number of variadic buffers + int32_t n_variadic_buffers; + + /// \brief Pointers to variadic buffers of binary/string_view arrays + const void** variadic_buffers; + + /// \brief Size of each variadic buffer + int64_t* variadic_buffer_sizes; +}; + +// Used as the private data member for ArrowArrays allocated here and accessed +// internally within inline ArrowArray* helpers. +struct ArrowArrayPrivateData { + // Holder for the validity buffer (or first buffer for union types, which are + // the only type whose first buffer is not a valdiity buffer) + struct ArrowBitmap bitmap; + + // Holder for additional buffers as required + struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1]; + + // The array of pointers to buffers. This must be updated after a sequence + // of appends to synchronize its values with the actual buffer addresses + // (which may have been reallocated during that time) + const void** buffer_data; + + // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown + enum ArrowType storage_type; + + // The buffer arrangement for the storage type + struct ArrowLayout layout; + + // Flag to indicate if there are non-sequence union type ids. + // In the future this could be replaced with a type id<->child mapping + // to support constructing unions in append mode where type_id != child_index + int8_t union_type_id_is_child_index; + + // Number of variadic buffers for binary view types + int32_t n_variadic_buffers; + + // Variadic buffers for binary view types + struct ArrowBuffer* variadic_buffers; + + // Size of each variadic buffer in bytes + int64_t* variadic_buffer_sizes; +}; + +/// \brief A representation of an interval. +/// \ingroup nanoarrow-utils +struct ArrowInterval { + /// \brief The type of interval being used + enum ArrowType type; + /// \brief The number of months represented by the interval + int32_t months; + /// \brief The number of days represented by the interval + int32_t days; + /// \brief The number of ms represented by the interval + int32_t ms; + /// \brief The number of ns represented by the interval + int64_t ns; +}; + +/// \brief Zero initialize an Interval with a given unit +/// \ingroup nanoarrow-utils +static inline void ArrowIntervalInit(struct ArrowInterval* interval, + enum ArrowType type) { + memset(interval, 0, sizeof(struct ArrowInterval)); + interval->type = type; +} + +/// \brief A representation of a fixed-precision decimal number +/// \ingroup nanoarrow-utils +/// +/// This structure should be initialized with ArrowDecimalInit() once and +/// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), +/// or ArrowDecimalSetBytes256(). +struct ArrowDecimal { + /// \brief An array of 64-bit integers of n_words length defined in native-endian order + uint64_t words[4]; + + /// \brief The number of significant digits this decimal number can represent + int32_t precision; + + /// \brief The number of digits after the decimal point. This can be negative. + int32_t scale; + + /// \brief The number of words in the words array + int n_words; + + /// \brief Cached value used by the implementation + int high_word_index; + + /// \brief Cached value used by the implementation + int low_word_index; +}; + +/// \brief Initialize a decimal with a given set of type parameters +/// \ingroup nanoarrow-utils +static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth, + int32_t precision, int32_t scale) { + memset(decimal->words, 0, sizeof(decimal->words)); + decimal->precision = precision; + decimal->scale = scale; + decimal->n_words = (int)(bitwidth / 8 / sizeof(uint64_t)); + + if (_ArrowIsLittleEndian()) { + decimal->low_word_index = 0; + decimal->high_word_index = decimal->n_words - 1; + } else { + decimal->low_word_index = decimal->n_words - 1; + decimal->high_word_index = 0; + } +} + +/// \brief Get a signed integer value of a sufficiently small ArrowDecimal +/// +/// This does not check if the decimal's precision sufficiently small to fit +/// within the signed 64-bit integer range (A precision less than or equal +/// to 18 is sufficiently small). +static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) { + return (int64_t)decimal->words[decimal->low_word_index]; +} + +/// \brief Copy the bytes of this decimal into a sufficiently large buffer +/// \ingroup nanoarrow-utils +static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal, + uint8_t* out) { + memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); +} + +/// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise +/// \ingroup nanoarrow-utils +static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) { + return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); +} + +/// \brief Sets the integer value of this decimal +/// \ingroup nanoarrow-utils +static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) { + if (value < 0) { + memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); + } else { + memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t)); + } + + decimal->words[decimal->low_word_index] = value; +} + +/// \brief Negate the value of this decimal in place +/// \ingroup nanoarrow-utils +static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { + uint64_t carry = 1; + + if (decimal->low_word_index == 0) { + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = decimal->words[i]; + elem = ~elem + carry; + carry &= (elem == 0); + decimal->words[i] = elem; + } + } else { + for (int i = decimal->low_word_index; i >= 0; i--) { + uint64_t elem = decimal->words[i]; + elem = ~elem + carry; + carry &= (elem == 0); + decimal->words[i] = elem; + } + } +} + +/// \brief Copy bytes from a buffer into this decimal +/// \ingroup nanoarrow-utils +static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, + const uint8_t* value) { + memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); +} + +#ifdef __cplusplus +} +#endif + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_H_INCLUDED +#define NANOARROW_H_INCLUDED + +#include +#include +#include + + + +// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will set this +// define in nanoarrow_config.h. If not, you can optionally #define NANOARROW_NAMESPACE +// MyNamespace here. + +// This section remaps the non-prefixed symbols to the prefixed symbols so that +// code written against this build can be used independent of the value of +// NANOARROW_NAMESPACE. +#ifdef NANOARROW_NAMESPACE +#define NANOARROW_CAT(A, B) A##B +#define NANOARROW_SYMBOL(A, B) NANOARROW_CAT(A, B) + +#define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) +#define ArrowNanoarrowVersionInt \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) +#define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc) +#define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc) +#define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree) +#define ArrowBufferAllocatorDefault \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault) +#define ArrowBufferDeallocator \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) +#define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet) +#define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit) +#define ArrowDecimalSetDigits NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) +#define ArrowDecimalAppendDigitsToBuffer \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendDigitsToBuffer) +#define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit) +#define ArrowSchemaInitFromType \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) +#define ArrowSchemaSetType NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType) +#define ArrowSchemaSetTypeStruct \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct) +#define ArrowSchemaSetTypeFixedSize \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize) +#define ArrowSchemaSetTypeDecimal \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal) +#define ArrowSchemaSetTypeRunEndEncoded \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeRunEndEncoded) +#define ArrowSchemaSetTypeDateTime \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime) +#define ArrowSchemaSetTypeUnion \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) +#define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) +#define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) +#define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) +#define ArrowSchemaSetMetadata \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata) +#define ArrowSchemaAllocateChildren \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren) +#define ArrowSchemaAllocateDictionary \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary) +#define ArrowMetadataReaderInit \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit) +#define ArrowMetadataReaderRead \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead) +#define ArrowMetadataSizeOf NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf) +#define ArrowMetadataHasKey NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey) +#define ArrowMetadataGetValue NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue) +#define ArrowMetadataBuilderInit \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit) +#define ArrowMetadataBuilderAppend \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend) +#define ArrowMetadataBuilderSet \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet) +#define ArrowMetadataBuilderRemove \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove) +#define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit) +#define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString) +#define ArrowArrayInitFromType \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType) +#define ArrowArrayInitFromSchema \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema) +#define ArrowArrayInitFromArrayView \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) +#define ArrowArrayInitFromArrayView \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) +#define ArrowArrayAllocateChildren \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren) +#define ArrowArrayAllocateDictionary \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary) +#define ArrowArraySetValidityBitmap \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap) +#define ArrowArraySetBuffer NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer) +#define ArrowArrayReserve NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve) +#define ArrowArrayFinishBuilding \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding) +#define ArrowArrayFinishBuildingDefault \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault) +#define ArrowArrayViewInitFromType \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType) +#define ArrowArrayViewInitFromSchema \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema) +#define ArrowArrayViewAllocateChildren \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren) +#define ArrowArrayViewAllocateDictionary \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary) +#define ArrowArrayViewSetLength \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength) +#define ArrowArrayViewSetArray \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray) +#define ArrowArrayViewSetArrayMinimal \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal) +#define ArrowArrayViewValidate \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate) +#define ArrowArrayViewCompare NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewCompare) +#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset) +#define ArrowBasicArrayStreamInit \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit) +#define ArrowBasicArrayStreamSetArray \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray) +#define ArrowBasicArrayStreamValidate \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate) + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup nanoarrow Nanoarrow C library +/// +/// Except where noted, objects are not thread-safe and clients should +/// take care to serialize accesses to methods. +/// +/// Because this library is intended to be vendored, it provides full type +/// definitions and encourages clients to stack or statically allocate +/// where convenient. + +/// \defgroup nanoarrow-malloc Memory management +/// +/// Non-buffer members of a struct ArrowSchema and struct ArrowArray +/// must be allocated using ArrowMalloc() or ArrowRealloc() and freed +/// using ArrowFree() for schemas and arrays allocated here. Buffer members +/// are allocated using an ArrowBufferAllocator. +/// +/// @{ + +/// \brief Allocate like malloc() +void* ArrowMalloc(int64_t size); + +/// \brief Reallocate like realloc() +void* ArrowRealloc(void* ptr, int64_t size); + +/// \brief Free a pointer allocated using ArrowMalloc() or ArrowRealloc(). +void ArrowFree(void* ptr); + +/// \brief Return the default allocator +/// +/// The default allocator uses ArrowMalloc(), ArrowRealloc(), and +/// ArrowFree(). +struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); + +/// \brief Create a custom deallocator +/// +/// Creates a buffer allocator with only a free method that can be used to +/// attach a custom deallocator to an ArrowBuffer. This may be used to +/// avoid copying an existing buffer that was not allocated using the +/// infrastructure provided here (e.g., by an R or Python object). +struct ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback, + void* private_data); + +/// @} + +/// \brief Move the contents of an src ArrowSchema into dst and set src->release to NULL +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst); + +/// \brief Call the release callback of an ArrowSchema +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowSchemaRelease(struct ArrowSchema* schema); + +/// \brief Move the contents of an src ArrowArray into dst and set src->release to NULL +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst); + +/// \brief Call the release callback of an ArrowArray +static inline void ArrowArrayRelease(struct ArrowArray* array); + +/// \brief Move the contents of an src ArrowArrayStream into dst and set src->release to +/// NULL \ingroup nanoarrow-arrow-cdata +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst); + +/// \brief Call the get_schema callback of an ArrowArrayStream +/// \ingroup nanoarrow-arrow-cdata +/// +/// Unlike the get_schema callback, this wrapper checks the return code +/// and propagates the error reported by get_last_error into error. This +/// makes it significantly less verbose to iterate over array streams +/// using NANOARROW_RETURN_NOT_OK()-style error handling. +static inline ArrowErrorCode ArrowArrayStreamGetSchema( + struct ArrowArrayStream* array_stream, struct ArrowSchema* out, + struct ArrowError* error); + +/// \brief Call the get_schema callback of an ArrowArrayStream +/// \ingroup nanoarrow-arrow-cdata +/// +/// Unlike the get_next callback, this wrapper checks the return code +/// and propagates the error reported by get_last_error into error. This +/// makes it significantly less verbose to iterate over array streams +/// using NANOARROW_RETURN_NOT_OK()-style error handling. +static inline ArrowErrorCode ArrowArrayStreamGetNext( + struct ArrowArrayStream* array_stream, struct ArrowArray* out, + struct ArrowError* error); + +/// \brief Call the get_next callback of an ArrowArrayStream +/// \ingroup nanoarrow-arrow-cdata +/// +/// Unlike the get_next callback, this function never returns NULL (i.e., its +/// result is safe to use in printf-style error formatters). Null values from the +/// original callback are reported as "". +static inline const char* ArrowArrayStreamGetLastError( + struct ArrowArrayStream* array_stream); + +/// \brief Call the release callback of an ArrowArrayStream +static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream); + +/// \defgroup nanoarrow-errors Error handling +/// +/// Functions generally return an errno-compatible error code; functions that +/// need to communicate more verbose error information accept a pointer +/// to an ArrowError. This can be stack or statically allocated. The +/// content of the message is undefined unless an error code has been +/// returned. If a nanoarrow function is passed a non-null ArrowError pointer, the +/// ArrowError pointed to by the argument will be propagated with a +/// null-terminated error message. It is safe to pass a NULL ArrowError anywhere +/// in the nanoarrow API. +/// +/// Except where documented, it is generally not safe to continue after a +/// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and +/// NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use +/// the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms +/// for memory management and error propgagtion. +/// +/// @{ + +/// \brief Set the contents of an error using printf syntax. +/// +/// If error is NULL, this function does nothing and returns NANOARROW_OK. +NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, + const char* fmt, ...); + +/// @} + +/// \defgroup nanoarrow-utils Utility data structures +/// +/// @{ + +/// \brief Return a version string in the form "major.minor.patch" +const char* ArrowNanoarrowVersion(void); + +/// \brief Return an integer that can be used to compare versions sequentially +int ArrowNanoarrowVersionInt(void); + +/// \brief Initialize a description of buffer arrangements from a storage type +void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type); + +/// \brief Create a string view from a null-terminated string +static inline struct ArrowStringView ArrowCharView(const char* value); + +/// \brief Sets the integer value of an ArrowDecimal from a string +ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, + struct ArrowStringView value); + +/// \brief Get the integer value of an ArrowDecimal as string +ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, + struct ArrowBuffer* buffer); + +/// \brief Get the half float value of a float +static inline uint16_t ArrowFloatToHalfFloat(float value); + +/// \brief Get the float value of a half float +static inline float ArrowHalfFloatToFloat(uint16_t value); + +/// \brief Resolve a chunk index from increasing int64_t offsets +/// +/// Given a buffer of increasing int64_t offsets that begin with 0 (e.g., offset buffer +/// of a large type, run ends of a chunked array implementation), resolve a value v +/// where lo <= v < hi such that offsets[v] <= index < offsets[v + 1]. +static inline int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, + int64_t lo, int64_t hi); + +/// @} + +/// \defgroup nanoarrow-schema Creating schemas +/// +/// These functions allocate, copy, and destroy ArrowSchema structures +/// +/// @{ + +/// \brief Initialize an ArrowSchema +/// +/// Initializes the fields and release callback of schema_out. Caller +/// is responsible for calling the schema->release callback if +/// NANOARROW_OK is returned. +void ArrowSchemaInit(struct ArrowSchema* schema); + +/// \brief Initialize an ArrowSchema from an ArrowType +/// +/// A convenience constructor for that calls ArrowSchemaInit() and +/// ArrowSchemaSetType() for the common case of constructing an +/// unparameterized type. The caller is responsible for calling the schema->release +/// callback if NANOARROW_OK is returned. +ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type); + +/// \brief Get a human-readable summary of a Schema +/// +/// Writes a summary of an ArrowSchema to out (up to n - 1 characters) +/// and returns the number of characters required for the output if +/// n were sufficiently large. If recursive is non-zero, the result will +/// also include children. +int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, + char recursive); + +/// \brief Set the format field of a schema from an ArrowType +/// +/// Initializes the fields and release callback of schema_out. For +/// NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and +/// NANOARROW_TYPE_MAP, the appropriate number of children are +/// allocated, initialized, and named; however, the caller must +/// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized +/// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type); + +/// \brief Set the format field and initialize children of a struct schema +/// +/// The specified number of children are initialized; however, the caller is responsible +/// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child. +/// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children); + +/// \brief Set the format field of a fixed-size schema +/// +/// Returns EINVAL for fixed_size <= 0 or for type that is not +/// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST. +/// For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are +/// allocated, initialized, and named; however, the caller must +/// ArrowSchemaSetType() the first child. Schema must have been initialized using +/// ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, + enum ArrowType type, int32_t fixed_size); + +/// \brief Set the format field of a decimal schema +/// +/// Returns EINVAL for scale <= 0 or for type that is not +/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been +/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, + int32_t decimal_precision, + int32_t decimal_scale); + +/// \brief Set the format field of a run-end encoded schema +/// +/// Returns EINVAL for run_end_type that is not +/// NANOARROW_TYPE_INT16, NANOARROW_TYPE_INT32 or NANOARROW_TYPE_INT64. +/// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +/// The caller must call `ArrowSchemaSetTypeXXX(schema->children[1])` to +/// set the value type. Note that when building arrays using the `ArrowArrayAppendXXX()` +/// functions, the run-end encoded array's logical length must be updated manually. +ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, + enum ArrowType run_end_type); + +/// \brief Set the format field of a time, timestamp, or duration schema +/// +/// Returns EINVAL for type that is not +/// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64, +/// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The +/// timezone parameter must be NULL for a non-timestamp type. Schema must have been +/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, + enum ArrowTimeUnit time_unit, + const char* timezone); + +/// \brief Set the format field of a union schema +/// +/// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION +/// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are +/// allocated, and initialized. +ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, + int64_t n_children); + +/// \brief Make a (recursive) copy of a schema +/// +/// Allocates and copies fields of schema into schema_out. +ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, + struct ArrowSchema* schema_out); + +/// \brief Copy format into schema->format +/// +/// schema must have been allocated using ArrowSchemaInitFromType() or +/// ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format); + +/// \brief Copy name into schema->name +/// +/// schema must have been allocated using ArrowSchemaInitFromType() or +/// ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name); + +/// \brief Copy metadata into schema->metadata +/// +/// schema must have been allocated using ArrowSchemaInitFromType() or +/// ArrowSchemaDeepCopy. +ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata); + +/// \brief Allocate the schema->children array +/// +/// Includes the memory for each child struct ArrowSchema. +/// schema must have been allocated using ArrowSchemaInitFromType() or +/// ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, + int64_t n_children); + +/// \brief Allocate the schema->dictionary member +/// +/// schema must have been allocated using ArrowSchemaInitFromType() or +/// ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema); + +/// @} + +/// \defgroup nanoarrow-metadata Create, read, and modify schema metadata +/// +/// @{ + +/// \brief Reader for key/value pairs in schema metadata +/// +/// The ArrowMetadataReader does not own any data and is only valid +/// for the lifetime of the underlying metadata pointer. +struct ArrowMetadataReader { + /// \brief A metadata string from a schema->metadata field. + const char* metadata; + + /// \brief The current offset into the metadata string + int64_t offset; + + /// \brief The number of remaining keys + int32_t remaining_keys; +}; + +/// \brief Initialize an ArrowMetadataReader +ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, + const char* metadata); + +/// \brief Read the next key/value pair from an ArrowMetadataReader +ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, + struct ArrowStringView* key_out, + struct ArrowStringView* value_out); + +/// \brief The number of bytes in in a key/value metadata string +int64_t ArrowMetadataSizeOf(const char* metadata); + +/// \brief Check for a key in schema metadata +char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key); + +/// \brief Extract a value from schema metadata +/// +/// If key does not exist in metadata, value_out is unmodified +ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, + struct ArrowStringView* value_out); + +/// \brief Initialize a builder for schema metadata from key/value pairs +/// +/// metadata can be an existing metadata string or NULL to initialize +/// an empty metadata string. +ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata); + +/// \brief Append a key/value pair to a buffer containing serialized metadata +ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); + +/// \brief Set a key/value pair to a buffer containing serialized metadata +/// +/// Ensures that the only entry for key in the metadata is set to value. +/// This function maintains the existing position of (the first instance of) +/// key if present in the data. +ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); + +/// \brief Remove a key from a buffer containing serialized metadata +ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, + struct ArrowStringView key); + +/// @} + +/// \defgroup nanoarrow-schema-view Reading schemas +/// +/// @{ + +/// \brief A non-owning view of a parsed ArrowSchema +/// +/// Contains more readily extractable values than a raw ArrowSchema. +/// Clients can stack or statically allocate this structure but are +/// encouraged to use the provided getters to ensure forward +/// compatibility. +struct ArrowSchemaView { + /// \brief A pointer to the schema represented by this view + const struct ArrowSchema* schema; + + /// \brief The data type represented by the schema + /// + /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a + /// non-null dictionary member; datetime types are valid values. + /// This value will never be NANOARROW_TYPE_EXTENSION (see + /// extension_name and/or extension_metadata to check for + /// an extension type). + enum ArrowType type; + + /// \brief The storage data type represented by the schema + /// + /// This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION + /// or any datetime type. This value represents only the type required to + /// interpret the buffers in the array. + enum ArrowType storage_type; + + /// \brief The storage layout represented by the schema + struct ArrowLayout layout; + + /// \brief The extension type name if it exists + /// + /// If the ARROW:extension:name key is present in schema.metadata, + /// extension_name.data will be non-NULL. + struct ArrowStringView extension_name; + + /// \brief The extension type metadata if it exists + /// + /// If the ARROW:extension:metadata key is present in schema.metadata, + /// extension_metadata.data will be non-NULL. + struct ArrowStringView extension_metadata; + + /// \brief Format fixed size parameter + /// + /// This value is set when parsing a fixed-size binary or fixed-size + /// list schema; this value is undefined for other types. For a + /// fixed-size binary schema this value is in bytes; for a fixed-size + /// list schema this value refers to the number of child elements for + /// each element of the parent. + int32_t fixed_size; + + /// \brief Decimal bitwidth + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_bitwidth; + + /// \brief Decimal precision + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_precision; + + /// \brief Decimal scale + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_scale; + + /// \brief Format time unit parameter + /// + /// This value is set when parsing a date/time type. The value is + /// undefined for other types. + enum ArrowTimeUnit time_unit; + + /// \brief Format timezone parameter + /// + /// This value is set when parsing a timestamp type and represents + /// the timezone format parameter. This value points to + /// data within the schema and is undefined for other types. + const char* timezone; + + /// \brief Union type ids parameter + /// + /// This value is set when parsing a union type and represents + /// type ids parameter. This value points to + /// data within the schema and is undefined for other types. + const char* union_type_ids; +}; + +/// \brief Initialize an ArrowSchemaView +ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, + const struct ArrowSchema* schema, + struct ArrowError* error); + +/// @} + +/// \defgroup nanoarrow-buffer Owning, growable buffers +/// +/// @{ + +/// \brief Initialize an ArrowBuffer +/// +/// Initialize a buffer with a NULL, zero-size buffer using the default +/// buffer allocator. +static inline void ArrowBufferInit(struct ArrowBuffer* buffer); + +/// \brief Set a newly-initialized buffer's allocator +/// +/// Returns EINVAL if the buffer has already been allocated. +static inline ArrowErrorCode ArrowBufferSetAllocator( + struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator); + +/// \brief Reset an ArrowBuffer +/// +/// Releases the buffer using the allocator's free method if +/// the buffer's data member is non-null, sets the data member +/// to NULL, and sets the buffer's size and capacity to 0. +static inline void ArrowBufferReset(struct ArrowBuffer* buffer); + +/// \brief Move an ArrowBuffer +/// +/// Transfers the buffer data and lifecycle management to another +/// address and resets buffer. +static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst); + +/// \brief Grow or shrink a buffer to a given size +/// +/// When shrinking the size of the buffer, the buffer is only reallocated +/// if shrink_to_fit is non-zero. +static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, + int64_t new_size_bytes, + char shrink_to_fit); + +/// \brief Ensure a buffer has at least a given additional capacity +/// +/// Ensures that the buffer has space to append at least +/// additional_size_bytes, overallocating when required. +static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, + int64_t additional_size_bytes); + +/// \brief Write data to buffer and increment the buffer size +/// +/// This function does not check that buffer has the required capacity +static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, + int64_t size_bytes); + +/// \brief Write data to buffer and increment the buffer size +/// +/// This function writes and ensures that the buffer has the required capacity, +/// possibly by reallocating the buffer. Like ArrowBufferReserve, this will +/// overallocate when reallocation is required. +static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, + const void* data, int64_t size_bytes); + +/// \brief Write fill to buffer and increment the buffer size +/// +/// This function writes the specified number of fill bytes and +/// ensures that the buffer has the required capacity, +static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, + uint8_t value, int64_t size_bytes); + +/// \brief Write an 8-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, + int8_t value); + +/// \brief Write an unsigned 8-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, + uint8_t value); + +/// \brief Write a 16-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, + int16_t value); + +/// \brief Write an unsigned 16-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, + uint16_t value); + +/// \brief Write a 32-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, + int32_t value); + +/// \brief Write an unsigned 32-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, + uint32_t value); + +/// \brief Write a 64-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, + int64_t value); + +/// \brief Write an unsigned 64-bit integer to a buffer +static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, + uint64_t value); + +/// \brief Write a double to a buffer +static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, + double value); + +/// \brief Write a float to a buffer +static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, + float value); + +/// \brief Write an ArrowStringView to a buffer +static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, + struct ArrowStringView value); + +/// \brief Write an ArrowBufferView to a buffer +static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, + struct ArrowBufferView value); + +/// @} + +/// \defgroup nanoarrow-bitmap Bitmap utilities +/// +/// @{ + +/// \brief Extract a boolean value from a bitmap +static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i); + +/// \brief Set a boolean value to a bitmap to true +static inline void ArrowBitSet(uint8_t* bits, int64_t i); + +/// \brief Set a boolean value to a bitmap to false +static inline void ArrowBitClear(uint8_t* bits, int64_t i); + +/// \brief Set a boolean value to a bitmap +static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value); + +/// \brief Set a boolean value to a range in a bitmap +static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, + uint8_t bits_are_set); + +/// \brief Count true values in a bitmap +static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to); + +/// \brief Extract int8 boolean values from a range in a bitmap +static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out); + +/// \brief Extract int32 boolean values from a range in a bitmap +static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, + int64_t length, int32_t* out); + +/// \brief Initialize an ArrowBitmap +/// +/// Initialize the builder's buffer, empty its cache, and reset the size to zero +static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap); + +/// \brief Move an ArrowBitmap +/// +/// Transfers the underlying buffer data and lifecycle management to another +/// address and resets the bitmap. +static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst); + +/// \brief Ensure a bitmap builder has at least a given additional capacity +/// +/// Ensures that the buffer has space to append at least +/// additional_size_bits, overallocating when required. +static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, + int64_t additional_size_bits); + +/// \brief Grow or shrink a bitmap to a given size +/// +/// When shrinking the size of the bitmap, the bitmap is only reallocated +/// if shrink_to_fit is non-zero. +static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, + int64_t new_size_bits, char shrink_to_fit); + +/// \brief Reserve space for and append zero or more of the same boolean value to a bitmap +static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length); + +/// \brief Append zero or more of the same boolean value to a bitmap +static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length); + +/// \brief Append boolean values encoded as int8_t to a bitmap +/// +/// The values must all be 0 or 1. +static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + const int8_t* values, int64_t n_values); + +/// \brief Append boolean values encoded as int32_t to a bitmap +/// +/// The values must all be 0 or 1. +static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, + const int32_t* values, int64_t n_values); + +/// \brief Reset a bitmap builder +/// +/// Releases any memory held by buffer, empties the cache, and resets the size to zero +static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); + +/// @} + +/// \defgroup nanoarrow-array Creating arrays +/// +/// These functions allocate, copy, and destroy ArrowArray structures. +/// Once an ArrowArray has been initialized via ArrowArrayInitFromType() +/// or ArrowArrayInitFromSchema(), the caller is responsible for releasing +/// it using the embedded release callback. +/// +/// @{ + +/// \brief Initialize the fields of an array +/// +/// Initializes the fields and release callback of array. Caller +/// is responsible for calling the array->release callback if +/// NANOARROW_OK is returned. +ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, + enum ArrowType storage_type); + +/// \brief Initialize the contents of an ArrowArray from an ArrowSchema +/// +/// Caller is responsible for calling the array->release callback if +/// NANOARROW_OK is returned. +ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, + const struct ArrowSchema* schema, + struct ArrowError* error); + +/// \brief Initialize the contents of an ArrowArray from an ArrowArrayView +/// +/// Caller is responsible for calling the array->release callback if +/// NANOARROW_OK is returned. +ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, + const struct ArrowArrayView* array_view, + struct ArrowError* error); + +/// \brief Allocate the array->children array +/// +/// Includes the memory for each child struct ArrowArray, +/// whose members are marked as released and may be subsequently initialized +/// with ArrowArrayInitFromType() or moved from an existing ArrowArray. +/// schema must have been allocated using ArrowArrayInitFromType(). +ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children); + +/// \brief Allocate the array->dictionary member +/// +/// Includes the memory for the struct ArrowArray, whose contents +/// is marked as released and may be subsequently initialized +/// with ArrowArrayInitFromType() or moved from an existing ArrowArray. +/// array must have been allocated using ArrowArrayInitFromType() +ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array); + +/// \brief Set the validity bitmap of an ArrowArray +/// +/// array must have been allocated using ArrowArrayInitFromType() +void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap); + +/// \brief Set a buffer of an ArrowArray +/// +/// array must have been allocated using ArrowArrayInitFromType() +ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, + struct ArrowBuffer* buffer); + +/// \brief Get the validity bitmap of an ArrowArray +/// +/// array must have been allocated using ArrowArrayInitFromType() +static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array); + +/// \brief Get a buffer of an ArrowArray +/// +/// array must have been allocated using ArrowArrayInitFromType() +static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i); + +/// \brief Start element-wise appending to an ArrowArray +/// +/// Initializes any values needed to use ArrowArrayAppend*() functions. +/// All element-wise appenders append by value and return EINVAL if the exact value +/// cannot be represented by the underlying storage type. +/// array must have been allocated using ArrowArrayInitFromType() +static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); + +/// \brief Reserve space for future appends +/// +/// For buffer sizes that can be calculated (i.e., not string data buffers or +/// child array sizes for non-fixed-size arrays), recursively reserve space for +/// additional elements. This is useful for reducing the number of reallocations +/// that occur using the item-wise appenders. +ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, + int64_t additional_size_elements); + +/// \brief Append a null value to an array +static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n); + +/// \brief Append an empty, non-null value to an array +static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n); + +/// \brief Append a signed integer value to an array +/// +/// Returns NANOARROW_OK if value can be exactly represented by +/// the underlying storage type or EINVAL otherwise (e.g., value +/// is outside the valid array range). +static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value); + +/// \brief Append an unsigned integer value to an array +/// +/// Returns NANOARROW_OK if value can be exactly represented by +/// the underlying storage type or EINVAL otherwise (e.g., value +/// is outside the valid array range). +static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, + uint64_t value); + +/// \brief Append a double value to an array +/// +/// Returns NANOARROW_OK if value can be exactly represented by +/// the underlying storage type or EINVAL otherwise (e.g., value +/// is outside the valid array range or there is an attempt to append +/// a non-integer to an array with an integer storage type). +static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, + double value); + +/// \brief Append a string of bytes to an array +/// +/// Returns NANOARROW_OK if value can be exactly represented by +/// the underlying storage type, EOVERFLOW if appending value would overflow +/// the offset type (e.g., if the data buffer would be larger than 2 GB for a +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// binary, string, large binary, large string, or fixed-size binary array, or value is +/// the wrong size for a fixed-size binary array). +static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, + struct ArrowBufferView value); + +/// \brief Append a string value to an array +/// +/// Returns NANOARROW_OK if value can be exactly represented by +/// the underlying storage type, EOVERFLOW if appending value would overflow +/// the offset type (e.g., if the data buffer would be larger than 2 GB for a +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// string or large string array). +static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, + struct ArrowStringView value); + +/// \brief Append a Interval to an array +/// +/// Returns NANOARROW_OK if value can be exactly represented by +/// the underlying storage type or EINVAL otherwise. +static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, + const struct ArrowInterval* value); + +/// \brief Append a decimal value to an array +/// +/// Returns NANOARROW_OK if array is a decimal array with the appropriate +/// bitwidth or EINVAL otherwise. +static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, + const struct ArrowDecimal* value); + +/// \brief Finish a nested array element +/// +/// Appends a non-null element to the array based on the first child's current +/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW +/// if the child of a list or map array would exceed INT_MAX elements, or EINVAL +/// if the underlying storage type is not a struct, list, large list, or fixed-size +/// list, or if there was an attempt to add a struct or fixed-size list element where the +/// length of the child array(s) did not match the expected length. +static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array); + +/// \brief Finish a union array element +/// +/// Appends an element to the union type ids buffer and increments array->length. +/// For sparse unions, up to one element is added to non type-id children. Returns +/// EINVAL if the underlying storage type is not a union, if type_id is not valid, +/// or if child sizes after appending are inconsistent. +static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, + int8_t type_id); + +/// \brief Shrink buffer capacity to the size required +/// +/// Also applies shrinking to any child arrays. array must have been allocated using +/// ArrowArrayInitFromType +static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); + +/// \brief Finish building an ArrowArray +/// +/// Flushes any pointers from internal buffers that may have been reallocated +/// into array->buffers and checks the actual size of the buffers +/// against the expected size based on the final length. +/// array must have been allocated using ArrowArrayInitFromType() +ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, + struct ArrowError* error); + +/// \brief Finish building an ArrowArray with explicit validation +/// +/// Finish building with an explicit validation level. This could perform less validation +/// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU +/// buffer data access is not possible or more validation (i.e., +/// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or +/// corruptible source. +ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, + enum ArrowValidationLevel validation_level, + struct ArrowError* error); + +/// @} + +/// \defgroup nanoarrow-array-view Reading arrays +/// +/// These functions read and validate the contents ArrowArray structures. +/// +/// @{ + +/// \brief Initialize the contents of an ArrowArrayView +void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, + enum ArrowType storage_type); + +/// \brief Move an ArrowArrayView +/// +/// Transfers the ArrowArrayView data and lifecycle management to another +/// address and resets the contents of src. +static inline void ArrowArrayViewMove(struct ArrowArrayView* src, + struct ArrowArrayView* dst); + +/// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema +ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, + const struct ArrowSchema* schema, + struct ArrowError* error); + +/// \brief Allocate the array_view->children array +/// +/// Includes the memory for each child struct ArrowArrayView +ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, + int64_t n_children); + +/// \brief Allocate array_view->dictionary +ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view); + +/// \brief Set data-independent buffer sizes from length +void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length); + +/// \brief Set buffer sizes and data pointers from an ArrowArray +ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error); + +/// \brief Set buffer sizes and data pointers from an ArrowArray except for those +/// that require dereferencing buffer content. +ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error); + +/// \brief Get the number of buffers +/// +/// The number of buffers referred to by this ArrowArrayView. In may cases this can also +/// be calculated from the ArrowLayout member of the ArrowArrayView or ArrowSchemaView; +/// however, for binary view and string view types, the number of total buffers depends on +/// the number of variadic buffers. +static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView* array_view); + +/// \brief Get a view of a specific buffer from an ArrowArrayView +/// +/// This is the ArrowArrayView equivalent of ArrowArray::buffers[i] that includes +/// size information (if known). +static inline struct ArrowBufferView ArrowArrayViewGetBufferView( + struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get the function of a specific buffer in an ArrowArrayView +/// +/// In may cases this can also be obtained from the ArrowLayout member of the +/// ArrowArrayView or ArrowSchemaView; however, for binary view and string view types, +/// the function of each buffer may be different between two arrays of the same type +/// depending on the number of variadic buffers. +static inline enum ArrowBufferType ArrowArrayViewGetBufferType( + struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get the data type of a specific buffer in an ArrowArrayView +/// +/// In may cases this can also be obtained from the ArrowLayout member of the +/// ArrowArrayView or ArrowSchemaView; however, for binary view and string view types, +/// the data type of each buffer may be different between two arrays of the same type +/// depending on the number of variadic buffers. +static inline enum ArrowType ArrowArrayViewGetBufferDataType( + struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get the element size (in bits) of a specific buffer in an ArrowArrayView +/// +/// In may cases this can also be obtained from the ArrowLayout member of the +/// ArrowArrayView or ArrowSchemaView; however, for binary view and string view types, +/// the element width of each buffer may be different between two arrays of the same type +/// depending on the number of variadic buffers. +static inline int64_t ArrowArrayViewGetBufferElementSizeBits( + struct ArrowArrayView* array_view, int64_t i); + +/// \brief Performs checks on the content of an ArrowArrayView +/// +/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray, +/// the buffer sizes and some content (fist and last offset) have already +/// been validated at the "default" level. If setting the buffer pointers +/// and sizes otherwise, you may wish to perform checks at a different level. See +/// documentation for ArrowValidationLevel for the details of checks performed +/// at each level. +ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, + enum ArrowValidationLevel validation_level, + struct ArrowError* error); + +/// \brief Compare two ArrowArrayView objects for equality +/// +/// Given two ArrowArrayView instances, place either 0 (not equal) and +/// 1 (equal) at the address pointed to by out. If the comparison determines +/// that actual and expected are not equal, a reason will be communicated via +/// error if error is non-NULL. +/// +/// Returns NANOARROW_OK if the comparison completed successfully. +ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, + const struct ArrowArrayView* expected, + enum ArrowCompareLevel level, int* out, + struct ArrowError* reason); + +/// \brief Reset the contents of an ArrowArrayView and frees resources +void ArrowArrayViewReset(struct ArrowArrayView* array_view); + +/// \brief Check for a null element in an ArrowArrayView +static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, + int64_t i); + +/// \brief Compute null count for an ArrowArrayView +static inline int64_t ArrowArrayViewComputeNullCount( + const struct ArrowArrayView* array_view); + +/// \brief Get the type id of a union array element +static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, + int64_t i); + +/// \brief Get the child index of a union array element +static inline int8_t ArrowArrayViewUnionChildIndex( + const struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get the index to use into the relevant union child array +static inline int64_t ArrowArrayViewUnionChildOffset( + const struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get an element in an ArrowArrayView as an integer +/// +/// This function does not check for null values, that values are actually integers, or +/// that values are within a valid range for an int64. +static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, + int64_t i); + +/// \brief Get an element in an ArrowArrayView as an unsigned integer +/// +/// This function does not check for null values, that values are actually integers, or +/// that values are within a valid range for a uint64. +static inline uint64_t ArrowArrayViewGetUIntUnsafe( + const struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get an element in an ArrowArrayView as a double +/// +/// This function does not check for null values, or +/// that values are within a valid range for a double. +static inline double ArrowArrayViewGetDoubleUnsafe( + const struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get an element in an ArrowArrayView as an ArrowStringView +/// +/// This function does not check for null values. +static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( + const struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get an element in an ArrowArrayView as an ArrowBufferView +/// +/// This function does not check for null values. +static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( + const struct ArrowArrayView* array_view, int64_t i); + +/// \brief Get an element in an ArrowArrayView as an ArrowDecimal +/// +/// This function does not check for null values. The out parameter must +/// be initialized with ArrowDecimalInit() with the proper parameters for this +/// type before calling this for the first time. +static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, + int64_t i, struct ArrowDecimal* out); + +/// @} + +/// \defgroup nanoarrow-basic-array-stream Basic ArrowArrayStream implementation +/// +/// An implementation of an ArrowArrayStream based on a collection of +/// zero or more previously-existing ArrowArray objects. Users should +/// initialize and/or validate the contents before transferring the +/// responsibility of the ArrowArrayStream elsewhere. +/// +/// @{ + +/// \brief Initialize an ArrowArrayStream backed by this implementation +/// +/// This function moves the ownership of schema to the array_stream. If +/// this function returns NANOARROW_OK, the caller is responsible for +/// releasing the ArrowArrayStream. +ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, + struct ArrowSchema* schema, int64_t n_arrays); + +/// \brief Set the ith ArrowArray in this ArrowArrayStream. +/// +/// array_stream must have been initialized with ArrowBasicArrayStreamInit(). +/// This function move the ownership of array to the array_stream. i must +/// be greater than zero and less than the value of n_arrays passed in +/// ArrowBasicArrayStreamInit(). Callers are not required to fill all +/// n_arrays members (i.e., n_arrays is a maximum bound). +void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, + struct ArrowArray* array); + +/// \brief Validate the contents of this ArrowArrayStream +/// +/// array_stream must have been initialized with ArrowBasicArrayStreamInit(). +/// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray() +/// to validate the contents of the arrays. +ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, + struct ArrowError* error); + +/// @} + +// Undefine ArrowErrorCode, which may have been defined to annotate functions that return +// it to warn for an unused result. +#if defined(ArrowErrorCode) +#undef ArrowErrorCode +#endif + +// Inline function definitions + + + +#ifdef __cplusplus +} +#endif + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED +#define NANOARROW_BUFFER_INLINE_H_INCLUDED + +#include +#include +#include + + + +#ifdef __cplusplus +extern "C" { +#endif + +// Modified from Arrow C++ (1eb46f76) cpp/src/arrow/chunk_resolver.h#L133-L162 +static inline int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, + int64_t lo, int64_t hi) { + // Similar to std::upper_bound(), but slightly different as our offsets + // array always starts with 0. + int64_t n = hi - lo; + // First iteration does not need to check for n > 1 + // (lo < hi is guaranteed by the precondition). + NANOARROW_DCHECK(n > 1); + do { + const int64_t m = n >> 1; + const int64_t mid = lo + m; + if (index >= offsets[mid]) { + lo = mid; + n -= m; + } else { + n = m; + } + } while (n > 1); + return lo; +} + +static inline int64_t ArrowResolveChunk32(int32_t index, const int32_t* offsets, + int32_t lo, int32_t hi) { + // Similar to std::upper_bound(), but slightly different as our offsets + // array always starts with 0. + int32_t n = hi - lo; + // First iteration does not need to check for n > 1 + // (lo < hi is guaranteed by the precondition). + NANOARROW_DCHECK(n > 1); + do { + const int32_t m = n >> 1; + const int32_t mid = lo + m; + if (index >= offsets[mid]) { + lo = mid; + n -= m; + } else { + n = m; + } + } while (n > 1); + return lo; +} + +static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) { + int64_t doubled_capacity = current_capacity * 2; + if (doubled_capacity > new_capacity) { + return doubled_capacity; + } else { + return new_capacity; + } +} + +// float to half float conversion, adapted from Arrow Go +// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go +static inline uint16_t ArrowFloatToHalfFloat(float value) { + union { + float f; + uint32_t b; + } u; + u.f = value; + + uint16_t sn = (uint16_t)((u.b >> 31) & 0x1); + uint16_t exp = (u.b >> 23) & 0xff; + int16_t res = (int16_t)(exp - 127 + 15); + uint16_t fc = (uint16_t)(u.b >> 13) & 0x3ff; + + if (exp == 0) { + res = 0; + } else if (exp == 0xff) { + res = 0x1f; + } else if (res > 0x1e) { + res = 0x1f; + fc = 0; + } else if (res < 0x01) { + res = 0; + fc = 0; + } + + return (uint16_t)((sn << 15) | (uint16_t)(res << 10) | fc); +} + +// half float to float conversion, adapted from Arrow Go +// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go +static inline float ArrowHalfFloatToFloat(uint16_t value) { + uint32_t sn = (uint32_t)((value >> 15) & 0x1); + uint32_t exp = (value >> 10) & 0x1f; + uint32_t res = exp + 127 - 15; + uint32_t fc = value & 0x3ff; + + if (exp == 0) { + res = 0; + } else if (exp == 0x1f) { + res = 0xff; + } + + union { + float f; + uint32_t b; + } u; + u.b = (uint32_t)(sn << 31) | (uint32_t)(res << 23) | (uint32_t)(fc << 13); + return u.f; +} + +static inline void ArrowBufferInit(struct ArrowBuffer* buffer) { + buffer->data = NULL; + buffer->size_bytes = 0; + buffer->capacity_bytes = 0; + buffer->allocator = ArrowBufferAllocatorDefault(); +} + +static inline ArrowErrorCode ArrowBufferSetAllocator( + struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) { + // This is not a perfect test for "has a buffer already been allocated" + // but is likely to catch most cases. + if (buffer->data == NULL) { + buffer->allocator = allocator; + return NANOARROW_OK; + } else { + return EINVAL; + } +} + +static inline void ArrowBufferReset(struct ArrowBuffer* buffer) { + buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data, + buffer->capacity_bytes); + ArrowBufferInit(buffer); +} + +static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst) { + memcpy(dst, src, sizeof(struct ArrowBuffer)); + src->data = NULL; + ArrowBufferInit(src); +} + +static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, + int64_t new_size_bytes, + char shrink_to_fit) { + if (new_size_bytes < 0) { + return EINVAL; + } + + int needs_reallocation = new_size_bytes > buffer->capacity_bytes || + (shrink_to_fit && new_size_bytes < buffer->capacity_bytes); + + if (needs_reallocation) { + buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data, + buffer->capacity_bytes, new_size_bytes); + + if (buffer->data == NULL && new_size_bytes > 0) { + buffer->capacity_bytes = 0; + buffer->size_bytes = 0; + return ENOMEM; + } + + buffer->capacity_bytes = new_size_bytes; + } + + buffer->size_bytes = new_size_bytes; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, + int64_t additional_size_bytes) { + int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes; + if (min_capacity_bytes <= buffer->capacity_bytes) { + return NANOARROW_OK; + } + + int64_t new_capacity_bytes = + _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes); + buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data, + buffer->capacity_bytes, new_capacity_bytes); + + if (buffer->data == NULL && new_capacity_bytes > 0) { + buffer->capacity_bytes = 0; + buffer->size_bytes = 0; + return ENOMEM; + } + + buffer->capacity_bytes = new_capacity_bytes; + return NANOARROW_OK; +} + +static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, + int64_t size_bytes) { + if (size_bytes > 0) { + NANOARROW_DCHECK(buffer->data != NULL); + memcpy(buffer->data + buffer->size_bytes, data, size_bytes); + buffer->size_bytes += size_bytes; + } +} + +static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, + const void* data, int64_t size_bytes) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); + + ArrowBufferAppendUnsafe(buffer, data, size_bytes); + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, + int8_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int8_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, + uint8_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint8_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, + int16_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int16_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, + uint16_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint16_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, + int32_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int32_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, + uint32_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint32_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, + int64_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int64_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, + uint64_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint64_t)); +} + +static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, + double value) { + return ArrowBufferAppend(buffer, &value, sizeof(double)); +} + +static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, + float value) { + return ArrowBufferAppend(buffer, &value, sizeof(float)); +} + +static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, + struct ArrowStringView value) { + return ArrowBufferAppend(buffer, value.data, value.size_bytes); +} + +static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, + struct ArrowBufferView value) { + return ArrowBufferAppend(buffer, value.data.data, value.size_bytes); +} + +static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, + uint8_t value, int64_t size_bytes) { + if (size_bytes == 0) { + return NANOARROW_OK; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); + + NANOARROW_DCHECK(buffer->data != NULL); // To help clang-tidy + memset(buffer->data + buffer->size_bytes, value, size_bytes); + buffer->size_bytes += size_bytes; + + return NANOARROW_OK; +} + +static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; +static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; +static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127}; +static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; + +static const uint8_t _ArrowkBytePopcount[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, + 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, + 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, + 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, + 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, + 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, + 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + +static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) { + return (value + 7) & ~((int64_t)7); +} + +static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) { + return (value / 8) * 8; +} + +static inline int64_t _ArrowBytesForBits(int64_t bits) { + return (bits >> 3) + ((bits & 7) != 0); +} + +static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) { + out[0] = (word & 0x1) != 0; + out[1] = (word & 0x2) != 0; + out[2] = (word & 0x4) != 0; + out[3] = (word & 0x8) != 0; + out[4] = (word & 0x10) != 0; + out[5] = (word & 0x20) != 0; + out[6] = (word & 0x40) != 0; + out[7] = (word & 0x80) != 0; +} + +static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) { + out[0] = (word & 0x1) != 0; + out[1] = (word & 0x2) != 0; + out[2] = (word & 0x4) != 0; + out[3] = (word & 0x8) != 0; + out[4] = (word & 0x10) != 0; + out[5] = (word & 0x20) != 0; + out[6] = (word & 0x40) != 0; + out[7] = (word & 0x80) != 0; +} + +static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { + *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); +} + +static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) { + *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); +} + +static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) { + return (bits[i >> 3] >> (i & 0x07)) & 1; +} + +static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out) { + if (length == 0) { + return; + } + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; + + if (bytes_begin == bytes_last_valid) { + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + return; + } + + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitsUnpackInt8(bits[i], out); + out += 8; + } + + // last byte + const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } +} + +static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, + int64_t length, int32_t* out) { + if (length == 0) { + return; + } + + NANOARROW_DCHECK(bits != NULL && out != NULL); + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; + + if (bytes_begin == bytes_last_valid) { + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + return; + } + + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitsUnpackInt32(bits[i], out); + out += 8; + } + + // last byte + const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } +} + +static inline void ArrowBitSet(uint8_t* bits, int64_t i) { + bits[i / 8] |= _ArrowkBitmask[i % 8]; +} + +static inline void ArrowBitClear(uint8_t* bits, int64_t i) { + bits[i / 8] &= _ArrowkFlippedBitmask[i % 8]; +} + +static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) { + bits[i / 8] ^= (uint8_t)(((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & + _ArrowkBitmask[i % 8]); +} + +static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, + uint8_t bits_are_set) { + if (length == 0) { + return; + } + + NANOARROW_DCHECK(bits != NULL); + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const uint8_t fill_byte = (uint8_t)(-bits_are_set); + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_end = i_end / 8 + 1; + + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; + + if (bytes_end == bytes_begin + 1) { + // set bits within a single byte + const uint8_t only_byte_mask = + i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask); + bits[bytes_begin] &= only_byte_mask; + bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); + return; + } + + // set/clear trailing bits of first byte + bits[bytes_begin] &= first_byte_mask; + bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); + + if (bytes_end - bytes_begin > 2) { + // set/clear whole bytes + memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2)); + } + + if (i_end % 8 == 0) { + return; + } + + // set/clear leading bits of last byte + bits[bytes_end - 1] &= last_byte_mask; + bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask); +} + +static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset, + int64_t length) { + if (length == 0) { + return 0; + } + + NANOARROW_DCHECK(bits != NULL); + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; + + if (bytes_begin == bytes_last_valid) { + // count bits within a single byte + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8]; + + const uint8_t only_byte_mask = + i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask); + + const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask; + return _ArrowkBytePopcount[byte_masked]; + } + + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; + const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8]; + int64_t count = 0; + + // first byte + count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask]; + + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + count += _ArrowkBytePopcount[bits[i]]; + } + + // last byte + count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask]; + + return count; +} + +static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) { + ArrowBufferInit(&bitmap->buffer); + bitmap->size_bits = 0; +} + +static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst) { + ArrowBufferMove(&src->buffer, &dst->buffer); + dst->size_bits = src->size_bits; + src->size_bits = 0; +} + +static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, + int64_t additional_size_bits) { + int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits; + int64_t min_capacity_bytes = _ArrowBytesForBits(min_capacity_bits); + int64_t current_size_bytes = bitmap->buffer.size_bytes; + int64_t current_capacity_bytes = bitmap->buffer.capacity_bytes; + + if (min_capacity_bytes <= current_capacity_bytes) { + return NANOARROW_OK; + } + + int64_t additional_capacity_bytes = min_capacity_bytes - current_size_bytes; + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&bitmap->buffer, additional_capacity_bytes)); + + // Zero out the last byte for deterministic output in the common case + // of reserving a known remaining size. We should have returned above + // if there was not at least one additional byte to allocate; however, + // DCHECK() just to be sure. + NANOARROW_DCHECK(bitmap->buffer.capacity_bytes > current_capacity_bytes); + bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, + int64_t new_size_bits, + char shrink_to_fit) { + if (new_size_bits < 0) { + return EINVAL; + } + + int64_t new_size_bytes = _ArrowBytesForBits(new_size_bits); + NANOARROW_RETURN_NOT_OK( + ArrowBufferResize(&bitmap->buffer, new_size_bytes, shrink_to_fit)); + + bitmap->size_bits = new_size_bits; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length)); + + ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length); + return NANOARROW_OK; +} + +static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length) { + ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set); + bitmap->size_bits += length; + bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits); +} + +static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + const int8_t* values, int64_t n_values) { + if (n_values == 0) { + return; + } + + const int8_t* values_cursor = values; + int64_t n_remaining = n_values; + int64_t out_i_cursor = bitmap->size_bits; + uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; + + // First byte + if ((out_i_cursor % 8) != 0) { + int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; + for (int i = 0; i < n_partial_bits; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); + } + + out_cursor++; + values_cursor += n_partial_bits; + n_remaining -= n_partial_bits; + } + + // Middle bytes + int64_t n_full_bytes = n_remaining / 8; + for (int64_t i = 0; i < n_full_bytes; i++) { + _ArrowBitmapPackInt8(values_cursor, out_cursor); + values_cursor += 8; + out_cursor++; + } + + // Last byte + out_i_cursor += n_full_bytes * 8; + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte + *out_cursor = 0x00; + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); + } + out_cursor++; + } + + bitmap->size_bits += n_values; + bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; +} + +static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, + const int32_t* values, int64_t n_values) { + if (n_values == 0) { + return; + } + + const int32_t* values_cursor = values; + int64_t n_remaining = n_values; + int64_t out_i_cursor = bitmap->size_bits; + uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; + + // First byte + if ((out_i_cursor % 8) != 0) { + int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; + for (int i = 0; i < n_partial_bits; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values[i]); + } + + out_cursor++; + values_cursor += n_partial_bits; + n_remaining -= n_partial_bits; + } + + // Middle bytes + int64_t n_full_bytes = n_remaining / 8; + for (int64_t i = 0; i < n_full_bytes; i++) { + _ArrowBitmapPackInt32(values_cursor, out_cursor); + values_cursor += 8; + out_cursor++; + } + + // Last byte + out_i_cursor += n_full_bytes * 8; + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte + *out_cursor = 0x00; + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values_cursor[i]); + } + out_cursor++; + } + + bitmap->size_bits += n_values; + bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; +} + +static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) { + ArrowBufferReset(&bitmap->buffer); + bitmap->size_bits = 0; +} + +#ifdef __cplusplus +} +#endif + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED +#define NANOARROW_ARRAY_INLINE_H_INCLUDED + +#include +#include +#include +#include +#include + + + + +#ifdef __cplusplus +extern "C" { +#endif + +static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + return &private_data->bitmap; +} + +static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + switch (i) { + case 0: + return &private_data->bitmap.buffer; + default: + return private_data->buffers + i - 1; + } +} + +// We don't currently support the case of unions where type_id != child_index; +// however, these functions are used to keep track of where that assumption +// is made. +static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array, + int8_t type_id) { + NANOARROW_UNUSED(array); + return type_id; +} + +static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array, + int8_t child_index) { + NANOARROW_UNUSED(array); + return child_index; +} + +static inline int32_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) { + if (*type_ids == '\0') { + return 0; + } + + int32_t i = 0; + long type_id; + char* end_ptr; + do { + type_id = strtol(type_ids, &end_ptr, 10); + if (end_ptr == type_ids || type_id < 0 || type_id > 127) { + return -1; + } + + if (out != NULL) { + out[i] = (int8_t)type_id; + } + + i++; + + type_ids = end_ptr; + if (*type_ids == '\0') { + return i; + } else if (*type_ids != ',') { + return -1; + } else { + type_ids++; + } + } while (1); + + return -1; +} + +static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* type_ids, + int64_t n_type_ids, + int64_t n_children) { + if (n_type_ids != n_children) { + return 0; + } + + for (int8_t i = 0; i < n_type_ids; i++) { + if (type_ids[i] != i) { + return 0; + } + } + + return 1; +} + +static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str, + int64_t n_children) { + int8_t type_ids[128]; + int32_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); + return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children); +} + +static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_UNINITIALIZED: + return EINVAL; + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_DENSE_UNION: + // Note that this value could be -1 if the type_ids string was invalid + if (private_data->union_type_id_is_child_index != 1) { + return EINVAL; + } else { + break; + } + default: + break; + } + if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) { + return EINVAL; + } + + // Initialize any data offset buffer with a single zero + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && + private_data->layout.element_size_bits[i] == 64) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); + } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && + private_data->layout.element_size_bits[i] == 32) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); + } + } + + // Start building any child arrays or dictionaries + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i])); + } + + if (array->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary)); + } + + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { + for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); + NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1)); + } + + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i])); + } + + if (array->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary)); + } + + return NANOARROW_OK; +} + +static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array, + int64_t buffer_i, uint8_t value, + int64_t n) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i); + int64_t bytes_required = + _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] * + (array->length + 1)) / + 8; + if (bytes_required > buffer->size_bytes) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes)); + } + + ArrowBitsSetTo(buffer->data, array->length, n, value); + return NANOARROW_OK; +} + +static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* array, + int64_t n, uint8_t is_valid) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + if (n == 0) { + return NANOARROW_OK; + } + + // Some type-specific handling + switch (private_data->storage_type) { + case NANOARROW_TYPE_NA: + // (An empty value for a null array *is* a null) + array->null_count += n; + array->length += n; + return NANOARROW_OK; + + case NANOARROW_TYPE_DENSE_UNION: { + // Add one null to the first child and append n references to that child + int8_t type_id = _ArrowArrayUnionTypeId(array, 0); + NANOARROW_RETURN_NOT_OK( + _ArrowArrayAppendEmptyInternal(array->children[0], 1, is_valid)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); + for (int64_t i = 0; i < n; i++) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 1), (int32_t)array->children[0]->length - 1)); + } + // For the purposes of array->null_count, union elements are never considered "null" + // even if some children contain nulls. + array->length += n; + return NANOARROW_OK; + } + + case NANOARROW_TYPE_SPARSE_UNION: { + // Add n nulls to the first child and append n references to that child + int8_t type_id = _ArrowArrayUnionTypeId(array, 0); + NANOARROW_RETURN_NOT_OK( + _ArrowArrayAppendEmptyInternal(array->children[0], n, is_valid)); + for (int64_t i = 1; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); + } + + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); + // For the purposes of array->null_count, union elements are never considered "null" + // even if some children contain nulls. + array->length += n; + return NANOARROW_OK; + } + + case NANOARROW_TYPE_FIXED_SIZE_LIST: + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty( + array->children[0], n * private_data->layout.child_size_elements)); + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); + } + break; + + default: + break; + } + + // Append n is_valid bits to the validity bitmap. If we haven't allocated a bitmap yet + // and we need to append nulls, do it now. + if (!is_valid && private_data->bitmap.buffer.data == NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n)); + ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length); + ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); + } else if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n)); + ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); + } + + // Add appropriate buffer fill + struct ArrowBuffer* buffer; + int64_t size_bytes; + + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + buffer = ArrowArrayBuffer(array, i); + size_bytes = private_data->layout.element_size_bits[i] / 8; + + switch (private_data->layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_NONE: + case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: + case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: + case NANOARROW_BUFFER_TYPE_VALIDITY: + continue; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + // Append the current value at the end of the offset buffer for each element + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); + + for (int64_t j = 0; j < n; j++) { + ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j), + size_bytes); + } + + // Skip the data buffer + i++; + continue; + case NANOARROW_BUFFER_TYPE_DATA: + // Zero out the next bit of memory + if (private_data->layout.element_size_bits[i] % 8 == 0) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + } else { + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n)); + } + continue; + + case NANOARROW_BUFFER_TYPE_TYPE_ID: + case NANOARROW_BUFFER_TYPE_UNION_OFFSET: + // These cases return above + return EINVAL; + } + } + + array->length += n; + array->null_count += n * !is_valid; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) { + return _ArrowArrayAppendEmptyInternal(array, n, 0); +} + +static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n) { + return _ArrowArrayAppendEmptyInternal(array, n, 1); +} + +static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, + int64_t value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_INT64: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t))); + break; + case NANOARROW_TYPE_INT32: + _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value)); + break; + case NANOARROW_TYPE_INT16: + _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value)); + break; + case NANOARROW_TYPE_INT8: + _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value)); + break; + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_UINT8: + _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); + return ArrowArrayAppendUInt(array, value); + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value)); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + case NANOARROW_TYPE_HALF_FLOAT: + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendUInt16(data_buffer, ArrowFloatToHalfFloat((float)value))); + break; + case NANOARROW_TYPE_BOOL: + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, + uint64_t value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_UINT64: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); + break; + case NANOARROW_TYPE_UINT32: + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); + break; + case NANOARROW_TYPE_UINT16: + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); + break; + case NANOARROW_TYPE_UINT8: + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); + break; + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_INT8: + _NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX); + return ArrowArrayAppendInt(array, value); + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value)); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + case NANOARROW_TYPE_HALF_FLOAT: + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendUInt16(data_buffer, ArrowFloatToHalfFloat((float)value))); + break; + case NANOARROW_TYPE_BOOL: + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, + double value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double))); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + case NANOARROW_TYPE_HALF_FLOAT: + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendUInt16(data_buffer, ArrowFloatToHalfFloat((float)value))); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +// Binary views only have two fixed buffers, but be aware that they must also +// always have more 1 buffer to store variadic buffer sizes (even if there are none) +#define NANOARROW_BINARY_VIEW_FIXED_BUFFERS 2 +#define NANOARROW_BINARY_VIEW_INLINE_SIZE 12 +#define NANOARROW_BINARY_VIEW_PREFIX_SIZE 4 +#define NANOARROW_BINARY_VIEW_BLOCK_SIZE (32 << 10) // 32KB + +// The Arrow C++ implementation uses anonymous structs as members +// of the ArrowBinaryView. For Cython support in this library, we define +// those structs outside of the ArrowBinaryView +struct ArrowBinaryViewInlined { + int32_t size; + uint8_t data[NANOARROW_BINARY_VIEW_INLINE_SIZE]; +}; + +struct ArrowBinaryViewRef { + int32_t size; + uint8_t prefix[NANOARROW_BINARY_VIEW_PREFIX_SIZE]; + int32_t buffer_index; + int32_t offset; +}; + +union ArrowBinaryView { + struct ArrowBinaryViewInlined inlined; + struct ArrowBinaryViewRef ref; + int64_t alignment_dummy; +}; + +static inline int32_t ArrowArrayVariadicBufferCount(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + return private_data->n_variadic_buffers; +} + +static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray* array, + int32_t nbuffers) { + const int32_t n_current_bufs = ArrowArrayVariadicBufferCount(array); + const int32_t nvariadic_bufs_needed = n_current_bufs + nbuffers; + + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc( + private_data->variadic_buffers, sizeof(struct ArrowBuffer) * nvariadic_bufs_needed); + if (private_data->variadic_buffers == NULL) { + return ENOMEM; + } + private_data->variadic_buffer_sizes = (int64_t*)ArrowRealloc( + private_data->variadic_buffer_sizes, sizeof(int64_t) * nvariadic_bufs_needed); + if (private_data->variadic_buffer_sizes == NULL) { + return ENOMEM; + } + + for (int32_t i = n_current_bufs; i < nvariadic_bufs_needed; i++) { + ArrowBufferInit(&private_data->variadic_buffers[i]); + private_data->variadic_buffer_sizes[i] = 0; + } + private_data->n_variadic_buffers = nvariadic_bufs_needed; + array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1 + nvariadic_bufs_needed; + + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, + struct ArrowBufferView value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + if (private_data->storage_type == NANOARROW_TYPE_STRING_VIEW || + private_data->storage_type == NANOARROW_TYPE_BINARY_VIEW) { + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + union ArrowBinaryView bvt; + bvt.inlined.size = (int32_t)value.size_bytes; + + if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { + memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes); + memset(bvt.inlined.data + bvt.inlined.size, 0, + NANOARROW_BINARY_VIEW_INLINE_SIZE - bvt.inlined.size); + } else { + int32_t current_n_vbufs = ArrowArrayVariadicBufferCount(array); + if (current_n_vbufs == 0 || + private_data->variadic_buffers[current_n_vbufs - 1].size_bytes + + value.size_bytes > + NANOARROW_BINARY_VIEW_BLOCK_SIZE) { + const int32_t additional_bufs_needed = 1; + NANOARROW_RETURN_NOT_OK( + ArrowArrayAddVariadicBuffers(array, additional_bufs_needed)); + current_n_vbufs += additional_bufs_needed; + } + + const int32_t buf_index = current_n_vbufs - 1; + struct ArrowBuffer* variadic_buf = &private_data->variadic_buffers[buf_index]; + memcpy(bvt.ref.prefix, value.data.as_char, NANOARROW_BINARY_VIEW_PREFIX_SIZE); + bvt.ref.buffer_index = (int32_t)buf_index; + bvt.ref.offset = (int32_t)variadic_buf->size_bytes; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(variadic_buf, value.data.as_char, value.size_bytes)); + private_data->variadic_buffer_sizes[buf_index] = variadic_buf->size_bytes; + } + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &bvt, sizeof(bvt))); + } else { + struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1); + struct ArrowBuffer* data_buffer = ArrowArrayBuffer( + array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY)); + int32_t offset; + int64_t large_offset; + int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + offset = ((int32_t*)offset_buffer->data)[array->length]; + if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) { + return EOVERFLOW; + } + + offset += (int32_t)value.size_bytes; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + large_offset = ((int64_t*)offset_buffer->data)[array->length]; + large_offset += value.size_bytes; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + if (value.size_bytes != fixed_size_bytes) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + default: + return EINVAL; + } + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, + struct ArrowStringView value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBufferView buffer_view; + buffer_view.data.data = value.data; + buffer_view.size_bytes = value.size_bytes; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_STRING_VIEW: + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_BINARY_VIEW: + return ArrowArrayAppendBytes(array, buffer_view); + default: + return EINVAL; + } +} + +static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, + const struct ArrowInterval* value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_INTERVAL_MONTHS: { + if (value->type != NANOARROW_TYPE_INTERVAL_MONTHS) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months)); + break; + } + case NANOARROW_TYPE_INTERVAL_DAY_TIME: { + if (value->type != NANOARROW_TYPE_INTERVAL_DAY_TIME) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->ms)); + break; + } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { + if (value->type != NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(data_buffer, value->ns)); + break; + } + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, + const struct ArrowDecimal* value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DECIMAL128: + if (value->n_words != 2) { + return EINVAL; + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, 2 * sizeof(uint64_t))); + break; + } + case NANOARROW_TYPE_DECIMAL256: + if (value->n_words != 4) { + return EINVAL; + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, 4 * sizeof(uint64_t))); + break; + } + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + int64_t child_length; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_MAP: + child_length = array->children[0]->length; + if (child_length > INT32_MAX) { + return EOVERFLOW; + } + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); + break; + case NANOARROW_TYPE_LARGE_LIST: + child_length = array->children[0]->length; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length)); + break; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + child_length = array->children[0]->length; + if (child_length != + ((array->length + 1) * private_data->layout.child_size_elements)) { + return EINVAL; + } + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array->n_children; i++) { + child_length = array->children[i]->length; + if (child_length != (array->length + 1)) { + return EINVAL; + } + } + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, + int8_t type_id) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id); + if (child_index < 0 || child_index >= array->n_children) { + return EINVAL; + } + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + // Append the target child length to the union offsets buffer + _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1)); + break; + case NANOARROW_TYPE_SPARSE_UNION: + // Append one empty to any non-target column that isn't already the right length + // or abort if appending a null will result in a column with invalid length + for (int64_t i = 0; i < array->n_children; i++) { + if (i == child_index || array->children[i]->length == (array->length + 1)) { + continue; + } + + if (array->children[i]->length != array->length) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], 1)); + } + + break; + default: + return EINVAL; + } + + // Write to the type_ids buffer + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id)); + array->length++; + return NANOARROW_OK; +} + +static inline void ArrowArrayViewMove(struct ArrowArrayView* src, + struct ArrowArrayView* dst) { + memcpy(dst, src, sizeof(struct ArrowArrayView)); + ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); +} + +static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView* array_view) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + return NANOARROW_BINARY_VIEW_FIXED_BUFFERS + array_view->n_variadic_buffers + 1; + default: + break; + } + + int64_t n_buffers = 0; + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { + break; + } + + n_buffers++; + } + + return n_buffers; +} + +static inline struct ArrowBufferView ArrowArrayViewGetBufferView( + struct ArrowArrayView* array_view, int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { + return array_view->buffer_views[i]; + } else if (i >= + (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { + struct ArrowBufferView view; + view.data.as_int64 = array_view->variadic_buffer_sizes; + view.size_bytes = array_view->n_variadic_buffers * sizeof(double); + return view; + } else { + struct ArrowBufferView view; + view.data.data = + array_view->variadic_buffers[i - NANOARROW_BINARY_VIEW_FIXED_BUFFERS]; + view.size_bytes = + array_view->variadic_buffer_sizes[i - NANOARROW_BINARY_VIEW_FIXED_BUFFERS]; + return view; + } + default: + // We need this check to avoid -Warray-bounds from complaining + if (i >= NANOARROW_MAX_FIXED_BUFFERS) { + struct ArrowBufferView view; + view.data.data = NULL; + view.size_bytes = 0; + return view; + } else { + return array_view->buffer_views[i]; + } + } +} + +enum ArrowBufferType ArrowArrayViewGetBufferType(struct ArrowArrayView* array_view, + int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { + return array_view->layout.buffer_type[i]; + } else if (i == + (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { + return NANOARROW_BUFFER_TYPE_VARIADIC_SIZE; + } else { + return NANOARROW_BUFFER_TYPE_VARIADIC_DATA; + } + default: + // We need this check to avoid -Warray-bounds from complaining + if (i >= NANOARROW_MAX_FIXED_BUFFERS) { + return NANOARROW_BUFFER_TYPE_NONE; + } else { + return array_view->layout.buffer_type[i]; + } + } +} + +static inline enum ArrowType ArrowArrayViewGetBufferDataType( + struct ArrowArrayView* array_view, int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { + return array_view->layout.buffer_data_type[i]; + } else if (i >= + (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { + return NANOARROW_TYPE_INT64; + } else if (array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) { + return NANOARROW_TYPE_BINARY; + } else { + return NANOARROW_TYPE_STRING; + } + default: + // We need this check to avoid -Warray-bounds from complaining + if (i >= NANOARROW_MAX_FIXED_BUFFERS) { + return NANOARROW_TYPE_UNINITIALIZED; + } else { + return array_view->layout.buffer_data_type[i]; + } + } +} + +static inline int64_t ArrowArrayViewGetBufferElementSizeBits( + struct ArrowArrayView* array_view, int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_BINARY_VIEW: + case NANOARROW_TYPE_STRING_VIEW: + if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { + return array_view->layout.element_size_bits[i]; + } else if (i >= + (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { + return sizeof(int64_t) * 8; + } else { + return 0; + } + default: + // We need this check to avoid -Warray-bounds from complaining + if (i >= NANOARROW_MAX_FIXED_BUFFERS) { + return 0; + } else { + return array_view->layout.element_size_bits[i]; + } + } +} + +static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, + int64_t i) { + const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; + i += array_view->offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_NA: + return 0x01; + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + // Unions are "never null" in Arrow land + return 0x00; + default: + return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i); + } +} + +static inline int64_t ArrowArrayViewComputeNullCount( + const struct ArrowArrayView* array_view) { + if (array_view->length == 0) { + return 0; + } + + switch (array_view->storage_type) { + case NANOARROW_TYPE_NA: + return array_view->length; + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + // Unions are "never null" in Arrow land + return 0; + default: + break; + } + + const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; + if (validity_buffer == NULL) { + return 0; + } + return array_view->length - + ArrowBitCountSet(validity_buffer, array_view->offset, array_view->length); +} + +static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, + int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + return array_view->buffer_views[0].data.as_int8[array_view->offset + i]; + default: + return -1; + } +} + +static inline int8_t ArrowArrayViewUnionChildIndex( + const struct ArrowArrayView* array_view, int64_t i) { + int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); + if (array_view->union_type_id_map == NULL) { + return type_id; + } else { + return array_view->union_type_id_map[type_id]; + } +} + +static inline int64_t ArrowArrayViewUnionChildOffset( + const struct ArrowArrayView* array_view, int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + return array_view->buffer_views[1].data.as_int32[array_view->offset + i]; + case NANOARROW_TYPE_SPARSE_UNION: + return array_view->offset + i; + default: + return -1; + } +} + +static inline int64_t ArrowArrayViewListChildOffset( + const struct ArrowArrayView* array_view, int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_LIST: + return array_view->buffer_views[1].data.as_int32[i]; + case NANOARROW_TYPE_LARGE_LIST: + return array_view->buffer_views[1].data.as_int64[i]; + default: + return -1; + } +} + +static struct ArrowBufferView ArrowArrayViewGetBytesFromViewArrayUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { + const union ArrowBinaryView* bv = &array_view->buffer_views[1].data.as_binary_view[i]; + struct ArrowBufferView out = {{NULL}, bv->inlined.size}; + if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { + out.data.as_uint8 = bv->inlined.data; + return out; + } + + out.data.data = array_view->variadic_buffers[bv->ref.buffer_index]; + out.data.as_uint8 += bv->ref.offset; + return out; +} + +static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, + int64_t i) { + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + i += array_view->offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return (int64_t)data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return (int64_t)data_view->data.as_float[i]; + case NANOARROW_TYPE_HALF_FLOAT: + return (int64_t)ArrowHalfFloatToFloat(data_view->data.as_uint16[i]); + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return INT64_MAX; + } +} + +static inline uint64_t ArrowArrayViewGetUIntUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { + i += array_view->offset; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return (uint64_t)data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return (uint64_t)data_view->data.as_float[i]; + case NANOARROW_TYPE_HALF_FLOAT: + return (uint64_t)ArrowHalfFloatToFloat(data_view->data.as_uint16[i]); + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return UINT64_MAX; + } +} + +static inline double ArrowArrayViewGetDoubleUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { + i += array_view->offset; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return (double)data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return (double)data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return data_view->data.as_float[i]; + case NANOARROW_TYPE_HALF_FLOAT: + return ArrowHalfFloatToFloat(data_view->data.as_uint16[i]); + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return DBL_MAX; + } +} + +static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { + i += array_view->offset; + const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const char* data_view = array_view->buffer_views[2].data.as_char; + + struct ArrowStringView view; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + view.data = data_view + offsets_view->data.as_int32[i]; + view.size_bytes = + offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + break; + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + view.data = data_view + offsets_view->data.as_int64[i]; + view.size_bytes = + offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; + break; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + view.size_bytes = array_view->layout.element_size_bits[1] / 8; + view.data = array_view->buffer_views[1].data.as_char + (i * view.size_bytes); + break; + case NANOARROW_TYPE_STRING_VIEW: + case NANOARROW_TYPE_BINARY_VIEW: { + struct ArrowBufferView buf_view = + ArrowArrayViewGetBytesFromViewArrayUnsafe(array_view, i); + view.data = buf_view.data.as_char; + view.size_bytes = buf_view.size_bytes; + break; + } + default: + view.data = NULL; + view.size_bytes = 0; + break; + } + + return view; +} + +static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { + i += array_view->offset; + const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; + + struct ArrowBufferView view; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + view.size_bytes = + offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; + break; + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + view.size_bytes = + offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; + view.data.as_uint8 = data_view + offsets_view->data.as_int64[i]; + break; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + view.size_bytes = array_view->layout.element_size_bits[1] / 8; + view.data.as_uint8 = + array_view->buffer_views[1].data.as_uint8 + (i * view.size_bytes); + break; + case NANOARROW_TYPE_STRING_VIEW: + case NANOARROW_TYPE_BINARY_VIEW: + view = ArrowArrayViewGetBytesFromViewArrayUnsafe(array_view, i); + break; + default: + view.data.data = NULL; + view.size_bytes = 0; + break; + } + + return view; +} + +static inline void ArrowArrayViewGetIntervalUnsafe( + const struct ArrowArrayView* array_view, int64_t i, struct ArrowInterval* out) { + const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INTERVAL_MONTHS: { + const size_t size = sizeof(int32_t); + memcpy(&out->months, data_view + i * size, sizeof(int32_t)); + break; + } + case NANOARROW_TYPE_INTERVAL_DAY_TIME: { + const size_t size = sizeof(int32_t) + sizeof(int32_t); + memcpy(&out->days, data_view + i * size, sizeof(int32_t)); + memcpy(&out->ms, data_view + i * size + 4, sizeof(int32_t)); + break; + } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { + const size_t size = sizeof(int32_t) + sizeof(int32_t) + sizeof(int64_t); + memcpy(&out->months, data_view + i * size, sizeof(int32_t)); + memcpy(&out->days, data_view + i * size + 4, sizeof(int32_t)); + memcpy(&out->ns, data_view + i * size + 8, sizeof(int64_t)); + break; + } + default: + break; + } +} + +static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, + int64_t i, struct ArrowDecimal* out) { + i += array_view->offset; + const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; + switch (array_view->storage_type) { + case NANOARROW_TYPE_DECIMAL128: + ArrowDecimalSetBytes(out, data_view + (i * 16)); + break; + case NANOARROW_TYPE_DECIMAL256: + ArrowDecimalSetBytes(out, data_view + (i * 32)); + break; + default: + memset(out->words, 0, sizeof(out->words)); + break; + } +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/oracledb/interchange/nanoarrow_bridge.pxd b/src/oracledb/interchange/nanoarrow_bridge.pxd new file mode 100644 index 00000000..806f660e --- /dev/null +++ b/src/oracledb/interchange/nanoarrow_bridge.pxd @@ -0,0 +1,102 @@ +#------------------------------------------------------------------------------ +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------ + +#------------------------------------------------------------------------------ +# nanoarrow_bridge.pxd +# +# Cython definition file declaring the classes used for bridging between the +# nanoarrow C interface and Python. +#------------------------------------------------------------------------------ + +# cython: language_level = 3 + +from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t +from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t + +cdef extern from "nanoarrow.h": + + cdef struct ArrowArray: + int64_t length + int64_t null_count + int64_t offset + int64_t n_buffers + void (*release)(ArrowSchema *) + + cdef struct ArrowSchema: + void (*release)(ArrowSchema*) + + cpdef enum ArrowType: + NANOARROW_TYPE_BOOL + NANOARROW_TYPE_DECIMAL128 + NANOARROW_TYPE_DOUBLE + NANOARROW_TYPE_FLOAT + NANOARROW_TYPE_INT64 + NANOARROW_TYPE_STRING + NANOARROW_TYPE_TIMESTAMP + + cpdef enum ArrowTimeUnit: + NANOARROW_TIME_UNIT_SECOND + NANOARROW_TIME_UNIT_MILLI + NANOARROW_TIME_UNIT_MICRO + NANOARROW_TIME_UNIT_NANO + + cdef struct ArrowStringView: + const char* data + int64_t size_bytes + + cdef struct ArrowDecimal: + pass + + +cdef class OracleArrowArray: + """ + OracleArrowArray corresponds to a Column in the Relational model + + It uses functions defined in the Arrow C Data Interface + to work with Arrow buffers and incrementally append values + + The only user-facing API in this object will be __arrow_c_array__() + which is documented in the Arrow PyCapsule Interface. Arrow-backed + DataFrame libraries will use __arrow_c_array__() to directly access + the underlying arrow data + + """ + cdef: + public int32_t precision + public int32_t scale + public str name + public ArrowType arrow_type + public ArrowTimeUnit time_unit + double factor + ArrowArray *arrow_array + ArrowSchema *arrow_schema + + cdef str _schema_to_string(self) + cdef int append_bytes(self, void* ptr, int64_t num_bytes) except -1 + cdef int append_double(self, double value) except -1 + cdef int append_float(self, float value) except -1 + cdef int append_int64(self, int64_t value) except -1 + cdef int append_null(self) except -1 + cdef int append_decimal(self, void* ptr, int64_t num_bytes) except -1 + cdef int finish_building(self) except -1 diff --git a/src/oracledb/interchange/nanoarrow_bridge.pyx b/src/oracledb/interchange/nanoarrow_bridge.pyx new file mode 100644 index 00000000..05705c5e --- /dev/null +++ b/src/oracledb/interchange/nanoarrow_bridge.pyx @@ -0,0 +1,334 @@ +#------------------------------------------------------------------------------ +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------ +#------------------------------------------------------------------------------ +# nanoarrow_bridge.pyx +# +# Cython wrapper around the Arrow C Data interface +#------------------------------------------------------------------------------ + +cimport cpython + +from libc.stdint cimport uintptr_t +from libc.string cimport strlen, strchr +from cpython.pycapsule cimport PyCapsule_New + +from .. import errors + +cdef extern from "nanoarrow/nanoarrow.c": + + ctypedef int ArrowErrorCode + + cdef union ArrowBufferViewData: + const void* data + + cdef struct ArrowBufferView: + ArrowBufferViewData data + int64_t size_bytes + + cdef struct ArrowArrayView: + ArrowBufferView *buffer_views + + cdef struct ArrowError: + pass + + cdef ArrowErrorCode NANOARROW_OK + + void ArrowArrayRelease(ArrowArray *array) + void ArrowSchemaRelease(ArrowSchema *schema) + + ArrowErrorCode ArrowArrayInitFromType(ArrowArray* array, + ArrowType storage_type) + ArrowErrorCode ArrowArrayAppendBytes(ArrowArray* array, + ArrowBufferView value) + ArrowErrorCode ArrowArrayAppendDouble(ArrowArray* array, double value) + ArrowErrorCode ArrowArrayAppendNull(ArrowArray* array, int64_t n) + ArrowErrorCode ArrowArrayAppendInt(ArrowArray* array, int64_t value) + ArrowErrorCode ArrowArrayAppendDecimal(ArrowArray * array, + const ArrowDecimal * value) + ArrowErrorCode ArrowArrayFinishBuildingDefault(ArrowArray* array, + ArrowError* error) + ArrowErrorCode ArrowArrayReserve(ArrowArray* array, + int64_t additional_size_elements) + inline ArrowErrorCode ArrowArrayStartAppending(ArrowArray* array) + ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, + const ArrowSchema* schema, + ArrowError* error) + ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, + const ArrowArray* array, + ArrowError* error) + void ArrowSchemaInit(ArrowSchema* schema) + ArrowErrorCode ArrowSchemaInitFromType(ArrowSchema* schema, ArrowType type) + ArrowErrorCode ArrowSchemaSetTypeDateTime(ArrowSchema* schema, + ArrowType arrow_type, + ArrowTimeUnit time_unit, + const char* timezone) + ArrowErrorCode ArrowSchemaSetTypeDecimal(ArrowSchema* schema, + ArrowType type, + int32_t decimal_precision, + int32_t decimal_scale) + ArrowErrorCode ArrowSchemaSetName(ArrowSchema* schema, const char* name) + int64_t ArrowSchemaToString(const ArrowSchema* schema, char* out, + int64_t n, char recursive) + void ArrowDecimalInit(ArrowDecimal * decimal, int32_t bitwidth, + int32_t precision, int32_t scale) + ArrowErrorCode ArrowDecimalSetDigits(ArrowDecimal * decimal, + ArrowStringView value) + + +cdef int _check_nanoarrow(int code) except -1: + """ + Checks the return code of the nanoarrow function and raises an exception if + it is not NANOARROW_OK. + """ + if code != NANOARROW_OK: + errors._raise_err(errors.ERR_ARROW_C_API_ERROR, code=code) + + +cdef void pycapsule_schema_deleter(object schema_capsule) noexcept: + cdef ArrowSchema * schema = cpython.PyCapsule_GetPointer( + schema_capsule, 'arrow_schema' + ) + if schema.release != NULL: + ArrowSchemaRelease(schema) + + +cdef void pycapsule_array_deleter(object array_capsule) noexcept: + cdef ArrowArray * array = cpython.PyCapsule_GetPointer( + array_capsule, 'arrow_array' + ) + # Do not invoke the deleter on a used/moved capsule + if array.release != NULL: + ArrowArrayRelease(array) + + +cdef class OracleArrowArray: + + def __cinit__(self, ArrowType arrow_type, str name, int8_t precision, + int8_t scale, ArrowTimeUnit time_unit): + cdef ArrowType storage_type = arrow_type + self.arrow_type = arrow_type + self.time_unit = time_unit + self.name = name + self.arrow_array = \ + cpython.PyMem_Malloc(sizeof(ArrowArray)) + if arrow_type == NANOARROW_TYPE_TIMESTAMP: + storage_type = NANOARROW_TYPE_INT64 + if time_unit == NANOARROW_TIME_UNIT_MILLI: + self.factor = 1e3 + elif time_unit == NANOARROW_TIME_UNIT_MICRO: + self.factor = 1e6 + elif time_unit == NANOARROW_TIME_UNIT_NANO: + self.factor = 1e9 + else: + self.factor = 1 + + _check_nanoarrow(ArrowArrayInitFromType(self.arrow_array, + storage_type)) + self.arrow_schema = \ + cpython.PyMem_Malloc(sizeof(ArrowSchema)) + _check_nanoarrow(ArrowArrayStartAppending(self.arrow_array)) + if arrow_type == NANOARROW_TYPE_DECIMAL128: + self.precision = precision + self.scale = scale + ArrowSchemaInit(self.arrow_schema) + _check_nanoarrow(ArrowSchemaSetTypeDecimal(self.arrow_schema, + arrow_type, + precision, scale)) + else: + _check_nanoarrow(ArrowSchemaInitFromType(self.arrow_schema, + storage_type)) + if arrow_type == NANOARROW_TYPE_TIMESTAMP: + _check_nanoarrow(ArrowSchemaSetTypeDateTime(self.arrow_schema, + arrow_type, + time_unit, NULL)) + _check_nanoarrow(ArrowSchemaSetName(self.arrow_schema, name.encode())) + + def __dealloc__(self): + if self.arrow_array != NULL: + cpython.PyMem_Free(self.arrow_array) + if self.arrow_schema != NULL: + cpython.PyMem_Free(self.arrow_schema) + + def __len__(self): + return self.arrow_array.length + + def __repr__(self): + return ( + f"OracleArrowArray(name={self.name}, " + f"len={self.arrow_array.length}, " + f"type={self._schema_to_string()})" + ) + + def __str__(self): + return self.__repr__() + + cdef str _schema_to_string(self): + """ + Converts the schema to a string representation. + """ + cdef char buffer[81] + ArrowSchemaToString(self.arrow_schema, buffer, sizeof(buffer), 0) + return buffer.decode() + + cdef int append_bytes(self, void* ptr, int64_t num_bytes) except -1: + """ + Append a value of type bytes to the array. + """ + cdef ArrowBufferView data + data.data.data = ptr + data.size_bytes = num_bytes + _check_nanoarrow(ArrowArrayAppendBytes(self.arrow_array, data)) + + cdef int append_double(self, double value) except -1: + """ + Append a value of type double to the array. + """ + _check_nanoarrow(ArrowArrayAppendDouble(self.arrow_array, value)) + + cdef int append_float(self, float value) except -1: + """ + Append a value of type float to the array. + """ + self.append_double(value) + + cdef int append_int64(self, int64_t value) except -1: + """ + Append a value of type int64_t to the array. + """ + _check_nanoarrow(ArrowArrayAppendInt(self.arrow_array, value)) + + cdef int append_null(self) except -1: + """ + Append a null value to the array. + """ + _check_nanoarrow(ArrowArrayAppendNull(self.arrow_array, 1)) + + cdef int append_decimal(self, void* ptr, int64_t num_bytes) except -1: + """ + Append a value of type ArrowDecimal to the array + + Arrow decimals are fixed-point decimal numbers encoded as a + scaled integer. decimal128(7, 3) can exactly represent the numbers + 1234.567 and -1234.567 encoded internally as the 128-bit integers + 1234567 and -1234567, respectively + + """ + cdef: + int64_t i = 0, j = 0 + char* digits = ptr + ArrowStringView decimal_view + ArrowDecimal * decimal = \ + cpython.PyMem_Malloc(sizeof(ArrowDecimal)) + + try: + decimal_view.data = digits + decimal_view.size_bytes = num_bytes + ArrowDecimalInit(decimal, 128, self.precision, self.scale) + _check_nanoarrow(ArrowDecimalSetDigits(decimal, decimal_view)) + _check_nanoarrow(ArrowArrayAppendDecimal(self.arrow_array, decimal)) + finally: + cpython.PyMem_Free(decimal) + + cdef int finish_building(self) except -1: + """ + Finish building the array. No more data will be added to it. + """ + _check_nanoarrow(ArrowArrayFinishBuildingDefault(self.arrow_array, + NULL)) + + def get_buffer_info(self): + """ + Get buffer information required by the dataframe interchange logic. + """ + cdef: + int64_t n_buffers = self.arrow_array.n_buffers + ArrowBufferView *buffer + ArrowArrayView *view + view = cpython.PyMem_Malloc(sizeof(ArrowArrayView)) + _check_nanoarrow(ArrowArrayViewInitFromSchema(view, self.arrow_schema, + NULL)) + _check_nanoarrow(ArrowArrayViewSetArray(view, self.arrow_array, NULL)) + + # initialize all buffers to None to begin with + buffers = { + "validity": None, + "offsets": None, + "data": None + } + + # validity buffer + if n_buffers > 0 and self.arrow_array.null_count > 0: + buffer = &view.buffer_views[0] + buffers["validity"] = ( + buffer.size_bytes, + buffer.data.data + ) + + # data / offset buffer + if n_buffers == 2: + buffer = &view.buffer_views[1] + buffers["data"] = ( + buffer.size_bytes, + buffer.data.data + ) + elif n_buffers == 3: + buffer = &view.buffer_views[1] + buffers["offsets"] = ( + buffer.size_bytes, + buffer.data.data + ) + buffer = &view.buffer_views[2] + buffers["data"] = ( + buffer.size_bytes, + buffer.data.data + ) + + return buffers + + @property + def null_count(self) -> int: + return self.arrow_array.null_count + + @property + def offset(self) -> int: + return self.arrow_array.offset + + def __arrow_c_array__(self, requested_schema=None): + """ + Returns + ------- + Tuple[PyCapsule, PyCapsule] + A pair of PyCapsules containing a C ArrowSchema and ArrowArray, + respectively. + """ + if requested_schema is not None: + raise NotImplementedError("requested_schema") + + array_capsule = PyCapsule_New( + self.arrow_array, 'arrow_array', &pycapsule_array_deleter + ) + schema_capsule = PyCapsule_New( + self.arrow_schema, "arrow_schema", &pycapsule_schema_deleter + ) + return schema_capsule, array_capsule diff --git a/src/oracledb/interchange/protocol.py b/src/oracledb/interchange/protocol.py new file mode 100644 index 00000000..e464bb55 --- /dev/null +++ b/src/oracledb/interchange/protocol.py @@ -0,0 +1,282 @@ +# ----------------------------------------------------------------------------- +# MIT License + +# Copyright (c) 2025 Consortium for Python Data API Standards contributors + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# protocol.py +# +# Implement DataFrame class as documented in the standard +# https://data-apis.org/dataframe-protocol/latest/API.html +# ----------------------------------------------------------------------------- + +from enum import IntEnum +from typing import ( + Any, + ClassVar, + Literal, + Protocol, + Tuple, + TypedDict, +) + +from collections.abc import Iterable, Sequence + + +class DlpackDeviceType(IntEnum): + """Integer enum for device type codes matching DLPack.""" + + CPU = 1 + CUDA = 2 + CPU_PINNED = 3 + OPENCL = 4 + VULKAN = 7 + METAL = 8 + VPI = 9 + ROCM = 10 + + +class DtypeKind(IntEnum): + """ + Integer enum for data types. + + Attributes + ---------- + INT : int + Matches to signed integer data type. + UINT : int + Matches to unsigned integer data type. + FLOAT : int + Matches to floating point data type. + BOOL : int + Matches to boolean data type. + STRING : int + Matches to string data type (UTF-8 encoded). + DATETIME : int + Matches to datetime data type. + CATEGORICAL : int + Matches to categorical data type. + """ + + INT = 0 + UINT = 1 + FLOAT = 2 + BOOL = 20 + STRING = 21 # UTF-8 + DATETIME = 22 + CATEGORICAL = 23 + DECIMAL = 24 + + +Dtype = Tuple[DtypeKind, int, str, str] # see Column.dtype + + +class ColumnNullType(IntEnum): + """ + Integer enum for null type representation. + + Attributes + ---------- + NON_NULLABLE : int + Non-nullable column. + USE_NAN : int + Use explicit float NaN value. + USE_SENTINEL : int + Sentinel value besides NaN. + USE_BITMASK : int + The bit is set/unset representing a null on a certain position. + USE_BYTEMASK : int + The byte is set/unset representing a null on a certain position. + """ + + NON_NULLABLE = 0 + USE_NAN = 1 + USE_SENTINEL = 2 + USE_BITMASK = 3 + USE_BYTEMASK = 4 + + +class ColumnBuffers(TypedDict): + """Buffers backing a column.""" + + # first element is a buffer containing the column data; + # second element is the data buffer's associated dtype + data: Tuple["Buffer", "Dtype"] + + # first element is a buffer containing mask values indicating missing data; + # second element is the mask value buffer's associated dtype. + # None if the null representation is not a bit or byte mask + validity: Tuple["Buffer", "Dtype"] + + # first element is a buffer containing the offset values for + # variable-size binary data (e.g., variable-length strings); + # second element is the offsets buffer's associated dtype. + # None if the data buffer does not have an associated offsets buffer + offsets: Tuple["Buffer", "Dtype"] + + +class CategoricalDescription(TypedDict): + """Description of a categorical column.""" + + # whether the ordering of dictionary indices is semantically meaningful + is_ordered: bool + # whether a dictionary-style mapping of categorical values to other objects + # exists + is_dictionary: Literal[True] + # Python-level only (e.g. `{int: str}`). + # None if not a dictionary-style categorical. + categories: "Column" + + +class Buffer(Protocol): + """Interchange buffer object.""" + + @property + def bufsize(self) -> int: + """Buffer size in bytes.""" + + @property + def ptr(self) -> int: + """Pointer to start of the buffer as an integer.""" + + def __dlpack__(self) -> Any: + """Represent this structure as DLPack interface.""" + + def __dlpack_device__(self) -> Tuple["DlpackDeviceType", int | None]: + """Device type and device ID for where the data in the buffer + resides.""" + + +class Column(Protocol): + """Interchange column object.""" + + def size(self) -> int: + """Size of the column in elements.""" + + @property + def offset(self) -> int: + """Offset of the first element with respect to the start + of the underlying buffer.""" # noqa: W505 + + @property + def dtype(self) -> "Dtype": + """Data type of the column.""" + + @property + def describe_categorical(self) -> "CategoricalDescription": + """Description of the categorical data type of the column.""" + + @property + def describe_null(self) -> Tuple["ColumnNullType", Any]: + """Description of the null representation the column uses.""" + + @property + def null_count(self) -> int | None: + """Number of null elements, if known.""" + + @property + def metadata(self) -> dict[str, Any]: + """The metadata for the column.""" + + def num_chunks(self) -> int: + """Return the number of chunks the column consists of.""" + + def get_chunks(self, n_chunks: int | None = None) -> Iterable["Column"]: + """Return an iterator yielding the column chunks.""" + + def get_buffers(self) -> "ColumnBuffers": + """Return a dictionary containing the underlying buffers.""" + + +class DataFrame(Protocol): + """Interchange dataframe object.""" + + version: ClassVar[int] # Version of the protocol + + def __dataframe__( + self, + nan_as_null: bool = False, # noqa: FBT001 + allow_copy: bool = True, # noqa: FBT001 + ) -> "DataFrame": + """Convert to a dataframe object implementing the dataframe + interchange protocol.""" # noqa: W505 + + @property + def metadata(self) -> dict[str, Any]: + """The metadata for the dataframe.""" + + def num_columns(self) -> int: + """Return the number of columns in the dataframe.""" + + def num_rows(self) -> int | None: + """Return the number of rows in the dataframe, if available.""" + + def num_chunks(self) -> int: + """Return the number of chunks the dataframe consists of..""" + + def column_names(self) -> Iterable[str]: + """Return the column names.""" + + def get_column(self, i: int) -> "Column": + """Return the column at the indicated position.""" + + def get_column_by_name(self, name: str) -> "Column": + """Return the column with the given name.""" + + def get_columns(self) -> Iterable["Column"]: + """Return an iterator yielding the columns.""" + + def select_columns(self, indices: Sequence[int]) -> "DataFrame": + """Create a new dataframe by selecting a subset of columns by index.""" + + def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame": + """Create a new dataframe by selecting a subset of columns by name.""" + + def get_chunks(self, n_chunks: int | None = None) -> Iterable["DataFrame"]: + """Return an iterator yielding the chunks of the dataframe.""" + + +class SupportsInterchange(Protocol): + """Dataframe that supports conversion into an interchange + dataframe object.""" + + def __dataframe__( + self, + nan_as_null: bool = False, # noqa: FBT001 + allow_copy: bool = True, # noqa: FBT001 + ) -> "SupportsInterchange": + """Convert to a dataframe object implementing the dataframe + interchange protocol.""" # noqa: W505 + + +class Endianness: + """Enum indicating the byte-order of a data type.""" + + LITTLE = "<" + BIG = ">" + NATIVE = "=" + NA = "|" + + +class CopyNotAllowedError(RuntimeError): + """Exception raised when a copy is required, + but `allow_copy` is set to `False`.""" diff --git a/src/oracledb/thick_impl.pyx b/src/oracledb/thick_impl.pyx index dc71cd37..8dfe11f1 100644 --- a/src/oracledb/thick_impl.pyx +++ b/src/oracledb/thick_impl.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -64,13 +64,16 @@ from .base_impl cimport ( BaseVarImpl, BindVar, C_DEFAULTS, + char_type, ConnectParamsImpl, + convert_oracle_data_to_arrow, DbType, DB_TYPE_NUM_CURSOR, DRIVER_NAME, DRIVER_VERSION, DRIVER_INSTALLATION_URL, ENCODING_UTF8, + OracleData, OracleMetadata, PURITY_DEFAULT, PY_TYPE_DATE, diff --git a/src/oracledb/thin_impl.pyx b/src/oracledb/thin_impl.pyx index 13a29688..c08a24f6 100644 --- a/src/oracledb/thin_impl.pyx +++ b/src/oracledb/thin_impl.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -110,6 +110,7 @@ from .base_impl cimport ( Buffer, ConnectParamsImpl, convert_oracle_data_to_python, + convert_oracle_data_to_arrow, convert_date_to_python, CS_FORM_IMPLICIT, CS_FORM_NCHAR, diff --git a/tests/sql/create_schema.sql b/tests/sql/create_schema.sql index e9486286..b342d5af 100644 --- a/tests/sql/create_schema.sql +++ b/tests/sql/create_schema.sql @@ -379,6 +379,19 @@ create table &main_user..PlsqlSessionCallbacks ( ) / +create table &main_user..TestDataframe ( + Id number(9), + FirstName varchar2(100), + LastName varchar2(100), + City varchar2(100), + Country varchar2(100), + DateOfBirth date, + Salary number(9, 2), + CreditScore number(3, 0), + LastUpdated timestamp +) +/ + -- create queue table and queues for testing advanced queuing begin diff --git a/tests/test_8000_dataframe.py b/tests/test_8000_dataframe.py new file mode 100644 index 00000000..9e1215a8 --- /dev/null +++ b/tests/test_8000_dataframe.py @@ -0,0 +1,481 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +""" +Module for testing dataframes +""" +import datetime +import decimal + +import oracledb + +try: + import pyarrow + import pandas + + HAS_INTEROP = True +except ImportError: + HAS_INTEROP = False + +import test_env + +# basic +DATASET_1 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + 12132.40, + 400, + datetime.datetime.now(), + ), + ( + 2, + "Big", + "Hero", + "San Fransokyo", + "Japansa", + datetime.date(1988, 8, 22), + 234234.32, + 400, + datetime.datetime.now(), + ), +] + +# None, -ve +DATASET_2 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + None, + 400, + datetime.datetime.now(), + ), + ( + 2, + "Big", + "Hero", + "San Fransokyo", + None, + datetime.date(1988, 8, 22), + -12312.1, + 0, + datetime.datetime.now(), + ), +] + +# None, +/- 0.XXX +DATASET_3 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + None, + 400, + datetime.datetime.now(), + ), + ( + 2, + "Big", + "Hero", + "San Fransokyo", + None, + datetime.date(1988, 8, 22), + 0.12, + 0, + datetime.datetime.now(), + ), + ( + 3, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + None, + 400, + datetime.datetime.now(), + ), + ( + 4, + "Big", + "Hero", + "San Fransokyo", + None, + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), +] + +# Duplicates +DATASET_4 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 2, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 3, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 4, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 5, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 6, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), +] + + +class TestCase(test_env.BaseTestCase): + + def __check_interop(self): + """ + Checks to see if the pyarrow and pandas modules are available. + """ + if not HAS_INTEROP: + self.skipTest("missing pandas or pyarrow modules") + + def __convert_to_array(self, data, typ): + """ + Convert raw data to an Arrow array using pyarrow. + """ + if isinstance(typ, pyarrow.Decimal128Type): + data = [ + decimal.Decimal(str(value)) if value is not None else value + for value in data + ] + elif isinstance(typ, pyarrow.TimestampType): + if typ.unit == "s": + data = [ + datetime.datetime(v.year, v.month, v.day).timestamp() + for v in data + ] + else: + data = [value.timestamp() * 1000000 for value in data] + mask = [value is None for value in data] + return pyarrow.array(data, typ, mask=mask) + + def __convert_to_df(self, data): + """ + Converts the data set to a Pandas data frame for comparison to what is + returned from the database. + """ + data_by_col = [[row[i] for row in data] for i in range(len(data[0]))] + fetch_decimals = oracledb.defaults.fetch_decimals + types = [ + pyarrow.decimal128(9) if fetch_decimals else pyarrow.int64(), + pyarrow.string(), + pyarrow.string(), + pyarrow.string(), + pyarrow.string(), + pyarrow.timestamp("s"), + pyarrow.decimal128(9, 2) if fetch_decimals else pyarrow.float64(), + pyarrow.decimal128(3) if fetch_decimals else pyarrow.int64(), + pyarrow.timestamp("us"), + ] + arrays = [ + self.__convert_to_array(d, t) for d, t in zip(data_by_col, types) + ] + names = [ + "ID", + "FIRSTNAME", + "LASTNAME", + "CITY", + "COUNTRY", + "DATEOFBIRTH", + "SALARY", + "CREDITSCORE", + "LASTUPDATED", + ] + pa_tab = pyarrow.Table.from_arrays(arrays, names=names) + return pa_tab.to_pandas() + + def __get_data_from_df(self, df): + """ + Returns data from the data frame in a normalized fashion suitable for + comparison. In particular, NaN values cannot be compared to one another + so they are converted to the value None for comparison purposes. + """ + return [ + tuple(None if pandas.isna(v) else v for v in row) + for row in df.itertuples(index=False, name=None) + ] + + def __populate_table(self, data): + """ + Populate the test table with the given data. + """ + self.cursor.execute("truncate table TestDataframe") + types = [None] * len(data[0]) + types[8] = oracledb.DB_TYPE_TIMESTAMP + self.cursor.setinputsizes(*types) + self.cursor.executemany( + """ + insert into TestDataframe ( + Id, FirstName, LastName, City, Country, + DateOfBirth, Salary, CreditScore, LastUpdated + ) values ( + :id, :first_name, :last_name, :city, :country, + :dob, :salary, :credit_score, :last_updated + ) + """, + data, + ) + self.conn.commit() + + def __test_df_interop(self, data): + """ + Tests interoperability with external data frames using the data set + provided. + """ + self.__check_interop() + self.__populate_table(data) + statement = "select * from TestDataFrame order by Id" + ora_df = self.conn.fetch_df_all(statement) + self.__validate_df(ora_df, data) + + def __test_df_batches_interop(self, data, batch_size, num_batches): + """ + Tests interoperability with external data frames using the data set + provided. + """ + self.__check_interop() + self.__populate_table(data) + statement = "select * from TestDataFrame order by Id" + batches = list(self.conn.fetch_df_batches(statement, size=batch_size)) + self.assertEqual(len(batches), num_batches) + if num_batches == 1: + self.__validate_df(batches[0], data) + else: + offset = 0 + for batch in batches: + self.__validate_df(batch, data[offset : offset + batch_size]) + offset += batch_size + + def __validate_df(self, ora_df, data): + """ + Validates the data frame by converting it to Pandas and comparing it + with the original data set that was used. + """ + raw_df = self.__convert_to_df(data) + raw_data = self.__get_data_from_df(raw_df) + fetched_tab = pyarrow.Table.from_arrays( + ora_df.column_arrays(), names=ora_df.column_names() + ) + fetched_df = fetched_tab.to_pandas() + fetched_data = self.__get_data_from_df(fetched_df) + self.assertEqual(fetched_data, raw_data) + + def test_8000(self): + "8000 - test basic fetch of data frame" + self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = self.conn.fetch_df_all(statement) + self.assertEqual(ora_df.num_rows(), len(DATASET_1)) + self.assertEqual(ora_df.num_columns(), len(DATASET_1[0])) + metadata = dict( + num_columns=ora_df.num_columns(), + num_rows=ora_df.num_rows(), + num_chunks=1, + ) + self.assertEqual(ora_df.metadata, metadata) + + def test_8001(self): + "8001 - test conversion to external dataframe" + self.__test_df_interop(DATASET_1) + + def test_8002(self): + "8001 - test null and negative values" + self.__test_df_interop(DATASET_2) + + def test_8003(self): + "8002 - test with fetch_decimals" + with test_env.DefaultsContextManager("fetch_decimals", True): + self.__test_df_interop(DATASET_1) + + def test_8004(self): + "8003 - test null and negative values with fetch_decimals" + with test_env.DefaultsContextManager("fetch_decimals", True): + self.__test_df_interop(DATASET_2) + + def test_8005(self): + "8005 - test null and values with leading zeros" + self.__test_df_interop(DATASET_3) + + def test_8006(self): + "8005 - test null and values with leading zeros with fetch_decimals" + with test_env.DefaultsContextManager("fetch_decimals", True): + self.__test_df_interop(DATASET_3) + + def test_8007(self): + "8007 - duplicate values in the rows" + self.__test_df_interop(DATASET_4) + + def test_8008(self): + "8008 - batches without specification of size" + self.__test_df_batches_interop( + DATASET_4, batch_size=None, num_batches=1 + ) + + def test_8009(self): + "8009 - batches with specification of size" + self.__test_df_batches_interop(DATASET_4, batch_size=5, num_batches=2) + + def test_8010(self): + "8010 - verify passing Arrow arrays twice fails" + self.__check_interop() + self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = self.conn.fetch_df_all(statement) + pyarrow.Table.from_arrays( + ora_df.column_arrays(), names=ora_df.column_names() + ) + with self.assertRaises(pyarrow.lib.ArrowInvalid): + pyarrow.Table.from_arrays( + ora_df.column_arrays(), names=ora_df.column_names() + ) + + def test_8011(self): + "8011 - verify empty data set" + self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame where Id = 4" + ora_df = self.conn.fetch_df_all(statement) + self.assertEqual(ora_df.num_rows(), 0) + + def test_8012(self): + "8012 - verify empty data set with batches" + self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame where Id = 4" + for ora_df in self.conn.fetch_df_batches(statement): + self.assertEqual(ora_df.num_rows(), 0) + + def test_8013(self): + "8013 - negative checks on attributes" + self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = self.conn.fetch_df_all(statement) + with self.assertRaises(IndexError): + ora_df.get_column(121) + with self.assertRaises(IndexError): + ora_df.get_column(-1) + with self.assertRaises(KeyError): + ora_df.get_column_by_name("missing_column") + + def test_8014(self): + "8014 - check size and null count with no nulls" + self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = self.conn.fetch_df_all(statement) + col = ora_df.get_column(0) + self.assertEqual(col.size(), len(DATASET_1)) + self.assertEqual(col.null_count, 0) + + def test_8015(self): + "8015 - check size and null count with nulls present" + self.__populate_table(DATASET_2) + statement = "select * from TestDataFrame order by Id" + ora_df = self.conn.fetch_df_all(statement) + col = ora_df.get_column_by_name("SALARY") + self.assertEqual(col.size(), len(DATASET_2)) + self.assertEqual(col.null_count, 1) + + def test_8016(self): + "8016 - check unsupported error for LOBs" + statement = "select to_clob('test_8016') from dual" + with self.assertRaisesFullCode("DPY-3030"): + self.conn.fetch_df_all(statement) + + def test_8017(self): + "8017 - batches with specification of size matching number of rows" + self.__test_df_batches_interop( + DATASET_2, batch_size=len(DATASET_2), num_batches=1 + ) + + +if __name__ == "__main__": + test_env.run_test_cases() diff --git a/utils/templates/connection.py b/utils/templates/connection.py index eaabc2f3..bb607666 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -711,6 +711,43 @@ def encode_oson(self, value): self._verify_connected() return self._impl.encode_oson(value) + def fetch_df_all( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + arraysize: Optional[int] = None, + ): + """ + Fetch all data as OracleDataFrame. + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if arraysize is not None: + cursor.arraysize = arraysize + cursor.prefetchrows = cursor.arraysize + cursor.execute(statement, parameters) + return cursor._impl.fetch_df_all(cursor) + + def fetch_df_batches( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + size: Optional[int] = None, + ): + """ + Fetch data in batches. Each batch is an OracleDataFrame + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if size is not None: + cursor.arraysize = size + cursor.prefetchrows = cursor.arraysize + cursor.execute(statement, parameters) + if size is None: + yield cursor._impl.fetch_df_all(cursor) + else: + yield from cursor._impl.fetch_df_batches(cursor, batch_size=size) + def getSodaDatabase(self) -> SodaDatabase: """ Return a SODA database object for performing all operations on Simple From 8105c82d83247d7efb7dfd56ff1b62dd4ce6e271 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:16:35 -0700 Subject: [PATCH 043/178] Fix auth handling in OCI config provider. --- src/oracledb/plugins/oci_config_provider.py | 95 ++++++++++++++------- 1 file changed, 65 insertions(+), 30 deletions(-) diff --git a/src/oracledb/plugins/oci_config_provider.py b/src/oracledb/plugins/oci_config_provider.py index 89767ab3..4e444d00 100644 --- a/src/oracledb/plugins/oci_config_provider.py +++ b/src/oracledb/plugins/oci_config_provider.py @@ -53,8 +53,20 @@ def _get_config(parameters, connect_params): config = {} - credential = _get_credential(parameters) - client_oci = oci_object_storage_client(credential) + credential, signer = _get_credential(parameters) + auth_method = parameters.get("auth") + if auth_method is not None: + auth_method = auth_method.upper() + + if auth_method is None or auth_method == "OCI_DEFAULT": + client_oci = oci_object_storage_client(credential) + elif ( + auth_method == "OCI_INSTANCE_PRINCIPAL" + or auth_method == "OCI_RESOURCE_PRINCIPAL" + ): + client_oci = oci_object_storage_client( + config=credential, signer=signer + ) get_object_request = { "object_name": _get_required_parameter(parameters, "filename"), "bucket_name": _get_required_parameter(parameters, "bucketname"), @@ -79,6 +91,7 @@ def _get_config(parameters, connect_params): pwd = settings["password"] if settings["password"]["type"] == "oci-vault": pwd["credential"] = credential + pwd["auth"] = auth_method # password should be stored in JSON and not plain text. config["password"] = pwd @@ -99,33 +112,39 @@ def _get_credential(parameters): if auth is not None: auth = auth.upper() - if auth is None or auth == "OCI_DEFAULT": - # Default Authentication - # default path ~/.oci/config - return oci_from_file() - if "tenancy_user" in parameters and "oci_user" in parameters: - with open(parameters["oci_key_file"], "r") as file_content: - public_key = file_content.read() - _retrieve_region(parameters.get("objservername")) - provider = oci.signer.Signer( - tenancy=parameters["oci_tenancy"], - user=parameters["oci_user"], - fingerprint=parameters["oci_fingerprint"], - private_key_file_location=parameters["oci_key_file"], - private_key_content=public_key, - pass_phrase=None, - ) - else: + try: + if auth is None or auth == "OCI_DEFAULT": + # Default Authentication + # default path ~/.oci/config + return oci_from_file(), None + except oci.exceptions.ClientError: + # try to create config with connection string parameters. + if "oci_tenancy" in parameters and "oci_user" in parameters: + with open(parameters["oci_key_file"], "r") as file_content: + public_key = file_content.read() + provider = dict( + tenancy=parameters["oci_tenancy"], + user=parameters["oci_user"], + fingerprint=parameters["oci_fingerprint"], + key_file=parameters["oci_key_file"], + private_key_content=public_key, + region=_retrieve_region(parameters.get("objservername")), + ) + return provider, None + + if auth == "OCI_INSTANCE_PRINCIPAL": signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() + return ( + dict(region=_retrieve_region(parameters.get("objservername"))), + signer, + ) + + elif auth == "OCI_RESOURCE_PRINCIPAL": rps = oci.auth.signers.get_resource_principals_signer() - if parameters[auth].upper() == "OCI_INSTANCE_PRINCIPAL": - provider = signer().build() - elif parameters[auth].upper() == "OCI_RESOURCE_PRINCIPAL": - provider = rps.builder().build() - else: - msg = "Authentication options not available in Connection String" - raise Exception(msg) - return provider + return {}, rps + else: + msg = "Authentication options not available in Connection String" + raise Exception(msg) def _get_required_parameter(parameters, name): @@ -170,9 +189,25 @@ def password_type_oci_vault_hook(args): raise Exception( "OCI Key Vault authentication details are not provided." ) - credential = _get_credential(auth) + credential, signer = _get_credential(auth) + auth_method = args.get("auth") + + if auth_method is not None: + auth_method = auth_method.upper() + + if auth_method is None or auth_method == "OCI_DEFAULT": + secret_client_oci = oci_secrets_client(credential) + elif auth_method == "OCI_INSTANCE_PRINCIPAL": + signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() + secret_client_oci = oci_secrets_client( + config=credential, signer=signer + ) + elif auth_method == "OCI_RESOURCE_PRINCIPAL": + signer = oci.auth.signers.get_resource_principals_signer() + secret_client_oci = oci_secrets_client( + config=credential, signer=signer + ) - secret_client_oci = oci_secrets_client(credential) get_secret_bundle_request = {"secret_id": secret_id} get_secret_bundle_response = secret_client_oci.get_secret_bundle( **get_secret_bundle_request @@ -182,7 +217,7 @@ def password_type_oci_vault_hook(args): def _retrieve_region(objservername): arr = objservername.split(".") - return arr[1].upper().replace("-", "_") + return arr[1].lower().replace("_", "-") def _stream_to_string(stream): From 63cd4120cce83e0b963fde39eb79008c23cddd21 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:24:16 -0700 Subject: [PATCH 044/178] Reorder release notes. --- doc/src/release_notes.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 41842d15..5053282a 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -23,25 +23,25 @@ Thin Mode Changes can be used to extend the capability of python-oracledb. #) Added support for property :attr:`ConnectionPool.max_lifetime_session` (`issue 410 `__). -#) Perform TLS server matching in python-oracledb instead of the Python SSL - library to allow alternate names to be checked - (`issue 415 `__). #) Added parameter :data:`ConnectParams.use_sni` to specify that the TLS SNI extension should be used to reduce the number of TLS neegotiations that are needed to connect to the database. -#) Improved support for planned database maintenance by internally sending - explicit request boundaries when using python-oracledb connection pools. #) Added parameter :data:`ConnectParams.instance_name` to specify the instance name to use when connecting to the database. Added support for setting the instance name in :ref:`Easy Connect strings `. +#) Added support for Transaction Guard by adding support to get the values of + :attr:`Connection.ltxid` and :attr:`oracledb._Error.isrecoverable`. +#) Improved support for planned database maintenance by internally sending + explicit request boundaries when using python-oracledb connection pools. +#) Perform TLS server matching in python-oracledb instead of the Python SSL + library to allow alternate names to be checked + (`issue 415 `__). #) Host names are now resolved to IP addresses in python-oracledb instead of the Python libraries. Address list load balancing and failover settings will be used when establishing connections. #) The thread that closes connection pools on interpreter shutdown is now only started when the first pool is created and not at module import (`issue 426 `__). -#) Added support for Transaction Guard by adding support to get the values of - :attr:`Connection.ltxid` and :attr:`oracledb._Error.isrecoverable`. #) Fixed hang when attempting to use pipelining against a database that doesn't support the end of response flag. #) Fixed hang when using asyncio and a connection is unexpectedly closed by @@ -102,6 +102,10 @@ Common Changes #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". +#) Added attributes :attr:`DbObjectAttribute.precision`, + :attr:`DbObjectAttribute.scale`, and :attr:`DbObjectAttribute.max_size` that + provide additional metadata about + :ref:`database object attributes `. #) Set the default value of :attr:`defaults.config_dir` to ``$ORACLE_HOME/network/admin`` if the environment variable ``ORACLE_HOME`` is set. @@ -115,10 +119,6 @@ Common Changes :ref:`full connect descriptor ` are passed through unchanged. All other parameters in other sections of a full connect descriptor that are unrecognized by the driver are ignored. -#) Added attributes :attr:`DbObjectAttribute.precision`, - :attr:`DbObjectAttribute.scale`, and :attr:`DbObjectAttribute.max_size` that - provide additional metadata about - :ref:`database object attributes `. #) Fixed bug where some :ref:`DbObject ` attributes for database objects defined using ANSI names (including FLOAT and REAL) may have shown as integers. From 77c37979abb280576f7617e174dc96fd2b1cb844 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:24:37 -0700 Subject: [PATCH 045/178] Remove the prerelease status from pipelining. --- doc/src/api_manual/async_connection.rst | 3 --- doc/src/api_manual/module.rst | 9 --------- doc/src/api_manual/pipeline.rst | 4 ---- doc/src/release_notes.rst | 4 ++++ doc/src/user_guide/asyncio.rst | 4 ---- 5 files changed, 4 insertions(+), 20 deletions(-) diff --git a/doc/src/api_manual/async_connection.rst b/doc/src/api_manual/async_connection.rst index b73b3e22..24edcbc2 100644 --- a/doc/src/api_manual/async_connection.rst +++ b/doc/src/api_manual/async_connection.rst @@ -224,9 +224,6 @@ AsyncConnection Methods .. note:: - In this release, pipelining support is experimental and subject to - change. - True pipelining requires Oracle Database 23ai. When you connect to an older database, operations are sequentially diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 455a54d2..334179d9 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -1148,11 +1148,6 @@ Oracledb Methods Creates a :ref:`pipeline object ` which can be used to process a set of operations against a database. - .. note:: - - In this release, pipelining support is experimental and subject to - change. - .. versionadded:: 2.4.0 .. function:: create_pool(dsn=None, pool_class=oracledb.ConnectionPool, \ @@ -3260,10 +3255,6 @@ These constants belong to the enumeration called ``PipelineOpType``. The pipelining constants listed below are used to identify the type of operation added. They are possible values for the :attr:`PipelineOp.op_type` attribute. -.. note:: - - In this release, pipelining support is experimental and subject to change. - .. versionadded:: 2.4.0 .. data:: oracledb.PIPELINE_OP_TYPE_CALL_FUNC diff --git a/doc/src/api_manual/pipeline.rst b/doc/src/api_manual/pipeline.rst index 62f2d74a..326fe95b 100644 --- a/doc/src/api_manual/pipeline.rst +++ b/doc/src/api_manual/pipeline.rst @@ -4,10 +4,6 @@ API: Pipeline Objects ********************* -.. note:: - - In this release, pipelining support is experimental and subject to change. - Pipelining is only supported in python-oracledb Thin mode with :ref:`asyncio `. See :ref:`pipelining` for more information about pipelining. diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 5053282a..1fe8760c 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -42,6 +42,10 @@ Thin Mode Changes #) The thread that closes connection pools on interpreter shutdown is now only started when the first pool is created and not at module import (`issue 426 `__). +#) Added support for Transaction Guard by adding support to get the values of + :attr:`Connection.ltxid` and :attr:`oracledb._Error.isrecoverable`. +#) Support for :ref:`Pipelining ` is no longer considered a + pre-release. #) Fixed hang when attempting to use pipelining against a database that doesn't support the end of response flag. #) Fixed hang when using asyncio and a connection is unexpectedly closed by diff --git a/doc/src/user_guide/asyncio.rst b/doc/src/user_guide/asyncio.rst index f8055652..af46ee7c 100644 --- a/doc/src/user_guide/asyncio.rst +++ b/doc/src/user_guide/asyncio.rst @@ -300,10 +300,6 @@ and can destroy transactional consistency. Pipelining Database Operations ============================== -.. note:: - - In this release, pipelining support is experimental and subject to change. - Pipelining allows an application to send multiple, independent statements to Oracle Database with one call. The database can be kept busy without waiting for the application to receive a result set and send the next statement. While From c73b9373d35a4e3eacc3f10aa76321588d482d39 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:24:59 -0700 Subject: [PATCH 046/178] Added support for fetching data frames using asyncio. --- doc/src/api_manual/async_connection.rst | 55 +++ doc/src/release_notes.rst | 6 +- doc/src/user_guide/asyncio.rst | 4 +- doc/src/user_guide/sql_execution.rst | 4 +- samples/dataframe_pandas_async.py | 97 +++++ src/oracledb/connection.py | 38 ++ src/oracledb/impl/thin/cursor.pyx | 21 + tests/test_8100_dataframe_async.py | 493 ++++++++++++++++++++++++ utils/templates/connection.py | 38 ++ 9 files changed, 752 insertions(+), 4 deletions(-) create mode 100644 samples/dataframe_pandas_async.py create mode 100644 tests/test_8100_dataframe_async.py diff --git a/doc/src/api_manual/async_connection.rst b/doc/src/api_manual/async_connection.rst index 24edcbc2..f08e31da 100644 --- a/doc/src/api_manual/async_connection.rst +++ b/doc/src/api_manual/async_connection.rst @@ -124,6 +124,61 @@ AsyncConnection Methods This is a shortcut for calling :meth:`AsyncConnection.cursor()`, :meth:`AsyncCursor.executemany()`, and then :meth:`AsyncCursor.close()`. +.. method:: AsyncConnection.fetch_df_all(statement, parameters=None, \ + arraysize=None) + + Fetches all rows of the SQL query ``statement``, returning them in an + :ref:`OracleDataFrame ` object. An empty + OracleDataFrame is returned if there are no rows available. + + The ``parameters`` parameter can be a list of tuples, where each tuple item + maps to one :ref:`bind variable placeholder ` in ``statement``. It + can also be a list of dictionaries, where the keys match the bind variable + placeholder names in ``statement``. + + The ``arraysize`` parameter can be specified to tune performance of fetching + data across the network. It defaults to :attr:`defaults.arraysize`. + Internally, the ``fetch_df_all()``'s :attr:`Cursor.prefetchrows` size is + always set to the value of the explicit or default ``arraysize`` parameter + value. + + See :ref:`dataframeformat` for the supported data types and examples. + + .. note:: + + The data frame support in python-oracledb 3.0.0 is a pre-release and + may change in the next version. + + .. versionadded:: 3.0.0 + +.. method:: AsyncConnection.fetch_df_batches(statement, parameters=None, \ + size=None) + + This returns an iterator yielding the next ``size`` rows of the SQL query + ``statement`` in each iteration as an :ref:`OracleDataFrame + ` object. An empty OracleDataFrame is returned if there + are no rows available. + + The ``parameters`` parameter can be a list of tuples, where each tuple item + maps to one :ref:`bind variable placeholder ` in ``statement``. It + can also be a list of dictionaries, where the keys match the bind variable + placeholder names in ``statement``. + + The ``size`` parameter controls the number of records fetched in each + batch. It defaults to :attr:`defaults.arraysize`. Internally, the + ``fetch_df_batches()``'s :attr:`Cursor.arraysize` and + :attr:`Cursor.prefetchrows` sizes are always set to the value of the + explicit or default ``size`` parameter value. + + See :ref:`dataframeformat` for the supported data types and examples. + + .. note:: + + The data frame support in python-oracledb 3.0.0 is a pre-release and + may change in the next version. + + .. versionadded:: 3.0.0 + .. method:: AsyncConnection.fetchall(statement, parameters=None, \ arraysize=None, rowfactory=None) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 1fe8760c..905b2d31 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -92,8 +92,10 @@ Thick Mode Changes Common Changes ++++++++++++++ -#) Added new methods :meth:`Connection.fetch_df_all()` and - :meth:`Connection.fetch_df_batches()` to fetch data as DataFrames +#) Added new methods :meth:`Connection.fetch_df_all()`, + :meth:`Connection.fetch_df_batches()`, + :meth:`AsyncConnection.fetch_df_all()`, and + :meth:`AsyncConnection.fetch_df_batches()` to fetch data as DataFrames compliant with the Python DataFrame Interchange protocol. See :ref:`dataframeformat`. #) Added support for Oracle Database 23ai SPARSE vectors. diff --git a/doc/src/user_guide/asyncio.rst b/doc/src/user_guide/asyncio.rst index af46ee7c..76c220c8 100644 --- a/doc/src/user_guide/asyncio.rst +++ b/doc/src/user_guide/asyncio.rst @@ -171,7 +171,9 @@ You can also use shortcut methods on the :ref:`asyncconnobj` object such as :meth:`AsyncConnection.execute()` or :meth:`AsyncConnection.executemany()`. Rows can be fetched using one of the shortcut methods :meth:`AsyncConnection.fetchone()`, -:meth:`AsyncConnection.fetchmany()`, or :meth:`AsyncConnection.fetchall()`. +:meth:`AsyncConnection.fetchmany()`, :meth:`AsyncConnection.fetchall()`, +:meth:`AsyncConnection.fetch_df_all()`, or +:meth:`AsyncConnection.fetch_df_batches()`. An example of using :meth:`AsyncConnection.fetchall()`: diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst index d3b63fa7..d1f6a143 100644 --- a/doc/src/user_guide/sql_execution.rst +++ b/doc/src/user_guide/sql_execution.rst @@ -13,7 +13,9 @@ executed. Statements are executed using one of these methods :meth:`Cursor.execute()`, :meth:`Cursor.executemany()`, :meth:`Connection.fetch_df_all()`, :meth:`Connection.fetch_df_batches()`, :meth:`AsyncCursor.execute()`, :meth:`AsyncCursor.executemany()`, -:meth:`AsyncConnection.execute()`, :meth:`AsyncConnection.executemany()`, or +:meth:`AsyncConnection.execute()`, :meth:`AsyncConnection.executemany()`, +:meth:`AsyncConnection.fetch_df_all()`, +:meth:`AsyncConnection.fetch_df_batches()`, or :meth:`AsyncConnection.run_pipeline()`. This chapter discusses python-oracledb's synchronous methods. The asynchronous diff --git a/samples/dataframe_pandas_async.py b/samples/dataframe_pandas_async.py new file mode 100644 index 00000000..b688773f --- /dev/null +++ b/samples/dataframe_pandas_async.py @@ -0,0 +1,97 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# dataframe_pandas_async.py +# +# An asynchronous version of dataframe_pandas.py +# +# Shows how to use AsyncConnection.fetch_df_all() and +# AsyncConnection.fetch_df_batches(). This example then creates Pandas +# dataframes. Alternative dataframe libraries could be used similar to the +# other, synchronous, data frame samples. +# ----------------------------------------------------------------------------- + +import asyncio + +import pandas +import oracledb +import sample_env + + +async def main(): + connection = await oracledb.connect_async( + user=sample_env.get_main_user(), + password=sample_env.get_main_password(), + dsn=sample_env.get_connect_string(), + params=sample_env.get_connect_params(), + ) + + SQL = "select id, name from SampleQueryTab order by id" + + # Get an OracleDataFrame. + # Adjust arraysize to tune the query fetch performance + odf = await connection.fetch_df_all(statement=SQL, arraysize=100) + + # Get a Pandas DataFrame from the data. + # This is a zero copy call + df = pandas.api.interchange.from_dataframe(odf) + + # Perform various Pandas operations on the DataFrame + + print("Columns:") + print(df.columns) + + print("\nDataframe description:") + print(df.describe()) + + print("\nLast three rows:") + print(df.tail(3)) + + print("\nTransform:") + print(df.T) + + # ------------------------------------------------------------------------- + + # An example of batch fetching + # + # Note that since this particular example ends up with all query rows being + # held in memory, it would be more efficient to use fetch_df_all() as shown + # above. + + print("\nFetching in batches:") + df = pandas.DataFrame() + + # Tune 'size' for your data set. Here it is small to show the batch fetch + # behavior on the sample table. + async for odf in connection.fetch_df_batches(statement=SQL, size=10): + df_b = pandas.api.interchange.from_dataframe(odf) + print(f"Appending {df_b.shape[0]} rows") + df = pandas.concat([df, df_b], ignore_index=True) + + print("\nLast three rows:") + print(df.tail(3)) + + +asyncio.run(main()) diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index e3aac76e..608c6fbb 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1781,6 +1781,44 @@ async def fetchall( cursor.rowfactory = rowfactory return await cursor.fetchall() + async def fetch_df_all( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + arraysize: Optional[int] = None, + ): + """ + Fetch all data as OracleDataFrame. + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if arraysize is not None: + cursor.arraysize = arraysize + cursor.prefetchrows = cursor.arraysize + await cursor.execute(statement, parameters) + return await cursor._impl.fetch_df_all(cursor) + + async def fetch_df_batches( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + size: Optional[int] = None, + ): + """ + Fetch data in batches. Each batch is an OracleDataFrame + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if size is not None: + cursor.arraysize = size + cursor.prefetchrows = cursor.arraysize + await cursor.execute(statement, parameters) + if size is None: + yield await cursor._impl.fetch_df_all(cursor) + else: + async for df in cursor._impl.fetch_df_batches(cursor, size): + yield df + async def fetchmany( self, statement: str, diff --git a/src/oracledb/impl/thin/cursor.pyx b/src/oracledb/impl/thin/cursor.pyx index 16595850..8c37b1ab 100644 --- a/src/oracledb/impl/thin/cursor.pyx +++ b/src/oracledb/impl/thin/cursor.pyx @@ -340,6 +340,27 @@ cdef class AsyncThinCursorImpl(BaseThinCursorImpl): await protocol._process_single_message(message) self.warning = message.warning + async def fetch_df_all(self, cursor): + """ + Internal method used for fetching all data as OracleDataFrame + """ + while self._more_rows_to_fetch: + await self._fetch_rows_async(cursor) + return self._finish_building_arrow_arrays() + + async def fetch_df_batches(self, cursor, int batch_size): + """ + Internal method used for fetching next batch as OracleDataFrame. + """ + # Return the prefetched batch + yield self._finish_building_arrow_arrays() + + while self._more_rows_to_fetch: + self._create_arrow_arrays() + await self._fetch_rows_async(cursor) + if self._buffer_rowcount > 0: + yield self._finish_building_arrow_arrays() + async def fetch_next_row(self, cursor): """ Internal method used for fetching the next row from a cursor. diff --git a/tests/test_8100_dataframe_async.py b/tests/test_8100_dataframe_async.py new file mode 100644 index 00000000..0b0fd9a2 --- /dev/null +++ b/tests/test_8100_dataframe_async.py @@ -0,0 +1,493 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +""" +Module for testing dataframes using asyncio. +""" + +import datetime +import decimal +import unittest + +import oracledb + +try: + import pyarrow + import pandas + + HAS_INTEROP = True +except ImportError: + HAS_INTEROP = False + +import test_env + +# basic +DATASET_1 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + 12132.40, + 400, + datetime.datetime.now(), + ), + ( + 2, + "Big", + "Hero", + "San Fransokyo", + "Japansa", + datetime.date(1988, 8, 22), + 234234.32, + 400, + datetime.datetime.now(), + ), +] + +# None, -ve +DATASET_2 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + None, + 400, + datetime.datetime.now(), + ), + ( + 2, + "Big", + "Hero", + "San Fransokyo", + None, + datetime.date(1988, 8, 22), + -12312.1, + 0, + datetime.datetime.now(), + ), +] + +# None, +/- 0.XXX +DATASET_3 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + None, + 400, + datetime.datetime.now(), + ), + ( + 2, + "Big", + "Hero", + "San Fransokyo", + None, + datetime.date(1988, 8, 22), + 0.12, + 0, + datetime.datetime.now(), + ), + ( + 3, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + None, + 400, + datetime.datetime.now(), + ), + ( + 4, + "Big", + "Hero", + "San Fransokyo", + None, + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), +] + +# Duplicates +DATASET_4 = [ + ( + 1, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1989, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 2, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 3, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 4, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 5, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), + ( + 6, + "John", + "Doe", + "San Francisco", + "USA", + datetime.date(1988, 8, 22), + -0.01, + 0, + datetime.datetime.now(), + ), +] + + +@unittest.skipUnless( + test_env.get_is_thin(), "asyncio not supported in thick mode" +) +class TestCase(test_env.BaseAsyncTestCase): + + def __check_interop(self): + """ + Checks to see if the pyarrow and pandas modules are available. + """ + if not HAS_INTEROP: + self.skipTest("missing pandas or pyarrow modules") + + def __convert_to_array(self, data, typ): + """ + Convert raw data to an Arrow array using pyarrow. + """ + if isinstance(typ, pyarrow.Decimal128Type): + data = [ + decimal.Decimal(str(value)) if value is not None else value + for value in data + ] + elif isinstance(typ, pyarrow.TimestampType): + if typ.unit == "s": + data = [ + datetime.datetime(v.year, v.month, v.day).timestamp() + for v in data + ] + else: + data = [value.timestamp() * 1000000 for value in data] + mask = [value is None for value in data] + return pyarrow.array(data, typ, mask=mask) + + def __convert_to_df(self, data): + """ + Converts the data set to a Pandas data frame for comparison to what is + returned from the database. + """ + data_by_col = [[row[i] for row in data] for i in range(len(data[0]))] + fetch_decimals = oracledb.defaults.fetch_decimals + types = [ + pyarrow.decimal128(9) if fetch_decimals else pyarrow.int64(), + pyarrow.string(), + pyarrow.string(), + pyarrow.string(), + pyarrow.string(), + pyarrow.timestamp("s"), + pyarrow.decimal128(9, 2) if fetch_decimals else pyarrow.float64(), + pyarrow.decimal128(3) if fetch_decimals else pyarrow.int64(), + pyarrow.timestamp("us"), + ] + arrays = [ + self.__convert_to_array(d, t) for d, t in zip(data_by_col, types) + ] + names = [ + "ID", + "FIRSTNAME", + "LASTNAME", + "CITY", + "COUNTRY", + "DATEOFBIRTH", + "SALARY", + "CREDITSCORE", + "LASTUPDATED", + ] + pa_tab = pyarrow.Table.from_arrays(arrays, names=names) + return pa_tab.to_pandas() + + def __get_data_from_df(self, df): + """ + Returns data from the data frame in a normalized fashion suitable for + comparison. In particular, NaN values cannot be compared to one another + so they are converted to the value None for comparison purposes. + """ + return [ + tuple(None if pandas.isna(v) else v for v in row) + for row in df.itertuples(index=False, name=None) + ] + + async def __populate_table(self, data): + """ + Populate the test table with the given data. + """ + await self.cursor.execute("truncate table TestDataframe") + types = [None] * len(data[0]) + types[8] = oracledb.DB_TYPE_TIMESTAMP + self.cursor.setinputsizes(*types) + await self.cursor.executemany( + """ + insert into TestDataframe ( + Id, FirstName, LastName, City, Country, + DateOfBirth, Salary, CreditScore, LastUpdated + ) values ( + :id, :first_name, :last_name, :city, :country, + :dob, :salary, :credit_score, :last_updated + ) + """, + data, + ) + await self.conn.commit() + + async def __test_df_interop(self, data): + """ + Tests interoperability with external data frames using the data set + provided. + """ + self.__check_interop() + await self.__populate_table(data) + statement = "select * from TestDataFrame order by Id" + ora_df = await self.conn.fetch_df_all(statement) + self.__validate_df(ora_df, data) + + async def __test_df_batches_interop(self, data, batch_size, num_batches): + """ + Tests interoperability with external data frames using the data set + provided. + """ + self.__check_interop() + await self.__populate_table(data) + statement = "select * from TestDataFrame order by Id" + batches = [ + df + async for df in self.conn.fetch_df_batches( + statement, size=batch_size + ) + ] + self.assertEqual(len(batches), num_batches) + if num_batches == 1: + self.__validate_df(batches[0], data) + else: + offset = 0 + for batch in batches: + self.__validate_df(batch, data[offset : offset + batch_size]) + offset += batch_size + + def __validate_df(self, ora_df, data): + """ + Validates the data frame by converting it to Pandas and comparing it + with the original data set that was used. + """ + raw_df = self.__convert_to_df(data) + raw_data = self.__get_data_from_df(raw_df) + fetched_tab = pyarrow.Table.from_arrays( + ora_df.column_arrays(), names=ora_df.column_names() + ) + fetched_df = fetched_tab.to_pandas() + fetched_data = self.__get_data_from_df(fetched_df) + self.assertEqual(fetched_data, raw_data) + + async def test_8100(self): + "8100 - test basic fetch of data frame" + await self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = await self.conn.fetch_df_all(statement) + self.assertEqual(ora_df.num_rows(), len(DATASET_1)) + self.assertEqual(ora_df.num_columns(), len(DATASET_1[0])) + metadata = dict( + num_columns=ora_df.num_columns(), + num_rows=ora_df.num_rows(), + num_chunks=1, + ) + self.assertEqual(ora_df.metadata, metadata) + + async def test_8101(self): + "8101 - test conversion to external dataframe" + await self.__test_df_interop(DATASET_1) + + async def test_8102(self): + "8101 - test null and negative values" + await self.__test_df_interop(DATASET_2) + + async def test_8103(self): + "8102 - test with fetch_decimals" + with test_env.DefaultsContextManager("fetch_decimals", True): + await self.__test_df_interop(DATASET_1) + + async def test_8104(self): + "8103 - test null and negative values with fetch_decimals" + with test_env.DefaultsContextManager("fetch_decimals", True): + await self.__test_df_interop(DATASET_2) + + async def test_8105(self): + "8105 - test null and values with leading zeros" + await self.__test_df_interop(DATASET_3) + + async def test_8106(self): + "8105 - test null and values with leading zeros with fetch_decimals" + with test_env.DefaultsContextManager("fetch_decimals", True): + await self.__test_df_interop(DATASET_3) + + async def test_8107(self): + "8107 - duplicate values in the rows" + await self.__test_df_interop(DATASET_4) + + async def test_8108(self): + "8108 - batches without specification of size" + await self.__test_df_batches_interop( + DATASET_4, batch_size=None, num_batches=1 + ) + + async def test_8109(self): + "8109 - batches with specification of size" + await self.__test_df_batches_interop( + DATASET_4, batch_size=5, num_batches=2 + ) + + async def test_8110(self): + "8110 - verify passing Arrow arrays twice fails" + self.__check_interop() + await self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = await self.conn.fetch_df_all(statement) + pyarrow.Table.from_arrays( + ora_df.column_arrays(), names=ora_df.column_names() + ) + with self.assertRaises(pyarrow.lib.ArrowInvalid): + pyarrow.Table.from_arrays( + ora_df.column_arrays(), names=ora_df.column_names() + ) + + async def test_8111(self): + "8111 - verify empty data set" + await self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame where Id = 4" + ora_df = await self.conn.fetch_df_all(statement) + self.assertEqual(ora_df.num_rows(), 0) + + async def test_8112(self): + "8112 - verify empty data set with batches" + await self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame where Id = 4" + async for ora_df in self.conn.fetch_df_batches(statement): + self.assertEqual(ora_df.num_rows(), 0) + + async def test_8113(self): + "8113 - negative checks on attributes" + await self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = await self.conn.fetch_df_all(statement) + with self.assertRaises(IndexError): + ora_df.get_column(121) + with self.assertRaises(IndexError): + ora_df.get_column(-1) + with self.assertRaises(KeyError): + ora_df.get_column_by_name("missing_column") + + async def test_8114(self): + "8114 - check size and null count with no nulls" + await self.__populate_table(DATASET_1) + statement = "select * from TestDataFrame order by Id" + ora_df = await self.conn.fetch_df_all(statement) + col = ora_df.get_column(0) + self.assertEqual(col.size(), len(DATASET_1)) + self.assertEqual(col.null_count, 0) + + async def test_8115(self): + "8115 - check size and null count with nulls present" + await self.__populate_table(DATASET_2) + statement = "select * from TestDataFrame order by Id" + ora_df = await self.conn.fetch_df_all(statement) + col = ora_df.get_column_by_name("SALARY") + self.assertEqual(col.size(), len(DATASET_2)) + self.assertEqual(col.null_count, 1) + + async def test_8116(self): + "8116 - check unsupported error for LOBs" + statement = "select to_clob('test_8116') from dual" + with self.assertRaisesFullCode("DPY-3030"): + await self.conn.fetch_df_all(statement) + + async def test_8117(self): + "8117 - batches with specification of size matching number of rows" + await self.__test_df_batches_interop( + DATASET_2, batch_size=len(DATASET_2), num_batches=1 + ) + + +if __name__ == "__main__": + test_env.run_test_cases() diff --git a/utils/templates/connection.py b/utils/templates/connection.py index bb607666..68c1f82e 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -1536,6 +1536,44 @@ async def fetchall( cursor.rowfactory = rowfactory return await cursor.fetchall() + async def fetch_df_all( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + arraysize: Optional[int] = None, + ): + """ + Fetch all data as OracleDataFrame. + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if arraysize is not None: + cursor.arraysize = arraysize + cursor.prefetchrows = cursor.arraysize + await cursor.execute(statement, parameters) + return await cursor._impl.fetch_df_all(cursor) + + async def fetch_df_batches( + self, + statement: str, + parameters: Optional[Union[list, tuple, dict]] = None, + size: Optional[int] = None, + ): + """ + Fetch data in batches. Each batch is an OracleDataFrame + """ + cursor = self.cursor() + cursor._impl.fetching_arrow = True + if size is not None: + cursor.arraysize = size + cursor.prefetchrows = cursor.arraysize + await cursor.execute(statement, parameters) + if size is None: + yield await cursor._impl.fetch_df_all(cursor) + else: + async for df in cursor._impl.fetch_df_batches(cursor, size): + yield df + async def fetchmany( self, statement: str, From d7ecec3cf480142dbfd37ea40ca3af23062d872d Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:25:20 -0700 Subject: [PATCH 047/178] Stop trying to establish the connection immediately as this prevents boostrapping! --- tests/test_env.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_env.py b/tests/test_env.py index 660d814b..784cc283 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -102,7 +102,6 @@ def _initialize(): module_name = f"oracledb.plugins.{name}" print("importing module", module_name) importlib.import_module(module_name) - get_connection() def get_value(name, label, default_value=None, password=False): From a092cfcbee801d8f815d9b98dfedd95e5a42dd43 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:25:50 -0700 Subject: [PATCH 048/178] Fix AQ thin mode with Oracle Database 19c. --- src/oracledb/impl/thin/messages.pyx | 30 +++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/oracledb/impl/thin/messages.pyx b/src/oracledb/impl/thin/messages.pyx index 2475166f..2a31e50c 100644 --- a/src/oracledb/impl/thin/messages.pyx +++ b/src/oracledb/impl/thin/messages.pyx @@ -2410,6 +2410,8 @@ cdef class DeqMessage(Message): if num_bytes > 0: ptr = buf._get_raw(num_bytes) self.props_impl.enq_txn_id = ptr[:num_bytes] + else: + self.props_impl.enq_txn_id = None buf.read_ub4(&num_extensions) # number of extensions if num_extensions > 0: buf.skip_ub1() @@ -2544,8 +2546,10 @@ cdef class DeqMessage(Message): buf.write_ub4(0) # condition length buf.write_uint8(0) # extensions buf.write_ub4(0) # number of extensions - buf.write_uint8(0) # JSON payload - buf.write_ub4(-1) # shard id + if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_20_1: + buf.write_uint8(0) # JSON payload + if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_21_1: + buf.write_ub4(-1) # shard id buf.write_bytes_with_length(queue_name_bytes) if consumer_name_bytes is not None: @@ -2610,9 +2614,13 @@ cdef class EnqMessage(Message): exceptionq_bytes = self.props_impl.exceptionq.encode() buf.write_ub4(len(exceptionq_bytes)) buf.write_bytes_with_length(exceptionq_bytes) - buf.write_ub4(self.props_impl.state) + buf.write_ub4(self.props_impl.state) # message state buf.write_ub4(0) # enqueue time length - buf.write_ub4(0) # enqueue transaction id length + if self.props_impl.enq_txn_id is None: + buf.write_ub4(0) # enqueue txn id length + else: + buf.write_ub4(len(self.props_impl.enq_txn_id)) + buf.write_bytes_with_length(self.props_impl.enq_txn_id) buf.write_ub4(4) # number of extensions buf.write_uint8(0x0e) # unknown extra byte buf.write_extension_values(None, None, TNS_AQ_EXT_KEYWORD_AGENT_NAME) @@ -2625,7 +2633,8 @@ cdef class EnqMessage(Message): buf.write_ub4(0) # cscn buf.write_ub4(0) # dscn buf.write_ub4(0) # flags - buf.write_ub4(0xffffffffl) # shard id + if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_21_1: + buf.write_ub4(0xffffffffl) # shard id if self.props_impl.recipients is None: buf.write_uint8(0) # recipients (pointer) @@ -2655,7 +2664,7 @@ cdef class EnqMessage(Message): buf.write_uint8(1) # return message id (pointer) buf.write_ub4(TNS_AQ_MESSAGE_ID_LENGTH) # return message id length enq_flags = 0 - if (self.enq_options_impl.delivery_mode == TNS_AQ_MSG_BUFFERED): + if self.enq_options_impl.delivery_mode == TNS_AQ_MSG_BUFFERED: enq_flags |= TNS_KPD_AQ_BUFMSG buf.write_ub4(enq_flags) # enqueue flags buf.write_uint8(0) # extensions 1 (pointer) @@ -2675,10 +2684,11 @@ cdef class EnqMessage(Message): buf.write_ub4(0) # sender address length buf.write_uint8(0) # sender charset id (pointer) buf.write_uint8(0) # sender ncharset id (pointer) - if self.queue_impl.is_json: - buf.write_uint8(1) # JSON payload (pointer) - else: - buf.write_uint8(0) # JSON payload (pointer) + if buf._caps.ttc_field_version >= TNS_CCAP_FIELD_VERSION_20_1: + if self.queue_impl.is_json: + buf.write_uint8(1) # JSON payload (pointer) + else: + buf.write_uint8(0) # JSON payload (pointer) buf.write_bytes_with_length(queue_name_bytes) buf.write_bytes(self.props_impl.toid) From a366371f6fb407802cddb0266273448648b9b05b Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:26:12 -0700 Subject: [PATCH 049/178] Update the version of the DataFrame API protocol to use typing constraints that are compatible with older versions of Python. --- src/oracledb/interchange/buffer.py | 4 +- src/oracledb/interchange/column.py | 36 +- src/oracledb/interchange/dataframe.py | 14 +- src/oracledb/interchange/protocol.py | 478 ++++++++++++++++++++------ 4 files changed, 404 insertions(+), 128 deletions(-) diff --git a/src/oracledb/interchange/buffer.py b/src/oracledb/interchange/buffer.py index 04461be0..798a8dba 100644 --- a/src/oracledb/interchange/buffer.py +++ b/src/oracledb/interchange/buffer.py @@ -28,6 +28,8 @@ # Implements the Buffer class as documented in DataFrame API # ----------------------------------------------------------------------------- +from typing import Tuple + from .protocol import ( Buffer, DlpackDeviceType, @@ -53,7 +55,7 @@ def __dlpack__(self): """ raise NotImplementedError("__dlpack__") - def __dlpack_device__(self) -> tuple[DlpackDeviceType, None]: + def __dlpack_device__(self) -> Tuple[DlpackDeviceType, None]: """ Device type and device ID for where the data in the buffer resides diff --git a/src/oracledb/interchange/column.py b/src/oracledb/interchange/column.py index 9bf24a59..3cf2d967 100644 --- a/src/oracledb/interchange/column.py +++ b/src/oracledb/interchange/column.py @@ -28,16 +28,16 @@ # Implements the Column class as documented in DataFrame API # ----------------------------------------------------------------------------- -from typing import Any, Iterable, Optional +from typing import Any, Dict, Iterable, Optional, Tuple from .buffer import OracleColumnBuffer from .protocol import ( + CategoricalDescription, Column, Dtype, ColumnBuffers, ColumnNullType, DtypeKind, - Endianness, ) from .nanoarrow_bridge import ( @@ -88,7 +88,7 @@ def _offsets_buffer(self): offsets_buffer = OracleColumnBuffer( size_in_bytes=size_bytes, address=address, buffer_type="offsets" ) - dtype = (DtypeKind.INT, 32, "i", Endianness.NATIVE) + dtype = (DtypeKind.INT, 32, "i", "=") return offsets_buffer, dtype def _validity_buffer(self): @@ -99,11 +99,17 @@ def _validity_buffer(self): validity_buffer = OracleColumnBuffer( size_in_bytes=size_bytes, address=address, buffer_type="validity" ) - dtype = (DtypeKind.BOOL, 1, "b", Endianness.NATIVE) + dtype = (DtypeKind.BOOL, 1, "b", "=") return validity_buffer, dtype + def describe_categorical(self) -> CategoricalDescription: + """ + Returns a description of a categorical data type. + """ + raise NotImplementedError() + @property - def describe_null(self) -> tuple[ColumnNullType, Optional[int]]: + def describe_null(self) -> Tuple[ColumnNullType, Optional[int]]: """ Returns a description of the null representation used by the column. """ @@ -119,29 +125,29 @@ def dtype(self) -> Dtype: information on the storage format and the type of data in the column. """ if self.ora_arrow_array.arrow_type == NANOARROW_TYPE_INT64: - return (DtypeKind.INT, 64, "l", Endianness.NATIVE) + return (DtypeKind.INT, 64, "l", "=") elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_DOUBLE: - return (DtypeKind.FLOAT, 64, "g", Endianness.NATIVE) + return (DtypeKind.FLOAT, 64, "g", "=") elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_FLOAT: - return (DtypeKind.FLOAT, 64, "g", Endianness.NATIVE) + return (DtypeKind.FLOAT, 64, "g", "=") elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_STRING: - return (DtypeKind.STRING, 8, "u", Endianness.NATIVE) + return (DtypeKind.STRING, 8, "u", "=") elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_TIMESTAMP: if self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_MICRO: - return (DtypeKind.DATETIME, 64, "tsu:", Endianness.NATIVE) + return (DtypeKind.DATETIME, 64, "tsu:", "=") elif self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_SECOND: - return (DtypeKind.DATETIME, 64, "tss:", Endianness.NATIVE) + return (DtypeKind.DATETIME, 64, "tss:", "=") elif self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_MILLI: - return (DtypeKind.DATETIME, 64, "tsm:", Endianness.NATIVE) + return (DtypeKind.DATETIME, 64, "tsm:", "=") elif self.ora_arrow_array.time_unit == NANOARROW_TIME_UNIT_NANO: - return (DtypeKind.DATETIME, 64, "tsn:", Endianness.NATIVE) + return (DtypeKind.DATETIME, 64, "tsn:", "=") elif self.ora_arrow_array.arrow_type == NANOARROW_TYPE_DECIMAL128: array = self.ora_arrow_array return ( DtypeKind.DECIMAL, 128, f"d:{array.precision}.{array.scale}", - Endianness.NATIVE, + "=", ) def get_buffers(self) -> ColumnBuffers: @@ -166,7 +172,7 @@ def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable[Column]: yield self @property - def metadata(self) -> dict[str, Any]: + def metadata(self) -> Dict[str, Any]: """ Returns metadata about the column. """ diff --git a/src/oracledb/interchange/dataframe.py b/src/oracledb/interchange/dataframe.py index f305ba8c..768145b2 100644 --- a/src/oracledb/interchange/dataframe.py +++ b/src/oracledb/interchange/dataframe.py @@ -29,7 +29,7 @@ # https://data-apis.org/dataframe-protocol/latest/API.html # ----------------------------------------------------------------------------- -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Sequence from .column import OracleColumn @@ -149,3 +149,15 @@ def num_rows(self) -> int: Returns the number of rows in the data frame. """ return self._rows + + def select_columns(self, indices: Sequence[int]) -> "DataFrame": + """ + Create a new DataFrame by selecting a subset of columns by index. + """ + raise NotImplementedError() + + def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame": + """ + Create a new DataFrame by selecting a subset of columns by name. + """ + raise NotImplementedError() diff --git a/src/oracledb/interchange/protocol.py b/src/oracledb/interchange/protocol.py index e464bb55..91739c75 100644 --- a/src/oracledb/interchange/protocol.py +++ b/src/oracledb/interchange/protocol.py @@ -27,22 +27,32 @@ # # Implement DataFrame class as documented in the standard # https://data-apis.org/dataframe-protocol/latest/API.html +# +# The DataFrame API standard has this file with the following changes: +# https://github.com/data-apis/dataframe-api/blob/main/protocol/dataframe_protocol.py +# - addition of license and this block of comments +# - addition of DtypeKind DECIMAL (24) +# - correction of typing for Column for older versions of Python +# - Black formatting # ----------------------------------------------------------------------------- -from enum import IntEnum +from abc import ( + ABC, + abstractmethod, +) +import enum from typing import ( Any, - ClassVar, - Literal, - Protocol, + Dict, + Iterable, + Optional, + Sequence, Tuple, TypedDict, ) -from collections.abc import Iterable, Sequence - -class DlpackDeviceType(IntEnum): +class DlpackDeviceType(enum.IntEnum): """Integer enum for device type codes matching DLPack.""" CPU = 1 @@ -55,7 +65,7 @@ class DlpackDeviceType(IntEnum): ROCM = 10 -class DtypeKind(IntEnum): +class DtypeKind(enum.IntEnum): """ Integer enum for data types. @@ -90,7 +100,7 @@ class DtypeKind(IntEnum): Dtype = Tuple[DtypeKind, int, str, str] # see Column.dtype -class ColumnNullType(IntEnum): +class ColumnNullType(enum.IntEnum): """ Integer enum for null type representation. @@ -116,167 +126,413 @@ class ColumnNullType(IntEnum): class ColumnBuffers(TypedDict): - """Buffers backing a column.""" - # first element is a buffer containing the column data; # second element is the data buffer's associated dtype - data: Tuple["Buffer", "Dtype"] + data: Tuple["Buffer", Dtype] # first element is a buffer containing mask values indicating missing data; # second element is the mask value buffer's associated dtype. # None if the null representation is not a bit or byte mask - validity: Tuple["Buffer", "Dtype"] + validity: Optional[Tuple["Buffer", Dtype]] # first element is a buffer containing the offset values for # variable-size binary data (e.g., variable-length strings); # second element is the offsets buffer's associated dtype. # None if the data buffer does not have an associated offsets buffer - offsets: Tuple["Buffer", "Dtype"] + offsets: Optional[Tuple["Buffer", Dtype]] class CategoricalDescription(TypedDict): - """Description of a categorical column.""" - # whether the ordering of dictionary indices is semantically meaningful is_ordered: bool # whether a dictionary-style mapping of categorical values to other objects # exists - is_dictionary: Literal[True] - # Python-level only (e.g. `{int: str}`). + is_dictionary: bool + # Python-level only (e.g. ``{int: str}``). # None if not a dictionary-style categorical. - categories: "Column" + categories: Optional["Column"] -class Buffer(Protocol): - """Interchange buffer object.""" +class Buffer(ABC): + """ + Data in the buffer is guaranteed to be contiguous in memory. + + Note that there is no dtype attribute present, a buffer can be thought of + as simply a block of memory. However, if the column that the buffer is + attached to has a dtype that's supported by DLPack and ``__dlpack__`` is + implemented, then that dtype information will be contained in the return + value from ``__dlpack__``. + + This distinction is useful to support both data exchange via DLPack on a + buffer and (b) dtypes like variable-length strings which do not have a + fixed number of bytes per element. + """ @property + @abstractmethod def bufsize(self) -> int: - """Buffer size in bytes.""" + """ + Buffer size in bytes. + """ + pass @property + @abstractmethod def ptr(self) -> int: - """Pointer to start of the buffer as an integer.""" + """ + Pointer to start of the buffer as an integer. + """ + pass + + @abstractmethod + def __dlpack__(self): + """ + Produce DLPack capsule (see array API standard). - def __dlpack__(self) -> Any: - """Represent this structure as DLPack interface.""" + Raises: - def __dlpack_device__(self) -> Tuple["DlpackDeviceType", int | None]: - """Device type and device ID for where the data in the buffer - resides.""" + - TypeError : if the buffer contains unsupported dtypes. + - NotImplementedError : if DLPack support is not implemented + Useful to have to connect to array libraries. Support optional because + it's not completely trivial to implement for a Python-only library. + """ + raise NotImplementedError("__dlpack__") -class Column(Protocol): - """Interchange column object.""" + @abstractmethod + def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]: + """ + Device type and device ID for where the data in the buffer resides. + Uses device type codes matching DLPack. + Note: must be implemented even if ``__dlpack__`` is not. + """ + pass + +class Column(ABC): + """ + A column object, with only the methods and properties required by the + interchange protocol defined. + + A column can contain one or more chunks. Each chunk can contain up to three + buffers - a data buffer, a mask buffer (depending on null representation), + and an offsets buffer (if variable-size binary; e.g., variable-length + strings). + + TBD: there's also the "chunk" concept here, which is implicit in Arrow as + multiple buffers per array (= column here). Semantically it may make + sense to have both: chunks were meant for example for lazy evaluation + of data which doesn't fit in memory, while multiple buffers per column + could also come from doing a selection operation on a single + contiguous buffer. + + Given these concepts, one would expect chunks to be all of the same + size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows), + while multiple buffers could have data-dependent lengths. Not an issue + in pandas if one column is backed by a single NumPy array, but in + Arrow it seems possible. + Are multiple chunks *and* multiple buffers per column necessary for + the purposes of this interchange protocol, or must producers either + reuse the chunk concept for this or copy the data? + + Note: this Column object can only be produced by ``__dataframe__``, so + doesn't need its own version or ``__column__`` protocol. + """ + + @abstractmethod def size(self) -> int: - """Size of the column in elements.""" + """ + Size of the column, in elements. + + Corresponds to DataFrame.num_rows() if column is a single chunk; + equal to size of this current chunk otherwise. + + Is a method rather than a property because it may cause a (potentially + expensive) computation for some dataframe implementations. + """ + pass @property + @abstractmethod def offset(self) -> int: - """Offset of the first element with respect to the start - of the underlying buffer.""" # noqa: W505 + """ + Offset of first element. - @property - def dtype(self) -> "Dtype": - """Data type of the column.""" + May be > 0 if using chunks; for example for a column with N chunks of + equal size M (only the last chunk may be shorter), + ``offset = n * M``, ``n = 0 .. N-1``. + """ + pass @property - def describe_categorical(self) -> "CategoricalDescription": - """Description of the categorical data type of the column.""" + @abstractmethod + def dtype(self) -> Dtype: + """ + Dtype description as a tuple ``(kind, bit-width, format string, + endianness)``. + + Bit-width : the number of bits as an integer + Format string : data type description format string in Apache Arrow C + Data Interface format. + Endianness : current only native endianness (``=``) is supported + + Notes: + - Kind specifiers are aligned with DLPack where possible (hence the + jump to 20, leave enough room for future extension) + - Masks must be specified as boolean with either bit width 1 (for + bit masks) or 8 (for byte masks). + - Dtype width in bits was preferred over bytes + - Endianness isn't too useful, but included now in case in the + future we need to support non-native endianness + - Went with Apache Arrow format strings over NumPy format strings + because they're more complete from a dataframe perspective + - Format strings are mostly useful for datetime specification, and + for categoricals. + - For categoricals, the format string describes the type of the + categorical in the data buffer. In case of a separate encoding of + the categorical (e.g. an integer to string mapping), this can + be derived from ``self.describe_categorical``. + - Data types not included: complex, Arrow-style null, binary, + decimal, and nested (list, struct, map, union) dtypes. + """ + pass @property - def describe_null(self) -> Tuple["ColumnNullType", Any]: - """Description of the null representation the column uses.""" + @abstractmethod + def describe_categorical(self) -> CategoricalDescription: + """ + If the dtype is categorical, there are two options: + - There are only values in the data buffer. + - There is a separate non-categorical Column encoding categorical + values. + + Raises TypeError if the dtype is not categorical + + Returns the dictionary with description on how to interpret the data + buffer: + - "is_ordered" : bool, whether the ordering of dictionary indices + is semantically meaningful. + - "is_dictionary" : bool, whether a mapping of + categorical values to other objects exists + - "categories" : Column representing the (implicit) mapping of + indices to category values (e.g. an array of cat1, + cat2, ...). + None if not a dictionary-style categorical. + + TBD: are there any other in-memory representations that are needed? + """ + pass @property - def null_count(self) -> int | None: - """Number of null elements, if known.""" + @abstractmethod + def describe_null(self) -> Tuple[ColumnNullType, Any]: + """ + Return the missing value (or "null") representation the column dtype + uses, as a tuple ``(kind, value)``. + + Value : if kind is "sentinel value", the actual value. If kind is a bit + mask or a byte mask, the value (0 or 1) indicating a missing value. + None otherwise. + """ + pass @property - def metadata(self) -> dict[str, Any]: - """The metadata for the column.""" + @abstractmethod + def null_count(self) -> Optional[int]: + """ + Number of null elements, if known. - def num_chunks(self) -> int: - """Return the number of chunks the column consists of.""" + Note: Arrow uses -1 to indicate "unknown", but None seems cleaner. + """ + pass - def get_chunks(self, n_chunks: int | None = None) -> Iterable["Column"]: - """Return an iterator yielding the column chunks.""" - - def get_buffers(self) -> "ColumnBuffers": - """Return a dictionary containing the underlying buffers.""" + @property + @abstractmethod + def metadata(self) -> Dict[str, Any]: + """ + The metadata for the column. See `DataFrame.metadata` for more details. + """ + pass + + @abstractmethod + def num_chunks(self) -> int: + """ + Return the number of chunks the column consists of. + """ + pass + + @abstractmethod + def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable["Column"]: + """ + Return an iterator yielding the chunks. + + See `DataFrame.get_chunks` for details on ``n_chunks``. + """ + pass + + @abstractmethod + def get_buffers(self) -> ColumnBuffers: + """ + Return a dictionary containing the underlying buffers. + + The returned dictionary has the following contents: + + - "data": a two-element tuple whose first element is a buffer + containing the data and whose second element is the data + buffer's associated dtype. + - "validity": a two-element tuple whose first element is a buffer + containing mask values indicating missing data and + whose second element is the mask value buffer's + associated dtype. None if the null representation is + not a bit or byte mask. + - "offsets": a two-element tuple whose first element is a buffer + containing the offset values for variable-size binary + data (e.g., variable-length strings) and whose second + element is the offsets buffer's associated dtype. None + if the data buffer does not have an associated offsets + buffer. + """ + pass + + +# def get_children(self) -> Iterable[Column]: +# """ +# Children columns underneath the column, each object in this iterator +# must adhere to the column specification. +# """ +# pass + + +class DataFrame(ABC): + """ + A data frame class, with only the methods required by the interchange + protocol defined. + A "data frame" represents an ordered collection of named columns. + A column's "name" must be a unique string. + Columns may be accessed by name or by position. -class DataFrame(Protocol): - """Interchange dataframe object.""" + This could be a public data frame class, or an object with the methods and + attributes defined on this DataFrame class could be returned from the + ``__dataframe__`` method of a public data frame class in a library adhering + to the dataframe interchange protocol specification. + """ - version: ClassVar[int] # Version of the protocol + version = 0 # version of the protocol + @abstractmethod def __dataframe__( - self, - nan_as_null: bool = False, # noqa: FBT001 - allow_copy: bool = True, # noqa: FBT001 + self, nan_as_null: bool = False, allow_copy: bool = True ) -> "DataFrame": - """Convert to a dataframe object implementing the dataframe - interchange protocol.""" # noqa: W505 + """ + Construct a new exchange object, potentially changing the parameters. + + ``nan_as_null`` is a DEPRECATED keyword that should not be used. See + warning below. + ``allow_copy`` is a keyword that defines whether or not the library is + allowed to make a copy of the data. For example, copying data would be + necessary if a library supports strided buffers, given that this + protocol specifies contiguous buffers. + + WARNING: the ``nan_as_null`` parameter will be removed from the API + protocol. Please avoid passing it as either a positional or keyword + argument. Call this method using ``.__dataframe__(allow_copy=...)``. + """ + pass @property - def metadata(self) -> dict[str, Any]: - """The metadata for the dataframe.""" - + @abstractmethod + def metadata(self) -> Dict[str, Any]: + """ + The metadata for the data frame, as a dictionary with string keys. The + contents of `metadata` may be anything, they are meant for a library + to store information that it needs to, e.g., roundtrip losslessly or + for two implementations to share data that is not (yet) part of the + interchange protocol specification. For avoiding collisions with other + entries, please add name the keys with the name of the library + followed by a period and the desired name, e.g, ``pandas.indexcol``. + """ + pass + + @abstractmethod def num_columns(self) -> int: - """Return the number of columns in the dataframe.""" - - def num_rows(self) -> int | None: - """Return the number of rows in the dataframe, if available.""" - + """ + Return the number of columns in the DataFrame. + """ + pass + + @abstractmethod + def num_rows(self) -> Optional[int]: + # TODO: not happy with Optional, but need to flag it may be expensive + # why include it if it may be None - what do we expect consumers + # to do here? + """ + Return the number of rows in the DataFrame, if available. + """ + pass + + @abstractmethod def num_chunks(self) -> int: - """Return the number of chunks the dataframe consists of..""" + """ + Return the number of chunks the DataFrame consists of. + """ + pass + @abstractmethod def column_names(self) -> Iterable[str]: - """Return the column names.""" - - def get_column(self, i: int) -> "Column": - """Return the column at the indicated position.""" - - def get_column_by_name(self, name: str) -> "Column": - """Return the column with the given name.""" - - def get_columns(self) -> Iterable["Column"]: - """Return an iterator yielding the columns.""" - + """ + Return an iterator yielding the column names. + """ + pass + + @abstractmethod + def get_column(self, i: int) -> Column: + """ + Return the column at the indicated position. + """ + pass + + @abstractmethod + def get_column_by_name(self, name: str) -> Column: + """ + Return the column whose name is the indicated name. + """ + pass + + @abstractmethod + def get_columns(self) -> Iterable[Column]: + """ + Return an iterator yielding the columns. + """ + pass + + @abstractmethod def select_columns(self, indices: Sequence[int]) -> "DataFrame": - """Create a new dataframe by selecting a subset of columns by index.""" + """ + Create a new DataFrame by selecting a subset of columns by index. + """ + pass + @abstractmethod def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame": - """Create a new dataframe by selecting a subset of columns by name.""" - - def get_chunks(self, n_chunks: int | None = None) -> Iterable["DataFrame"]: - """Return an iterator yielding the chunks of the dataframe.""" - - -class SupportsInterchange(Protocol): - """Dataframe that supports conversion into an interchange - dataframe object.""" - - def __dataframe__( - self, - nan_as_null: bool = False, # noqa: FBT001 - allow_copy: bool = True, # noqa: FBT001 - ) -> "SupportsInterchange": - """Convert to a dataframe object implementing the dataframe - interchange protocol.""" # noqa: W505 - - -class Endianness: - """Enum indicating the byte-order of a data type.""" - - LITTLE = "<" - BIG = ">" - NATIVE = "=" - NA = "|" - - -class CopyNotAllowedError(RuntimeError): - """Exception raised when a copy is required, - but `allow_copy` is set to `False`.""" + """ + Create a new DataFrame by selecting a subset of columns by name. + """ + pass + + @abstractmethod + def get_chunks( + self, n_chunks: Optional[int] = None + ) -> Iterable["DataFrame"]: + """ + Return an iterator yielding the chunks. + + By default (None), yields the chunks that the data is stored as by the + producer. If given, ``n_chunks`` must be a multiple of + ``self.num_chunks()``, meaning the producer must subdivide each chunk + before yielding it. + + Note that the producer must ensure that all columns are chunked the + same way. + """ + pass From a7fd59a02eb80a64f0f9717b7719d7d0e87cee62 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 14:31:13 -0700 Subject: [PATCH 050/178] Cloud native authentication support plugins. --- doc/src/api_manual/connect_params.rst | 17 +- doc/src/api_manual/module.rst | 141 +++- doc/src/api_manual/pool_params.rst | 6 +- doc/src/release_notes.rst | 13 + doc/src/user_guide/connection_handling.rst | 740 +++++++++++++++------ src/oracledb/__init__.py | 2 + src/oracledb/base_impl.pxd | 1 + src/oracledb/base_impl.pyx | 3 + src/oracledb/connect_params.py | 22 +- src/oracledb/connection.py | 10 + src/oracledb/errors.py | 4 + src/oracledb/impl/base/connect_params.pyx | 10 + src/oracledb/plugins/azure_tokens.py | 79 +++ src/oracledb/plugins/oci_tokens.py | 168 +++++ src/oracledb/pool.py | 10 + src/oracledb/pool_params.py | 13 +- src/oracledb/utils.py | 24 +- tests/test_1100_connection.py | 19 + tests/test_2400_pool.py | 25 + tests/test_4500_connect_params.py | 2 + tests/test_4700_pool_params.py | 1 + utils/fields.cfg | 7 + 22 files changed, 1100 insertions(+), 217 deletions(-) create mode 100644 src/oracledb/plugins/azure_tokens.py create mode 100644 src/oracledb/plugins/oci_tokens.py diff --git a/doc/src/api_manual/connect_params.rst b/doc/src/api_manual/connect_params.rst index 40b2896d..6c225e6d 100644 --- a/doc/src/api_manual/connect_params.rst +++ b/doc/src/api_manual/connect_params.rst @@ -64,15 +64,15 @@ ConnectParams Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=None, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=None) + extra_auth_params=None, handle=None) Sets the values for one or more of the parameters of a ConnectParams object. .. versionchanged:: 3.0.0 - The ``use_sni``, ``thick_mode_dsn_passthrough``, and ``instance_name`` - parameters were added. + The ``use_sni``, ``thick_mode_dsn_passthrough``, ``extra_auth_params`` + and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 @@ -242,6 +242,17 @@ ConnectParams Attributes This attribute is only supported in python-oracledb Thick mode. +.. attribute:: ConnectParams.extra_auth_params + + This read-only attribute is a dictionary containing the configuration + parameters necessary for Oracle Database authentication using + :ref:`Azure ` or + :ref:` ` cloud native authentication plugins. + + This attribute is supported in both python-oracledb Thin and Thick modes. + + .. versionadded:: 3.0.0 + .. attribute:: ConnectParams.host This read-only attribute is a string that returns the name or IP address of diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index 334179d9..afb66573 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -54,7 +54,7 @@ Oracledb Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=False, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=0) + extra_auth_params=None, handle=0) Constructor for creating a connection to the database. Returns a :ref:`Connection Object `. All parameters are optional and can be @@ -408,6 +408,13 @@ Oracledb Methods python-oracledb Thick mode. The default value is the value of :attr:`defaults.thick_mode_dsn_passthrough`. + The ``extra_auth_params`` parameter is expected to be a dictionary + containing the configuration parameters necessary for Oracle Database + authentication using :ref:`Azure ` or + :ref:`OCI ` cloud native authentication plugins. + This value is used in both the python-oracledb Thin and Thick modes. See + :ref:`tokenauth`. + If the ``handle`` parameter is specified, it must be of type OCISvcCtx\* and is only of use when embedding Python in an application (like PowerBuilder) which has already made the connection. The connection thus @@ -418,9 +425,10 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name``, ``use_sni``, and - ``thick_mode_dsn_passthrough`` parameters were added. The ``pool`` - parameter was deprecated. Use :meth:`ConnectionPool.acquire()` instead. + The ``pool_alias``, ``instance_name``, ``use_sni``, + ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + were added. The ``pool`` parameter was deprecated: use + :meth:`ConnectionPool.acquire()` instead. .. versionchanged:: 2.5.0 @@ -467,7 +475,7 @@ Oracledb Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=False, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=0) + extra_auth_params=None, handle=0) Constructor for creating a connection to the database. Returns an :ref:`AsyncConnection Object `. All parameters are optional @@ -747,15 +755,22 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. + The ``extra_auth_params`` parameter is expected to be a dictionary + containing the configuration parameters necessary for Oracle Database + authentication using :ref:`Azure ` or + :ref:`OCI ` cloud native authentication plugins. + This value is used in both the python-oracledb Thin and Thick modes. See + :ref:`tokenauth`. + The ``thick_mode_dsn_passthrough`` and ``handle`` parameters are ignored in python-oracledb Thin mode. .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name``, ``use_sni``, and - ``thick_mode_dsn_passthrough`` parameters were added. The ``pool`` - parameter was deprecated. Use :meth:`AsyncConnectionPool.acquire()` - instead. + The ``pool_alias``, ``instance_name``, ``use_sni``, + ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + were added. The ``pool`` parameter was deprecated: use + :meth:`AsyncConnectionPool.acquire()` instead. .. versionchanged:: 2.5.0 @@ -800,7 +815,7 @@ Oracledb Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=False, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=0) + extra_auth_params=None, handle=0) Contains all the parameters that can be used to establish a connection to the database. @@ -1108,6 +1123,13 @@ Oracledb Methods python-oracledb Thick mode. The default value is the value of :attr:`defaults.thick_mode_dsn_passthrough`. + The ``extra_auth_params`` parameter is expected to be a dictionary + containing the configuration parameters necessary for Oracle Database + authentication using :ref:`Azure ` or + :ref:`OCI ` cloud native authentication plugins. + This value is used in both the python-oracledb Thin and Thick modes. See + :ref:`tokenauth`. + The ``handle`` parameter is expected to be an integer which represents a pointer to a valid service context handle. This value is only used in the python-oracledb Thick mode. It should be used with extreme caution. The @@ -1115,8 +1137,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``use_sni``, ``thick_mode_dsn_passthrough``, and ``instance_name`` - parameters were added. + The ``instance_name``, ``use_sni``, ``thick_mode_dsn_passthrough`` and + ``extra_auth_params`` parameters were added. .. versionchanged:: 2.5.0 @@ -1174,7 +1196,7 @@ Oracledb Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=False, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=0) + extra_auth_params=None, handle=0) Creates a connection pool with the supplied parameters and returns the :ref:`ConnectionPool object ` for the pool. See :ref:`Connection @@ -1603,6 +1625,13 @@ Oracledb Methods python-oracledb Thick mode. The default value is :attr:`defaults.thick_mode_dsn_passthrough`. + The ``extra_auth_params`` parameter is expected to be a dictionary + containing the configuration parameters necessary for Oracle Database + authentication using :ref:`Azure ` or + :ref:`OCI ` cloud native authentication plugins. + This value is used in both the python-oracledb Thin and Thick modes. See + :ref:`tokenauth`. + If the ``handle`` parameter is specified, it must be of type OCISvcCtx\* and is only of use when embedding Python in an application (like PowerBuilder) which has already made the connection. The connection thus @@ -1613,8 +1642,9 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name``, ``use_sni``, and - ``thick_mode_dsn_passthrough`` parameters were added. + The ``pool_alias``, ``instance_name``, ``use_sni``, + ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + were added. .. versionchanged:: 2.5.0 @@ -1666,7 +1696,7 @@ Oracledb Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=False, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=0) + extra_auth_params=None, handle=0) Creates a connection pool with the supplied parameters and returns the :ref:`AsyncConnectionPool object ` for the pool. @@ -2005,13 +2035,21 @@ Oracledb Methods is used in both the python-oracledb Thin and Thick modes. The default is the value of :attr:`defaults.driver_name`. + The ``extra_auth_params`` parameter is expected to be a dictionary + containing the configuration parameters necessary for Oracle Database + authentication using :ref:`Azure ` or + :ref:`OCI ` cloud native authentication plugins. + This value is used in both the python-oracledb Thin and Thick modes. See + :ref:`tokenauth`. + The ``handle`` and ``thick_mode_dsn_passthrough`` parameters are ignored in python-oracledb Thin mode. .. versionchanged:: 3.0.0 - The ``pool_alias``, ``instance_name``, ``use_sni``, and - ``thick_mode_dsn_passthrough`` parameters were added. + The ``pool_alias``, ``instance_name``, ``use_sni``, + ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + were added. .. versionchanged:: 2.5.0 @@ -2230,7 +2268,7 @@ Oracledb Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=False, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=0) + extra_auth_params=None, handle=0) Creates and returns a :ref:`PoolParams Object `. The object can be passed to :meth:`oracledb.create_pool()`. @@ -2602,6 +2640,13 @@ Oracledb Methods python-oracledb Thick mode. The default value is :attr:`defualts.thick_mode_dsn_passthrough`. + The ``extra_auth_params`` parameter is expected to be a dictionary + containing the configuration parameters necessary for Oracle Database + authentication using :ref:`Azure ` or + :ref:`OCI ` cloud native authentication plugins. + This value is used in both the python-oracledb Thin and Thick modes. See + :ref:`tokenauth`. + The ``handle`` parameter is expected to be an integer which represents a pointer to a valid service context handle. This value is only used in the python-oracledb Thick mode. It should be used with extreme caution. The @@ -2609,8 +2654,8 @@ Oracledb Methods .. versionchanged:: 3.0.0 - The ``use_sni``, ``thick_mode_dsn_passthrough``, and ``instance_name`` - parameters were added. + The ``use_sni``, ``instance_name``, ``thick_mode_dsn_passthrough``, + ``extra_auth_params`` and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 @@ -2651,6 +2696,22 @@ Oracledb Methods .. versionadded:: 3.0.0 +.. function:: register_params_hook(hook_function) + + Registers a user hook function that will be called internally by + python-oracledb prior to connection or pool creation. The hook function + accepts a copy of the parameters that will be used to create the pool or + standalone connection and may modify them. For example, the cloud native + authentication plugins modify the "access_token" parameter with a function + that will acquire the token using information found in the + "extra_auth_parms" parameter. + + .. note:: + + This method is an extension to the DB API definition. + + .. versionadded:: 3.0.0 + .. function:: register_password_type(password_type, hook_function) Registers a user hook function that will be called internally by @@ -2778,6 +2839,17 @@ Oracledb Methods (number of seconds since the epoch; see the documentation of the standard Python time module for details). +.. function:: unregister_params_hook(hook_function) + + Unregisters a user function that was earlier registered with a call to + :meth:`oracledb.register_params_hook()`. + + .. note:: + + This method is an extension to the DB API definition. + + .. versionadded:: 3.0.0 + .. _interval_ym: @@ -4424,3 +4496,30 @@ will use to connect to Oracle Database. See :ref:`importconfigazureplugin` for more information. .. versionadded:: 3.0.0 + +.. _ocicloudnativeauthplugin: + +Oracle Cloud Infrastructure (OCI) Cloud Native Authentication Plugin +-------------------------------------------------------------------- + +``oci_tokens`` is a plugin that uses the Oracle Cloud Infrastructure (OCI) +Software Development Kit (SDK) to generate access tokens when authenticating +with OCI Identity and Access Management (IAM) token-based authentication. +Importing this plugin defines and +:meth:`registers `, the built-in hook function +that generates OCI IAM access tokens. See :ref:`cloudnativeauthoci`. + +.. versionadded:: 3.0.0 + +.. _azurecloudnativeauthplugin: + +Azure Cloud Native Authentication Plugin +---------------------------------------- + +``azure_tokens`` is a plugin that uses the Microsoft Authentication Library +(MSAL) to generate access tokens when authenticating with OAuth 2.0 token-based +authentication. Importing this plugin defines and +:meth:`registers `, the built-in hook function +that generates OAuth2 access tokens. See :ref:`cloudnativeauthoauth`. + +.. versionadded:: 3.0.0 diff --git a/doc/src/api_manual/pool_params.rst b/doc/src/api_manual/pool_params.rst index 11ca1dfa..5a802522 100644 --- a/doc/src/api_manual/pool_params.rst +++ b/doc/src/api_manual/pool_params.rst @@ -53,14 +53,14 @@ PoolParams Methods terminal=oracledb.defaults.terminal, osuser=oracledb.defaults.osuser, \ driver_name=oracledb.defaults.driver_name, use_sni=None, \ thick_mode_dsn_passthrough=oracledb.defaults.thick_mode_dsn_passthrough, \ - handle=None) + extra_auth_params=None, handle=None) Sets one or more of the parameters. .. versionchanged:: 3.0.0 - The ``use_sni``, ``thick_mode_dsn_passthrough``, and - ``instance_name`` parameters were added. + The ``use_sni``, ``thick_mode_dsn_passthrough``, + ``extra_auth_params`` and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 905b2d31..39a53738 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -108,6 +108,19 @@ Common Changes #) Added :meth:`oracledb.register_password_type()` to allow users to register a function that will be called when a password is supplied as a dictionary containing the key "type". +#) Added :ref:`cloud native authentication ` support through the + integration of Oracle Cloud Infrastructure (OCI) SDK and Azure SDK. +#) Added parameter ``extra_auth_params`` to :meth:`oracledb.connect()`, + :meth:`oracledb.connect_async()`, :meth:`oracledb.create_pool()`, + and :meth:`oracledb.create_pool_async()` which is used to specify the + configuration parameters required for cloud native authentication. +#) Added :meth:`oracledb.register_params_hook()` and + :meth:`oracledb.unregister_params_hook()` which allow users to register or + unregister a function that manipulates the parameters used for creating + pools or standalone connections. See + :ref:`oci_tokens ` and + :ref:`azure_tokens ` plugins which make use of + this functionality. #) Added attributes :attr:`DbObjectAttribute.precision`, :attr:`DbObjectAttribute.scale`, and :attr:`DbObjectAttribute.max_size` that provide additional metadata about diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index e3edf20c..4e8240a5 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -1597,6 +1597,66 @@ Calling :meth:`~oracledb.register_password_type()` with the ``hook_function`` parameter set to None will result in a previously registered user function being removed and the default behavior restored. +.. _registerparamshook(): + +Using oracledb.register_params_hook() +------------------------------------- + +The :meth:`oracledb.register_params_hook()` method registers a user hook +function that will be called internally by python-oracledb prior to connection +or pool creation. The hook function will be invoked when +:meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()` are +called. The hook function accepts a copy of the parameters that will be used +to create the pool or standalone connections and may modify them. For example, +the cloud native authentication plugins modify the ``access_token`` parameter +with a function that will acquire the token using the information found in the +``extra_auth_parms`` parameter. + +Below is an example of registering a hook function, oci_token_hook, to +generate OCI IAM tokens. This hook function is registered using +:meth:`oracledb.register_params_hook()`. + +.. code-block:: python + + def oci_token_hook(params: oracledb.ConnectParams): + + if params.extra_auth_params is not None: + + def token_callback(refresh): + return generate_token(params.extra_auth_params, refresh) + + params.set(access_token=token_callback) + + oracledb.register_params_hook(oci_token_hook) + +To unregister a user function that was earlier registered with a call to +:meth:`oracledb.register_params_hook()`, you can use +:meth:`oracledb.unregister_params_hook`. + +**register_protocol() and register_params_hook()** + +Consider the following example in which both +the :meth:`oracledb.register_protocol()` and +:meth:`oracledb.register_params_hook()` methods are used. In this example, a +JSON file, *config.json*, is passed as a value in the ``dsn`` parameter. +This file contains the configuration parameters necessary to establish a +connection to Oracle Database. Additionally, the +:ref:`oci_tokens plugin ` is imported to retrieve the +access tokens. + +.. code-block:: python + + import oracledb + import oracledb.plugins.oci_tokens + + connection = oracledb.connect(dsn="config-file://config.json") + +In this example, the :meth:`oracledb.register_protocol()` registers a built-in +hook function when the ``oracledb`` module is imported. Similarly, the +:meth:`oracledb.register_params_hook()` method is implicitly invoked when the +``oracledb.plugins.oci_tokens`` module is imported. + .. _ldapconnections: LDAP Directory Naming @@ -3541,6 +3601,10 @@ without getting a new token. The two authentication methods supported by python-oracledb are :ref:`Open Authorization (OAuth 2.0) ` and :ref:`Oracle Cloud Infrastructure (OCI) Identity and Access Management (IAM) `. +These authentication methods can use Cloud Native Authentication with the +support of the Azure SDK or OCI SDK to generate access tokens and connect to +Oracle Database. Alternatively, these methods can use a Python script that +contains a class to generate access tokens to connect to Oracle Database. .. _oauth2: @@ -3548,40 +3612,42 @@ Connecting Using OAuth 2.0 Token-Based Authentication ----------------------------------------------------- Oracle Cloud Infrastructure (OCI) users can be centrally managed in a Microsoft -Azure Active Directory (Azure AD) service. Open Authorization (OAuth 2.0) token-based -authentication allows users to authenticate to Oracle Database using Azure AD OAuth2 -tokens. Currently, only Azure AD tokens are supported. Ensure that you have a -Microsoft Azure account and your Oracle Database is registered with Azure AD. See -`Configuring the Oracle Autonomous Database for Microsoft Azure AD Integration -`_ for more information. -Both Thin and Thick modes of the python-oracledb driver support OAuth 2.0 token-based -authentication. - -When using python-oracledb in Thick mode, Oracle Client libraries 19.15 (or later), -or 21.7 (or later) are needed. - -OAuth 2.0 token-based authentication can be used for both standalone connections -and connection pools. Tokens can be specified using the connection parameter -introduced in python-oracledb 1.1. Users of earlier python-oracledb versions -can alternatively use -:ref:`OAuth 2.0 Token-Based Authentication Connection Strings`. +Azure Active Directory (Azure AD) service. Open Authorization (OAuth 2.0) +token-based authentication allows users to authenticate to Oracle Database +using Azure AD OAuth2 tokens. Ensure that you have a Microsoft Azure account +and your Oracle Database is registered with Azure AD. See `Configuring the +Oracle Autonomous Database for Microsoft Azure AD Integration `_ for more information. Both Thin and Thick modes of the +python-oracledb driver support OAuth 2.0 token-based authentication. + +When using python-oracledb in Thick mode, Oracle Client libraries 19.15 (or +later), or 21.7 (or later) are needed. + +Standalone connections and pooled connections can be created in python-oracledb +Thick and Thin modes using OAuth 2.0 token-based authentication. This can be +done or by using a class such as the :ref:`TokenHandlerOAuth Class +` or by using python-oracledb's :ref:`Azure Cloud Native +Authentication Plugin (azure_tokens) `. Tokens can be +specified using the connection parameter introduced in python-oracledb 1.1. +Users of earlier python-oracledb versions can alternatively use +:ref:`OAuth 2.0 Token-Based Authentication Connection Strings `. OAuth2 Token Generation And Extraction ++++++++++++++++++++++++++++++++++++++ -There are different ways to retrieve Azure AD OAuth2 tokens. Some of the ways to -retrieve the OAuth2 tokens are detailed in `Examples of Retrieving Azure AD OAuth2 -Tokens `_. You can also retrieve Azure AD OAuth2 -tokens by using `Azure Identity client library for Python -`_. +There are different ways to retrieve Azure AD OAuth2 tokens. You can use +python-oracledb's :ref:`azure_tokens ` plugin to generate +tokens. Some of the other ways to retrieve OAuth2 tokens are detailed in +`Examples of Retrieving Azure AD OAuth2 Tokens `_. You can +also retrieve Azure AD OAuth2 tokens by using `Azure Identity client library +for Python `_. .. _oauthhandler: -Example of Using a TokenHandlerOAuth Class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Example of Using a TokenHandlerOAuth Class** Here, as an example, we are using a Python script to automate the process of generating and reading the Azure AD OAuth2 tokens. @@ -3627,108 +3693,113 @@ process of generating and reading the Azure AD OAuth2 tokens. f.write(self.token) return self.token -The TokenHandlerOAuth class uses a callable to generate and read the OAuth2 -tokens. When the callable in the TokenHandlerAuth class is invoked for the +The TokenHandlerOAuth class uses a callable to generate and read OAuth2 +tokens. When the callable in the TokenHandlerOAuth class is invoked for the first time to create a standalone connection or pool, the ``refresh`` parameter -is False which allows the callable to return a cached token, if desired. The +is *False* which allows the callable to return a cached token, if desired. The expiry date is then extracted from this token and compared with the current date. If the token has not expired, then it will be used directly. If the token has expired, the callable is invoked the second time with the ``refresh`` -parameter set to True. +parameter set to *True*. -See :ref:`curl` for an alternative way to generate the tokens. +The TokenHandlerOAuth class defined here is used in the examples shown in +:ref:`conncreationoauth2`. -Standalone Connection Creation with OAuth2 Access Tokens -++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +**Example of Using a Curl Command** -For OAuth 2.0 Token-Based Authentication, the ``access_token`` connection parameter -must be specified. This parameter should be a string (or a callable that returns a -string) specifying an Azure AD OAuth2 token. +See using a :ref:`curl ` command for an alternative way to generate the +tokens. -Standalone connections can be created in the python-oracledb Thick and Thin modes -using OAuth 2.0 token-based authentication. In the examples below, the -``access_token`` parameter is set to a callable. +.. _conncreationoauth2: -**In python-oracledb Thin mode** - -When connecting to Oracle Cloud Database with mutual TLS (mTLS) using OAuth2 -tokens in the python-oracledb Thin mode, you need to explicitly set the -``config_dir``, ``wallet_location``, and ``wallet_password`` parameters of -:func:`~oracledb.connect`. See, :ref:`autonomousdb`. -The following example shows a standalone connection creation using OAuth 2.0 token -based authentication in the python-oracledb Thin mode. For information on -TokenHandlerOAuth() used in the example, see :ref:`oauthhandler`. +Connection Creation with OAuth2 Access Tokens ++++++++++++++++++++++++++++++++++++++++++++++ -.. code:: python +For OAuth 2.0 Token-Based Authentication using a class such as the +:ref:`TokenHandlerOAuth class `, the ``access_token`` connection +parameter must be specified. This parameter should be a string (or a callable +that returns a string) specifying an Azure AD OAuth2 token. In the examples +used below, the ``access_token`` parameter is set to a callable. - connection = oracledb.connect(access_token=TokenHandlerOAuth(), - dsn=mydb_low, - config_dir="path_to_extracted_wallet_zip", - wallet_location="location_of_pem_file", - wallet_password=wp) +The examples used in the subsequent sections use the +:ref:`TokenHandlerOAuth class ` to generate OAuth2 tokens to +connect to Oracle Autonomous Database with mutual TLS (mTLS). See +:ref:`autonomousdb`. -**In python-oracledb Thick mode** +**Standalone Connections in Thin Mode Using OAuth2 Tokens** -In the python-oracledb Thick mode, you can create a standalone connection using -OAuth2 tokens as shown in the example below. For information on -TokenHandlerOAuth() used in the example, see :ref:`oauthhandler`. +When using a class such as the :ref:`TokenHandlerOAuth class ` to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thin mode, +you need to explicitly set the ``access_token``, ``config_dir``, +``wallet_location``, and ``wallet_password`` parameters of +:func:`~oracledb.connect`. For example: .. code:: python - connection = oracledb.connect(access_token=TokenHandlerOAuth(), - externalauth=True, - dsn=mydb_low) + connection = oracledb.connect( + access_token=TokenHandlerOAuth(), + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + wallet_location="location_of_pem_file", + wallet_password=wp) -Connection Pool Creation with OAuth2 Access Tokens -++++++++++++++++++++++++++++++++++++++++++++++++++ +**Connection Pools in Thin Mode Using OAuth2 Tokens** -For OAuth 2.0 Token-Based Authentication, the ``access_token`` connection -parameter must be specified. This parameter should be a string (or a callable -that returns a string) specifying an Azure AD OAuth2 token. +When using a class such as the :ref:`TokenHandlerOAuth class ` to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thin mode, +you need to explicitly set the ``access_token``, ``homogeneous``, +``config_dir``, ``wallet_location``, and ``wallet_password`` parameters of +:func:`~oracledb.create_pool`. For example: -The ``externalauth`` parameter must be set to True in the python-oracledb Thick -mode. The ``homogeneous`` parameter must be set to True in both the -python-oracledb Thin and Thick modes. +.. code:: python -Connection pools can be created in the python-oracledb Thick and Thin modes -using OAuth 2.0 token-based authentication. In the examples below, the -``access_token`` parameter is set to a callable. + connection = oracledb.create_pool( + access_token=TokenHandlerOAuth(), + homogeneous=True, # must always be True for connection pools + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + wallet_location="location_of_pem_file", + wallet_password=wp + min=1, max=5, increment=2) Note that the ``access_token`` parameter should be set to a callable. This is useful when the connection pool needs to expand and create new connections but -the current token has expired. In such case, the callable should return a +the current token has expired. In such a case, the callable should return a string specifying the new, valid Azure AD OAuth2 token. -**In python-oracledb Thin mode** +**Standalone Connections Thick Mode Using OAuth2 Tokens** -When connecting to Oracle Cloud Database with mutual TLS (mTLS) using OAuth2 -tokens in the python-oracledb Thin mode, you need to explicitly set the -``config_dir``, ``wallet_location``, and ``wallet_password`` parameters of -:func:`~oracledb.create_pool`. See, :ref:`autonomousdb`. -The following example shows a connection pool creation using OAuth 2.0 token -based authentication in the python-oracledb Thin mode. For information on -TokenHandlerOAuth() used in the example, see :ref:`oauthhandler`. +When using a class such as the :ref:`TokenHandlerOAuth class ` +to generate OAuth2 tokens to connect to Oracle Autonomous Database in Thick +mode, you need to explicitly set the ``access_token`` and ``externalAuth`` +parameters of :func:`~oracledb.connect`. For example: .. code:: python - connection = oracledb.create_pool(access_token=TokenHandlerOAuth(), - homogeneous=True, dsn=mydb_low, - config_dir="path_to_extracted_wallet_zip", - wallet_location="location_of_pem_file", - wallet_password=wp - min=1, max=5, increment=2) + connection = oracledb.connect( + access_token=TokenHandlerOAuth(), + externalauth=True, # must always be True in Thick mode + dsn=mydb_low) -**In python-oracledb Thick mode** +**Connection Pools in Thick Mode Using OAuth2 Tokens** -In the python-oracledb Thick mode, you can create a connection pool using -OAuth2 tokens as shown in the example below. For information on -TokenHandlerOAuth() used in the example, see :ref:`oauthhandler`. +When using a class such as the :ref:`TokenHandlerOAuth class ` to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thick mode, +you need to explicitly set the ``access_token``, ``externalauth``, and +``homogeneous`` parameters of :func:`~oracledb.create_pool`. For example: .. code:: python - pool = oracledb.create_pool(access_token=TokenHandlerOAuth(), - externalauth=True, homogeneous=True, - dsn=mydb_low, min=1, max=5, increment=2) + pool = oracledb.create_pool( + access_token=TokenHandlerOAuth(), + externalauth=True, # must always be True in Thick mode + homogeneous=True, # must always be True in connection pools + dsn=mydb_low, min=1, max=5, increment=2) + +Note that the ``access_token`` parameter should be set to a callable. This is +useful when the connection pool needs to expand and create new connections but +the current token has expired. In such a case, the callable should return a +string specifying the new, valid Azure AD OAuth2 token. .. _oauth2connstr: @@ -3747,7 +3818,7 @@ introduced in python-oracledb 1.1 instead. See the python-oracledb Thick mode. See :ref:`enablingthick`. There are different ways to retrieve Azure AD OAuth2 tokens. Some of the ways to -retrieve the OAuth2 tokens are detailed in `Examples of Retrieving Azure AD OAuth2 +retrieve OAuth2 tokens are detailed in `Examples of Retrieving Azure AD OAuth2 Tokens `_. You can also retrieve Azure AD OAuth2 tokens by using `Azure Identity client library for Python @@ -3756,8 +3827,7 @@ azure-python>`_. .. _curl: -Example of Using a Curl Command -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Example of Using a Curl Command** Here, as an example, we are using Curl with a Resource Owner Password Credential (ROPC) Flow, that is, a ``curl`` command is used against @@ -3774,8 +3844,8 @@ This command generates a JSON response with token type, expiration, and access token values. The JSON response needs to be parsed so that only the access token is written and stored in a file. You can save the value of ``access_token`` generated to a file and set ``TOKEN_LOCATION`` to the location -of token file. See :ref:`oauthhandler` for an example of using the -TokenHandlerOAuth class to generate and read tokens. +of token file. See :ref:`TokenHandlerOAuth class ` for an example +of using this class to generate and read tokens. The Oracle Net parameters ``TOKEN_AUTH`` and ``TOKEN_LOCATION`` must be set when you are using the connection string syntax. Also, the ``PROTOCOL`` @@ -3824,6 +3894,152 @@ Connection pool example: connection = pool.acquire() +.. _cloudnativeauthoauth: + +Azure Cloud Native Authentication with azure_tokens Plugin +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +With cloud native authentication, python-oracledb can automatically generate +and refresh OAuth2 tokens when required with the support of the +`Microsoft Authentication Library (MSAL) `__. This provides enhanced security +since it removes the need to use static user credentials. + +You can use python-oracledb's Azure Cloud Native Authentication plugin, +:ref:`azure_tokens `, with MSAL to generate OAuth2 +tokens. To use Azure cloud native authentication, you must import this plugin +using: + +.. code-block:: python + + import oracledb.plugins.azure_tokens + +Importing the :ref:`azure_tokens ` plugin defines +and registers a built-in hook function that generates OAuth2 tokens. This +function is internally invoked when the ``extra_auth_params`` is specified in +calls to :meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()`. +This hook function sets the ``access_token`` parameter of +:ref:`ConnectParams object ` to a callable which uses the +configuration parameters specified to generate OAuth2 tokens. + +For OAuth 2.0 Token-Based Authentication with the +:ref:`azure_tokens ` plugin, the +``extra_auth_params`` connection parameter must be specified. This parameter +should be a dictionary containing the configuration parameters necessary for +Oracle Database authentication. For information on the Azure specific +configuration parameters used in the ``extra_auth_params`` parameter, see +`MSAL `__. + +The examples used in the subsequent sections use the +:ref:`azure_tokens ` plugin to generate OAuth2 +tokens to connect to Oracle Autonomous Database with mutual TLS (mTLS). See +:ref:`autonomousdb`. + +**Standalone Connections in Thin Mode Using OAuth2 Tokens** + +When using the :ref:`azure_tokens ` plugin to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thin mode, +you need to explicitly set the ``extra_auth_params``, ``config_dir``, +``wallet_location``, and ``wallet_password`` parameter of +:func:`~oracledb.connect`. For example: + +.. code:: python + + import oracledb.plugins.azure_tokens + + token_based_auth = { + "authType": , # Azure specific configuration + "authority": , # parameters to be set when using + "clientId": , # azure_tokens plugin + "clientSecret": , + "scopes": + } + + connection = oracledb.connect( + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + wallet_location="location_of_pem_file", + wallet_password=wp, + extra_auth_params=token_based_auth) + +**Connection Pools in Thin Mode Using OAuth2 Tokens** + +When using the :ref:`azure_tokens ` plugin to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thin mode, +you need to explicitly set the ``homogeneous``, ``extra_auth_params``, +``config_dir``, ``wallet_location``, and ``wallet_password`` parameters of +:func:`~oracledb.create_pool`. For example: + +.. code:: python + + import oracledb.plugins.azure_tokens + + token_based_auth = { + "authType": , # Azure specific configuration + "authority": , # parameters to be set when using + "clientId": , # azure_tokens plugin + "clientSecret": , + "scopes": + } + + connection = oracledb.create_pool( + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + homogeneous=true, # must always be True for connection pools + wallet_location="location_of_pem_file", + wallet_password=wp, + extra_auth_params=token_based_auth) + +**Standalone Connections Thick Mode Using OAuth2 Tokens** + +When using the :ref:`azure_tokens ` plugin to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thick mode, +you need to explicitly set the ``extra_auth_params`` and ``externalauth`` +parameter of :func:`~oracledb.connect`. For example: + +.. code:: python + + import oracledb.plugins.azure_tokens + + token_based_auth = { + "authType": , # Azure specific configuration + "authority": , # parameters to be set when using + "clientId": , # azure_tokens plugin + "clientSecret": , + "scopes": + } + + connection = oracledb.connect( + externalauth=True, # must always be True in Thick mode + dsn=mydb_low, + extra_auth_params=token_based_auth) + +**Connection Pools in Thick Mode Using OAuth2 Tokens** + +When using the :ref:`azure_tokens ` plugin to +generate OAuth2 tokens to connect to Oracle Autonomous Database in Thick mode, +you need to explicitly set the ``extra_auth_params``, ``externalauth``, and +``homogeneous`` parameters of :func:`~oracledb.create_pool`. + +.. code:: python + + import oracledb.plugins.azure_tokens + + token_based_auth = { + "authType": , # Azure specific configuration + "authority": , # parameters to be set when using + "clientId": , # azure_tokens plugin + "clientSecret": , + "scopes": + } + + connection = oracledb.create_pool( + externalauth=True, # must always be True in Thick mode + homogeneous=True, # must always be True for connection pools + dsn=mydb_low, + extra_auth_params=token_based_auth) .. _iamauth: @@ -3832,24 +4048,31 @@ Connecting Using OCI IAM Token-Based Authentication Oracle Cloud Infrastructure (OCI) Identity and Access Management (IAM) provides its users with a centralized database authentication and authorization system. -Using this authentication method, users can use the database access token issued -by OCI IAM to authenticate to the Oracle Cloud Database. Both Thin and Thick modes -of the python-oracledb driver support OCI IAM token-based authentication. +Using this authentication method, users can use the database access token +issued by OCI IAM to authenticate to the Oracle Autonomous Database. Both Thin +and Thick modes of the python-oracledb driver support OCI IAM token-based +authentication. When using python-oracledb in Thick mode, Oracle Client libraries 19.14 (or later), or 21.5 (or later) are needed. -OCI IAM token-based authentication can be used for both standalone connections and -connection pools. Tokens can be specified using the connection parameter -introduced in python-oracledb 1.1. Users of earlier python-oracledb versions -can alternatively use :ref:`OCI IAM Token-Based Authentication Connection Strings -`. +Standalone connections and pooled connections can be created in python-oracledb +Thick and Thin modes using OCI IAM token-based authentication. This can be done +by using a class such as a :ref:`TokenHandlerIAM class ` or by +using python-oracledb's :ref:`OCI Cloud Native Authentication Plugin +(oci_tokens) `. Tokens can be specified using the +connection parameter introduced in python-oracledb 1.1. Users of earlier +python-oracledb versions can alternatively use +:ref:`OCI IAM Token-Based Authentication Connection Strings `. OCI IAM Token Generation and Extraction +++++++++++++++++++++++++++++++++++++++ -Authentication tokens can be generated through execution of an Oracle Cloud -Infrastructure command line interface (OCI-CLI) command :: +Authentication tokens can be generated using python-oracledb's +:ref:`oci_tokens ` plugin. + +Alternatively, authentication tokens can be generated through execution of an +Oracle Cloud Infrastructure command line interface (OCI-CLI) command :: oci iam db-token get @@ -3858,11 +4081,10 @@ It will contain the token and private key files needed by python-oracledb. .. _iamhandler: -Example of Using a TokenHandlerIAM Class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Example of Using a TokenHandlerIAM Class** Here, as an example, we are using a Python script to automate the process of -generating and reading the OCI IAM tokens. +generating and reading OCI IAM tokens. .. code:: python @@ -3902,108 +4124,107 @@ generating and reading the OCI IAM tokens. '-----END PRIVATE KEY-----')] self.private_key = "".join(lines) -The TokenHandlerIAM class uses a callable to generate and read the OCI IAM -tokens. When the callable in the TokenHandlerIAM class is invoked for the first -time to create a standalone connection or pool, the ``refresh`` parameter is -False which allows the callable to return a cached token, if desired. The +The TokenHandlerIAM class uses a callable to generate and read OCI IAM tokens. +When the callable in the TokenHandlerIAM class is invoked for the first time +to create a standalone connection or pool, the ``refresh`` parameter is +*False* which allows the callable to return a cached token, if desired. The expiry date is then extracted from this token and compared with the current date. If the token has not expired, then it will be used directly. If the token has expired, the callable is invoked the second time with the ``refresh`` -parameter set to True. +parameter set to *True*. -Standalone Connection Creation with OCI IAM Access Tokens -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The TokenHandlerIAM class defined here is used in the examples shown in +:ref:`conncreationociiam`. -For OCI IAM Token-Based Authentication, the ``access_token`` connection parameter -must be specified. This parameter should be a 2-tuple (or a callable that returns -a 2-tuple) containing the token and private key. +.. _conncreationociiam: -Standalone connections can be created in the python-oracledb Thick and Thin modes -using OCI IAM token-based authentication. In the examples below, the -``access_token`` parameter is set to a callable. +Connection Creation with OCI IAM Access Tokens +++++++++++++++++++++++++++++++++++++++++++++++ -**In python-oracledb Thin mode** +For OCI IAM Token-Based Authentication with a class such as the +:ref:`TokenHandlerIAM class `, the ``access_token`` connection +parameter must be specified. This parameter should be a 2-tuple (or a callable +that returns a 2-tuple) containing the token and private key. In the examples +used below, the ``access_token`` parameter is set to a callable. -When connecting to Oracle Cloud Database with mutual TLS (mTLS) using OCI IAM -tokens in the python-oracledb Thin mode, you need to explicitly set the -``config_dir``, ``wallet_location``, and ``wallet_password`` parameters of -:func:`~oracledb.connect`. See, :ref:`autonomousdb`. -The following example shows a standalone connection creation using OCI IAM token -based authentication in the python-oracledb Thin mode. For information on -TokenHandlerIAM() used in the example, see :ref:`iamhandler`. +The examples used in the subsequent sections use the +:ref:`TokenHandlerIAM class ` to generate OCI IAM tokens to connect +to Oracle Autonomous Database with mutual TLS (mTLS). See :ref:`autonomousdb`. -.. code:: python +**Standalone Connections in Thin Mode Using OCI IAM Tokens** - connection = oracledb.connect(access_token=TokenHandlerIAM(), - dsn=mydb_low, - config_dir="path_to_extracted_wallet_zip", - wallet_location="location_of_pem_file", - wallet_password=wp) - -**In python-oracledb Thick mode** - -In the python-oracledb Thick mode, you can create a standalone connection using -OCI IAM tokens as shown in the example below. For information on -TokenHandlerIAM() used in the example, see :ref:`iamhandler`. +When using a class such as the :ref:`TokenHandlerIAM class ` to +generate OCI IAM tokens to connect to Oracle Cloud Database in Thin mode, you +need to explicitly set the ``access_token``, ``config_dir``, +``wallet_location``, and ``wallet_password`` parameters of +:func:`~oracledb.connect`. For example: .. code:: python - connection = oracledb.connect(access_token=TokenHandlerIAM(), - externalauth=True, - dsn=mydb_low) + connection = oracledb.connect( + access_token=TokenHandlerIAM(), + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + wallet_location="location_of_pem_file", + wallet_password=wp) -Connection Pool Creation with OCI IAM Access Tokens -+++++++++++++++++++++++++++++++++++++++++++++++++++ +**Connection Pools in Thin Mode Using OCI IAM Tokens** -For OCI IAM Token-Based Authentication, the ``access_token`` connection -parameter must be specified. This parameter should be a 2-tuple (or a callable -that returns a 2-tuple) containing the token and private key. +When using a class such as :ref:`TokenHandlerIAM class ` to +generate OCI IAM tokens to connect to Oracle Cloud Database in Thin mode, you +need to explicitly set the ``access_token``, ``homogeneous``, ``config_dir``, +``wallet_location``, and ``wallet_password`` parameters of +:func:`~oracledb.create_pool`. For example: -The ``externalauth`` parameter must be set to True in the python-oracledb Thick -mode. The ``homogeneous`` parameter must be set to True in both the -python-oracledb Thin and Thick modes. +.. code:: python -Connection pools can be created in the python-oracledb Thick and Thin modes -using OCI IAM token-based authentication. In the examples below, the -``access_token`` parameter is set to a callable. + connection = oracledb.create_pool( + access_token=TokenHandlerIAM(), + homogeneous=True, # must always be set to True for connection pools + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + wallet_location="location_of_pem_file", + wallet_password=wp + min=1, max=5, increment=2) Note that the ``access_token`` parameter should be set to a callable. This is useful when the connection pool needs to expand and create new connections but -the current token has expired. In such case, the callable should return a -2-tuple (token, private key) specifying the new, valid access token. +the current token has expired. In such a case, the callable should return a +string specifying the new, valid access token. -**In python-oracledb Thin mode** +**Standalone Connections in Thick Mode Using OCI IAM Tokens** -When connecting to Oracle Cloud Database with mutual TLS (mTLS) using OCI IAM -tokens in the python-oracledb Thin mode, you need to explicitly set the -``config_dir``, ``wallet_location``, and ``wallet_password`` parameters of -:func:`~oracledb.create_pool`. See, :ref:`autonomousdb`. -The following example shows a connection pool creation using OCI IAM token -based authentication in the python-oracledb Thin mode. For information on -TokenHandlerIAM() used in the example, see :ref:`iamhandler`. +When using a class such as :ref:`TokenHandlerIAM class ` to +generate OCI IAM tokens to connect to Oracle Autonomous Database in Thick mode, +you need to explicitly set the ``access_token`` and ``externalAuth`` parameters +of :func:`~oracledb.connect`. For example: .. code:: python - connection = oracledb.connect(access_token=TokenHandlerIAM(), - homogeneous=True, dsn=mydb_low, - config_dir="path_to_extracted_wallet_zip", - wallet_location="location_of_pem_file", - wallet_password=wp - min=1, max=5, increment=2) + connection = oracledb.connect( + access_token=TokenHandlerIAM(), + externalauth=True, # must always be True in Thick mode + dsn=mydb_low) -**In python-oracledb Thick mode** +**Connection Pools in Thick Mode Using OCI IAM Tokens** -In the python-oracledb Thick mode, you can create a connection pool using -OCI IAM tokens as shown in the example below. For information on -TokenHandlerIAM() used in the example, see :ref:`iamhandler`. +When using a class such as :ref:`TokenHandlerIAM class ` to +generate OCI IAM tokens to connect to Oracle Cloud Database in Thick mode, you +need to explicitly set the ``access_token``, ``externalauth``, and +``homogeneous`` parameters of :func:`oracledb.create_pool`. For example: .. code:: python - pool = oracledb.create_pool(access_token=TokenHandlerIAM(), - externalauth=True, - homogeneous=True, - dsn=mydb_low, - min=1, max=5, increment=2) + pool = oracledb.create_pool( + access_token=TokenHandlerIAM(), + externalauth=True, # must always be True in Thick mode + homogeneous=True, # must always be True in connection pools + dsn=mydb_low, min=1, max=5, increment=2) + +Note that the ``access_token`` parameter should be set to a callable. This is +useful when the connection pool needs to expand and create new connections but +the current token has expired. In such a case, the callable should return a +string specifying the new, valid access token. .. _iamauthconnstr: @@ -4083,6 +4304,151 @@ Connection pool example: connection = pool.acquire() +.. _cloudnativeauthoci: + +OCI Cloud Native Authentication with oci_tokens Plugin +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +With cloud native authentication, python-oracledb can automatically generate +and refresh OCI IAM tokens when required with the support of the +`Oracle Cloud Infrastructure (OCI) Software Development Kit (SDK) +`__. +This provides enhanced security since it removes the need to use static user +credentials. + +You can use python-oracledb's OCI Cloud Native Authentication plugin, +:ref:`oci_tokens `, with OCI SDK to generate OCI IAM +tokens. To use OCI cloud native authentication, you must import this plugin +using: + +.. code-block:: python + + import oracledb.plugins.oci_tokens + +Importing the :ref:`oci_tokens ` plugin defines and +registers a built-in hook function that generates OCI IAM tokens. This function +is internally invoked when the ``extra_auth_params`` is specified in calls to +:meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()`. +This hook function sets the ``access_token`` parameter of +:ref:`ConnectParams object ` to a callable which uses the +configuration parameters specified to generate OCI IAM tokens. + +For OCI IAM Token-Based Authentication with the +:ref:`oci_tokens ` plugin, the +``extra_auth_params`` connection parameter must be specified. This parameter +should be a dictionary containing the configuration parameters necessary for +Oracle Database authentication. For information on the OCI specific +configuration parameters used in the ``extra_auth_params`` parameter, see +`OCI SDK `__. + +The examples used in the subsequent sections use the +:ref:`oci_tokens ` plugin to generate OCI IAM tokens +to connect to Oracle Autonomous Database with mutual TLS (mTLS). See +:ref:`autonomousdb`. + +**Standalone Connections in Thin Mode Using OCI IAM Tokens** + +When using the :ref:`oci_tokens ` plugin to generate +OCI IAM tokens to connect to Oracle Autonomous Database in Thin mode, you need +to explicitly set the ``config_dir``, ``wallet_location``, ``wallet_password`` +and ``extra_auth_params`` parameters of :func:`~oracledb.connect`. For example: + +.. code:: python + + import oracledb.plugins.oci_tokens + + token_based_auth = { # OCI specific configuration + "authType": "configfilebasedauthentication", # parameters to be set when using + "profile": , # oci_tokens plugin with authType + "configFileLocation": , # configfilebasedauthentication + } + + connection = oracledb.connect( + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + wallet_location="location_of_pem_file", + wallet_password=wp, + extra_auth_params=token_based_auth) + +**Connection Pools in Thin Mode Using OCI IAM Tokens** + +When using the :ref:`oci_tokens ` plugin to generate +OCI IAM tokens to connect to Oracle Cloud Database in Thin mode, you need to +explicitly set the ``config_dir``, ``homogeneous``, ``wallet_location``, +``wallet_password``, and ``extra_auth_params`` parameters of +:func:`~oracledb.create_pool`. For example: + +.. code:: python + + import oracledb.plugins.oci_tokens + + token_based_auth = { + "authType": "simpleauthentication", # OCI specific configuration + "user": , # parameters to be set when using + "key_file": , # oci_tokens plugin with authType + "fingerprint": , # simpleAuthentication + "tenancy": , + "region": , + "profile": + } + + connection = oracledb.create_pool( + dsn=mydb_low, + config_dir="path_to_unzipped_wallet", + homogeneous=true, # must always be True for connection pools + wallet_location="location_of_pem_file", + wallet_password=wp, + extra_auth_params=token_based_auth) + +**Standalone Connections in Thick Mode Using OCI IAM Tokens** + +When using the :ref:`oci_tokens ` plugin to generate +OCI IAM tokens to connect to Oracle Autonomous Database in Thick mode, you need +to explicitly set the ``externalauth`` and ``extra_auth_params`` parameters of +:func:`oracledb.connect`. For example: + +.. code:: python + + import oracledb.plugins.oci_tokens + + token_based_auth = { + "authType": "simpleauthentication", # OCI specific configuration + "user": , # parameters to be set when using + "key_file": , # oci_tokens plugin with authType + "fingerprint": , # simpleAuthentication + "tenancy": , + "region": , + "profile": + } + connection = oracledb.connect( + externalauth=True, + dsn=mydb_low, + extra_auth_params=token_based_auth) + +**Connection Pools in Thick Mode Using OCI IAM Tokens** + +When using the :ref:`oci_tokens ` plugin to generate +OCI IAM tokens to connect to Oracle Autonomous Database in Thick mode, you +need to explicitly set the ``externalauth``, ``homogeneous``, and +``extra_auth_params`` parameters of :func:`~oracledb.create_pool`. For example: + +.. code:: python + + import oracledb.plugins.oci_tokens + + token_based_auth = { # OCI specific configuration + "authType": "configfilebasedauthentication", # parameters to be set when using + "profile": , # oci_tokens plugin with authType + "configFileLocation": , # configfilebasedauthentication + } + + connection = oracledb.create_pool( + externalauth=True, # must always be True in Thick mode + homogeneous=True, # must always be True for connection pools + dsn=mydb_low, + extra_auth_params=token_based_auth) Privileged Connections ====================== @@ -4645,7 +5011,7 @@ connection strings, wallet locations, and wallet password (if required) in each .. code-block:: python connection = oracledb.connect(user=user_name, password=userpw, dsn=dsn, - config_dir="path_to_extracted_wallet_zip", + config_dir="path_to_unzipped_wallet", wallet_location="location_of_pem_file", wallet_password=walletpw) @@ -4664,7 +5030,7 @@ containing the ``MY_WALLET_DIRECTORY`` option needs to be created: .. code-block:: python dsn = "mydb_high" # one of the network aliases from tnsnames.ora - params = oracledb.ConnectParams(config_dir="path_to_extracted_wallet_zip", + params = oracledb.ConnectParams(config_dir="path_to_unzipped_wallet", wallet_location="path_location_of_sso_file") params.parse_connect_string(dsn) dsn = params.get_connect_string() diff --git a/src/oracledb/__init__.py b/src/oracledb/__init__.py index 1302e220..15f62a54 100644 --- a/src/oracledb/__init__.py +++ b/src/oracledb/__init__.py @@ -287,8 +287,10 @@ from .utils import ( enable_thin_mode as enable_thin_mode, + register_params_hook as register_params_hook, register_password_type as register_password_type, register_protocol as register_protocol, + unregister_params_hook as unregister_params_hook, ) from .thick_impl import ( diff --git a/src/oracledb/base_impl.pxd b/src/oracledb/base_impl.pxd index 42b5de16..0b4a6188 100644 --- a/src/oracledb/base_impl.pxd +++ b/src/oracledb/base_impl.pxd @@ -570,6 +570,7 @@ cdef class ConnectParamsImpl: public str terminal public str osuser public str driver_name + public dict extra_auth_params public bint thick_mode_dsn_passthrough cdef int _check_credentials(self) except -1 diff --git a/src/oracledb/base_impl.pyx b/src/oracledb/base_impl.pyx index f071fef1..8a819d33 100644 --- a/src/oracledb/base_impl.pyx +++ b/src/oracledb/base_impl.pyx @@ -117,6 +117,9 @@ REGISTERED_PROTOCOLS = {} # password types registered with the library REGISTERED_PASSWORD_TYPES = {} +# params hooks registered with the library +REGISTERED_PARAMS_HOOKS = [] + include "impl/base/types.pyx" include "impl/base/constants.pxi" include "impl/base/decoders.pyx" diff --git a/src/oracledb/connect_params.py b/src/oracledb/connect_params.py index 548c70bc..52e2ccbf 100644 --- a/src/oracledb/connect_params.py +++ b/src/oracledb/connect_params.py @@ -106,6 +106,7 @@ def __init__( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ): """ @@ -311,6 +312,10 @@ def __init__( configuration file (default: oracledb.defaults.thick_mode_dsn_passthrough) + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as + the Azure and OCI cloud-native authentication plugins (default: None) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -365,7 +370,8 @@ def __repr__(self): f"osuser={self.osuser!r}, " f"driver_name={self.driver_name!r}, " f"use_sni={self.use_sni!r}, " - f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}, " + f"extra_auth_params={self.extra_auth_params!r}" ")" ) @@ -480,6 +486,15 @@ def externalauth(self) -> bool: """ return self._impl.externalauth + @property + def extra_auth_params(self) -> dict: + """ + A dictionary containing configuration parameters necessary for Oracle + Database authentication using plugins, such as the Azure and OCI cloud- + native authentication plugins. + """ + return self._impl.extra_auth_params + @property @_flatten_value def host(self) -> Union[list, str]: @@ -898,6 +913,7 @@ def set( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ): """ @@ -1089,6 +1105,10 @@ def set( parameter handling and locating any optional tnsnames.ora configuration file + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as + the Azure and OCI cloud-native authentication plugins + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 608c6fbb..39cb4dca 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1316,6 +1316,7 @@ def connect( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ) -> Connection: """ @@ -1539,6 +1540,10 @@ def connect( handling and locating any optional tnsnames.ora configuration file (default: oracledb.defaults.thick_mode_dsn_passthrough) + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as the + Azure and OCI cloud-native authentication plugins (default: None) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -2138,6 +2143,7 @@ def connect_async( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ) -> AsyncConnection: """ @@ -2361,6 +2367,10 @@ def connect_async( handling and locating any optional tnsnames.ora configuration file (default: oracledb.defaults.thick_mode_dsn_passthrough) + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as the + Azure and OCI cloud-native authentication plugins (default: None) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) diff --git a/src/oracledb/errors.py b/src/oracledb/errors.py index ef0b2000..4f46deec 100644 --- a/src/oracledb/errors.py +++ b/src/oracledb/errors.py @@ -280,6 +280,7 @@ def _raise_not_supported(feature: str) -> None: ERR_PLAINTEXT_PASSWORD_IN_CONFIG = 2058 ERR_MISSING_CONNECT_DESCRIPTOR = 2059 ERR_ARROW_C_API_ERROR = 2060 +ERR_PARAMS_HOOK_HANDLER_FAILED = 2061 # error numbers that result in NotSupportedError ERR_TIME_NOT_SUPPORTED = 3000 @@ -742,6 +743,9 @@ def _raise_not_supported(feature: str) -> None: "OSON node type 0x{node_type:x} is not supported" ), ERR_OSON_VERSION_NOT_SUPPORTED: "OSON version {version} is not supported", + ERR_PARAMS_HOOK_HANDLER_FAILED: ( + "registered handler for params hook failed" + ), ERR_PASSWORD_TYPE_HANDLER_FAILED: ( 'registered handler for password type "{password_type}" failed' ), diff --git a/src/oracledb/impl/base/connect_params.pyx b/src/oracledb/impl/base/connect_params.pyx index b34b7387..3a2746f0 100644 --- a/src/oracledb/impl/base/connect_params.pyx +++ b/src/oracledb/impl/base/connect_params.pyx @@ -103,6 +103,7 @@ cdef class ConnectParamsImpl: _set_str_param(args, "machine", self, check_network_character_set=True) _set_str_param(args, "osuser", self, check_network_character_set=True) _set_str_param(args, "driver_name", self) + _set_obj_param(args, "extra_auth_params", self) _set_bool_param(args, "thick_mode_dsn_passthrough", &self.thick_mode_dsn_passthrough) self._set_access_token_param(args.get("access_token")) @@ -191,6 +192,7 @@ cdef class ConnectParamsImpl: self.machine = other_params.machine self.osuser = other_params.osuser self.driver_name = other_params.driver_name + self.extra_auth_params = other_params.extra_auth_params self.thick_mode_dsn_passthrough = \ other_params.thick_mode_dsn_passthrough @@ -557,6 +559,14 @@ cdef class ConnectParamsImpl: dsn = self._get_connect_string() elif thin or not self.thick_mode_dsn_passthrough: self.parse_connect_string(dsn) + if REGISTERED_PARAMS_HOOKS: + params = self._get_public_instance() + for hook_fn in REGISTERED_PARAMS_HOOKS: + try: + hook_fn(params) + except Exception as e: + errors._raise_err(errors.ERR_PARAMS_HOOK_HANDLER_FAILED, + cause=e) return dsn diff --git a/src/oracledb/plugins/azure_tokens.py b/src/oracledb/plugins/azure_tokens.py new file mode 100644 index 00000000..d2c0e1f2 --- /dev/null +++ b/src/oracledb/plugins/azure_tokens.py @@ -0,0 +1,79 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# azure_tokens.py +# +# Python file defining the methods that generates an OAuth access token +# using the MSAL SDK +# ----------------------------------------------------------------------------- + +import msal +import oracledb + + +def generate_token(token_auth_config, refresh=False): + """ + Generates an Azure access token based on provided credentials. + """ + auth_type = token_auth_config.get("authType", "").lower() + if auth_type == "azureserviceprincipal": + return _service_principal_credentials(token_auth_config) + else: + raise ValueError(f"Unrecognized authentication method: {auth_type}") + + +def _service_principal_credentials(token_auth_config): + """ + Returns the access token for authentication as a service principal. + """ + msal_config = { + "authority": token_auth_config["authority"], + "client_id": token_auth_config["clientId"], + "client_credential": token_auth_config["clientSecret"], + } + # Initialize the Confidential Client Application + cca = msal.ConfidentialClientApplication(**msal_config) + auth_response = cca.acquire_token_for_client( + scopes=[token_auth_config["scopes"]] + ) + + if "access_token" in auth_response: + return auth_response["access_token"] + + +def azure_token_hook(params: oracledb.ConnectParams): + """ + Azure-specific hook for generating a token. + """ + if params.extra_auth_params is not None: + + def token_callback(refresh): + return generate_token(params.extra_auth_params, refresh) + + params.set(access_token=token_callback) + + +# Register the token hook for Azure +oracledb.register_params_hook(azure_token_hook) diff --git a/src/oracledb/plugins/oci_tokens.py b/src/oracledb/plugins/oci_tokens.py new file mode 100644 index 00000000..bf75ecc8 --- /dev/null +++ b/src/oracledb/plugins/oci_tokens.py @@ -0,0 +1,168 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024, 2025, Oracle and/or its affiliates. +# +# This software is dual-licensed to you under the Universal Permissive License +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose +# either license. +# +# If you elect to accept the software under the Apache License, Version 2.0, +# the following applies: +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# oci_tokens.py +# +# Python file defining the methods that genearates an OCI access +# token using the OCI SDK +# ----------------------------------------------------------------------------- + +import oci +import oracledb +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives import serialization + + +def generate_token(token_auth_config, refresh=False): + """ + Generates an OCI access token based on provided credentials. + """ + auth_type = token_auth_config.get("authType", "").lower() + if auth_type == "configfilebasedauthentication": + return _config_file_based_authentication(token_auth_config) + elif auth_type == "simpleauthentication": + return _simple_authentication(token_auth_config) + else: + raise ValueError(f"Unrecognized authentication method: {auth_type}") + + +def _get_key_pair(): + """ + Generates a public-private key pair for proof of possession. + """ + private_key = rsa.generate_private_key( + public_exponent=65537, + key_size=4096, + ) + private_key_pem = private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ).decode("utf-8") + + public_key_pem = ( + private_key.public_key() + .public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo, + ) + .decode("utf-8") + ) + + if not oracledb.is_thin_mode(): + p_key = "".join( + line.strip() + for line in private_key_pem.splitlines() + if not ( + line.startswith("-----BEGIN") or line.startswith("-----END") + ) + ) + private_key_pem = p_key + + return {"privateKey": private_key_pem, "publicKey": public_key_pem} + + +def _config_file_based_authentication(token_auth_config): + """ + Config file base authentication implementation, config parameters + are provided in a file. + """ + file_location = token_auth_config.get( + "configFileLocation", oci.config.DEFAULT_LOCATION + ) + profile = token_auth_config.get("profile", oci.config.DEFAULT_PROFILE) + + # Load OCI config + config = oci.config.from_file(file_location, profile) + oci.config.validate_config(config) + + # Initialize service client with default config file + client = oci.identity_data_plane.DataplaneClient(config) + + key_pair = _get_key_pair() + + response = client.generate_scoped_access_token( + generate_scoped_access_token_details=oci.identity_data_plane.models.GenerateScopedAccessTokenDetails( + scope="urn:oracle:db::id::*", public_key=key_pair["publicKey"] + ) + ) + + # access_token is a tuple holding token and private key + access_token = ( + response.data.token, + key_pair["privateKey"], + ) + + return access_token + + +def _simple_authentication(token_auth_config): + """ + Simple authentication, config parameters are passed as parameters + """ + config = { + "user": token_auth_config["user"], + "key_file": token_auth_config["key_file"], + "fingerprint": token_auth_config["fingerprint"], + "tenancy": token_auth_config["tenancy"], + "region": token_auth_config["region"], + "profile": token_auth_config["profile"], + } + oci.config.validate_config(config) + + # Initialize service client with given configuration + client = oci.identity_data_plane.DataplaneClient(config) + + key_pair = _get_key_pair() + + response = client.generate_scoped_access_token( + generate_scoped_access_token_details=oci.identity_data_plane.models.GenerateScopedAccessTokenDetails( + scope="urn:oracle:db::id::*", public_key=key_pair["publicKey"] + ) + ) + + # access_token is a tuple holding token and private key + access_token = ( + response.data.token, + key_pair["privateKey"], + ) + + return access_token + + +def oci_token_hook(params: oracledb.ConnectParams): + """ + OCI-specific hook for generating a token. + """ + if params.extra_auth_params is not None: + + def token_callback(refresh): + return generate_token(params.extra_auth_params, refresh) + + params.set(access_token=token_callback) + + +# Register the token hook for OCI +oracledb.register_params_hook(oci_token_hook) diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index b01ba65d..80b0efbd 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -672,6 +672,7 @@ def create_pool( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ) -> ConnectionPool: """ @@ -952,6 +953,10 @@ def create_pool( handling and locating any optional tnsnames.ora configuration file (default: oracledb.defaults.thick_mode_dsn_passthrough) + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as the + Azure and OCI cloud-native authentication plugins (default: None) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -1196,6 +1201,7 @@ def create_pool_async( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ) -> AsyncConnectionPool: """ @@ -1477,6 +1483,10 @@ def create_pool_async( handling and locating any optional tnsnames.ora configuration file (default: oracledb.defaults.thick_mode_dsn_passthrough) + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as the + Azure and OCI cloud-native authentication plugins (default: None) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) diff --git a/src/oracledb/pool_params.py b/src/oracledb/pool_params.py index 307f9370..919e379b 100644 --- a/src/oracledb/pool_params.py +++ b/src/oracledb/pool_params.py @@ -119,6 +119,7 @@ def __init__( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ): """ @@ -380,6 +381,10 @@ def __init__( configuration file (default: oracledb.defaults.thick_mode_dsn_passthrough) + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as + the Azure and OCI cloud-native authentication plugins (default: None) + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution (default: 0) @@ -448,7 +453,8 @@ def __repr__(self): f"osuser={self.osuser!r}, " f"driver_name={self.driver_name!r}, " f"use_sni={self.use_sni!r}, " - f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}" + f"thick_mode_dsn_passthrough={self.thick_mode_dsn_passthrough!r}, " + f"extra_auth_params={self.extra_auth_params!r}" ")" ) @@ -648,6 +654,7 @@ def set( driver_name: Optional[str] = None, use_sni: Optional[bool] = None, thick_mode_dsn_passthrough: Optional[bool] = None, + extra_auth_params: Optional[dict] = None, handle: Optional[int] = None, ): """ @@ -890,6 +897,10 @@ def set( parameter handling and locating any optional tnsnames.ora configuration file + - extra_auth_params: a dictionary containing configuration parameters + necessary for Oracle Database authentication using plugins, such as + the Azure and OCI cloud-native authentication plugins + - handle: an integer representing a pointer to a valid service context handle. This value is only used in thick mode. It should be used with extreme caution diff --git a/src/oracledb/utils.py b/src/oracledb/utils.py index e796f953..239cdc55 100644 --- a/src/oracledb/utils.py +++ b/src/oracledb/utils.py @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# Copyright (c) 2020, 2024, Oracle and/or its affiliates. +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -87,6 +87,20 @@ def wrapped_f(self, *args, **kwargs): return wrapped_f +def register_params_hook(hook_function: Callable) -> None: + """ + Registers a user function to be called internally prior to connection or + pool creation. The hook function accepts a copy of the parameters that will + be used to create the pool or standalone connection and may modify them. + For example, the cloud native authentication plugins modify the + "access_token" parameter with a function that will acquire the token using + information found in the "extra_auth_parms" parameter. + """ + if hook_function is None or not callable(hook_function): + raise TypeError("hook_function must be a callable and cannot be None") + base_impl.REGISTERED_PARAMS_HOOKS.append(hook_function) + + def register_password_type( password_type: str, hook_function: Callable ) -> None: @@ -128,6 +142,14 @@ def register_protocol(protocol: str, hook_function: Callable) -> None: base_impl.REGISTERED_PROTOCOLS[protocol] = hook_function +def unregister_params_hook(hook_function: Callable) -> None: + """ + Unregisters a user function that was earlier registered with a call to + register_params_hook(). + """ + base_impl.REGISTERED_PARAMS_HOOKS.remove(hook_function) + + def verify_stored_proc_args( parameters: Union[list, tuple], keyword_parameters: dict ) -> None: diff --git a/tests/test_1100_connection.py b/tests/test_1100_connection.py index 2c8d41e0..dc50ca4b 100644 --- a/tests/test_1100_connection.py +++ b/tests/test_1100_connection.py @@ -935,6 +935,25 @@ def test_1155(self): self.assertEqual(cursor.fetchone()[0], edition.upper()) self.assertEqual(conn.edition, edition) + def test_1156(self): + "1156 - test connect() with parameters hook" + conn = test_env.get_connection() + orig_stmtcachesize = conn.stmtcachesize + stmtcachesize = orig_stmtcachesize + 10 + + def hook(params): + params.set(stmtcachesize=stmtcachesize) + + try: + oracledb.register_params_hook(hook) + conn = test_env.get_connection() + self.assertEqual(conn.stmtcachesize, stmtcachesize) + finally: + oracledb.unregister_params_hook(hook) + + conn = test_env.get_connection() + self.assertEqual(conn.stmtcachesize, orig_stmtcachesize) + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_2400_pool.py b/tests/test_2400_pool.py index e6685140..952ed446 100644 --- a/tests/test_2400_pool.py +++ b/tests/test_2400_pool.py @@ -1042,6 +1042,31 @@ def test_2454(self): with self.assertRaises(TypeError): test_env.get_pool(pool_alias=alias) + def test_2455(self): + "2455 - test create_pool() with parameters hook" + pool = test_env.get_pool() + with pool.acquire() as conn: + orig_stmtcachesize = conn.stmtcachesize + stmtcachesize = orig_stmtcachesize + 10 + pool.close() + + def hook(params): + params.set(stmtcachesize=stmtcachesize) + + try: + oracledb.register_params_hook(hook) + pool = test_env.get_pool() + with pool.acquire() as conn: + self.assertEqual(conn.stmtcachesize, stmtcachesize) + pool.close() + finally: + oracledb.unregister_params_hook(hook) + + pool = test_env.get_pool() + with pool.acquire() as conn: + self.assertEqual(conn.stmtcachesize, orig_stmtcachesize) + pool.close() + if __name__ == "__main__": test_env.run_test_cases() diff --git a/tests/test_4500_connect_params.py b/tests/test_4500_connect_params.py index 0edda59a..42f511a6 100644 --- a/tests/test_4500_connect_params.py +++ b/tests/test_4500_connect_params.py @@ -691,6 +691,7 @@ def test_4539(self): ("driver_name", "custom_driver"), ("use_sni", True), ("thick_mode_dsn_passthrough", True), + ("extra_auth_params", dict(extra1="A", extra2="B")), ] params = oracledb.ConnectParams(**dict(values)) parts = [f"{name}={value!r}" for name, value in values] @@ -745,6 +746,7 @@ def test_4539(self): ("driver_name", "modified_driver_name"), ("use_sni", False), ("thick_mode_dsn_passthrough", False), + ("extra_auth_params", dict(extra1="X", extra2="Y")), ] params.set(**dict(new_values)) parts = [f"{name}={value!r}" for name, value in new_values] diff --git a/tests/test_4700_pool_params.py b/tests/test_4700_pool_params.py index f3b7a643..2630889a 100644 --- a/tests/test_4700_pool_params.py +++ b/tests/test_4700_pool_params.py @@ -132,6 +132,7 @@ def test_4701(self): ("driver_name", "custom_driver"), ("use_sni", True), ("thick_mode_dsn_passthrough", True), + ("extra_auth_params", dict(extra1="A", extra2="B")), ] params = oracledb.PoolParams(**dict(values)) parts = [f"{name}={value!r}" for name, value in values] diff --git a/utils/fields.cfg b/utils/fields.cfg index d308a402..ac0f6ac7 100644 --- a/utils/fields.cfg +++ b/utils/fields.cfg @@ -512,6 +512,13 @@ description = connection string parameter handling and locating any optional tnsnames.ora configuration file +[extra_auth_params] +type = dict +description = + a dictionary containing configuration parameters necessary for Oracle + Database authentication using plugins, such as the Azure and OCI + cloud-native authentication plugins + [handle] type = int default = 0 From 2e576d6798b00badde6f7ff5b51569152a25ae0b Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 15:02:23 -0700 Subject: [PATCH 051/178] Remove duplicated release note. --- doc/src/release_notes.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 39a53738..37f2bec6 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -42,8 +42,6 @@ Thin Mode Changes #) The thread that closes connection pools on interpreter shutdown is now only started when the first pool is created and not at module import (`issue 426 `__). -#) Added support for Transaction Guard by adding support to get the values of - :attr:`Connection.ltxid` and :attr:`oracledb._Error.isrecoverable`. #) Support for :ref:`Pipelining ` is no longer considered a pre-release. #) Fixed hang when attempting to use pipelining against a database that From 48020d30a9527f2bc314b3b6cfb31f39f5f07bde Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 15:02:40 -0700 Subject: [PATCH 052/178] Fix formatting (from PR #444). --- src/oracledb/connection.py | 15 +++++++-------- utils/templates/connection.py | 15 +++++++-------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index 39cb4dca..f11b1ad8 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -36,24 +36,23 @@ import collections import functools import ssl +from typing import Any, Callable, Type, Optional, Union import oracledb from . import __name__ as MODULE_NAME -from typing import Any, Callable, Type, Union, Optional -from . import constants, driver_mode, errors -from . import base_impl, thick_impl, thin_impl +from . import base_impl, constants, driver_mode, errors, thick_impl, thin_impl from . import pool as pool_module -from .pipeline import Pipeline +from .aq import Queue, MessageProperties +from .base_impl import DB_TYPE_BLOB, DB_TYPE_CLOB, DB_TYPE_NCLOB, DbType from .connect_params import ConnectParams from .cursor import AsyncCursor, Cursor +from .dbobject import DbObjectType, DbObject from .lob import AsyncLOB, LOB -from .subscr import Subscription -from .aq import Queue, MessageProperties +from .pipeline import Pipeline from .soda import SodaDatabase -from .dbobject import DbObjectType, DbObject -from .base_impl import DB_TYPE_BLOB, DB_TYPE_CLOB, DB_TYPE_NCLOB, DbType +from .subscr import Subscription # named tuple used for representing global transactions Xid = collections.namedtuple( diff --git a/utils/templates/connection.py b/utils/templates/connection.py index 68c1f82e..e605f9ab 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -34,24 +34,23 @@ import collections import functools import ssl +from typing import Any, Callable, Type, Optional, Union import oracledb from . import __name__ as MODULE_NAME -from typing import Any, Callable, Type, Union, Optional -from . import constants, driver_mode, errors -from . import base_impl, thick_impl, thin_impl +from . import base_impl, constants, driver_mode, errors, thick_impl, thin_impl from . import pool as pool_module -from .pipeline import Pipeline +from .aq import Queue, MessageProperties +from .base_impl import DB_TYPE_BLOB, DB_TYPE_CLOB, DB_TYPE_NCLOB, DbType from .connect_params import ConnectParams from .cursor import AsyncCursor, Cursor +from .dbobject import DbObjectType, DbObject from .lob import AsyncLOB, LOB -from .subscr import Subscription -from .aq import Queue, MessageProperties +from .pipeline import Pipeline from .soda import SodaDatabase -from .dbobject import DbObjectType, DbObject -from .base_impl import DB_TYPE_BLOB, DB_TYPE_CLOB, DB_TYPE_NCLOB, DbType +from .subscr import Subscription # named tuple used for representing global transactions Xid = collections.namedtuple( From 4cca48577060332cd948ef8dbedd329ed7757b2b Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Tue, 18 Feb 2025 15:05:56 -0700 Subject: [PATCH 053/178] Correct typing for connect(), connect_async(), create_pool() and create_pool_async() (#438) (from PR #444). --- doc/src/release_notes.rst | 4 ++++ src/oracledb/connection.py | 8 ++++++-- src/oracledb/pool.py | 8 ++++++-- utils/templates/connection.py | 8 ++++++-- utils/templates/pool.py | 8 ++++++-- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 37f2bec6..74d3e532 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -142,6 +142,10 @@ Common Changes #) All Oracle errors that result in the connection no longer being usable will be raised as ``DPY-4011: the database or network closed the connection`` with the underlying reason being included in the error message. +#) Fix typing issue with :meth:`oracledb.connect()`, + :meth:`oracledb.connect_async()`, :meth:`oracledb.create_pool()` and + :meth:`oracledb.create_pool_async()` + (`issue 438 `__). #) Error ``DPY-2053: python-oracledb thin mode cannot be used because thick mode has already been enabled`` is now raised when attempting to use asyncio in thick mode diff --git a/src/oracledb/connection.py b/src/oracledb/connection.py index f11b1ad8..06b2b49a 100644 --- a/src/oracledb/connection.py +++ b/src/oracledb/connection.py @@ -1206,7 +1206,9 @@ def unsubscribe(self, subscr: Subscription) -> None: subscr._impl.unsubscribe(self._impl) -def _connection_factory(f): +def _connection_factory( + f: Callable[..., Connection] +) -> Callable[..., Connection]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and @@ -2025,7 +2027,9 @@ async def tpc_rollback(self, xid: Optional[Xid] = None) -> None: await self._impl.tpc_rollback(xid) -def _async_connection_factory(f): +def _async_connection_factory( + f: Callable[..., AsyncConnection] +) -> Callable[..., AsyncConnection]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and diff --git a/src/oracledb/pool.py b/src/oracledb/pool.py index 80b0efbd..ea411da4 100644 --- a/src/oracledb/pool.py +++ b/src/oracledb/pool.py @@ -569,7 +569,9 @@ def reconfigure( self.ping_interval = ping_interval -def _pool_factory(f): +def _pool_factory( + f: Callable[..., ConnectionPool] +) -> Callable[..., ConnectionPool]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and @@ -1097,7 +1099,9 @@ async def release( await connection.close() -def _async_pool_factory(f): +def _async_pool_factory( + f: Callable[..., AsyncConnectionPool] +) -> Callable[..., AsyncConnectionPool]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and diff --git a/utils/templates/connection.py b/utils/templates/connection.py index e605f9ab..5548b3fb 100644 --- a/utils/templates/connection.py +++ b/utils/templates/connection.py @@ -1204,7 +1204,9 @@ def unsubscribe(self, subscr: Subscription) -> None: subscr._impl.unsubscribe(self._impl) -def _connection_factory(f): +def _connection_factory( + f: Callable[..., Connection] +) -> Callable[..., Connection]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and @@ -1775,7 +1777,9 @@ async def tpc_rollback(self, xid: Optional[Xid] = None) -> None: await self._impl.tpc_rollback(xid) -def _async_connection_factory(f): +def _async_connection_factory( + f: Callable[..., AsyncConnection] +) -> Callable[..., AsyncConnection]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and diff --git a/utils/templates/pool.py b/utils/templates/pool.py index dcaa0a51..ed86d431 100644 --- a/utils/templates/pool.py +++ b/utils/templates/pool.py @@ -567,7 +567,9 @@ def reconfigure( self.ping_interval = ping_interval -def _pool_factory(f): +def _pool_factory( + f: Callable[..., ConnectionPool] +) -> Callable[..., ConnectionPool]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and @@ -778,7 +780,9 @@ async def release( await connection.close() -def _async_pool_factory(f): +def _async_pool_factory( + f: Callable[..., AsyncConnectionPool] +) -> Callable[..., AsyncConnectionPool]: """ Decorator which checks the validity of the supplied keyword parameters by calling the original function (which does nothing), then creates and From 5f5f8178702b6cfb7840e26fda9403d8efa9a974 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 19 Feb 2025 18:35:03 -0700 Subject: [PATCH 054/178] Doc updates. --- doc/src/release_notes.rst | 5 +++-- doc/src/user_guide/sql_execution.rst | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 74d3e532..cc7c4cbc 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -24,7 +24,7 @@ Thin Mode Changes #) Added support for property :attr:`ConnectionPool.max_lifetime_session` (`issue 410 `__). #) Added parameter :data:`ConnectParams.use_sni` to specify that the TLS SNI - extension should be used to reduce the number of TLS neegotiations that are + extension should be used to reduce the number of TLS negotiations that are needed to connect to the database. #) Added parameter :data:`ConnectParams.instance_name` to specify the instance name to use when connecting to the database. Added support for setting the @@ -96,7 +96,7 @@ Common Changes :meth:`AsyncConnection.fetch_df_batches()` to fetch data as DataFrames compliant with the Python DataFrame Interchange protocol. See :ref:`dataframeformat`. -#) Added support for Oracle Database 23ai SPARSE vectors. +#) Added support for Oracle Database 23.7 SPARSE vectors. #) Added support for :ref:`naming and caching connection pools ` during creation, and retrieving them later from the python-oracledb pool cache with :meth:`oracledb.get_pool()`. @@ -158,6 +158,7 @@ Common Changes #) Internal change: improve handling of metadata. #) Internal build tool change: bumped minimum Cython version to 3.0.10 to avoid bug in earlier versions. +#) Improved test suite and documentation. oracledb 2.5.1 (December 2024) diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst index d1f6a143..144cff8d 100644 --- a/doc/src/user_guide/sql_execution.rst +++ b/doc/src/user_guide/sql_execution.rst @@ -771,7 +771,7 @@ the results: # Adjust arraysize to tune the query fetch performance odf = connection.fetch_df_all(statement=sql, arraysize=100) - print(odf.odf.column_names()) + print(odf.column_names()) print(f"{odf.num_columns()} columns") print(f"{odf.num_rows()} rows") From 5bef1e4c9e5b0266e65a2b16faf7c773ccee896a Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 19 Feb 2025 18:35:19 -0700 Subject: [PATCH 055/178] Fixed bug resulting in an infinite loop when a fixed buffer is exhausted at the end of one of the internal fields. --- src/oracledb/impl/base/buffer.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/oracledb/impl/base/buffer.pyx b/src/oracledb/impl/base/buffer.pyx index 86d53477..2208c794 100644 --- a/src/oracledb/impl/base/buffer.pyx +++ b/src/oracledb/impl/base/buffer.pyx @@ -411,6 +411,8 @@ cdef class Buffer: cdef ssize_t num_bytes_this_time while num_bytes > 0: num_bytes_this_time = min(num_bytes, self.bytes_left()) + if num_bytes_this_time == 0: + num_bytes_this_time = num_bytes self._get_raw(num_bytes_this_time) num_bytes -= num_bytes_this_time From 3bc7d376207f273f787360c93ab16766cbd7626e Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 19 Feb 2025 18:36:16 -0700 Subject: [PATCH 056/178] Update doc and samples for new features. --- THIRD_PARTY_LICENSES.txt | 26 ++++++++ doc/src/release_notes.rst | 3 +- doc/src/user_guide/sql_execution.rst | 14 ++--- doc/src/user_guide/vector_data_type.rst | 83 ++++++++++++++++--------- samples/create_schema.py | 2 +- samples/dataframe_pandas.py | 1 - samples/dataframe_pandas_async.py | 1 - samples/sql/create_schema_23.sql | 11 ++-- samples/vector.py | 61 +++++++++++++----- samples/vector_async.py | 34 +++++++--- 10 files changed, 170 insertions(+), 66 deletions(-) diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index 1c1cf597..5b5179c1 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -697,3 +697,29 @@ software distributed under the License is distributed on an KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +___________________________________________________________________________________________ + +Python dataframe interchange protocol + +MIT License + +Copyright (c) 2020 Consortium for Python Data API Standards contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index cc7c4cbc..063f801b 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -96,7 +96,8 @@ Common Changes :meth:`AsyncConnection.fetch_df_batches()` to fetch data as DataFrames compliant with the Python DataFrame Interchange protocol. See :ref:`dataframeformat`. -#) Added support for Oracle Database 23.7 SPARSE vectors. +#) Added support for Oracle Database 23.7 + :ref:`SPARSE vectors `. #) Added support for :ref:`naming and caching connection pools ` during creation, and retrieving them later from the python-oracledb pool cache with :meth:`oracledb.get_pool()`. diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst index 144cff8d..80507ba8 100644 --- a/doc/src/user_guide/sql_execution.rst +++ b/doc/src/user_guide/sql_execution.rst @@ -743,12 +743,13 @@ Fetching using the DataFrame Interchange Protocol Python-oracledb can fetch directly to the `Python DataFrame Interchange Protocol `__ -format. This then allows zero-copy data interchanges between Python data frame -libraries. It is an efficient way to work with data using Python libraries such -as `Apache Arrow `__, `Pandas -`__, `Polars `__, `NumPy -`__, `PyTorch `__, or to write files -in `Apache Parquet `__ format. +format. This can reduce application memory requirements and allow zero-copy +data interchanges between Python data frame libraries. It is an efficient way +to work with data using Python libraries such as `Apache Arrow +`__, `Pandas `__, `Polars +`__, `NumPy `__, `PyTorch +`__, or to write files in `Apache Parquet +`__ format. .. note:: @@ -914,7 +915,6 @@ org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame>`__ is: odf = connection.fetch_df_all(statement=sql, parameters=[myid], arraysize=1000) # Get a Pandas DataFrame from the data. - # This is a zero copy call df = pandas.api.interchange.from_dataframe(odf) # Perform various Pandas operations on the DataFrame diff --git a/doc/src/user_guide/vector_data_type.rst b/doc/src/user_guide/vector_data_type.rst index 8ac26b69..90cea8c9 100644 --- a/doc/src/user_guide/vector_data_type.rst +++ b/doc/src/user_guide/vector_data_type.rst @@ -218,14 +218,30 @@ Using SPARSE Vectors ==================== A Sparse vector is a vector which has zero value for most of its dimensions. -This vector only physically stores the non-zero values. A sparse vector is -supported when you are using Oracle Database 23.7 or later. +This vector only physically stores the non-zero values. For more information +on sparse vectors, see the `Oracle AI Vector search User's Guide `__. -Sparse vectors can store the total number of dimensions, an array of indices, -and an array of values. The storage formats that can be used with sparse -vectors are float32, float64, and int8. Note that the binary storage format -cannot be used with sparse vectors. You can define a column for a sparse -vector using the following format:: +Sparse vectors are supported when you are using Oracle Database 23.7 or later. + +Sparse vectors are represented by the total number of vector dimensions, an +array of indices, and an array of values where each value's location in the +vector is indicated by the corresponding indices array position. All other +vector values are treated as zero. The storage formats that can be used with +sparse vectors are float32, float64, and int8. Note that the binary storage +format cannot be used with sparse vectors. + +For example, a string representation could be:: + + [25, [5, 8, 11], [25.25, 6.125, 8.25]] + +In this example, the sparse vector has 25 dimensions. Only indices 5, 8, and 11 +have values which are 25.25, 6.125, and 8.25 respectively. All of the other +values are zero. + +In Oracle Database, you can define a column for a sparse vector using the +following format:: VECTOR(number_of_dimensions, dimension_storage_format, sparse) @@ -239,7 +255,7 @@ For example, to create a table with three columns for sparse vectors: int8sparsecol vector(35, int8, sparse) ) -In this example the: +In this example: - The float32sparsecol column can store sparse vector data of 25 dimensions where each dimension value is a 32-bit floating-point number. @@ -256,18 +272,9 @@ Inserting SPARSE Vectors ------------------------ With python-oracledb, sparse vector data can be inserted using -:ref:`SparseVector objects `. You can specify the number of -dimensions, an array of indices, and an array of values as the data for a -sparse vector. For example, the string representation is:: - - [25, [5,8,11], [25.25, 6.125, 8.25]] - -In this example, the sparse vector has 25 dimensions. Only indices 5, 8, and -11 have values 25.25, 6.125, and 8.25 respectively. All of the other values -are zero. - -The SparseVector objects are used as bind values when inserting sparse vector -columns. For example: +:ref:`SparseVector objects `. The SparseVector objects are +used when fetching vectors, and as bind values when inserting sparse vector +columns. For example to insert data: .. code-block:: python @@ -289,7 +296,7 @@ columns. For example: ) cursor.execute( - "insert into vector_sparse_table (:1, :2, :3)", + "insert into vector_sparse_table values (:1, :2, :3)", [float32_val, float64_val, int8_val] ) @@ -298,23 +305,43 @@ columns. For example: Fetching Sparse Vectors ----------------------- -With python-oracledb, sparse vector columns are fetched in the same format -accepted by Oracle Database by using the str() function. For example: +With python-oracledb, sparse vector columns are fetched as :ref:`SparseVector +objects `: .. code-block:: python - cursor.execute("select * from vec_sparse") + cursor.execute("select * from vector_sparse_table") + for row in cursor: + print(row) + + +This prints:: + + (oracledb.SparseVector(25, array('I', [6, 10, 18]), array('f', [26.25, 129.625, 579.875])), + oracledb.SparseVector(30, array('I', [9, 16, 24]), array('d', [19.125, 78.5, 977.375])), + oracledb.SparseVector(35, array('I', [10, 20, 30]), array('b', [26, 125, -37]))) + +Depending on context, the SparseVector type will be treated as a string: + +.. code-block:: python + + cursor.execute("select * from vector_sparse_table") for float32_val, float64_val, int8_val in cursor: - print("float32:", str(float32_val)) - print("float64:", str(float64_val)) - print("int8:", str(int8_val)) + print("float32:", float32_val) + print("float64:", float64_val) + print("int8:", int8_val) -This prints the following output:: +This prints:: float32: [25, [6, 10, 18], [26.25, 129.625, 579.875]] float64: [30, [9, 16, 24], [19.125, 78.5, 977.375]] int8: [35, [10, 20, 30], [26, 125, -37]] +Values can also be explicitly passed to `str() +`__, if needed. + +**SPARSE Vector Metadata** + The :ref:`FetchInfo ` object that is returned as part of the fetched metadata contains attributes :attr:`FetchInfo.vector_dimensions`, :attr:`FetchInfo.vector_format`, and :attr:`FetchInfo.vector_is_sparse` which diff --git a/samples/create_schema.py b/samples/create_schema.py index d2bc0997..409b367e 100644 --- a/samples/create_schema.py +++ b/samples/create_schema.py @@ -54,7 +54,7 @@ sample_env.run_sql_script( conn, "create_schema_21", main_user=sample_env.get_main_user() ) -if sample_env.get_server_version() >= (23, 5): +if sample_env.get_server_version() >= (23, 7): sample_env.run_sql_script( conn, "create_schema_23", main_user=sample_env.get_main_user() ) diff --git a/samples/dataframe_pandas.py b/samples/dataframe_pandas.py index f6165757..10fa1b8b 100644 --- a/samples/dataframe_pandas.py +++ b/samples/dataframe_pandas.py @@ -51,7 +51,6 @@ odf = connection.fetch_df_all(statement=SQL, arraysize=100) # Get a Pandas DataFrame from the data. -# This is a zero copy call df = pandas.api.interchange.from_dataframe(odf) # Perform various Pandas operations on the DataFrame diff --git a/samples/dataframe_pandas_async.py b/samples/dataframe_pandas_async.py index b688773f..1860cf65 100644 --- a/samples/dataframe_pandas_async.py +++ b/samples/dataframe_pandas_async.py @@ -55,7 +55,6 @@ async def main(): odf = await connection.fetch_df_all(statement=SQL, arraysize=100) # Get a Pandas DataFrame from the data. - # This is a zero copy call df = pandas.api.interchange.from_dataframe(odf) # Perform various Pandas operations on the DataFrame diff --git a/samples/sql/create_schema_23.sql b/samples/sql/create_schema_23.sql index e96e7f86..30daef5f 100644 --- a/samples/sql/create_schema_23.sql +++ b/samples/sql/create_schema_23.sql @@ -27,15 +27,16 @@ * * Performs the actual work of creating and populating the schemas with the * database objects used by the python-oracledb samples that require Oracle - * Database 23.5 or higher. It is executed by the Python script + * Database 23.7 or higher. It is executed by the Python script * create_schema.py. *---------------------------------------------------------------------------*/ create table &main_user..SampleVectorTab ( - v32 vector(3, float32), - v64 vector(3, float64), - v8 vector(3, int8), - vbin vector(24, binary) + v32 vector(3, float32), + v64 vector(3, float64), + v8 vector(3, int8), + vbin vector(24, binary), + v64sparse vector(30, float64, sparse) ) / diff --git a/samples/vector.py b/samples/vector.py index 49f90bbf..eb38af06 100644 --- a/samples/vector.py +++ b/samples/vector.py @@ -45,16 +45,21 @@ params=sample_env.get_connect_params(), ) -# this script only works with Oracle Database 23.5 or later -if sample_env.get_server_version() < (23, 5): - sys.exit("This example requires Oracle Database 23.5 or later.") +# this script only works with Oracle Database 23.7 or later +# +# The VECTOR datatype was initially introduced in Oracle Database 23.4. +# The BINARY vector format was introduced in Oracle Database 23.5. +# The SPARSE vector format was introduced in Oracle Database 23.7. + +if sample_env.get_server_version() < (23, 7): + sys.exit("This example requires Oracle Database 23.7 or later.") -# this script works with thin mode, or with thick mode using Oracle Client 23.5 +# this script works with thin mode, or with thick mode using Oracle Client 23.7 # or later -if not connection.thin and oracledb.clientversion()[:2] < (23, 5): +if not connection.thin and oracledb.clientversion()[:2] < (23, 7): sys.exit( "This example requires python-oracledb thin mode, or Oracle Client" - " 23.5 or later" + " 23.7 or later" ) with connection.cursor() as cursor: @@ -63,11 +68,20 @@ vector1_data_64 = array.array("d", [11.25, 11.75, 11.5]) vector1_data_8 = array.array("b", [1, 2, 3]) vector1_data_bin = array.array("B", [180, 150, 100]) + vector1_data_sparse64 = oracledb.SparseVector( + 30, [9, 16, 24], array.array("d", [19.125, 78.5, 977.375]) + ) cursor.execute( - """insert into SampleVectorTab (v32, v64, v8, vbin) - values (:1, :2, :3, :4)""", - [vector1_data_32, vector1_data_64, vector1_data_8, vector1_data_bin], + """insert into SampleVectorTab (v32, v64, v8, vbin, v64sparse) + values (:1, :2, :3, :4, :5)""", + [ + vector1_data_32, + vector1_data_64, + vector1_data_8, + vector1_data_bin, + vector1_data_sparse64, + ], ) # Multi-row insert @@ -75,26 +89,45 @@ vector2_data_64 = array.array("d", [22.25, 22.75, 22.5]) vector2_data_8 = array.array("b", [4, 5, 6]) vector2_data_bin = array.array("B", [40, 15, 255]) + vector2_data_sparse64 = oracledb.SparseVector( + 30, [3, 10, 12], array.array("d", [2.5, 2.5, 1.0]) + ) vector3_data_32 = array.array("f", [3.625, 3.5, 3.0]) vector3_data_64 = array.array("d", [33.25, 33.75, 33.5]) vector3_data_8 = array.array("b", [7, 8, 9]) vector3_data_bin = array.array("B", [0, 17, 101]) + vector3_data_sparse64 = oracledb.SparseVector( + 30, [8, 15, 29], array.array("d", [1.125, 200.5, 100.0]) + ) rows = [ - (vector2_data_32, vector2_data_64, vector2_data_8, vector2_data_bin), - (vector3_data_32, vector3_data_64, vector3_data_8, vector3_data_bin), + ( + vector2_data_32, + vector2_data_64, + vector2_data_8, + vector2_data_bin, + vector2_data_sparse64, + ), + ( + vector3_data_32, + vector3_data_64, + vector3_data_8, + vector3_data_bin, + vector3_data_sparse64, + ), ] cursor.executemany( - """insert into SampleVectorTab (v32, v64, v8, vbin) - values (:1, :2, :3, :4)""", + """insert into SampleVectorTab (v32, v64, v8, vbin, v64sparse) + values (:1, :2, :3, :4, :5)""", rows, ) # Query cursor.execute("select * from SampleVectorTab") - # Each vector is represented as an array.array type + # Each non-sparse vector is represented as an array.array type. + # Sparse vectors are represented as oracledb.SparseVector() instances for row in cursor: print(row) diff --git a/samples/vector_async.py b/samples/vector_async.py index 1f349e70..47b11f65 100644 --- a/samples/vector_async.py +++ b/samples/vector_async.py @@ -46,9 +46,14 @@ async def main(): params=sample_env.get_connect_params(), ) - # this script only works with Oracle Database 23.5 or later - if sample_env.get_server_version() < (23, 5): - sys.exit("This example requires Oracle Database 23.5 or later.") + # this script only works with Oracle Database 23.7 or later + # + # The VECTOR datatype was initially introduced in Oracle Database 23.4. + # The BINARY vector format was introduced in Oracle Database 23.5. + # The SPARSE vector format was introduced in Oracle Database 23.7. + + if sample_env.get_server_version() < (23, 7): + sys.exit("This example requires Oracle Database 23.7 or later.") with connection.cursor() as cursor: # Single-row insert @@ -56,15 +61,19 @@ async def main(): vector1_data_64 = array.array("d", [11.25, 11.75, 11.5]) vector1_data_8 = array.array("b", [1, 2, 3]) vector1_data_bin = array.array("B", [180, 150, 100]) + vector1_data_sparse64 = oracledb.SparseVector( + 30, [9, 16, 24], array.array("d", [19.125, 78.5, 977.375]) + ) await cursor.execute( - """insert into SampleVectorTab (v32, v64, v8, vbin) - values (:1, :2, :3, :4)""", + """insert into SampleVectorTab (v32, v64, v8, vbin, v64sparse) + values (:1, :2, :3, :4, :5)""", [ vector1_data_32, vector1_data_64, vector1_data_8, vector1_data_bin, + vector1_data_sparse64, ], ) @@ -73,11 +82,17 @@ async def main(): vector2_data_64 = array.array("d", [22.25, 22.75, 22.5]) vector2_data_8 = array.array("b", [4, 5, 6]) vector2_data_bin = array.array("B", [40, 15, 255]) + vector2_data_sparse64 = oracledb.SparseVector( + 30, [3, 10, 12], array.array("d", [2.5, 2.5, 1.0]) + ) vector3_data_32 = array.array("f", [3.625, 3.5, 3.0]) vector3_data_64 = array.array("d", [33.25, 33.75, 33.5]) vector3_data_8 = array.array("b", [7, 8, 9]) vector3_data_bin = array.array("B", [0, 17, 101]) + vector3_data_sparse64 = oracledb.SparseVector( + 30, [8, 15, 29], array.array("d", [1.125, 200.5, 100.0]) + ) rows = [ ( @@ -85,25 +100,28 @@ async def main(): vector2_data_64, vector2_data_8, vector2_data_bin, + vector2_data_sparse64, ), ( vector3_data_32, vector3_data_64, vector3_data_8, vector3_data_bin, + vector3_data_sparse64, ), ] await cursor.executemany( - """insert into SampleVectorTab (v32, v64, v8, vbin) - values (:1, :2, :3, :4)""", + """insert into SampleVectorTab (v32, v64, v8, vbin, v64sparse) + values (:1, :2, :3, :4, :5)""", rows, ) # Query await cursor.execute("select * from SampleVectorTab") - # Each vector is represented as an array.array type + # Each non-sparse vector is represented as an array.array type. + # Sparse vectors are represented as oracledb.SparseVector() instances async for row in cursor: print(row) From d3555d455885f9ad19c47bd8c0b30ca94b575525 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Wed, 19 Feb 2025 18:36:33 -0700 Subject: [PATCH 057/178] Raise not supported exception when fetching a degenerate image (actual image is stored in a LOB instead of inline) --- doc/src/release_notes.rst | 3 +++ src/oracledb/impl/thin/dbobject.pyx | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 063f801b..05fead13 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -51,6 +51,9 @@ Thin Mode Changes #) Fixed bug when using :ref:`asyncio ` and calling a stored procedure with data that exceeds 32767 bytes in length (`issue 441 `__). +#) Fixed bug when attempting to fetch a database object stored in a LOB. The + fetch will still fail but with an unsupported error exception instead of a + hang. #) Error ``DPY-6002: The distinguished name (DN) on the server certificate does not match the expected value: "{expected_dn}"`` now shows the expected value. diff --git a/src/oracledb/impl/thin/dbobject.pyx b/src/oracledb/impl/thin/dbobject.pyx index 6aa40dd0..81508712 100644 --- a/src/oracledb/impl/thin/dbobject.pyx +++ b/src/oracledb/impl/thin/dbobject.pyx @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. # # This software is dual-licensed to you under the Universal Permissive License # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License @@ -76,6 +76,8 @@ cdef class DbObjectPickleBuffer(GrowableBuffer): self.read_ub1(flags) self.read_ub1(version) self.skip_length() + if flags[0] & TNS_OBJ_IS_DEGENERATE: + errors._raise_not_supported("DbObject stored in a LOB") if flags[0] & TNS_OBJ_NO_PREFIX_SEG: return 0 self.read_length(&prefix_seg_length) From 310aa504bc915db646124572eacaf46cabf76032 Mon Sep 17 00:00:00 2001 From: Anthony Tuininga Date: Mon, 24 Feb 2025 15:53:06 -0700 Subject: [PATCH 058/178] Doc updates. --- doc/src/.static/custom.css | 7 +- doc/src/api_manual/connect_params.rst | 9 +- doc/src/api_manual/connection.rst | 12 +- doc/src/api_manual/defaults.rst | 89 +- doc/src/api_manual/module.rst | 232 ++- doc/src/release_notes.rst | 19 +- doc/src/user_guide/appendix_a.rst | 6 +- doc/src/user_guide/appendix_b.rst | 147 +- doc/src/user_guide/appendix_c.rst | 23 +- doc/src/user_guide/connection_handling.rst | 2071 +++++++++++--------- doc/src/user_guide/extending.rst | 16 +- doc/src/user_guide/initialization.rst | 284 +-- doc/src/user_guide/installation.rst | 38 +- doc/src/user_guide/tracing.rst | 12 +- doc/src/user_guide/troubleshooting.rst | 34 +- 15 files changed, 1604 insertions(+), 1395 deletions(-) diff --git a/doc/src/.static/custom.css b/doc/src/.static/custom.css index b6b3ac57..17a42f2d 100644 --- a/doc/src/.static/custom.css +++ b/doc/src/.static/custom.css @@ -1,4 +1,7 @@ -/* Added code to display tables without horizontal scrollbars */ +/* Added code to display tables without horizontal scrollbars +add an extra line between two paragraphs and define the line +spacing in tables */ .wy-table-responsive table td, .wy-table-responsive table th { - white-space: normal; + white-space: pre-wrap; + line-height: 10pt; } diff --git a/doc/src/api_manual/connect_params.rst b/doc/src/api_manual/connect_params.rst index 6c225e6d..6e51f1a8 100644 --- a/doc/src/api_manual/connect_params.rst +++ b/doc/src/api_manual/connect_params.rst @@ -145,13 +145,8 @@ ConnectParams Attributes .. attribute:: ConnectParams.config_dir This read-only attribute is a string that identifies the directory in which - the configuration files such as tnsnames.ora are found. The default is the - value of :attr:`defaults.config_dir`. - - This attribute is only supported in python-oracledb Thin mode. - - For python-oracledb Thick mode, use the ``config_dir`` parameter of - :meth:`oracledb.init_oracle_client()`. + the :ref:`optional configuration files ` are found. The + default is the value of :attr:`defaults.config_dir`. .. attribute:: ConnectParams.connection_id_prefix diff --git a/doc/src/api_manual/connection.rst b/doc/src/api_manual/connection.rst index c4f5f409..b962d875 100644 --- a/doc/src/api_manual/connection.rst +++ b/doc/src/api_manual/connection.rst @@ -140,11 +140,11 @@ Connection Methods can also be a list of dictionaries, where the keys match the bind variable placeholder names in ``statement``. - The ``arraysize`` parameter can specified to tune performance of fetching - data across the network. It defaults to :attr:`defaults.arraysize`. - Internally, the ``fetch_df_all()``'s :attr:`Cursor.prefetchrows` size is - always set to the value of the explicit or default ``arraysize`` parameter - value. + The ``arraysize`` parameter can be specified to tune performance of + fetching data across the network. It defaults to + :attr:`defaults.arraysize`. Internally, the ``fetch_df_all()``'s + :attr:`Cursor.prefetchrows` size is always set to the value of the explicit + or default ``arraysize`` parameter value. See :ref:`dataframeformat` for the supported data types and examples. @@ -170,7 +170,7 @@ Connection Methods The ``size`` parameter controls the number of records fetched in each batch. It defaults to :attr:`defaults.arraysize`. Internally, the - ``fetch_df_batches()``'s :attr:`Cursor.arraysize`. and + ``fetch_df_batches()``'s :attr:`Cursor.arraysize` and :attr:`Cursor.prefetchrows` sizes are always set to the value of the explicit or default ``size`` parameter value. diff --git a/doc/src/api_manual/defaults.rst b/doc/src/api_manual/defaults.rst index 120b11fc..03464fb5 100644 --- a/doc/src/api_manual/defaults.rst +++ b/doc/src/api_manual/defaults.rst @@ -29,14 +29,31 @@ Defaults Attributes .. attribute:: defaults.config_dir - The directory in which optional configuration files such as - ``tnsnames.ora`` will be read in python-oracledb Thin mode. This attribute - takes its initial value from the environment variable ``TNS_ADMIN``. + The directory in which the optional configuration file ``tnsnames.ora`` + will be read in python-oracledb Thin mode. - This attribute is not used by the python-oracledb Thick mode: the usual - Oracle Client search path behavior for configuration files is followed, see + At time of ``import oracledb`` the value of + ``oracledb.defaults.config_dir`` will be set to (first one wins): + + - the value of ``$TNS_ADMIN``, if ``TNS_ADMIN`` is set. + + - ``$ORACLE_HOME/network/admin``, if ``$ORACLE_HOME`` is set. + + Otherwise, ``oracledb.defaults.config_dir`` will not be set. + + This attribute is used in python-oracledb Thin mode. It is also used in + Thick mode if :attr:`defaults.thick_mode_dsn_passthrough` is *False*, see :ref:`optnetfiles`. + .. versionchanged:: 3.0.0 + + The directory ``$ORACLE_HOME/network/admin`` was added to the + heuristic. + + At completion of a call to :meth:`oracledb.init_oracle_client()` in + Thick mode, the value of :attr:`defaults.config_dir` may get changed + by python-oracledb. + .. attribute:: defaults.driver_name The default value that represents the driver used by the client to connect @@ -169,27 +186,49 @@ Defaults Attributes .. attribute:: defaults.thick_mode_dsn_passthrough - The default value that determines whether :ref:`connection strings - ` passed to :meth:`oracledb.connect()` and - :meth:`oracledb.create_pool()` in python-oracledb Thick mode will be parsed - by Oracle Client libraries or by python-oracledb itself. - - When the value of this attribute is *True*, then connection strings passed - to these methods will be sent unchanged to the Oracle Client libraries. - - Setting this attribute to *False* makes Thick and Thin mode applications - behave similarly regarding connection string parameter handling and - locating any optional :ref:`tnsnames.ora files ` configuration - file, see :ref:`usingconfigfiles`. Connection strings used in connection - and pool creation methods in Thick mode are parsed by python-oracledb - itself and a generated connect descriptor is sent to the Oracle Client - libraries. The location of any optional :ref:`tnsnames.ora file - ` used to resolve a :ref:`TNS Alias ` is - determined by python-oracledb heuristics instead of by the Oracle Client - libraries. + The value that determines whether :ref:`connection strings ` + passed as the ``dsn`` parameter to :meth:`oracledb.connect()`, + :meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, and + :meth:`oracledb.create_pool_async()` in python-oracledb Thick mode will be + parsed by Oracle Client libraries or by python-oracledb itself. + + When ``thick_mode_dsn_passthrough`` is the default value `True`, the + behavior of python-oracledb 2.5 and earlier versions occurs: Thick mode + passes connect strings unchanged to the Oracle Client libraries to + handle. Those libraries have their own heuristics for locating the optional + :ref:`tnsnames.ora `, if used. + + When ``thick_mode_dsn_passthrough`` is `False`, python-oracledb Thick mode + behaves similarly to Thin mode, which can be helpful for applications that + may be run in either mode: + + - The search path used to locate and read any optional :ref:`tnsnames.ora + ` file is handled in the python-oracledb driver. Different + :ref:`tnsnames.ora ` files can be used by each + connection. Note loading of optional Thick mode files such as + ``sqlnet.ora`` and ``oraaccess.xml`` is always handled by Oracle Client + libraries regardless of the value of ``thick_mode_dsn_passthrough`` + because it is those libraries that use these files. + + - All connect strings will be parsed by the python-oracledb driver and a + generated connect descriptor is sent to the database. Parameters + unrecognized by python-oracledb in :ref:`Easy Connect strings + ` are discarded. In :ref:`full connect descriptors + ` passed explicitly as the ``dsn`` parameter value or + stored in a :ref:`tnsnames.ora ` file, any parameters that + are unrecognized by python-oracledb in the ``DESCRIPTION``, + ``CONNECT_DATA`` and ``SECURITY`` sections will be passed through to the + database unchanged, while unrecognized parameters in other sections are + discarded. + + - If a :ref:`Centralized Configuration Provider ` + is used for connection configuration, any :ref:`python-oracledb parameter + values ` in the configuration will be used. + + The value of ``thick_mode_dsn_passthrough`` is ignored in python-oracledb + Thin mode, which always parses all connect strings (including reading a + :ref:`tnsnames.ora ` file, if required). This attribute has an initial value of *True*. - This attribute is ignored in python-oracledb Thin mode. - .. versionadded:: 3.0.0 diff --git a/doc/src/api_manual/module.rst b/doc/src/api_manual/module.rst index afb66573..1a03fe00 100644 --- a/doc/src/api_manual/module.rst +++ b/doc/src/api_manual/module.rst @@ -284,10 +284,8 @@ Oracledb Methods mode. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the - directory in which configuration files (tnsnames.ora) are found. This value - is only used in python-oracledb Thin mode. The default is the value of - :attr:`defaults.config_dir`. For python-oracledb Thick mode, use the - ``config_dir`` parameter of :func:`oracledb.init_oracle_client()`. + directory in which :ref:`optional configuration files ` are + found. The default is the value of :attr:`defaults.config_dir`. The ``appcontext`` parameter is expected to be a list of 3-tuples that identifies the application context used by the connection. This parameter @@ -369,10 +367,12 @@ Oracledb Methods required. The ``use_sni`` parameter is expected to be a boolean which indicates - whether to use the TLS Server Name Indicator (SNI) extension to bypass the + whether to use the TLS Server Name Indication (SNI) extension to bypass the second TLS negotiation that would otherwise be required. This parameter is - used in both python-oracledb Thin and Thick modes. The default value is - False. + used in both python-oracledb Thin and Thick modes. This parameter requires + Oracle Database 23.7. The default value is *False*. See the `SQL*Net + documentation `__ for more details. The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle @@ -406,13 +406,14 @@ Oracledb Methods are parsed by python-oracledb itself and a generated connect descriptor is sent to the Oracle Client libraries. This value is only used in the python-oracledb Thick mode. The default value is the value of - :attr:`defaults.thick_mode_dsn_passthrough`. + :attr:`defaults.thick_mode_dsn_passthrough`. For more information, see + :ref:`usingconfigfiles`. The ``extra_auth_params`` parameter is expected to be a dictionary containing the configuration parameters necessary for Oracle Database - authentication using :ref:`Azure ` or - :ref:`OCI ` cloud native authentication plugins. - This value is used in both the python-oracledb Thin and Thick modes. See + authentication using :ref:`OCI ` or :ref:`Azure + ` cloud native authentication plugins. This value is + used in both the python-oracledb Thin and Thick modes. See :ref:`tokenauth`. If the ``handle`` parameter is specified, it must be of type OCISvcCtx\* @@ -426,7 +427,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 The ``pool_alias``, ``instance_name``, ``use_sni``, - ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + ``thick_mode_dsn_passthrough``, and ``extra_auth_params`` parameters were added. The ``pool`` parameter was deprecated: use :meth:`ConnectionPool.acquire()` instead. @@ -653,8 +654,8 @@ Oracledb Methods The ``matchanytag`` parameter is ignored in the python-oracledb Thin mode. The ``config_dir`` parameter is expected to be a string that indicates the - directory in which configuration files (tnsnames.ora) are found. The - default is the value of :attr:`defaults.config_dir`. + directory in which :ref:`optional configuration files ` are + found. The default is the value of :attr:`defaults.config_dir`. The ``appcontext`` parameter is expected to be a list of 3-tuples that identifies the application context used by the connection. This parameter @@ -725,10 +726,12 @@ Oracledb Methods required. The ``use_sni`` parameter is expected to be a boolean which indicates - whether to use the TLS Server Name Indicator (SNI) extension to bypass the + whether to use the TLS Server Name Indication (SNI) extension to bypass the second TLS negotiation that would otherwise be required. This parameter is - used in both python-oracledb Thin and Thick modes. The default value is - False. + used in both python-oracledb Thin and Thick modes. This parameter requires + Oracle Database 23.7. The default value is *False*. See the `SQL*Net + documentation `__ for more details. The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle @@ -757,8 +760,8 @@ Oracledb Methods The ``extra_auth_params`` parameter is expected to be a dictionary containing the configuration parameters necessary for Oracle Database - authentication using :ref:`Azure ` or - :ref:`OCI ` cloud native authentication plugins. + authentication using :ref:`OCI ` or :ref:`Azure + ` cloud native authentication plugins. This value is used in both the python-oracledb Thin and Thick modes. See :ref:`tokenauth`. @@ -768,7 +771,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 The ``pool_alias``, ``instance_name``, ``use_sni``, - ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + ``thick_mode_dsn_passthrough``, and ``extra_auth_params`` parameters were added. The ``pool`` parameter was deprecated: use :meth:`AsyncConnectionPool.acquire()` instead. @@ -999,10 +1002,8 @@ Oracledb Methods connection from a pool. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the - directory in which configuration files (tnsnames.ora) are found. This value - is only used in python-oracledb Thin mode. The default is the value of - :attr:`defaults.config_dir`. For python-oracledb Thick mode, use - the ``config_dir`` parameter of :func:`oracledb.init_oracle_client()`. + directory in which the :ref:`tnsnames.ora ` configuration file + is located. The ``appcontext`` parameter is expected to be a list of 3-tuples that identifies the application context used by the connection. This parameter @@ -1084,10 +1085,12 @@ Oracledb Methods required. The ``use_sni`` parameter is expected to be a boolean which indicates - whether to use the TLS Server Name Indicator (SNI) extension to bypass the + whether to use the TLS Server Name Indication (SNI) extension to bypass the second TLS negotiation that would otherwise be required. This parameter is - used in both python-oracledb Thin and Thick modes. The default value is - False. + used in both python-oracledb Thin and Thick modes. This parameter requires + Oracle Database 23.7. The default value is *False*. See the `SQL*Net + documentation `__ for more details. The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle @@ -1121,13 +1124,14 @@ Oracledb Methods are parsed by python-oracledb itself and a generated connect descriptor is sent to the Oracle Client libraries. This value is only used in the python-oracledb Thick mode. The default value is the value of - :attr:`defaults.thick_mode_dsn_passthrough`. + :attr:`defaults.thick_mode_dsn_passthrough`. For more information, see + :ref:`usingconfigfiles`. The ``extra_auth_params`` parameter is expected to be a dictionary containing the configuration parameters necessary for Oracle Database - authentication using :ref:`Azure ` or - :ref:`OCI ` cloud native authentication plugins. - This value is used in both the python-oracledb Thin and Thick modes. See + authentication using :ref:`OCI ` or :ref:`Azure + ` cloud native authentication plugins. This value is + used in both the python-oracledb Thin and Thick modes. See :ref:`tokenauth`. The ``handle`` parameter is expected to be an integer which represents a @@ -1501,10 +1505,8 @@ Oracledb Methods mode. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the - directory in which configuration files (tnsnames.ora) are found. This value - is only used in python-oracledb Thin mode. The default is the value of - :attr:`defaults.config_dir`. For python-oracledb Thick mode, use - the ``config_dir`` parameter of :func:`oracledb.init_oracle_client()`. + directory in which the :ref:`tnsnames.ora ` configuration file + is located. The default is the value of :attr:`defaults.config_dir`. The ``appcontext`` parameter is expected to be a list of 3-tuples that identifies the application context used by the connection. This parameter @@ -1586,10 +1588,12 @@ Oracledb Methods required. The ``use_sni`` parameter is expected to be a boolean which indicates - whether to use the TLS Server Name Indicator (SNI) extension to bypass the + whether to use the TLS Server Name Indication (SNI) extension to bypass the second TLS negotiation that would otherwise be required. This parameter is - used in both python-oracledb Thin and Thick modes. The default value is - False. + used in both python-oracledb Thin and Thick modes. This parameter requires + Oracle Database 23.7. The default value is *False*. See the `SQL*Net + documentation `__ for more details. The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle @@ -1623,13 +1627,14 @@ Oracledb Methods are parsed by python-oracledb itself and a generated connect descriptor is sent to the Oracle Client libraries. This value is only used in the python-oracledb Thick mode. The default value is - :attr:`defaults.thick_mode_dsn_passthrough`. + :attr:`defaults.thick_mode_dsn_passthrough`. For more information, see + :ref:`usingconfigfiles`. The ``extra_auth_params`` parameter is expected to be a dictionary containing the configuration parameters necessary for Oracle Database - authentication using :ref:`Azure ` or - :ref:`OCI ` cloud native authentication plugins. - This value is used in both the python-oracledb Thin and Thick modes. See + authentication using :ref:`OCI ` or :ref:`Azure + ` cloud native authentication plugins. This value is + used in both the python-oracledb Thin and Thick modes. See :ref:`tokenauth`. If the ``handle`` parameter is specified, it must be of type OCISvcCtx\* @@ -1643,7 +1648,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 The ``pool_alias``, ``instance_name``, ``use_sni``, - ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + ``thick_mode_dsn_passthrough``, and ``extra_auth_params`` parameters were added. .. versionchanged:: 2.5.0 @@ -1933,8 +1938,8 @@ Oracledb Methods The ``matchanytag`` parameter is ignored in the python-oracledb Thin mode. The ``config_dir`` parameter is expected to be a string that indicates the - directory in which configuration files (tnsnames.ora) are found. The - default is the value of :attr:`defaults.config_dir`. + directory in which the :ref:`tnsnames.ora ` configuration file + is located. The ``appcontext`` parameter is expected to be a list of 3-tuples that identifies the application context used by the connection. This parameter @@ -2005,10 +2010,12 @@ Oracledb Methods required. The ``use_sni`` parameter is expected to be a boolean which indicates - whether to use the TLS Server Name Indicator (SNI) extension to bypass the + whether to use the TLS Server Name Indication (SNI) extension to bypass the second TLS negotiation that would otherwise be required. This parameter is - used in both python-oracledb Thin and Thick modes. The default value is - False. + used in both python-oracledb Thin and Thick modes. This parameter requires + Oracle Database 23.7. The default value is *False*. See the `SQL*Net + documentation `__ for more details. The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle @@ -2037,9 +2044,9 @@ Oracledb Methods The ``extra_auth_params`` parameter is expected to be a dictionary containing the configuration parameters necessary for Oracle Database - authentication using :ref:`Azure ` or - :ref:`OCI ` cloud native authentication plugins. - This value is used in both the python-oracledb Thin and Thick modes. See + authentication using :ref:`OCI ` or :ref:`Azure + ` cloud native authentication plugins. This value is + used in both the python-oracledb Thin and Thick modes. See :ref:`tokenauth`. The ``handle`` and ``thick_mode_dsn_passthrough`` parameters are ignored in @@ -2048,7 +2055,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 The ``pool_alias``, ``instance_name``, ``use_sni``, - ``thick_mode_dsn_passthrough`` and ``extra_auth_params`` parameters + ``thick_mode_dsn_passthrough``, and ``extra_auth_params`` parameters were added. .. versionchanged:: 2.5.0 @@ -2133,16 +2140,18 @@ Oracledb Methods error_url=None, driver_name=None) Enables python-oracledb Thick mode by initializing the Oracle Client - library, see :ref:`enablingthick`. The method must be called before any - standalone connection or pool is created. If a connection or pool is first - created in Thin mode, then ``init_oracle_client()`` will raise an exception - and Thick mode cannot be enabled. + library, see :ref:`enablingthick`. If a standalone connection or pool has + already been created in Thin mode, ``init_oracle_client()`` will raise an + exception and python-oracledb will remain in Thin mode. + + If a standalone connection or pool has *not* already been created in Thin + mode, but ``init_oracle_client()`` raises an exception, python-oracledb + will remain in Thin mode but further calls to ``init_oracle_client()`` can + be made, if desired. The ``init_oracle_client()`` method can be called multiple times in each Python process as long as the arguments are the same each time. - See :ref:`initialization` for more information. - The ``lib_dir`` parameter is a string or a bytes object that specifies the directory containing Oracle Client libraries. If the ``lib_dir`` parameter is set, then the specified directory is the only one searched for the @@ -2192,10 +2201,37 @@ Oracledb Methods python-oracledb Thick mode is like "python-oracledb thk : ". See :ref:`otherinit`. + At successful completion of a call to ``oracledb.init_oracle_client()``, + the attribute :attr:`defaults.config_dir` will be set as determined below + (first one wins): + + - the value of the ``oracledb.init_oracle_client()`` parameter + ``config_dir``, if one was passed. + + - the value of :attr:`defaults.config_dir` if it has one. I.e. + :attr:`defaults.config_dir` remains unchanged after + ``oracledb.init_oracle_client()`` completes. + + - the value of the environment variable ``$TNS_ADMIN``, if it is set. + + - the value of ``$ORACLE_HOME/network/admin`` if the environment variable + ``$ORACLE_HOME`` is set. + + - the directory of the loaded Oracle Client library, appended with + ``network/admin``. Note this directory is not determinable on AIX. + + - otherwise the value *None* is used. (Leaving :attr:`defaults.config_dir` + unchanged). + .. note:: This method is an extension to the DB API definition. + .. versionchanged:: 3.0.0 + + At completion of the method, the value of :attr:`defaults.config_dir` + may get changed by python-oracledb. + .. versionchanged:: 2.5.0 The values supplied to the ``lib_dir`` and ``config_dir`` parameters @@ -2205,7 +2241,6 @@ Oracledb Methods "utf-8" is used. These values may also be supplied as a ``bytes`` object, in which case they will be used as is. - .. function:: is_thin_mode() Returns a boolean indicating if Thin mode is in use. @@ -2235,7 +2270,8 @@ Oracledb Methods Returns a string suitable for use as the ``dsn`` parameter for :meth:`~oracledb.connect()`. This string is identical to the strings that - are defined by the Oracle names server or defined in the tnsnames.ora file. + are defined by the Oracle names server or defined in the ``tnsnames.ora`` + file. .. deprecated:: python-oracledb 1.0 @@ -2516,10 +2552,8 @@ Oracledb Methods connection from a pool. The default value is *False*. The ``config_dir`` parameter is expected to be a string that indicates the - directory in which configuration files (tnsnames.ora) are found. This value - is only used in python-oracledb Thin mode. The default is the value of - :attr:`defaults.config_dir`. For python-oracledb Thick mode, use the - ``config_dir`` parameter of :func:`oracledb.init_oracle_client()`. + directory in which the :ref:`tnsnames.ora ` configuration file + is located. The ``appcontext`` parameter is expected to be a list of 3-tuples that identifies the application context used by the connection. This parameter @@ -2601,10 +2635,12 @@ Oracledb Methods required. The ``use_sni`` parameter is expected to be a boolean which indicates - whether to use the TLS Server Name Indicator (SNI) extension to bypass the + whether to use the TLS Server Name Indication (SNI) extension to bypass the second TLS negotiation that would otherwise be required. This parameter is - used in both python-oracledb Thin and Thick modes. The default value is - False. + used in both python-oracledb Thin and Thick modes. This parameter requires + Oracle Database 23.7. The default value is *False*. See the `SQL*Net + documentation `__ for more details. The ``program`` parameter is expected to be a string which specifies the name of the executable program or application connected to Oracle @@ -2638,13 +2674,14 @@ Oracledb Methods are parsed by python-oracledb itself and a generated connect descriptor is sent to the Oracle Client libraries. This value is only used in the python-oracledb Thick mode. The default value is - :attr:`defualts.thick_mode_dsn_passthrough`. + :attr:`defualts.thick_mode_dsn_passthrough`. For more information, see + :ref:`usingconfigfiles`. The ``extra_auth_params`` parameter is expected to be a dictionary containing the configuration parameters necessary for Oracle Database - authentication using :ref:`Azure ` or - :ref:`OCI ` cloud native authentication plugins. - This value is used in both the python-oracledb Thin and Thick modes. See + authentication using :ref:`OCI ` or :ref:`Azure + ` cloud native authentication plugins. This value is + used in both the python-oracledb Thin and Thick modes. See :ref:`tokenauth`. The ``handle`` parameter is expected to be an integer which represents a @@ -2655,7 +2692,7 @@ Oracledb Methods .. versionchanged:: 3.0.0 The ``use_sni``, ``instance_name``, ``thick_mode_dsn_passthrough``, - ``extra_auth_params`` and ``instance_name`` parameters were added. + ``extra_auth_params``, and ``instance_name`` parameters were added. .. versionchanged:: 2.5.0 @@ -4466,15 +4503,22 @@ Oracle Cloud Infrastructure (OCI) Object Storage Configuration Provider Plugin ------------------------------------------------------------------------------ ``oracledb.plugins.oci_config_provider`` is a plugin that provides access to -the configuration information stored in the :ref:`OCI Object Storage -` configuration provider. Importing this plugin defines and -:meth:`registers ` the hook function that -handles :ref:`OCI Object Storage connection strings ` prefixed -with ``config-oci``. The hook function parses this connection string, and -extracts the authentication details and URI details from the connection -string. Using the information, the hook function accesses the configuration -information in OCI Object Storage, which python-oracledb will use to connect -to Oracle Database. See :ref:`importconfigociplugin` for more information. +database connection credentials and application configuration information +stored in the :ref:`OCI Object Storage configuration provider +`. + +Importing this plugin defines and registers a pre-defined hook function with +:meth:`oracledb.register_protocol()` to handle connection strings which have +the prefix ``config-ociobject``, see :ref:`OCI Object Storage connection +strings `. The hook function parses these connection strings and +gets the stored configuration information. Python-oracledb then uses this +information to connect to Oracle Database. + +To use this plugin in python-oracledb Thick mode, you must set +:attr:`defaults.thick_mode_dsn_passthrough` to *False* or explicitly call +:meth:`ConnectParams.parse_connect_string()`. + +See :ref:`ociobjstorageprovider` for more information. .. versionadded:: 3.0.0 @@ -4484,16 +4528,22 @@ Azure App Configuration Provider Plugin --------------------------------------- ``oracledb.plugins.azure_config_provider`` is a plugin that provides access to -the configuration information stored in :ref:`Azure App Configuration -` provider. Importing this plugin defines and -:meth:`registers ` the hook function that -handles :ref:`Azure App Configuration connection string ` -prefixed with ``config-azure``. The hook function parses this connection -string, and extracts the authentication details and URI details from the -connection string. Using the information, the hook function accesses the -configuration information in Azure App Configuration, which python-oracledb -will use to connect to Oracle Database. See :ref:`importconfigazureplugin` -for more information. +database connection credentials and application configuration information +stored in the :ref:`Azure App Configuration provider +`. + +Importing this plugin defines and registers a pre-defined hook function with +:meth:`oracledb.register_protocol()` to handle connection strings which have +the prefix ``config-azure``, see :ref:`Azure App Configuration connection +strings `. The hook function parses these connection strings +and gets the stored configuration information. Python-oracledb then uses this +information to connect to Oracle Database. + +To use this plugin in python-oracledb Thick mode, you must set +:attr:`defaults.thick_mode_dsn_passthrough` to *False* or explicitly call +:meth:`ConnectParams.parse_connect_string()`. + +See :ref:`azureappstorageprovider` for more information. .. versionadded:: 3.0.0 diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst index 05fead13..d05c5483 100644 --- a/doc/src/release_notes.rst +++ b/doc/src/release_notes.rst @@ -81,12 +81,15 @@ Thin Mode Changes Thick Mode Changes ++++++++++++++++++ -#) The value of :attr:`defaults.config_dir` is now calculated from the - location of the Oracle Client shared library on some platforms. If a value - is supplied to the ``config_dir`` parameter of - :meth:`oracledb.init_oracle_client()`, then the value of - :attr:`defaults.config_dir` is set to that value after the call completes - successfully. +#) At successful completion of a call to :meth:`oracledb.init_oracle_client()`, + the value of :attr:`defaults.config_dir` may get set by python-oracledb in + some cases. For example it might be set to the configuration directory that + is relative to the loaded Oracle Client libraries. +#) Connect string parsing and :ref:`tnsnames.ora ` file handling + can be configured with the new parameter + :attr:`defaults.thick_mode_dsn_passthrough` which can be helpful for + application portability. When it is `False`, python-oracledb Thick mode + behaves similarly to Thin mode. #) Fixed bug that caused :attr:`oracledb._Error.isrecoverable` to always be `False`. @@ -127,9 +130,9 @@ Common Changes :attr:`DbObjectAttribute.scale`, and :attr:`DbObjectAttribute.max_size` that provide additional metadata about :ref:`database object attributes `. -#) Set the default value of :attr:`defaults.config_dir` to +#) The attribute :attr:`defaults.config_dir` is now set to ``$ORACLE_HOME/network/admin`` if the environment variable ``ORACLE_HOME`` - is set. + is set and ``TNS_ADMIN`` is *not* set. #) All connect strings are parsed by the driver if the new parameter ``thick_mode_dsn_passthrough`` is set to *True*. Previously, only Thin mode parsed all connect strings and Thick mode passed the connect string diff --git a/doc/src/user_guide/appendix_a.rst b/doc/src/user_guide/appendix_a.rst index 117b8f77..6f65dfee 100644 --- a/doc/src/user_guide/appendix_a.rst +++ b/doc/src/user_guide/appendix_a.rst @@ -147,6 +147,10 @@ see :ref:`driverdiff` and :ref:`compatibility`. - Yes - mostly supported. Unknown settings are ignored and not passed to Oracle Database. - Yes - Yes + * - Centralized Configuration Providers (see :ref:`configurationproviders`) + - Yes + - Yes + - No * - One-way TLS connections (see :ref:`onewaytls`) - Yes - Yes @@ -317,7 +321,7 @@ see :ref:`driverdiff` and :ref:`compatibility`. - Yes * - Two-phase Commit (TPC) (see :ref:`tpc`) - Yes - - Yes - improved support + - Yes - Yes - limited support * - REF CURSORs and Nested Cursors - Yes diff --git a/doc/src/user_guide/appendix_b.rst b/doc/src/user_guide/appendix_b.rst index 60ecd3da..c1e2ae51 100644 --- a/doc/src/user_guide/appendix_b.rst +++ b/doc/src/user_guide/appendix_b.rst @@ -39,11 +39,10 @@ mode. However, only one of these modes can be used in each Python process: .. note:: The parameters of connection and pool creation functions - :func:`oracledb.connect()` and :func:`oracledb.create_pool()` are now - keyword and not positional in both Thin and Thick modes. This change makes - the python-oracledb driver compliant with the Python Database API - specification PEP 249. The old usage will cause an error, see - :ref:`connerrors`. + :func:`oracledb.connect()` and :func:`oracledb.create_pool()` are keyword + and not positional. This makes the python-oracledb driver compliant with + the Python Database API specification PEP 249. The old positional usage + possible in cx_Oracle will cause an error, see :ref:`connerrors`. Connections to a Local Database ------------------------------- @@ -73,107 +72,6 @@ In the python-oracledb Thin mode: See :ref:`optnetfiles` and :ref:`optclientfiles` for more information. -.. _diffconnstr: - -Connection Strings ------------------- - -Python-oracledb Thin mode accepts :ref:`Oracle Net Services connection strings -` in the same formats as the Oracle Client libraries used by Thick -mode does, but not all keywords will be supported. - -The following table lists the parameters that are recognized in Thin mode -either in :ref:`Easy Connect ` strings or in :ref:`Connect -Descriptors ` that are either explicitly passed, or are in a -``tnsnames.ora`` file. All unrecognized parameters are ignored. The -connection parameters shown can be used in :meth:`oracledb.connect()`, -:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, -:meth:`oracledb.create_pool_async()`, :meth:`oracledb.ConnectParams()`, and -:meth:`oracledb.PoolParams()`. - -.. list-table-with-summary:: Oracle Net Keywords Supported in the python-oracledb Thin Mode - :header-rows: 1 - :class: wy-table-responsive - :align: center - :summary: The first column displays the keyword. The second column displays the equivalent oracledb.connect(), oracledb.create_pool(), oracledb.ConnectParams(), or oracledb.PoolParams() parameters. The third column displays the notes. - - * - Oracle Net Keyword - - Equivalent Connection Parameter - - Notes - * - SSL_SERVER_CERT_DN - - ssl_server_cert_dn - - If specified, this value is used for any verification. Otherwise, the hostname will be used. - * - SSL_SERVER_DN_MATCH - - ssl_server_dn_match - - In Thin mode parsing the parameter supports case insensitive on/yes/true values similar to the Thick mode. Any other value is treated as disabling it. - * - WALLET_LOCATION - - wallet_location - - Used in Easy Connect Strings. It is same as ``MY_WALLET_DIRECTORY`` in a connect descriptor. - * - MY_WALLET_DIRECTORY - - wallet_location - - No relevant notes - * - EXPIRE_TIME - - expire_time - - No relevant notes - * - HTTPS_PROXY - - https_proxy - - No relevant notes - * - HTTPS_PROXY_PORT - - https_proxy_port - - No relevant notes - * - RETRY_COUNT - - retry_count - - No relevant notes - * - RETRY_DELAY - - retry_delay - - No relevant notes - * - TRANSPORT_CONNECT_TIMEOUT - - tcp_connect_timeout - - No relevant notes - * - POOL_CONNECTION_CLASS - - cclass - - No relevant notes - * - POOL_PURITY - - purity - - No relevant notes - * - SERVICE_NAME - - service_name - - No relevant notes - * - SID - - sid - - No relevant notes - * - PORT - - port - - No relevant notes - * - PROTOCOL - - protocol - - No relevant notes - -In python-oracledb Thin mode, using the ``POOL_CONNECTION_CLASS`` or -``POOL_PURITY`` parameters in a connection string is similar to setting the -equivalent attributes when creating a connection or connection pool. - -In python-oracledb Thick mode, the ``POOL_CONNECTION_CLASS`` or ``POOL_PURITY`` -values will only work when connected to Oracle Database 21c, or later. Note if -``POOL_PURITY=SELF`` is used in a connect string, then python-oracledb Thick -mode applications will ignore the action to drop the session when attempting to -remove an unusable connections from a pool in some uncommon error cases. It is -recommended to avoid using ``POOL_PURITY=SELF`` in a connect string with -python-oracledb Thick mode. Instead, code the python-oracledb Thick mode -application to explicitly specify the purity and connection class as -attributes. - -The ``ENABLE=BROKEN`` connect descriptor option is not supported in -python-oracledb Thin mode. Use ``expire_time`` instead. - -If a name is given as a connect string, then the python-oracledb Thin mode will -consider it as a Net Service Name and not as the minimal Easy Connect string of -a hostname. The given connect string will be looked up in a ``tnsnames.ora`` -file. This is different from the python-oracledb Thick mode. If supporting a -bare name as a hostname is important to you in the python-oracledb Thin mode, -then you can alter the connection string to include a port number such as -``hostname:1521`` or a protocol such as ``tcp://hostname``. - Token Based Authentication -------------------------- @@ -187,7 +85,7 @@ In the python-oracledb Thin mode: - :ref:`Open Authorization (OAuth 2.0) token based authentication connection strings ` and :ref:`Oracle Cloud Infrastructure (OCI) Identity and Access Management (IAM) token based authentication connection strings - ` are not supported. Use ``access_token`` parameter of + ` are not supported. Use the ``access_token`` parameter of :func:`oracledb.ConnectParams()` instead. See :ref:`tokenauth`. Transport Layer Security (TLS) Support @@ -206,15 +104,21 @@ Native Network Encryption and Checksumming ------------------------------------------ The python-oracledb Thin mode does not support connections using Oracle -Database native network encryption or checksumming. You can enable -TLS instead of using native network encryption. If native network encryption -or checksumming are required, then use python-oracledb in the Thick mode. -See :ref:`enablingthick`. +Database Native Network Encryption (NNE) or checksumming. You can `enable TLS +`__ instead of using native network encryption. If +native network encryption or checksumming are required, then use +python-oracledb in Thick mode. See :ref:`enablingthick`. + +For example, if you use python-oracledb Thin mode and try to connect to an +Oracle Cloud Infrastructure (OCI) Oracle Base Database (where Native Network +Encryption is set to *REQUIRED* by default in the database ``sqlnet.ora`` +file), the connection will fail with an error like:: + + DPY-3001: Native Network Encryption and Data Integrity is only + supported in python-oracledb thick mode -For example, if you use python-oracledb Thin mode and try to connect to the -Oracle Cloud Infrastructure (OCI) Oracle Base Database where by default native -network encryption is set to REQUIRED in the ``sqlnet.ora`` file of the OCI -Oracle Base Database server, the connection will fail with an error like:: +or:: DPY-4011: the database or network closed the connection @@ -223,6 +127,8 @@ or:: DPY-6000: cannot connect to database. Listener refused connection. (Similar to ORA-12660) +See :ref:`Troubleshooting DPY-3001 ` for more information. + Connection Pooling Differences between Thin and Thick Modes =========================================================== @@ -239,8 +145,7 @@ differs from the python-oracledb Thick mode in the following ways: parameters. The parameters that are ignored in Thin mode include ``events``, ``tag``, ``matchanytag``, ``shardingkey``, ``supershardingkey``, and ``handle`` parameters. The parameters that are ignored in the Thick mode - include ``wallet_password``, ``disable_oob``, ``config_dir``, and - ``debug_jdwp`` parameters. + include ``wallet_password``, ``disable_oob``, and ``debug_jdwp`` parameters. * The python-oracledb Thin mode only suppports :ref:`homogeneous ` pools. @@ -283,14 +188,12 @@ differs from the python-oracledb Thick mode in the following ways: * In python-oracledb Thin mode, the connection pool supports all the :ref:`connection mode privileges `. - The python-oracledb Thick mode only supports the :data:`~oracledb.AUTH_MODE_SYSDBA` - privilege. - Supported Database Data Types in Thin and Thick Modes ===================================================== -The python-oracledb Thin and Thick modes support different Oracle database data -types. See :ref:`supporteddbtypes`. +The python-oracledb Thin and Thick mode support for the UROWID, REF, and +XMLType database data types has some small differences. See +:ref:`supporteddbtypes`. .. _querymetadatadiff: diff --git a/doc/src/user_guide/appendix_c.rst b/doc/src/user_guide/appendix_c.rst index 108eb7c3..79fa7925 100644 --- a/doc/src/user_guide/appendix_c.rst +++ b/doc/src/user_guide/appendix_c.rst @@ -265,7 +265,7 @@ Advanced Queuing (AQ) Differences from cx_Oracle Use the new :ref:`Advanced Queuing (AQ) ` API instead of the older API which was deprecated in cx_Oracle 7.2 and is not available in -python-oracledb. Note that AQ is only available in python-oracledb Thick mode. +python-oracledb. Replace: @@ -598,13 +598,21 @@ addition to the common :ref:`commonupgrade`: 2. If the ``config_dir`` parameter of :func:`~oracledb.init_oracle_client` had been used, then set the new :attr:`defaults.config_dir` attribute to the - desired value or set the ``config_dir`` parameter when connecting. For - example: + desired value or set the ``config_dir`` parameter in your connection or pool + creation method call. For example: .. code-block:: python oracledb.defaults.config_dir = "/opt/oracle/config" + or + + .. code-block:: python + + connection = oracledb.connect(user="hr", password=userpwd, dsn="orclpdb", + config_dir="/opt/oracle/config") + + Also, see :ref:`sqlnetclientconfig`. 3. If the ``driver_name`` parameter of :func:`~oracledb.init_oracle_client` had @@ -660,9 +668,10 @@ addition to the common :ref:`commonupgrade`: The requirement to call ``init_oracle_client()`` means that Oracle Client library loading is not automatically deferred until the driver is first used, such as when a connection is opened. The application must explicitly - manage this, if deferral is required. In python-oracledb, - ``init_oracle_client()`` can be called multiple times in a Python process - as long as arguments are the same. + manage this if deferral is required. + + In python-oracledb, ``init_oracle_client()`` can be called multiple times in + a Python process as long as the arguments are the same. Note that on Linux and related operating systems, the ``init_oracle_client()`` parameter ``lib_dir`` should not be @@ -717,7 +726,7 @@ You can then choose what mode is in use by setting the environment variable Output shows the python-oracledb Thin mode was used:: - python-oracledb thn : 1.0.0 + python-oracledb thn : 3.0.0 You can customize ``oracledb_upgrade.py`` to your needs. For example, if your connection and pool creation calls always use keyword parameters, you can diff --git a/doc/src/user_guide/connection_handling.rst b/doc/src/user_guide/connection_handling.rst index 4e8240a5..08b8490f 100644 --- a/doc/src/user_guide/connection_handling.rst +++ b/doc/src/user_guide/connection_handling.rst @@ -58,12 +58,6 @@ a connection can take, or enabling :ref:`network encryption `. reference Oracle environment variables ``ORACLE_SID``, ``TWO_TASK``, or ``LOCAL``. -.. note:: - - When using python-oracledb in Thin mode, the ``tnsnames.ora`` file will not - be automatically located. The file's directory must explicitly be passed - to the application, see :ref:`optnetfiles`. - .. _standaloneconnection: Standalone Connections @@ -120,23 +114,6 @@ listener port can also be passed: connection = oracledb.connect(user="hr", password=userpwd, host="localhost", port=1521, service_name="orclpdb") -If you like to encapsulate values, parameters can be passed using a -:ref:`ConnectParams Object `: - -.. code-block:: python - - params = oracledb.ConnectParams(host="my_host", port=my_port, service_name="my_service_name") - conn = oracledb.connect(user="my_user", password="my_password", params=params) - -Some values such as the database host name can be specified as ``connect()`` -parameters, as part of the connect string, and in the ``params`` object. If a -``dsn`` is passed, a connection string is internally constructed from the -individual parameters and ``params`` object values, with the individual -parameters having precedence. The precedence is that values in any ``dsn`` -parameter override values passed as individual parameters, which themselves -override values set in the ``params`` object. Similar precedence rules also -apply to other values. - A single, combined connection string can be passed to ``connect()`` but this may cause complications if the password contains "@" or "/" characters: @@ -151,6 +128,21 @@ may cause complications if the password contains "@" or "/" characters: dsn = f'{username}/{userpwd}@{host}:{port}/{service_name}' connection = oracledb.connect(dsn) +If you like to encapsulate values, parameters can be passed using a +:ref:`ConnectParams Object `: + +.. code-block:: python + + params = oracledb.ConnectParams(host="my_host", port=my_port, service_name="my_service_name") + conn = oracledb.connect(user="my_user", password="my_password", params=params) + +Some values such as the database host name can be specified as ``connect()`` +parameters, as part of the ``dsn`` connection string, and in the ``params`` +object. A final connection string is internally constructed from any ``dsn``, +individual parameters, and ``params`` object values. The precedence is that +values in a ``dsn`` parameter override values passed as individual parameters, +which themselves override values set in the ``params`` object. + Closing Connections +++++++++++++++++++ @@ -283,7 +275,7 @@ of Oracle Database's naming methods: * An Oracle :ref:`Easy Connect ` string * A :ref:`Connect Descriptor ` -* A :ref:`TNS Alias ` mapping to a Connect Descriptor in a +* A :ref:`TNS Alias ` mapping to a Connect Descriptor stored in a :ref:`tnsnames.ora ` file * An :ref:`LDAP URL ` * A :ref:`Configuration Provider URL ` @@ -303,9 +295,22 @@ Easy Connect Syntax for Connection Strings An `Easy Connect `__ string is often the simplest connection string to use in the data source name parameter ``dsn`` of -connection functions such as :meth:`oracledb.connect()` and -:meth:`oracledb.create_pool()`. This method does not need configuration files -such as :ref:`tnsnames.ora `. +connection functions such as :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, and +:meth:`oracledb.create_pool_async()`. + +Using Easy Connect strings means that an external :ref:`tnsnames.ora +` configuration file is not needed. + +The Easy Connect syntax in python-oracledb is:: + + [[protocol:]//]host1{,host12}[:port1]{,host2:port2}{;host1{,host12}[:port1]}[/[service_name][:server][/instance_name]][?parameter_name=value{¶meter_name=value}] + +See the `Database Net Services Administrator's Guide +`__ +and the technical brief `Oracle Database Easy Connect Plus +`__ for more details. For example, to connect to the Oracle Database service ``orclpdb`` that is running on the host ``dbhost.example.com`` with the default Oracle @@ -326,37 +331,38 @@ If the database is using a non-default port, it must be specified: The Easy Connect syntax supports Oracle Database service names. It cannot be used with the older System Identifiers (SID). -The `Easy Connect `__ syntax allows the use of -multiple hosts or ports, along with optional entries for the wallet location, -the distinguished name of the database server, and allows some network -configuration options such as the connection timeout and keep-alive values to -be set:: +**Oracle Net Settings in Easy Connect Strings** + +The Easy Connect syntax allows some `Oracle Network and database +`__ configuration options to be +set. This means that a :ref:`sqlnet.ora ` file is not needed for +common connection scenarios. + +For example, to set a connection timeout and keep-alive value: .. code-block:: python connection = oracledb.connect(user="hr", password=userpwd, - dsn="dbhost.example.com/orclpdb?expire_time=2") + dsn="dbhost.example.com/orclpdb?transport_connect_timeout=10&expire_time=2") + -This means that a :ref:`sqlnet.ora ` file is not needed for common -connection scenarios. See the technical brief `Oracle Database Easy Connect -Plus `__ for additional information. +For more information, see :ref:`connectdesckeywords`. Any Easy Connect +parameters that are not known to python-oracledb are ignored and not passed to +the database. -Python-oracledb specific settings can also be passed as Easy Connect arguments. -For example to set the statement cache size used by connections:: +**Python-oracledb Settings in Easy Connect Strings** + +Many python-oracledb connection method API arguments can alternatively be +passed as Easy Connect parameters with a "pyo." prefix. For example, to set +the statement cache size used by connections: .. code-block:: python connection = oracledb.connect(user="hr", password=userpwd, dsn="dbhost.example.com/orclpdb?pyo.stmtcachesize=50") -See :ref:`defineconnparams` and :ref:`definepoolparams` for the settings that -can be passed as arguments. - -Any Easy Connect parameters that are unknown to python-oracledb are ignored and -not passed to the database. See :ref:`Connection String Differences -` for more information. +See :ref:`pyoparams` for the usable attributes. .. _conndescriptor: @@ -392,10 +398,14 @@ For example: This prints:: - (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=dbhost.example.com)(PORT=1521)))(CONNECT_DATA=(SERVICE_NAME=orclpdb))(SECURITY=(SSL_SERVER_DN_MATCH=True))) + (DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=dbhost.example.com)(PORT=1521))(CONNECT_DATA=(SERVICE_NAME=orclpdb))) -The ``CONNECT_DATA`` parameters of a full connect descriptor that are -unrecognized by python-oracledb are passed to the database unchanged. +Syntax is shown in the `Database Net Services Reference +`__. + +Any ``DESCRIPTION``, ``CONNECT_DATA`` and ``SECURITY`` parameters of a full +connect descriptor that are unrecognized by python-oracledb are passed to the +database unchanged. .. _netservice: @@ -404,9 +414,11 @@ TNS Aliases for Connection Strings :ref:`Connect Descriptors ` are commonly stored in a :ref:`tnsnames.ora ` file and associated with a TNS Alias. This -alias can be used directly for the data source name parameter ``dsn`` of -:meth:`oracledb.connect()` and :meth:`oracledb.create_pool()`. For example, -given a file ``/opt/oracle/config/tnsnames.ora`` with the following contents:: +:ref:alias can be used directly for the data source name parameter ``dsn`` of +:ref::meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:ref::meth:`oracledb.connect_async()`, and +:ref::meth:`oracledb.create_pool_async()`. For example, given a file +:ref:``/opt/oracle/config/tnsnames.ora`` with the following contents:: ORCLPDB = (DESCRIPTION = @@ -417,18 +429,24 @@ given a file ``/opt/oracle/config/tnsnames.ora`` with the following contents:: ) ) -Then you could connect in python-oracledb Thin mode by passing the TNS Alias -"ORCLPDB" (case insensitive) as the ``dsn`` value: +Then you could connect by passing the TNS Alias "ORCLPDB" (case insensitive) as +the ``dsn`` value: .. code-block:: python connection = oracledb.connect(user="hr", password=userpwd, dsn="orclpdb", config_dir="/opt/oracle/config") -More options for how python-oracledb locates ``tnsnames.ora`` files are -detailed in :ref:`optnetfiles`. Note that in python-oracledb Thick mode, the -configuration directory must be set during initialization, not at connection -time. +In python-oracledb Thick mode, the configuration directory can also be set +during library initialization: + +.. code-block:: python + + oracledb.init_oracle_client(config_dir="/opt/oracle/config") + connection = oracledb.connect(user="hr", password=userpwd, dsn="orclpdb") + +More options for how python-oracledb locates :ref:`tnsnames.ora ` +files are detailed in :ref:`usingconfigfiles`. TNS Aliases may also be resolved by :ref:`LDAP `. @@ -454,7 +472,9 @@ Client 23ai could connect using: connection = oracledb.connect(user="scott", password=pw, dsn=ldapurl) This syntax is also usable in python-oracledb Thin mode via a :ref:`connection -hook function `, see :ref:`ldapconnections`. +hook function `, see :ref:`ldapconnections`. In +python-oracledb Thick mode, when :attr:`defaults.thick_mode_dsn_passthrough` is +*False*, a connection hook function is also required. .. _configproviderurl: @@ -462,34 +482,52 @@ Centralized Configuration Provider URL Connection Strings --------------------------------------------------------- A :ref:`Centralized Configuration Provider ` URL -contains the details of where the configuration information is located. The -information that can be stored in configuration providers includes connect -descriptors, database credentials (user name and password), and python-oracledb -specific attributes. With this URL, python-oracledb can access the information -stored in the configuration providers listed below and connect to Oracle -Database: - -- :ref:`Oracle Cloud Infrastructure (OCI) Object Storage configuration - provider ` -- :ref:`Microsoft Azure App Configuration provider ` -- :ref:`File Configuration Provider ` - -The configuration provider URL can be set in the ``dsn`` parameter of -connection functions :meth:`oracledb.connect()`, -:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, and -:meth:`oracledb.create_pool_async()`. This URL must begin with -"config-" where the configuration-provider value can -be set to *ociobject*, *azure*, or *file*, depending on the location of your -configuration information. For example, to use connection configuration stored -in a local file ``/opt/oracle/my-config.json``, you need to specify the ``dsn`` -parameter as shown: +connection string allows python-oracledb configuration information to be stored +centrally in OCI Object Storage, using Azure App Configuration, or in a local +file. Given a provider URL, python-oracledb will access the information stored +in the configuration provider and use it to connect to Oracle Database. + +The database connect descriptor and any database credentials stored in a +configuration provider will be used by any language driver that accesses the +configuration. Other driver-specific sections can exist. Python-oracledb will +take settings that are in a section with the prefix "pyo", and will ignore +other sections. + +For example, to use connection configuration stored in a local file +``/opt/oracle/my-config.json``: + +.. code-block:: json + + { + "connect_descriptor": "localhost/orclpdb", + "pyo": { + "min": 5, + "max": 10, + "increment": 2 + "stmtcachesize": 4 + } + } + +You could use this to create a connection pool by specifying the ``dsn`` +connection string parameter as: .. code-block:: python - connection = oracledb.connect(user="hr", password=userpwd, - dsn="config-file:///opt/oracle/my-config.json") + pool = oracledb.create_pool(user="hr", password=userpwd, + dsn="config-file:///opt/oracle/my-config.json") + + +The pool will be created using the pool settings from the configuration. + +The Centralized Configuration Provider URL must begin with +"config-://" where the configuration-provider value can +be set to *ociobject*, *azure*, or *file*, depending on the location of your +configuration information. -See the respective configuration provider sections for more details. +See :ref:`configurationproviders` for more information, particularly regarding +using python-oracledb Thick mode. + +The valid keys for the "pyo" object are shown in :ref:`pyoparams`. .. _jdbcconnstring: @@ -549,96 +587,459 @@ This can be referenced in python-oracledb: connection = oracledb.connect(user="hr", password=userpwd, dsn="finance") +.. _connectdesckeywords: + +Oracle Net Connect Descriptor and Easy Connect Keywords +------------------------------------------------------- + +Easy Connect syntax is described in :ref:`easyconnect`. + +Connect Descriptor keywords are shown in the `Database Net Services Reference +`__. + +**Notes on specific keywords** + +The ``POOL_CONNECTION_CLASS`` or ``POOL_PURITY`` values will only work when +connected to Oracle Database 21c, or later. Note if ``POOL_PURITY=SELF`` is +used in a connect string, then python-oracledb Thick mode applications will +ignore the action to drop the session when attempting to remove an unusable +connections from a pool in some uncommon error cases. It is recommended to +avoid using ``POOL_PURITY=SELF`` in a connect string with python-oracledb Thick +mode. Instead, code python-oracledb Thick mode applications to explicitly +specify the purity and connection class as attributes. + +The ``ENABLE=BROKEN`` connect descriptor option is not supported by +python-oracledb Thin mode. Use ``EXPIRE_TIME`` instead. + +If a name is given as a connect string, then python-oracledb will consider it +as a Net Service Name and not as the minimal Easy Connect string of a hostname. +The given connect string will be looked up in a :ref:`tnsnames.ora +` file. If supporting a bare name as a hostname is important to +you in python-oracledb, then you can alter the connection string to include a +protocol such as ``tcp://hostname``, or a port number such as +``hostname:1521``. + +In python-oracledb Thick mode, when :attr:`defaults.thick_mode_dsn_passthrough` +is *False*, any ``DESCRIPTION``, ``CONNECT_DATA`` and ``SECURITY`` parameters +of a full connect descriptor that are unrecognized by python-oracledb are +passed to the database unchanged. Any Easy Connect parameters that are not +known to python-oracledb are discarded and not passed to the database. + +.. _pyoparams: + +Python-oracledb Parameters Settable in Easy Connect Strings or Central Configuration Providers +---------------------------------------------------------------------------------------------- + +Some python-oracledb connection and pool creation parameters can be set in +:ref:`Easy Connect strings ` or via a :ref:`Centralized +Configuration Provider `. This is an alternative to +passing explicit arguments to :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, or +:meth:`oracledb.create_pool_async()`. This allows application behavior to be +changed without needing application code to be updated. + +The parameters are shown below in :ref:`this table +`. Parameters have a "pyo." prefix or are under a +"pyo" key. Each of these parameters that is defined in an Easy Connect string +or via a Centralized Configuration Provider will take precedence over the value +passed as the equivalent python-oracledb API parameter. + +Parameters that apply to :ref:`pool creation ` will be ignored if +they are used in the context of :ref:`standalone connections +`. Parameters with unknown names will be ignored in both +cases. + +**Python-oracledb Parameters in Easy Connect Strings** + +The Easy Connect parameter names are similar to the python-oracledb method +argument names, but have a "pyo." prefix. For example: + +.. code-block:: python + + cs = "host.example.com:1522/orclpdb?pyo.stmtcachesize=30&pyo.mode=SYSDBA" + connection = oracledb.connect(user="hr", password=userpwd, dsn=cs) + +is the same as: + +.. code-block:: python + + cs = "host.example.com:1522/orclpdb" + connection = oracledb.connect(user="hr", password=userpwd, dsn=cs, + stmtcachesize=30, mode=oracledb.AuthMode.SYSDBA) + +If a parameter is specified multiple times in an Easy Connect string, then the +last value of that parameter is used. For example, in +"localhost/orclpdb?pyo.sdu=10&pyo.sdu=20" the SDU is set to 20. + +Note some Oracle Net parameters can also be prefixed with "pyo.". + +Parameters with the prefix "pyo." can only be used in Easy Connect strings and +not in :ref:`Connect Descriptors `. + +**Python-oracledb Parameters in Configuration Providers** + +With the :ref:`File Centralized Configuration Provider ` or +:ref:`OCI Object Storage Centralized Configuration Provider +`, the settable python-oracledb driver attributes should +be in the JSON file under the key "pyo". An example is: + +.. code-block:: json + + { + "connect_descriptor": "localhost/orclpdb", + "pyo": { + "min": 5, + "max": 10, + "increment": 2 + "stmtcachesize": 4 + } + } + +With :ref:`Azure App Configuration `, values are set +using a key such as "/pyo/". This is similar to how `Oracle +Call Interface +`__ settings use +the key "/oci/" as shown in `Oracle Net Service +Administrator’s Guide `__. + +.. _params_ez_config_provider: + +**Parameter Names** + +When used in Easy Connect Strings, the parameter names should be prefixed with +"pyo.". When used in a Centralized Configuration Provider, the parameter +names are used to form the key names under a parent "pyo" key or with a "pyo/" +prefix. The names are case insensitive. + +.. list-table-with-summary:: Python-oracledb parameters usable in Easy Connect Strings or Centralized Configuration Providers + :header-rows: 1 + :class: wy-table-responsive + :align: center + :name: _params_ez_config_provider_table + :summary: The first column displays the base parameter name. The second column displays the type of the parameter. The third column displays the equivalent API parameter name. The fourth column contains notes. + + * - Base Parameter Name + - Type/Value + - Equivalent python-oracledb Connection Parameter Name + - Notes + * - ``CCLASS`` + - String + - ``cclass`` + - No relevant notes + * - ``CONNECTION_ID_PREFIX`` + - String + - ``connection_id_prefix`` + - No relevant notes + * - ``DISABLE_OOB`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``disable_oob`` + - No relevant notes + * - ``DRIVER_NAME`` + - String + - ``driver_name`` + - No relevant notes + * - ``EDITION`` + - String + - ``edition`` + - No relevant notes + * - ``EVENTS`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``events`` + - No relevant notes + * - ``EXPIRE_TIME`` + - Integer + - ``expire_time`` + - No relevant notes + * - ``EXTERNALAUTH`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``externalauth`` + - No relevant notes + * - ``EXTRA_AUTH_PARAMS`` + - A dictionary containing the configuration parameters necessary for Oracle Database authentication using :ref:`OCI ` or :ref:`Azure ` cloud native authentication plugins. + - ``extra_auth_params`` + - For use by Centralized Configuration Providers only + * - ``GETMODE`` + - String, values may be one of *FORCEGET*, *NOWAIT*, *WAIT*, or *TIMEDWAIT* mapping to :ref:`connpoolmodes`. + - ``getmode`` + - Pool creation only + * - ``HOMOGENEOUS`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``homogeneous`` + - Pool creation only + * - ``HTTPS_PROXY`` + - String + - ``https_proxy`` + - No relevant notes + * - ``HTTPS_PROXY_PORT`` + - Integer + - ``https_proxy_port`` + - No relevant notes + * - ``INCREMENT`` + - Integer + - ``increment`` + - Pool creation only + * - ``MACHINE`` + - String + - ``machine`` + - No relevant notes + * - ``MAX`` + - Integer + - ``max`` + - Pool creation only + * - ``MAX_LIFETIME_SESSION`` + - Integer + - ``max_lifetime_session`` + - Pool creation only + * - ``MAX_SESSIONS_PER_SHARD`` + - Integer + - ``max_sessions_per_shard`` + - Pool creation only + * - ``MIN`` + - Integer + - ``min`` + - Pool creation only + * - ``MODE`` + - String, values may be one of *DEFAULT*, *PRELIM*, *SYSASM*, *SYSBKP*, *SYSDBA*, *SYSDGD*, *SYSKMT*, *SYSOPER*, or *SYSRAC* mapping to :ref:`connection-authorization-modes`. + - ``mode`` + - No relevant notes + * - ``OSUSER`` + - String + - ``osuser`` + - No relevant notes + * - ``PING_INTERVAL`` + - Integer + - ``ping_interval`` + - Pool creation only + * - ``PING_TIMEOUT`` + - Integer + - ``ping_timeout`` + - Pool creation only + * - ``POOL_BOUNDARY`` + - String + - ``pool_boundary`` + - No relevant notes + * - ``PROGRAM`` + - String + - ``program`` + - No relevant notes + * - ``PURITY`` + - String, values may be one of *DEFAULT*, *NEW*, or *SELF* mapping to :ref:`drcppurityconsts`. + - ``purity`` + - No relevant notes + * - ``RETRY_COUNT`` + - Integer + - ``retry_count`` + - No relevant notes + * - ``RETRY_DELAY`` + - Integer + - ``retry_delay`` + - No relevant notes + * - ``SDU`` + - Integer + - ``sdu`` + - No relevant notes + * - ``SODA_METADATA_CACHE`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``soda_metadata_cache`` + - Pool creation only + * - ``SSL_SERVER_CERT_DN`` + - String + - ``ssl_server_cert_dn`` + - No relevant notes + * - ``SSL_SERVER_DN_MATCH`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``ssl_server_dn_match`` + - No relevant notes + * - ``STMTCACHESIZE`` + - Integer + - ``stmtcachesize`` + - No relevant notes + * - ``TCP_CONNECT_TIMEOUT`` + - Integer + - ``tcp_connect_timeout`` + - No relevant notes + * - ``TERMINAL`` + - String + - ``terminal`` + - No relevant notes + * - ``TIMEOUT`` + - Integer + - ``timeout`` + - Pool creation only + * - ``USE_TCP_FAST_OPEN`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``use_tcp_fast_open`` + - No relevant notes + * - ``USE_SNI`` + - String representing a boolean. Values may be one of *on* or *off*, *true* or *false*, *yes* or *no* (case insensitive). + - ``use_sni`` + - No relevant notes + * - ``WAIT_TIMEOUT`` + - Integer + - ``wait_timeout`` + - Pool creation only + * - ``WALLET_LOCATION`` + - String + - ``wallet_location`` + - Not recommended for use in Configuration Providers because the path name may not be valid on any particular application host. + .. _configurationproviders: Centralized Configuration Providers =================================== -Centralized Configuration Providers allow the storage and management of -database connection credentials and application configuration information in a -central location. These providers allow you to separately store the -configuration information from the code of your application. The information -that can be stored in these providers includes connect descriptors, database -credentials such as user name and password, and python-oracledb specific -attributes. - -You can access the information stored in configuration providers using both -python-oracledb Thin and Thick modes. With this information, python-oracledb -can connect to Oracle Database using :meth:`oracledb.connect()`, -:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, or +`Centralized Configuration Providers `__ allow the storage +and management of database connection credentials and application configuration +information in a central location. Providers allow you to separately store +configuration information from the code of your application. The values that +can be stored includes the database connection string, database credentials, a +cache time, and python-oracledb specific attributes such as connection pool +settings. Python-oracledb can use the centrally stored information to connect +to Oracle Database with :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, and :meth:`oracledb.create_pool_async()`. The following configuration providers are supported by python-oracledb: -- :ref:`Oracle Cloud Infrastructure (OCI) Object Storage ` -- :ref:`Microsoft Azure App Configuration ` -- :ref:`File Configuration Provider ` +- :ref:`File Centralized Configuration Provider ` +- :ref:`Oracle Cloud Infrastructure (OCI) Object Storage Centralized + Configuration Provider ` +- :ref:`Microsoft Azure App Centralized Configuration Provider + ` + +To use python-oracledb :ref:`Centralized Configuration Provider +` functionality in Thick mode, you must set +:attr:`defaults.thick_mode_dsn_passthrough` to *False* or explicitly call +:meth:`ConnectParams.parse_connect_string()`. + +In Thick mode, when :attr:`defaults.thick_mode_dsn_passthrough` is *True*, it +is the Oracle Client libraries that access the configuration provider when +python-oracledb connection or pool creation methods are invoked. Any +python-oracledb parameter section will be ignored. Any Oracle Client Interface +parameter section should be removed from the configuration because its values +may be different to those that python-oracledb assumes, and will cause +undefined behavior. **Precedence of Attributes** -If you have defined the values of ``user`` and ``password`` in both the +Defining attributes in multiple places is not recommended. However, if +you have defined the values of ``user`` and ``password`` in both the +application and the configuration provider, then the values defined in the +application will have the higher precedence. If the ``externalauth`` parameter +is set to *True*, then the ``user`` and ``password`` values specified in the +configuration provider are ignored. + +If other python-oracledb connection attributes have been defined in both the application and the configuration provider, then the values defined in the -application will have the higher precedence. If the ``externalauth`` -parameter is set to *True*, then the ``user`` and ``password`` values -specified in the configuration provider is ignored. +configuration provider will have higher precedence. + +If you are using Thick mode, and have defined python-oracledb attributes in an +``oraaccess.xml`` file (see :ref:`optclientfiles`), the configuration provider, +and the application, then the values defined in the configuration provider will +have the higher precedence followed by the ``oraaccess.xml`` file settings, and +then application settings. + +.. _fileconfigprovider: + +Using a File Centralized Configuration Provider +----------------------------------------------- -If you have defined the python-oracledb specific attributes in both the -application and in the configuration provider, then the values defined in the -configuration provider will have the higher precedence. +The File Centralized Configuration Provider enables the storage and management +of Oracle Database connection information using local files. -.. _ociobjstorage: +To use a File Centralized Configuration Provider, you must: -OCI Object Storage Configuration Provider ------------------------------------------ +1. Store the connection information in a JSON file on your local file system. -The `Oracle Cloud Infrastructure (OCI) Object Storage `__ configuration -provider enables the storage and management of Oracle Database connection -information in a JSON file. +2. Set the path to the file in the ``dsn`` parameter of connection and pool + creation methods. -To use python-oracledb to access the configuration information from OCI Object -Storage, you must install the `OCI module `__, -see :ref:`ocimodules`. +**File Centralized Configuration Provider JSON File Syntax** -The JSON configuration file must contain the ``connect_descriptor`` property. -Optionally, you can specify the database user name, password, and -python-oracledb specific properties in the file. The database password can also -be stored securely as a secret using `OCI Vault `__. The properties that -can be added in the JSON file are listed below: +The configuration file must contain at least a ``connect_descriptor`` key to +specify the database connection string. Optionally, you can store the database +user name, password, a cache time, and :ref:`python-oracledb settings +`. The keys that can be stored in the file are: -.. list-table-with-summary:: JSON Properties for OCI Object Storage Configuration Provider +.. list-table-with-summary:: JSON keys for the File Configuration Provider :header-rows: 1 :class: wy-table-responsive :widths: 15 25 15 - :name: _oci_object_storage_sub-objects - :summary: The first column displays the name of the property. The second column displays the description of the property. The third column displays whether the property is required or optional. + :name: _file_configuration_provider + :summary: The first column displays the name of the key. The second column displays its description. The third column displays whether the key is required or optional. - * - Property + * - Key - Description - Required or Optional * - ``user`` - The database user name. - Optional * - ``password`` - - The password of the database user, or a dictionary containing the key "type" and password-type specific properties. + - The password of the database user as a dictionary containing the key "type" and password type-specific keys. + + .. warning:: + + Storing passwords in the configuration file should only ever be used in development or test environments. + - Optional * - ``connect_descriptor`` - The database :ref:`connection string `. - Required + * - ``config_time_to_live`` + - How many seconds the configuration is cached for. Defaults to 86,400 seconds (24 hours). + - Optional + * - ``config_time_to_live_grace_period`` + - How many seconds an expired configuration can still be used if a new configuration cannot be obtained. Defaults to 1,800 seconds (30 minutes). + - Optional * - ``pyo`` - - Python-oracledb specific properties. + - See :ref:`pyoparams`. - Optional -The following sample is an example of OCI Object Storage configuration -provider syntax:: +See the `Oracle Net Service Administrator’s Guide `__ for +more information on the generic provider sub-objects usable in JSON files. + +Multiple configurations can be defined by specifying the above keys under +user-chosen, top-level keys, see the example further below. + +**File Centralized Configuration Provider DSN Syntax** + +To use a file provider, specify the ``dsn`` parameter of +:meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, +:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()` using +the following format:: + + config-file://[?key=] + +The elements of the ``dsn`` parameter are detailed in the table below. + +.. list-table-with-summary:: Connection String Parameters for File Configuration Provider + :header-rows: 1 + :class: wy-table-responsive + :widths: 20 60 + :name: _connection_string_for_file_configuration_provider + :summary: The first column displays the name of the connection string parameter. The second column displays the description of the connection string parameter. + + * - Parameter + - Description + * - ``config-file`` + - Indicates that the centralized configuration provider is a file in your local system. + * - + - The file path and name of the JSON file that contains the configuration information. For relative paths, python-oracledb will use the connection or pool creation ``config_dir`` parameter, or :attr:`defaults.config_dir` value, to create an absolute path. + * - ``key`` + - The connection key name used to identify a specific configuration. If this parameter is specified, the file is assumed to contain multiple configurations that are indexed by the key value. If not specified, the file is assumed to contain a single configuration. See the example further below. + +**File Configuration Provider Examples** + +An example of File Configuration Provider file syntax is:: { "user": "scott", "password": { - "type": "oci-vault", - "value": "oci.vaultsecret.my-secret-id" - "authentication": "OCI_INSTANCE_PRINCIPAL" + "type": "base64", + "value": "dGlnZXI=" }, "connect_descriptor": "dbhost.example.com:1522/orclpdb", "pyo": { @@ -648,104 +1049,130 @@ provider syntax:: } } -If the password key has a reference to Azure Key Vault, then you must define -the Azure Key Vault credentials in the ``password`` property. The -``azure_client_id`` and ``azure_tenant_id`` must be specified in the password -property. Also, either the ``azure_client_secret`` or -``azure_client_certificate_path`` should be specified. The password format -should be:: +This encodes the password as base64. See :ref:`ociobjstorageprovider` for +other password examples. Plaintext passwords are not supported. - "password": { - "type": "azure-vault", - "value": "", - "azure_tenant_id":"", - "azure_client_id":"", - "azure_client_secret": "", or "azure_client_certificate_path" : "" - } +Note that python-oracledb caches configurations by default, see +:ref:`conncaching`. -.. _useociconfigprovider: +If you have this configuration file in ``/opt/oracle/my-config1.json``, you +could use it like: -Using python-oracledb with OCI Object Storage Configuration Provider -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. code-block:: python -To use python-oracledb with an OCI Object Storage configuration provider, you -must: + connection = oracledb.connect(dsn="config-file:///opt/oracle/my-config1.json") -1. :ref:`Import the oracledb.plugins.oci_config_provider plugin - ` in your code. +Multiple configurations can be defined by specifying user-chosen top-level +keys:: -2. :ref:`Use an OCI Object Storage connection string URL ` - in the ``dsn`` parameter of connection and pool creation methods. + { + "production": { + "connect_descriptor": "localhost/orclpdb" + }, + "testing": { + "connect_descriptor": "localhost/orclpdb", + "user": "scott", + "password": { + "type": "base64", + "value": "dGlnZXI=" + } + } + } -An example using a :ref:`standalone connection ` is -shown below: +If you have this configuration file in ``/opt/oracle/my-config2.json``, you +could use it like: .. code-block:: python - import oracledb.plugins.oci_config_provider + connection = oracledb.connect(user="hr", password=userpwd, + dsn="config-file:///opt/oracle/my-config2.json?key=production") - configociurl = "config-ociobject://abc.oraclecloud.com/n/abcnamespace/b/abcbucket/o/abcobject?oci_tenancy=abc123&oci_user=ociuser1&oci_fingerprint=ab:14:ba:13&oci_key_file=ociabc/ocikeyabc.pem" - oracledb.connect(dsn=configociurl) +.. _ociobjstorageprovider: -An example using a :ref:`connection pool ` is shown below: +Using an OCI Object Storage Centralized Configuration Provider +-------------------------------------------------------------- -.. code-block:: python +The Oracle Cloud Infrastructure (OCI) `Object Storage configuration provider +`__ enables the storage and management of Oracle +Database connection information as JSON in `OCI Object Storage `__. - import oracledb.plugins.oci_config_provider +To use an OCI Object Storage Centralized Configuration Provider, you must: - configociurl = "config-ociobject://abc.oraclecloud.com/n/abcnamespace/b/abcbucket/o/abcobject?oci_tenancy=abc123&oci_user=ociuser1&oci_fingerprint=ab:14:ba:13&oci_key_file=ociabc/ocikeyabc.pem" +1. Upload a JSON file that contains the connection information into an OCI + Object Storage Bucket. See `Uploading an Object Storage Object to a Bucket + `__ and the `Oracle Net Service + Administrator’s Guide `__ for the steps. + See :ref:`OCI Object Storage Centralized Configuration Provider Parameters + ` for the configuration information that can be added. - oracledb.create_pool(dsn=configociurl) +2. Install the Python `OCI `__ module, see + :ref:`ocimodules`. -.. _importconfigociplugin: +3. Import the :ref:`oracledb.plugins.oci_config_provider ` + plugin in your application. -Importing ``oracledb.plugins.oci_config_provider`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +4. :ref:`Use an OCI Object Storage connection string URL ` + in the ``dsn`` parameter of connection and pool creation methods. -You must import a :ref:`oracledb.plugins.oci_config_provider ` -plugin provided by python-oracledb to access the configuration information -stored in :ref:`OCI Object Storage ` such as database connect -descriptor, user name, password, and python-oracledb specific attributes. +.. _ociconfigparams: -Importing this plugin defines and -:meth:`registers ` a built-in -:ref:`connection hook function ` that handles :ref:`connection -strings prefixed with config-ociobject `. This function is -internally invoked when the ``dsn`` parameter is prefixed with -``config-ociobject`` in calls to :meth:`oracledb.connect()`, -:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, or -:meth:`oracledb.create_pool_async()`. This hook function parses the connection -string, and extracts the following details: - -- URL of the OCI Object Storage endpoint -- OCI Object Storage namespace where the JSON file is stored -- OCI Object Storage bucket name where the JSON file is stored -- JSON file name -- Network service name or alias if the JSON file contains one or more aliases -- OCI Object Storage authentication details - -Using the above details, the hook function accesses the configuration -information stored in OCI Object Storage. The hook function sets the -connection information from OCI Object Storage in its ``connect_params`` -parameter which is a :ref:`ConnectParams ` object. This object is -used by python-oracledb to establish a connection to Oracle Database. +**OCI Object Storage Centralized Configuration Provider JSON File Syntax** + +The stored JSON configuration file must contain a ``connect_descriptor`` +property. Optionally, you can specify the database user name, password, a +cache time, and python-oracledb attributes. The database password can also be +stored securely using `OCI Vault `__ or `Azure Key Vault +`__. The +keys that can be in the JSON file are listed below. + +.. list-table-with-summary:: JSON Keys for OCI Object Storage Configuration Provider + :header-rows: 1 + :class: wy-table-responsive + :widths: 15 25 15 + :name: _oci_object_storage_sub-objects + :summary: The first column displays the name of the property. The second column displays the description of the property. The third column displays whether the property is required or optional. + + * - Property + - Description + - Required or Optional + * - ``user`` + - The database user name. + - Optional + * - ``password`` + - The password of the database user as a dictionary containing the key "type" and password type-specific keys. + - Optional + * - ``connect_descriptor`` + - The database :ref:`connection string `. + - Required + * - ``config_time_to_live`` + - How many seconds the configuration is cached for. Defaults to 86,400 seconds (24 hours). + - Optional + * - ``config_time_to_live_grace_period`` + - How many seconds an expired configuration can still be used if a new configuration cannot be obtained. Defaults to 1,800 seconds (30 minutes). + - Optional + * - ``pyo`` + - See :ref:`pyoparams`. + - Optional .. _connstringoci: -Defining a Connection String URL for OCI Object Storage -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**OCI Object Storage Centralized Configuration Provider DSN Syntax** -You must define a connection string URL in a specific format in the ``dsn`` -property of :meth:`oracledb.connect()`, :meth:`oracledb.create_pool()`, -:meth:`oracledb.connect_async()`, or :meth:`oracledb.create_pool_async()` to -access the information stored in OCI Object Storage. The syntax of the OCI -Object Storage connection string URL is:: +The ``dsn`` parameter for :meth:`oracledb.connect()`, +:meth:`oracledb.create_pool()`, :meth:`oracledb.connect_async()`, or +:meth:`oracledb.create_pool_async()` calls should use a connection string URL +in the format:: config-ociobject:/n/{namespaceName}/b/{bucketName}/o/ [/c/][?=&=...] -The parameters of the connection string are detailed in the table below. +The elements of the connection string are detailed in the table below. .. list-table-with-summary:: Connection String Parameters for OCI Object Storage :header-rows: 1 @@ -775,186 +1202,206 @@ The parameters of the connection string are detailed in the table below. * - - The network service name or alias if the JSON file contains one or more network service names. - Optional - * - and - - The authentication method and corresponding authentication parameters to access the OCI Object Storage configuration provider. Depending on the specified authentication method, you must also set the corresponding authentication parameters in the ``option=value`` syntax of the connection string. You can specify one of the following authentication methods: + * -