Skip to content

Commit 437bfd4

Browse files
committed
BUG#36570707: Collation set on connect using C-Extension is ignored
This patch fixes the issue where the collation passed through the connection option was getting ignored and default collation was being set by C API. Additionally, the default charset and collation now corresponds to charset ID 255 for MySQL Servers equal to or greater than 8.0. Change-Id: Ib7c876044a05b1e3d4aeb09092b0659984f2da98
1 parent 78ffff8 commit 437bfd4

14 files changed

+656
-114
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ v9.0.0
1515
- WL#16318: Deprecate Cursors Prepared Raw and Named Tuple
1616
- WL#16283: Remove OpenTelemetry Bundled Installation
1717
- BUG#36611371: Update dnspython required versions to allow latest 2.6.1
18+
- BUG#36570707: Collation set on connect using C-Extension is ignored
1819
- BUG#36476195: Incorrect escaping in pure Python mode if sql_mode includes NO_BACKSLASH_ESCAPES
1920
- BUG#36289767: MySQLCursorBufferedRaw does not skip conversion
2021

mysql-connector-python/lib/mysql/connector/abstracts.py

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@
7272
from .constants import (
7373
CONN_ATTRS_DN,
7474
DEFAULT_CONFIGURATION,
75+
MYSQL_DEFAULT_CHARSET_ID_57,
76+
MYSQL_DEFAULT_CHARSET_ID_80,
7577
OPENSSL_CS_NAMES,
7678
TLS_CIPHER_SUITES,
7779
TLS_VERSIONS,
@@ -174,13 +176,20 @@ class MySQLConnectionAbstract(ABC):
174176

175177
def __init__(self) -> None:
176178
"""Initialize"""
177-
# opentelemetry related
178-
self._tracer: Any = None
179-
self._span: Any = None
180-
self.otel_context_propagation: bool = True
179+
# private (shouldn't be manipulated directly internally)
180+
self.__charset_id: Optional[int] = None
181+
"""It shouldn't be manipulated directly, even internally. If you need
182+
to manipulate the charset ID, use the property `_charset_id` (read & write)
183+
instead. Similarly, `_charset_id` shouldn't be manipulated externally,
184+
in this case, use property `charset_id` (read-only).
185+
"""
186+
187+
# protected (can be manipulated directly internally)
188+
self._tracer: Any = None # opentelemetry related
189+
self._span: Any = None # opentelemetry related
190+
self.otel_context_propagation: bool = True # opentelemetry related
181191

182192
self._client_flags: int = ClientFlag.get_default()
183-
self._charset_id: int = 45
184193
self._sql_mode: Optional[str] = None
185194
self._time_zone: Optional[str] = None
186195
self._autocommit: bool = False
@@ -1177,6 +1186,18 @@ def unread_result(self, value: bool) -> None:
11771186
raise ValueError("Expected a boolean type")
11781187
self._unread_result = value
11791188

1189+
@property
1190+
def collation(self) -> str:
1191+
"""Returns the collation for current connection.
1192+
1193+
This property returns the collation name of the current connection.
1194+
The server is queried when the connection is active. If not connected,
1195+
the configured collation name is returned.
1196+
1197+
Returns a string.
1198+
"""
1199+
return self._character_set.get_charset_info(self._charset_id)[2]
1200+
11801201
@property
11811202
def charset(self) -> str:
11821203
"""Returns the character set for current connection.
@@ -1189,6 +1210,41 @@ def charset(self) -> str:
11891210
"""
11901211
return self._character_set.get_info(self._charset_id)[0]
11911212

1213+
@property
1214+
def charset_id(self) -> int:
1215+
"""The charset ID utilized during the connection phase.
1216+
1217+
If the charset ID hasn't been set, the default charset ID is returned.
1218+
"""
1219+
return self._charset_id
1220+
1221+
@property
1222+
def _charset_id(self) -> int:
1223+
"""The charset ID utilized during the connection phase.
1224+
1225+
If the charset ID hasn't been set, the default charset ID is returned.
1226+
"""
1227+
if self.__charset_id is None:
1228+
if self._server_version is None:
1229+
# We mustn't set the private since we still don't know
1230+
# the server version. We temporarily return the default
1231+
# charset for undefined scenarios - eventually, the server
1232+
# info will be available and the private variable will be set.
1233+
return MYSQL_DEFAULT_CHARSET_ID_57
1234+
1235+
self.__charset_id = (
1236+
MYSQL_DEFAULT_CHARSET_ID_57
1237+
if self._server_version < (8, 0)
1238+
else MYSQL_DEFAULT_CHARSET_ID_80
1239+
)
1240+
1241+
return self.__charset_id
1242+
1243+
@_charset_id.setter
1244+
def _charset_id(self, value: int) -> None:
1245+
"""Sets the charset ID utilized during the connection phase."""
1246+
self.__charset_id = value
1247+
11921248
@property
11931249
def python_charset(self) -> str:
11941250
"""Returns the Python character set for current connection.
@@ -1269,28 +1325,9 @@ def set_charset_collation(
12691325

12701326
self._execute_query(f"SET NAMES '{charset_name}' COLLATE '{collation_name}'")
12711327

1272-
try:
1273-
# Required for C Extension
1274-
self.set_character_set_name(charset_name)
1275-
except AttributeError:
1276-
# Not required for pure Python connection
1277-
pass
1278-
12791328
if self.converter:
12801329
self.converter.set_charset(charset_name, character_set=self._character_set)
12811330

1282-
@property
1283-
def collation(self) -> str:
1284-
"""Returns the collation for current connection.
1285-
1286-
This property returns the collation name of the current connection.
1287-
The server is queried when the connection is active. If not connected,
1288-
the configured collation name is returned.
1289-
1290-
Returns a string.
1291-
"""
1292-
return self._character_set.get_charset_info(self._charset_id)[2]
1293-
12941331
@property
12951332
@abstractmethod
12961333
def connection_id(self) -> Optional[int]:
@@ -1311,7 +1348,7 @@ def _post_connection(self) -> None:
13111348
established. Some setting like autocommit, character set, and SQL mode
13121349
are set using this method.
13131350
"""
1314-
self.set_charset_collation(self._charset_id)
1351+
self.set_charset_collation(charset=self._charset_id)
13151352
self.autocommit = self._autocommit
13161353
if self._time_zone:
13171354
self.time_zone = self._time_zone

mysql-connector-python/lib/mysql/connector/aio/abstracts.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575
from ..constants import (
7676
CONN_ATTRS_DN,
7777
DEFAULT_CONFIGURATION,
78+
MYSQL_DEFAULT_CHARSET_ID_57,
79+
MYSQL_DEFAULT_CHARSET_ID_80,
7880
OPENSSL_CS_NAMES,
7981
TLS_CIPHER_SUITES,
8082
TLS_VERSIONS,
@@ -201,6 +203,14 @@ def __init__(
201203
tls_ciphersuites: Optional[List[str]] = None,
202204
loop: Optional[asyncio.AbstractEventLoop] = None,
203205
):
206+
# private (shouldn't be manipulated directly internally)
207+
self.__charset: Optional[Charset] = None
208+
"""It shouldn't be manipulated directly, even internally. If you need
209+
to manipulate the charset object, use the property `_charset` (read & write)
210+
instead. Similarly, `_charset` shouldn't be manipulated externally.
211+
"""
212+
213+
# protected (can be manipulated directly internally)
204214
self._user: str = user
205215
self._password: str = password
206216
self._host: str = host
@@ -220,9 +230,10 @@ def __init__(
220230
self._init_command: Optional[str] = init_command
221231
self._protocol: MySQLProtocol = MySQLProtocol()
222232
self._socket: Optional[Union[MySQLTcpSocket, MySQLUnixSocket]] = None
223-
self._charset: Optional[Charset] = None
224233
self._charset_name: Optional[str] = charset
234+
"""Charset name provided by the user at connection time."""
225235
self._charset_collation: Optional[str] = collation
236+
"""Collation provided by the user at connection time."""
226237
self._ssl_active: bool = False
227238
self._ssl_disabled: bool = ssl_disabled
228239
self._ssl_ca: Optional[str] = ssl_ca
@@ -239,7 +250,7 @@ def __init__(
239250
loop or asyncio.get_event_loop()
240251
)
241252
self._client_flags: int = client_flags or ClientFlag.get_default()
242-
self._server_info: ServerInfo
253+
self._server_info: Optional[ServerInfo] = None
243254
self._cursors: weakref.WeakSet = weakref.WeakSet()
244255
self._query_attrs: Dict[str, BinaryProtocolType] = {}
245256
self._query_attrs_supported: int = False
@@ -893,6 +904,18 @@ def unread_result(self, value: bool) -> None:
893904
raise ValueError("Expected a boolean type")
894905
self._unread_result = value
895906

907+
@property
908+
def collation(self) -> str:
909+
"""Returns the collation for current connection.
910+
911+
This property returns the collation name of the current connection.
912+
The server is queried when the connection is active. If not connected,
913+
the configured collation name is returned.
914+
915+
Returns a string.
916+
"""
917+
return self._charset.collation
918+
896919
@property
897920
def charset(self) -> str:
898921
"""Return the character set for current connection.
@@ -901,7 +924,40 @@ def charset(self) -> str:
901924
The server is queried when the connection is active.
902925
If not connected, the configured character set name is returned.
903926
"""
904-
return self._charset.name if self._charset else "utf8"
927+
return self._charset.name
928+
929+
@property
930+
def charset_id(self) -> int:
931+
"""The charset ID utilized during the connection phase.
932+
933+
If the charset ID hasn't been set, the default charset ID is returned.
934+
"""
935+
return self._charset.charset_id
936+
937+
@property
938+
def _charset(self) -> Charset:
939+
"""The charset object encapsulates charset and collation information."""
940+
if self.__charset is None:
941+
if self._server_info is None:
942+
# We mustn't set `_charset` since we still don't know
943+
# the server version. We temporarily return the default
944+
# charset for undefined scenarios - eventually, the server
945+
# info will be available and `_charset` (data class) will be set.
946+
return charsets.get_by_id(MYSQL_DEFAULT_CHARSET_ID_57)
947+
948+
self.__charset = charsets.get_by_id(
949+
(
950+
MYSQL_DEFAULT_CHARSET_ID_57
951+
if self._server_info.version_tuple < (8, 0)
952+
else MYSQL_DEFAULT_CHARSET_ID_80
953+
)
954+
)
955+
return self.__charset
956+
957+
@_charset.setter
958+
def _charset(self, value: Charset) -> None:
959+
"""The charset object encapsulates charset and collation information."""
960+
self.__charset = value
905961

906962
@property
907963
def python_charset(self) -> str:
@@ -940,7 +996,7 @@ async def _post_connection(self) -> None:
940996
Some setting like autocommit, character set, and SQL mode are set using this
941997
method.
942998
"""
943-
await self.set_charset_collation(self._charset.charset_id)
999+
await self.set_charset_collation(charset=self._charset.charset_id)
9441000
await self.set_autocommit(self._autocommit)
9451001
if self._time_zone:
9461002
await self.set_time_zone(self._time_zone)
@@ -1000,18 +1056,9 @@ async def set_charset_collation(
10001056
charset = DEFAULT_CONFIGURATION["charset"]
10011057
self._charset = charsets.get_by_name(charset) # type: ignore[arg-type]
10021058

1003-
self._charset_name = self._charset.name
1004-
self._charset_collation = self._charset.collation
1005-
10061059
await self.cmd_query(
10071060
f"SET NAMES '{self._charset.name}' COLLATE '{self._charset.collation}'"
10081061
)
1009-
try:
1010-
# Required for C Extension
1011-
self.set_character_set_name(self._charset.name)
1012-
except AttributeError:
1013-
# Not required for pure Python connection
1014-
pass
10151062

10161063
if self.converter:
10171064
self.converter.set_charset(self._charset.name)
@@ -1048,7 +1095,9 @@ def get_server_version(self) -> Optional[Tuple[int, ...]]:
10481095
The MySQL server version as a tuple. If not previously connected, it will
10491096
return `None`.
10501097
"""
1051-
return self._server_info.version # type: ignore[return-value]
1098+
if self._server_info is not None:
1099+
return self._server_info.version # type: ignore[return-value]
1100+
return None
10521101

10531102
def get_server_info(self) -> Optional[str]:
10541103
"""Gets the original MySQL version information.

mysql-connector-python/lib/mysql/connector/aio/connection.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -238,17 +238,8 @@ async def _do_handshake(self) -> None:
238238
)
239239
elif self._charset_name:
240240
self._charset = charsets.get_by_name(self._charset_name)
241-
self._charset_collation = self._charset.collation
242241
elif self._charset_collation:
243242
self._charset = charsets.get_by_collation(self._charset_collation)
244-
self._charset_name = self._charset.name
245-
else:
246-
# The default charset from the server handshake should be used instead,
247-
# as `charsets.get_by_id(self._server_info.charset)`.
248-
# The charset id 45 is used to be aligned with the current implementation.
249-
self._charset = charsets.get_by_id(45)
250-
self._charset_name = self._charset.name
251-
self._charset_collation = self._charset.collation
252243

253244
if not self._handshake["capabilities"] & ClientFlag.SSL:
254245
if self._auth_plugin == "mysql_clear_password" and not self.is_secure:
@@ -1296,10 +1287,10 @@ async def cmd_ping(self) -> OkPacketType:
12961287

12971288
async def cmd_change_user(
12981289
self,
1299-
user: str = "",
1290+
username: str = "",
13001291
password: str = "",
13011292
database: str = "",
1302-
charset: int = 45,
1293+
charset: Optional[int] = None,
13031294
password1: str = "",
13041295
password2: str = "",
13051296
password3: str = "",
@@ -1311,13 +1302,17 @@ async def cmd_change_user(
13111302
This method allows to change the current logged in user information.
13121303
The result is a dictionary with OK packet information.
13131304
"""
1314-
if not isinstance(charset, int):
1315-
raise ValueError("charset must be an integer")
1316-
if charset < 0:
1317-
raise ValueError("charset should be either zero or a postive integer")
1305+
# If charset isn't defined, we use the same charset ID defined previously,
1306+
# otherwise, we run a verification and update the charset ID.
1307+
if charset is not None:
1308+
if not isinstance(charset, int):
1309+
raise ValueError("charset must be an integer")
1310+
if charset < 0:
1311+
raise ValueError("charset should be either zero or a postive integer")
1312+
self._charset = charsets.get_by_id(charset)
13181313

13191314
self._mfa_nfactor = 1
1320-
self._user = user
1315+
self._user = username
13211316
self._password = password
13221317
self._password1 = password1
13231318
self._password2 = password2
@@ -1341,7 +1336,7 @@ async def cmd_change_user(
13411336
username=self._user,
13421337
password=self._password,
13431338
database=self._database,
1344-
charset=charset,
1339+
charset=self._charset.charset_id,
13451340
client_flags=self._client_flags,
13461341
ssl_enabled=self._ssl_active,
13471342
auth_plugin=self._auth_plugin,
@@ -1356,8 +1351,5 @@ async def cmd_change_user(
13561351
if not (self._client_flags & ClientFlag.CONNECT_WITH_DB) and database:
13571352
await self.cmd_init_db(database)
13581353

1359-
self._charset = charsets.get_by_id(charset)
1360-
self._charset_name = self._charset.name
1361-
13621354
# return ok_pkt
13631355
return self._handle_ok(ok_packet)

0 commit comments

Comments
 (0)