Skip to content

Commit ad94644

Browse files
committed
BUG28188883: Use utf8mb4 as the default character set
In MySQL 8.0 the utf8 alias for utf8mb3 has been deprecated. But Connector/Python still has it as the default character set. This patch changes the default character set to utf8mb4, on both pure Python and C extension implementations. Test was added for regression.
1 parent 4fc315f commit ad94644

File tree

10 files changed

+54
-25
lines changed

10 files changed

+54
-25
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ v8.0.12
1414
- WL#11951: Consolidate discrepancies between pure and c extension
1515
- WL#11932: Remove Fabric support
1616
- WL#11898: Core API v1 alignment
17+
- BUG#28188883: Use utf8mb4 as the default character set
1718
- BUG#28133321: Fix incorrect columns names representing aggregate functions
1819
- BUG#27962293: Fix Django 2.0 and MySQL 8.0 compatibility issues
1920
- BUG#27567999: Fix wrong docstring in ModifyStatement.patch()

lib/mysql/connector/abstracts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class MySQLConnectionAbstract(object):
4949
def __init__(self, **kwargs):
5050
"""Initialize"""
5151
self._client_flags = ClientFlag.get_default()
52-
self._charset_id = 33
52+
self._charset_id = 45
5353
self._sql_mode = None
5454
self._time_zone = None
5555
self._autocommit = False
@@ -955,7 +955,7 @@ def cmd_ping(self):
955955
raise NotImplementedError
956956

957957
def cmd_change_user(self, username='', password='', database='',
958-
charset=33):
958+
charset=45):
959959
"""Change the current logged in user"""
960960
raise NotImplementedError
961961

lib/mysql/connector/connection.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __init__(self, *args, **kwargs):
6464
self._converter_class = MySQLConverter
6565

6666
self._client_flags = ClientFlag.get_default()
67-
self._charset_id = 33
67+
self._charset_id = 45
6868
self._sql_mode = None
6969
self._time_zone = None
7070
self._autocommit = False
@@ -136,7 +136,7 @@ def _do_handshake(self):
136136
self._handshake = handshake
137137

138138
def _do_auth(self, username=None, password=None, database=None,
139-
client_flags=0, charset=33, ssl_options=None):
139+
client_flags=0, charset=45, ssl_options=None):
140140
"""Authenticate with the MySQL server
141141
142142
Authentication happens in two parts. We first send a response to the
@@ -686,7 +686,7 @@ def cmd_ping(self):
686686
return self._handle_ok(self._send_cmd(ServerCmd.PING))
687687

688688
def cmd_change_user(self, username='', password='', database='',
689-
charset=33):
689+
charset=45):
690690
"""Change the current logged in user
691691
692692
This method allows to change the current logged in user information.

lib/mysql/connector/connection_cext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ def consume_results(self):
544544
self._cmysql.consume_result()
545545

546546
def cmd_change_user(self, username='', password='', database='',
547-
charset=33):
547+
charset=45):
548548
"""Change the current logged in user"""
549549
try:
550550
self._cmysql.change_user(username, password, database)

lib/mysql/connector/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
'port': 3306,
4545
'unix_socket': None,
4646
'use_unicode': True,
47-
'charset': 'utf8',
47+
'charset': 'utf8mb4',
4848
'collation': None,
4949
'converter_class': None,
5050
'autocommit': False,

lib/mysql/connector/cursor_cext.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,11 +296,11 @@ def remove_comments(match):
296296
"Failed rewriting statement for multi-row INSERT. "
297297
"Check SQL syntax."
298298
)
299-
fmt = matches.group(1).encode(self._cnx.charset)
299+
fmt = matches.group(1).encode(self._cnx.python_charset)
300300
values = []
301301

302302
try:
303-
stmt = operation.encode(self._cnx.charset)
303+
stmt = operation.encode(self._cnx.python_charset)
304304
for params in seq_params:
305305
tmp = fmt
306306
prepared = self._cnx.prepare_for_mysql(params)

lib/mysql/connector/protocol.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def _auth_response(self, client_flags, username, password, database,
7777
return auth_response
7878

7979
def make_auth(self, handshake, username=None, password=None, database=None,
80-
charset=33, client_flags=0,
80+
charset=45, client_flags=0,
8181
max_allowed_packet=1073741824, ssl_enabled=False,
8282
auth_plugin=None):
8383
"""Make a MySQL Authentication packet"""
@@ -113,7 +113,7 @@ def make_auth(self, handshake, username=None, password=None, database=None,
113113

114114
return packet
115115

116-
def make_auth_ssl(self, charset=33, client_flags=0,
116+
def make_auth_ssl(self, charset=45, client_flags=0,
117117
max_allowed_packet=1073741824):
118118
"""Make a SSL authentication packet"""
119119
return utils.int4store(client_flags) + \
@@ -133,7 +133,7 @@ def make_stmt_fetch(self, statement_id, rows=1):
133133
return utils.int4store(statement_id) + utils.int4store(rows)
134134

135135
def make_change_user(self, handshake, username=None, password=None,
136-
database=None, charset=33, client_flags=0,
136+
database=None, charset=45, client_flags=0,
137137
ssl_enabled=False, auth_plugin=None):
138138
"""Make a MySQL packet with the Change User command"""
139139

tests/test_bugs.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4080,7 +4080,7 @@ def test_bad_set_charset_number(self):
40804080
cursor = cnx.cursor(raw="true",buffered="true")
40814081
cursor.execute("SHOW VARIABLES LIKE 'character_set_connection'")
40824082
row = cursor.fetchone()
4083-
self.assertEqual(row[1], u"utf8")
4083+
self.assertEqual(row[1], u"utf8mb4")
40844084
cursor.close()
40854085

40864086
self.assertEqual(self.cnx._charset_id, old_val)
@@ -5298,3 +5298,31 @@ def test_invalid_collation(self):
52985298

52995299
def tearDown(self):
53005300
pass
5301+
5302+
5303+
class BugOra28188883(tests.MySQLConnectorTests):
5304+
"""BUG#27277937: DEPRECATED UTF8 IS THE DEFAULT CHARACTER SET IN 8.0
5305+
"""
5306+
def setUp(self):
5307+
# Remove charset from the connection configuration if is set, so the
5308+
# default charset 'utf8mb4' is used for each connection
5309+
self.config = tests.get_mysql_config().copy()
5310+
if "charset" in self.config:
5311+
del self.config
5312+
5313+
@foreach_cnx()
5314+
def test_utf8mb4_default_charset(self):
5315+
self.assertEqual(self.cnx.charset, "utf8mb4")
5316+
data = [(1, u'🐬'), (2, u'🐍'), (3, u'🐶')]
5317+
tbl = "BugOra28188883"
5318+
cur = self.cnx.cursor()
5319+
cur.execute("DROP TABLE IF EXISTS {0}".format(tbl))
5320+
cur.execute("CREATE TABLE {0} (id INT, name VARCHAR(100)) "
5321+
"DEFAULT CHARSET utf8mb4".format(tbl))
5322+
stmt = "INSERT INTO {0} (id, name) VALUES (%s, %s)".format(tbl)
5323+
cur.executemany(stmt, data)
5324+
cur.execute("SELECT id, name FROM {0}".format(tbl))
5325+
self.assertEqual(data, cur.fetchall())
5326+
cur.execute("DROP TABLE IF EXISTS {0}".format(tbl))
5327+
cur.close()
5328+
self.cnx.close()

tests/test_connection.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def test_init(self):
148148
'converter': None,
149149
'_converter_class': MySQLConverter,
150150
'_client_flags': constants.ClientFlag.get_default(),
151-
'_charset_id': 33,
151+
'_charset_id': 45,
152152
'_user': '',
153153
'_password': '',
154154
'_database': '',
@@ -760,7 +760,7 @@ def test__do_auth(self):
760760
'username': 'ham',
761761
'password': 'spam',
762762
'database': 'test',
763-
'charset': 33,
763+
'charset': 45,
764764
'client_flags': flags,
765765
}
766766

@@ -806,7 +806,7 @@ def test_caching_sha2_password(self):
806806
'username': 'ham',
807807
'password': 'spam',
808808
'database': 'test',
809-
'charset': 33,
809+
'charset': 45,
810810
'client_flags': flags,
811811
'ssl_options': {
812812
'ca': os.path.join(tests.SSL_DIR, 'tests_CA_cert.pem'),
@@ -892,7 +892,7 @@ def test__do_auth_ssl(self):
892892
'username': 'ham',
893893
'password': 'spam',
894894
'database': 'test',
895-
'charset': 33,
895+
'charset': 45,
896896
'client_flags': flags,
897897
'ssl_options': {
898898
'ca': os.path.join(tests.SSL_DIR, 'tests_CA_cert.pem'),
@@ -1030,8 +1030,8 @@ def test_config(self):
10301030
constants.ClientFlag.COMPRESS)
10311031

10321032
# Test character set
1033-
# utf8 is default, which is mapped to 33
1034-
self.assertEqual(33, cnx._charset_id)
1033+
# utf8mb4 is default, which is mapped to 45
1034+
self.assertEqual(45, cnx._charset_id)
10351035
cnx.config(charset='latin1')
10361036
self.assertEqual(8, cnx._charset_id)
10371037
cnx.config(charset='latin1', collation='latin1_general_ci')
@@ -1150,12 +1150,12 @@ def test__open_connection(self):
11501150

11511151
def test__post_connection(self):
11521152
"""Executes commands after connection has been established"""
1153-
self.cnx._charset_id = 33
1153+
self.cnx._charset_id = 45
11541154
self.cnx._autocommit = True
11551155
self.cnx._time_zone = "-09:00"
11561156
self.cnx._sql_mode = "STRICT_ALL_TABLES"
11571157
self.cnx._post_connection()
1158-
self.assertEqual('utf8', self.cnx.charset)
1158+
self.assertEqual('utf8mb4', self.cnx.charset)
11591159
self.assertEqual(self.cnx._autocommit, self.cnx.autocommit)
11601160
self.assertEqual(self.cnx._time_zone, self.cnx.time_zone)
11611161
self.assertEqual(self.cnx._sql_mode, self.cnx.sql_mode)

tests/test_protocol.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,17 +140,17 @@ def test_make_auth_ssl(self):
140140
"""Make a SSL authentication packet"""
141141
cases = [
142142
({},
143-
b'\x00\x00\x00\x00\x00\x00\x00\x40\x21\x00\x00\x00\x00\x00'
143+
b'\x00\x00\x00\x00\x00\x00\x00@-\x00\x00\x00\x00\x00\x00'
144144
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
145-
b'\x00\x00\x00\x00'),
145+
b'\x00\x00\x00'),
146146
({'charset': 8},
147147
b'\x00\x00\x00\x00\x00\x00\x00\x40\x08\x00\x00\x00\x00\x00'
148148
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
149149
b'\x00\x00\x00\x00'),
150150
({'client_flags': 240141},
151-
b'\x0d\xaa\x03\x00\x00\x00\x00\x40\x21\x00\x00\x00\x00\x00'
151+
b'\r\xaa\x03\x00\x00\x00\x00@-\x00\x00\x00\x00\x00\x00\x00'
152152
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
153-
b'\x00\x00\x00\x00'),
153+
b'\x00\x00'),
154154
({'charset': 8, 'client_flags': 240141,
155155
'max_allowed_packet': 2147483648},
156156
b'\x0d\xaa\x03\x00\x00\x00\x00\x80\x08\x00\x00\x00\x00\x00'

0 commit comments

Comments
 (0)