Skip to content

Commit dcd5b63

Browse files
committed
BUG19500097: Fix string decoding with binary character set
When connection character set is set to 'binary', the string data failed to decode since it is not a valid Python character set. We fix this by returning string data without decoding when the character set is set to 'binary'. A unit test has been added for BUG#19500097.
1 parent 411da6f commit dcd5b63

File tree

3 files changed

+56
-4
lines changed

3 files changed

+56
-4
lines changed

lib/mysql/connector/connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,7 @@ def python_charset(self):
11351135
Returns a string.
11361136
"""
11371137
encoding = CharacterSet.get_info(self._charset_id)[0]
1138-
if encoding == 'utf8mb4':
1138+
if encoding in ('utf8mb4', 'binary'):
11391139
return 'utf8'
11401140
else:
11411141
return encoding

lib/mysql/connector/conversion.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,15 @@ def _str_to_mysql(self, value):
178178

179179
def _unicode_to_mysql(self, value):
180180
"""Convert unicode"""
181-
encoded = value.encode(self.charset)
182-
if self.charset_id in CharacterSet.slash_charsets:
181+
charset = self.charset
182+
charset_id = self.charset_id
183+
if charset == 'binary':
184+
charset = 'utf8'
185+
charset_id = CharacterSet.get_charset_info(charset)[0]
186+
encoded = value.encode(charset)
187+
if charset_id in CharacterSet.slash_charsets:
183188
if b'\x5c' in encoded:
184-
return HexLiteral(value, self.charset)
189+
return HexLiteral(value, charset)
185190
return encoded
186191

187192
def _bytes_to_mysql(self, value):
@@ -537,6 +542,8 @@ def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103
537542
if dsc[7] & FieldFlag.BINARY:
538543
return value
539544

545+
if self.charset == 'binary':
546+
return value
540547
if isinstance(value, (bytes, bytearray)) and self.use_unicode:
541548
return value.decode(self.charset)
542549

tests/test_bugs.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2960,3 +2960,48 @@ def test_row_to_python(self):
29602960
cur.execute("INSERT INTO {0} (c1) VALUES (?)".format(self.tbl), (data,))
29612961
self.cur.execute("SELECT * FROM {0}".format(self.tbl))
29622962
self.assertEqual((data,), self.cur.fetchone())
2963+
2964+
2965+
class BugOra19500097(tests.MySQLConnectorTests):
2966+
"""BUG#19500097: BETTER SUPPORT FOR RAW/BINARY DATA
2967+
"""
2968+
def setUp(self):
2969+
config = tests.get_mysql_config()
2970+
self.cnx = connection.MySQLConnection(**config)
2971+
self.cur = self.cnx.cursor()
2972+
2973+
self.tbl = 'Bug19500097'
2974+
self.cur.execute("DROP TABLE IF EXISTS {0}".format(self.tbl))
2975+
2976+
create = ("CREATE TABLE {0} (col1 VARCHAR(10), col2 INT) "
2977+
"DEFAULT CHARSET latin1".format(self.tbl))
2978+
self.cur.execute(create)
2979+
2980+
def tearDown(self):
2981+
self.cur.execute("DROP TABLE IF EXISTS {0}".format(self.tbl))
2982+
self.cur.close()
2983+
self.cnx.close()
2984+
2985+
def test_binary_charset(self):
2986+
2987+
sql = "INSERT INTO {0} VALUES(%s, %s)".format(self.tbl)
2988+
self.cur.execute(sql, ('foo', 1))
2989+
self.cur.execute(sql, ('ëëë', 2))
2990+
self.cur.execute(sql, (u'ááá', 5))
2991+
2992+
self.cnx.set_charset_collation('binary')
2993+
self.cur.execute(sql, ('bar', 3))
2994+
self.cur.execute(sql, ('ëëë', 4))
2995+
self.cur.execute(sql, (u'ááá', 6))
2996+
2997+
exp = [
2998+
(bytearray(b'foo'), 1),
2999+
(bytearray(b'\xeb\xeb\xeb'), 2),
3000+
(bytearray(b'\xe1\xe1\xe1'), 5),
3001+
(bytearray(b'bar'), 3),
3002+
(bytearray(b'\xc3\xab\xc3\xab\xc3\xab'), 4),
3003+
(bytearray(b'\xc3\xa1\xc3\xa1\xc3\xa1'), 6)
3004+
]
3005+
3006+
self.cur.execute("SELECT * FROM {0}".format(self.tbl))
3007+
self.assertEqual(exp, self.cur.fetchall())

0 commit comments

Comments
 (0)