Skip to content

Commit 1a3e3c1

Browse files
author
Geert Vanderkelen
committed
BUG21535573: Fix character decoding of identifiers using CExtension
We fix in CExtension the decoding of column names, table name and database name in field information of result sets. When the decoding fails, an error is now raised (instead of a segmentation fault). A test case was added for BUG21535573 which is testing GBK, SJIS and BIG5 characters in column names. We also update a few other tests cases. (cherry picked from commit 6e4649c)
1 parent a74964e commit 1a3e3c1

File tree

8 files changed

+154
-46
lines changed

8 files changed

+154
-46
lines changed

lib/mysql/connector/charsets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
# MySQL Connector/Python - MySQL driver written in Python.
4-
# Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
4+
# Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
55

66
# MySQL Connector/Python is licensed under the terms of the GPLv2
77
# <http://www.gnu.org/licenses/old-licenses/gpl-2.0.html>, like most
@@ -24,8 +24,8 @@
2424
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2525

2626
# This file was auto-generated.
27-
_GENERATED_ON = '2014-05-23'
28-
_MYSQL_VERSION = (5, 7, 4)
27+
_GENERATED_ON = '2015-08-24'
28+
_MYSQL_VERSION = (5, 7, 8)
2929

3030
"""This module contains the MySQL Server Character Sets"""
3131

lib/mysql/connector/connection.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,8 @@ def _handle_result(self, packet):
401401

402402
columns = [None,] * column_count
403403
for i in range(0, column_count):
404-
columns[i] = self._protocol.parse_column(self._socket.recv())
404+
columns[i] = self._protocol.parse_column(
405+
self._socket.recv(), self.python_charset)
405406

406407
eof = self._handle_eof(self._socket.recv())
407408
self.unread_result = True
@@ -920,7 +921,8 @@ def _handle_binary_result(self, packet):
920921

921922
columns = [None] * column_count
922923
for i in range(0, column_count):
923-
columns[i] = self._protocol.parse_column(self._socket.recv())
924+
columns[i] = self._protocol.parse_column(
925+
self._socket.recv(), self.python_charset)
924926

925927
eof = self._handle_eof(self._socket.recv())
926928
return (column_count, columns, eof)
@@ -941,12 +943,14 @@ def cmd_stmt_prepare(self, statement):
941943
if result['num_params'] > 0:
942944
for _ in range(0, result['num_params']):
943945
result['parameters'].append(
944-
self._protocol.parse_column(self._socket.recv()))
946+
self._protocol.parse_column(self._socket.recv(),
947+
self.python_charset))
945948
self._handle_eof(self._socket.recv())
946949
if result['num_columns'] > 0:
947950
for _ in range(0, result['num_columns']):
948951
result['columns'].append(
949-
self._protocol.parse_column(self._socket.recv()))
952+
self._protocol.parse_column(self._socket.recv(),
953+
self.python_charset))
950954
self._handle_eof(self._socket.recv())
951955

952956
return result

lib/mysql/connector/protocol.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def parse_column_count(self, packet):
233233
except (struct.error, ValueError):
234234
raise errors.InterfaceError("Failed parsing column count")
235235

236-
def parse_column(self, packet):
236+
def parse_column(self, packet, charset='utf-8'):
237237
"""Parse a MySQL column-packet"""
238238
(packet, _) = utils.read_lc_string(packet[4:]) # catalog
239239
(packet, _) = utils.read_lc_string(packet) # db
@@ -249,7 +249,7 @@ def parse_column(self, packet):
249249
raise errors.InterfaceError("Failed parsing column information")
250250

251251
return (
252-
name.decode('utf-8'),
252+
name.decode(charset),
253253
field_type,
254254
None, # display_size
255255
None, # internal_size

src/include/mysql_capi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ typedef struct {
4646
PyObject *have_result_set;
4747
PyObject *fields;
4848
PyObject *auth_plugin;
49+
MY_CHARSET_INFO cs;
4950
unsigned int connection_timeout;
5051
// class members
5152

src/mysql_capi.c

Lines changed: 68 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,23 @@ str_to_bytes(const char* charset, PyObject *value)
128128
return NULL;
129129
}
130130

131+
/**
132+
Get Python character name based on MySQL character name
133+
*/
134+
static char*
135+
python_characterset_name(const char* mysql_name)
136+
{
137+
if (!mysql_name) {
138+
return "latin1"; // MySQL default
139+
}
140+
141+
if (strcmp(mysql_name, "utf8mb4") == 0) {
142+
return "utf8";
143+
}
144+
145+
return (char*)mysql_name;
146+
}
147+
131148
/**
132149
Get the character set name from the current MySQL session.
133150
@@ -148,17 +165,14 @@ static const char*
148165
my2py_charset_name(MYSQL *session)
149166
{
150167
const char *name;
168+
151169
if (!session)
152170
{
153171
return NULL;
154172
}
155173

156-
name = mysql_character_set_name(session);
157-
if (strcmp(name, "utf8mb4") == 0) {
158-
return "utf8";
159-
}
160-
161-
return name;
174+
name= mysql_character_set_name(session);
175+
return python_characterset_name(name);
162176
}
163177

164178
/**
@@ -177,12 +191,15 @@ my2py_charset_name(MYSQL *session)
177191
@retval NULL Exception
178192
*/
179193
static PyObject*
180-
fetch_fields(MYSQL_RES *result, unsigned int num_fields)
194+
fetch_fields(MYSQL_RES *result, unsigned int num_fields, MY_CHARSET_INFO *cs,
195+
unsigned int use_unicode)
181196
{
182-
PyObject *fields = NULL;
183-
PyObject *field = NULL;
184-
MYSQL_FIELD *my_fields;
197+
PyObject *fields= NULL;
198+
PyObject *field= NULL;
199+
PyObject *decoded= NULL;
200+
MYSQL_FIELD *myfs;
185201
unsigned int i;
202+
char *charset= python_characterset_name(cs->csname);
186203

187204
fields = PyList_New(0);
188205

@@ -192,35 +209,48 @@ fetch_fields(MYSQL_RES *result, unsigned int num_fields)
192209
}
193210

194211
Py_BEGIN_ALLOW_THREADS
195-
my_fields = mysql_fetch_fields(result);
212+
myfs = mysql_fetch_fields(result);
196213
Py_END_ALLOW_THREADS
197214

198215
for (i = 0; i < num_fields; i++)
199216
{
200217
field = PyTuple_New(11);
201-
PyTuple_SET_ITEM(field, 0,
202-
UnicodeFromStringAndSize(my_fields[i].catalog,
203-
my_fields[i].catalog_length));
204-
PyTuple_SET_ITEM(field, 1,
205-
UnicodeFromStringAndSize(my_fields[i].db,
206-
my_fields[i].db_length));
207-
PyTuple_SET_ITEM(field, 2,
208-
UnicodeFromStringAndSize(my_fields[i].table,
209-
my_fields[i].table_length));
210-
PyTuple_SET_ITEM(field, 3,
211-
UnicodeFromStringAndSize(my_fields[i].org_table,
212-
my_fields[i].org_table_length));
213-
PyTuple_SET_ITEM(field, 4,
214-
UnicodeFromStringAndSize(my_fields[i].name,
215-
my_fields[i].name_length));
216-
PyTuple_SET_ITEM(field, 5,
217-
UnicodeFromStringAndSize(my_fields[i].org_name,
218-
my_fields[i].org_name_length));
219-
PyTuple_SET_ITEM(field, 6, PyInt_FromLong(my_fields[i].charsetnr));
220-
PyTuple_SET_ITEM(field, 7, PyInt_FromLong(my_fields[i].max_length));
221-
PyTuple_SET_ITEM(field, 8, PyInt_FromLong(my_fields[i].type));
222-
PyTuple_SET_ITEM(field, 9, PyInt_FromLong(my_fields[i].flags));
223-
PyTuple_SET_ITEM(field, 10, PyInt_FromLong(my_fields[i].decimals));
218+
219+
decoded= mytopy_string(myfs[i].catalog, myfs[i].catalog_length,
220+
myfs[i].flags, charset, use_unicode);
221+
if (NULL == decoded) return NULL; // decode error
222+
PyTuple_SET_ITEM(field, 0, decoded);
223+
224+
decoded= mytopy_string(myfs[i].db, myfs[i].db_length,
225+
myfs[i].flags, charset, use_unicode);
226+
if (NULL == decoded) return NULL; // decode error
227+
PyTuple_SET_ITEM(field, 1, decoded);
228+
229+
decoded= mytopy_string(myfs[i].table, myfs[i].table_length,
230+
myfs[i].flags, charset, use_unicode);
231+
if (NULL == decoded) return NULL; // decode error
232+
PyTuple_SET_ITEM(field, 2, decoded);
233+
234+
decoded= mytopy_string(myfs[i].org_table, myfs[i].org_table_length,
235+
myfs[i].flags, charset, use_unicode);
236+
if (NULL == decoded) return NULL; // decode error
237+
PyTuple_SET_ITEM(field, 3, decoded);
238+
239+
decoded= mytopy_string(myfs[i].name, myfs[i].name_length,
240+
myfs[i].flags, charset, use_unicode);
241+
if (NULL == decoded) return NULL; // decode error
242+
PyTuple_SET_ITEM(field, 4, decoded);
243+
244+
decoded= mytopy_string(myfs[i].org_name, myfs[i].org_name_length,
245+
myfs[i].flags, charset, use_unicode);
246+
if (NULL == decoded) return NULL; // decode error
247+
PyTuple_SET_ITEM(field, 5, decoded);
248+
249+
PyTuple_SET_ITEM(field, 6, PyInt_FromLong(myfs[i].charsetnr));
250+
PyTuple_SET_ITEM(field, 7, PyInt_FromLong(myfs[i].max_length));
251+
PyTuple_SET_ITEM(field, 8, PyInt_FromLong(myfs[i].type));
252+
PyTuple_SET_ITEM(field, 9, PyInt_FromLong(myfs[i].flags));
253+
PyTuple_SET_ITEM(field, 10, PyInt_FromLong(myfs[i].decimals));
224254
PyList_Append(fields, field);
225255
Py_DECREF(field);
226256
}
@@ -1823,6 +1853,7 @@ MySQL_query(MySQL *self, PyObject *args, PyObject *kwds)
18231853
self->raw= self->raw_at_connect;
18241854
}
18251855

1856+
mysql_get_character_set_info(&self->session, &self->cs);
18261857
return MySQL_handle_result(self);
18271858
}
18281859

@@ -2157,7 +2188,7 @@ MySQL_fetch_fields(MySQL *self)
21572188
count= mysql_num_fields(self->result);
21582189
Py_END_ALLOW_THREADS
21592190

2160-
return fetch_fields(self->result, count);
2191+
return fetch_fields(self->result, count, &self->cs, self->use_unicode);
21612192
}
21622193

21632194
/**
@@ -2230,7 +2261,8 @@ MySQL_fetch_row(MySQL *self)
22302261
}
22312262

22322263
if (self->fields == NULL) {
2233-
self->fields= fetch_fields(self->result, num_fields);
2264+
self->fields= fetch_fields(self->result, num_fields, &self->cs,
2265+
self->use_unicode);
22342266
}
22352267

22362268
result_row = PyTuple_New(num_fields);

src/mysql_capi_conversion.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,18 @@ mytopy_string(const char *data, const unsigned long length,
736736
const unsigned long flags, const char *charset,
737737
unsigned int use_unicode)
738738
{
739+
if (!charset || !data) {
740+
printf("\n==> here ");
741+
if (charset) {
742+
printf(" charset:%s", charset);
743+
}
744+
if (data) {
745+
printf(" data:'%s'", data);
746+
}
747+
printf("\n");
748+
return NULL;
749+
}
750+
739751
if (!(flags & BINARY_FLAG) && use_unicode && strcmp(charset, "binary") != 0)
740752
{
741753
return PyUnicode_Decode(data, length, charset, NULL);

tests/cext/test_cext_cursor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def test__str__(self):
438438
def test_column_names(self):
439439
cur = self._get_cursor(self.cnx)
440440
stmt = "SELECT NOW() as now, 'The time' as label, 123 FROM dual"
441-
exp = ('now', 'label', '123')
441+
exp = (b'now', 'label', b'123')
442442
cur.execute(stmt)
443443
cur.fetchone()
444444
self.assertEqual(exp, cur.column_names)

tests/test_bugs.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3675,6 +3675,65 @@ def sleepy_select(cnx):
36753675
"1317 (70100): Query execution was interrupted")
36763676

36773677

3678+
class BugOra21535573(tests.MySQLConnectorTests):
3679+
"""BUG#21535573: SEGFAULT WHEN TRY TO SELECT GBK DATA WITH C-EXTENSION
3680+
"""
3681+
def tearDown(self):
3682+
cnx = connection.MySQLConnection(**tests.get_mysql_config())
3683+
for charset in ('gbk', 'sjis', 'big5'):
3684+
tablename = charset + 'test'
3685+
cnx.cmd_query("DROP TABLE IF EXISTS {0}".format(tablename))
3686+
cnx.close()
3687+
3688+
def _test_charset(self, charset, data):
3689+
config = tests.get_mysql_config()
3690+
config['charset'] = charset
3691+
config['use_unicode'] = True
3692+
self.cnx = self.cnx.__class__(**config)
3693+
tablename = charset + 'test'
3694+
cur = self.cnx.cursor()
3695+
3696+
cur.execute("DROP TABLE IF EXISTS {0}".format(tablename))
3697+
if PY2:
3698+
column = data.encode(charset)
3699+
else:
3700+
column = data
3701+
table = (
3702+
"CREATE TABLE {table} ("
3703+
" {col} INT AUTO_INCREMENT KEY, "
3704+
"c1 VARCHAR(40)"
3705+
") CHARACTER SET '{charset}'"
3706+
).format(table=tablename, charset=charset, col=column)
3707+
cur.execute(table)
3708+
self.cnx.commit()
3709+
3710+
cur.execute("TRUNCATE {0}".format(tablename))
3711+
self.cnx.commit()
3712+
3713+
insert = "INSERT INTO {0} (c1) VALUES (%s)".format(tablename)
3714+
cur.execute(insert, (data,))
3715+
self.cnx.commit()
3716+
3717+
cur.execute("SELECT * FROM {0}".format(tablename))
3718+
for row in cur:
3719+
self.assertEqual(data, row[1])
3720+
3721+
cur.close()
3722+
self.cnx.close()
3723+
3724+
@foreach_cnx()
3725+
def test_gbk(self):
3726+
self._test_charset('gbk', u'海豚')
3727+
3728+
@foreach_cnx()
3729+
def test_sjis(self):
3730+
self._test_charset('sjis', u'シイラ')
3731+
3732+
@foreach_cnx()
3733+
def test_big5(self):
3734+
self._test_charset('big5', u'皿')
3735+
3736+
36783737
class BugOra21536507(tests.MySQLConnectorTests):
36793738
"""BUG#21536507:C/PYTHON BEHAVIOR NOT PROPER WHEN RAISE_ON_WARNINGS=TRUE
36803739
"""

0 commit comments

Comments
 (0)