diff --git a/pymysql/charset.py b/pymysql/charset.py index 1cf7d913..5e0ad89b 100644 --- a/pymysql/charset.py +++ b/pymysql/charset.py @@ -11,6 +11,10 @@ def __init__(self, id, name, collation, is_default): self.id, self.name, self.collation = id, name, collation self.is_default = is_default == 'Yes' + def __repr__(self): + return "Charset(id=%s, name=%r, collation=%r)" % ( + self.id, self.name, self.collation) + @property def encoding(self): name = self.name diff --git a/pymysql/connections.py b/pymysql/connections.py index 52f13f05..65dc8d2d 100644 --- a/pymysql/connections.py +++ b/pymysql/connections.py @@ -88,7 +88,6 @@ def _makefile(sock, mode): FIELD_TYPE.BLOB, FIELD_TYPE.LONG_BLOB, FIELD_TYPE.MEDIUM_BLOB, - FIELD_TYPE.JSON, FIELD_TYPE.STRING, FIELD_TYPE.TINY_BLOB, FIELD_TYPE.VAR_STRING, @@ -407,9 +406,9 @@ class FieldDescriptorPacket(MysqlPacket): def __init__(self, data, encoding): MysqlPacket.__init__(self, data, encoding) - self.__parse_field_descriptor(encoding) + self._parse_field_descriptor(encoding) - def __parse_field_descriptor(self, encoding): + def _parse_field_descriptor(self, encoding): """Parse the 'Field Descriptor' (Metadata) packet. This is compatible with MySQL 4.1+ (not compatible with MySQL 4.0). @@ -1433,21 +1432,30 @@ def _get_descriptions(self): self.fields = [] self.converters = [] use_unicode = self.connection.use_unicode + conn_encoding = self.connection.encoding description = [] + for i in range_type(self.field_count): field = self.connection._read_packet(FieldDescriptorPacket) self.fields.append(field) description.append(field.description()) field_type = field.type_code if use_unicode: - if field_type in TEXT_TYPES: - charset = charset_by_id(field.charsetnr) - if charset.is_binary: + if field_type == FIELD_TYPE.JSON: + # When SELECT from JSON column: charset = binary + # When SELECT CAST(... AS JSON): charset = connection encoding + # This behavior is different from TEXT / BLOB. + # We should decode result by connection encoding regardless charsetnr. + # See https://github.com/PyMySQL/PyMySQL/issues/488 + encoding = conn_encoding # SELECT CAST(... AS JSON) + elif field_type in TEXT_TYPES: + if field.charsetnr == 63: # binary # TEXTs with charset=binary means BINARY types. encoding = None else: - encoding = charset.encoding + encoding = conn_encoding else: + # Integers, Dates and Times, and other basic data is encoded in ascii encoding = 'ascii' else: encoding = None diff --git a/pymysql/converters.py b/pymysql/converters.py index fc596ded..4c00bc45 100644 --- a/pymysql/converters.py +++ b/pymysql/converters.py @@ -333,12 +333,6 @@ def convert_set(s): return set(s.split(",")) -def convert_json(b): - # JSON is returned as binary data. - # Decode with utf-8 regardless connection encoding. - return b.decode('utf-8') - - def through(x): return x @@ -416,7 +410,6 @@ def convert_characters(connection, field, data): FIELD_TYPE.VARCHAR: through, FIELD_TYPE.DECIMAL: Decimal, FIELD_TYPE.NEWDECIMAL: Decimal, - FIELD_TYPE.JSON: convert_json, } diff --git a/pymysql/tests/test_basic.py b/pymysql/tests/test_basic.py index 52d9c7ff..67f11447 100644 --- a/pymysql/tests/test_basic.py +++ b/pymysql/tests/test_basic.py @@ -253,12 +253,17 @@ def test_json(self): primary key (id) );""") cur = conn.cursor() + json_str = u'{"hello": "こんにちは"}' cur.execute("INSERT INTO test_json (id, `json`) values (42, %s)", (json_str,)) cur.execute("SELECT `json` from `test_json` WHERE `id`=42") res = cur.fetchone()[0] self.assertEqual(json.loads(res), json.loads(json_str)) + cur.execute("SELECT CAST(%s AS JSON) AS x", (json_str,)) + res = cur.fetchone()[0] + self.assertEqual(json.loads(res), json.loads(json_str)) + class TestBulkInserts(base.PyMySQLTestCase):