Skip to content

Commit c3a12f6

Browse files
authored
Make charset="utf8" use utf8mb4. (#1127)
Use charset="utf8mb3" to use utf8mb3 instead. Fix #1126
1 parent f4c348f commit c3a12f6

File tree

2 files changed

+188
-156
lines changed

2 files changed

+188
-156
lines changed

pymysql/charset.py

Lines changed: 163 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1+
# Internal use only. Do not use directly.
2+
13
MBLENGTH = {8: 1, 33: 3, 88: 2, 91: 2}
24

35

46
class Charset:
5-
def __init__(self, id, name, collation, is_default):
7+
def __init__(self, id, name, collation, is_default=False):
68
self.id, self.name, self.collation = id, name, collation
7-
self.is_default = is_default == "Yes"
9+
self.is_default = is_default
810

911
def __repr__(self):
10-
return "Charset(id={}, name={!r}, collation={!r})".format(
11-
self.id,
12-
self.name,
13-
self.collation,
12+
return (
13+
f"Charset(id={self.id}, name={self.name!r}, collation={self.collation!r})"
1414
)
1515

1616
@property
@@ -45,165 +45,172 @@ def by_id(self, id):
4545
return self._by_id[id]
4646

4747
def by_name(self, name):
48+
if name == "utf8":
49+
name = "utf8mb4"
4850
return self._by_name.get(name.lower())
4951

5052

5153
_charsets = Charsets()
54+
charset_by_name = _charsets.by_name
55+
charset_by_id = _charsets.by_id
56+
5257
"""
58+
TODO: update this script.
59+
5360
Generated with:
5461
5562
mysql -N -s -e "select id, character_set_name, collation_name, is_default
5663
from information_schema.collations order by id;" | python -c "import sys
5764
for l in sys.stdin.readlines():
58-
id, name, collation, is_default = l.split(chr(9))
59-
print '_charsets.add(Charset(%s, \'%s\', \'%s\', \'%s\'))' \
60-
% (id, name, collation, is_default.strip())
61-
"
62-
65+
id, name, collation, is_default = l.split(chr(9))
66+
if is_default.strip() == "Yes":
67+
print('_charsets.add(Charset(%s, \'%s\', \'%s\', True))' \
68+
% (id, name, collation))
69+
else:
70+
print('_charsets.add(Charset(%s, \'%s\', \'%s\'))' \
71+
% (id, name, collation, bool(is_default.strip()))
6372
"""
64-
_charsets.add(Charset(1, "big5", "big5_chinese_ci", "Yes"))
65-
_charsets.add(Charset(2, "latin2", "latin2_czech_cs", ""))
66-
_charsets.add(Charset(3, "dec8", "dec8_swedish_ci", "Yes"))
67-
_charsets.add(Charset(4, "cp850", "cp850_general_ci", "Yes"))
68-
_charsets.add(Charset(5, "latin1", "latin1_german1_ci", ""))
69-
_charsets.add(Charset(6, "hp8", "hp8_english_ci", "Yes"))
70-
_charsets.add(Charset(7, "koi8r", "koi8r_general_ci", "Yes"))
71-
_charsets.add(Charset(8, "latin1", "latin1_swedish_ci", "Yes"))
72-
_charsets.add(Charset(9, "latin2", "latin2_general_ci", "Yes"))
73-
_charsets.add(Charset(10, "swe7", "swe7_swedish_ci", "Yes"))
74-
_charsets.add(Charset(11, "ascii", "ascii_general_ci", "Yes"))
75-
_charsets.add(Charset(12, "ujis", "ujis_japanese_ci", "Yes"))
76-
_charsets.add(Charset(13, "sjis", "sjis_japanese_ci", "Yes"))
77-
_charsets.add(Charset(14, "cp1251", "cp1251_bulgarian_ci", ""))
78-
_charsets.add(Charset(15, "latin1", "latin1_danish_ci", ""))
79-
_charsets.add(Charset(16, "hebrew", "hebrew_general_ci", "Yes"))
80-
_charsets.add(Charset(18, "tis620", "tis620_thai_ci", "Yes"))
81-
_charsets.add(Charset(19, "euckr", "euckr_korean_ci", "Yes"))
82-
_charsets.add(Charset(20, "latin7", "latin7_estonian_cs", ""))
83-
_charsets.add(Charset(21, "latin2", "latin2_hungarian_ci", ""))
84-
_charsets.add(Charset(22, "koi8u", "koi8u_general_ci", "Yes"))
85-
_charsets.add(Charset(23, "cp1251", "cp1251_ukrainian_ci", ""))
86-
_charsets.add(Charset(24, "gb2312", "gb2312_chinese_ci", "Yes"))
87-
_charsets.add(Charset(25, "greek", "greek_general_ci", "Yes"))
88-
_charsets.add(Charset(26, "cp1250", "cp1250_general_ci", "Yes"))
89-
_charsets.add(Charset(27, "latin2", "latin2_croatian_ci", ""))
90-
_charsets.add(Charset(28, "gbk", "gbk_chinese_ci", "Yes"))
91-
_charsets.add(Charset(29, "cp1257", "cp1257_lithuanian_ci", ""))
92-
_charsets.add(Charset(30, "latin5", "latin5_turkish_ci", "Yes"))
93-
_charsets.add(Charset(31, "latin1", "latin1_german2_ci", ""))
94-
_charsets.add(Charset(32, "armscii8", "armscii8_general_ci", "Yes"))
95-
_charsets.add(Charset(33, "utf8", "utf8_general_ci", "Yes"))
96-
_charsets.add(Charset(34, "cp1250", "cp1250_czech_cs", ""))
97-
_charsets.add(Charset(36, "cp866", "cp866_general_ci", "Yes"))
98-
_charsets.add(Charset(37, "keybcs2", "keybcs2_general_ci", "Yes"))
99-
_charsets.add(Charset(38, "macce", "macce_general_ci", "Yes"))
100-
_charsets.add(Charset(39, "macroman", "macroman_general_ci", "Yes"))
101-
_charsets.add(Charset(40, "cp852", "cp852_general_ci", "Yes"))
102-
_charsets.add(Charset(41, "latin7", "latin7_general_ci", "Yes"))
103-
_charsets.add(Charset(42, "latin7", "latin7_general_cs", ""))
104-
_charsets.add(Charset(43, "macce", "macce_bin", ""))
105-
_charsets.add(Charset(44, "cp1250", "cp1250_croatian_ci", ""))
106-
_charsets.add(Charset(45, "utf8mb4", "utf8mb4_general_ci", "Yes"))
107-
_charsets.add(Charset(46, "utf8mb4", "utf8mb4_bin", ""))
108-
_charsets.add(Charset(47, "latin1", "latin1_bin", ""))
109-
_charsets.add(Charset(48, "latin1", "latin1_general_ci", ""))
110-
_charsets.add(Charset(49, "latin1", "latin1_general_cs", ""))
111-
_charsets.add(Charset(50, "cp1251", "cp1251_bin", ""))
112-
_charsets.add(Charset(51, "cp1251", "cp1251_general_ci", "Yes"))
113-
_charsets.add(Charset(52, "cp1251", "cp1251_general_cs", ""))
114-
_charsets.add(Charset(53, "macroman", "macroman_bin", ""))
115-
_charsets.add(Charset(57, "cp1256", "cp1256_general_ci", "Yes"))
116-
_charsets.add(Charset(58, "cp1257", "cp1257_bin", ""))
117-
_charsets.add(Charset(59, "cp1257", "cp1257_general_ci", "Yes"))
118-
_charsets.add(Charset(63, "binary", "binary", "Yes"))
119-
_charsets.add(Charset(64, "armscii8", "armscii8_bin", ""))
120-
_charsets.add(Charset(65, "ascii", "ascii_bin", ""))
121-
_charsets.add(Charset(66, "cp1250", "cp1250_bin", ""))
122-
_charsets.add(Charset(67, "cp1256", "cp1256_bin", ""))
123-
_charsets.add(Charset(68, "cp866", "cp866_bin", ""))
124-
_charsets.add(Charset(69, "dec8", "dec8_bin", ""))
125-
_charsets.add(Charset(70, "greek", "greek_bin", ""))
126-
_charsets.add(Charset(71, "hebrew", "hebrew_bin", ""))
127-
_charsets.add(Charset(72, "hp8", "hp8_bin", ""))
128-
_charsets.add(Charset(73, "keybcs2", "keybcs2_bin", ""))
129-
_charsets.add(Charset(74, "koi8r", "koi8r_bin", ""))
130-
_charsets.add(Charset(75, "koi8u", "koi8u_bin", ""))
131-
_charsets.add(Charset(76, "utf8", "utf8_tolower_ci", ""))
132-
_charsets.add(Charset(77, "latin2", "latin2_bin", ""))
133-
_charsets.add(Charset(78, "latin5", "latin5_bin", ""))
134-
_charsets.add(Charset(79, "latin7", "latin7_bin", ""))
135-
_charsets.add(Charset(80, "cp850", "cp850_bin", ""))
136-
_charsets.add(Charset(81, "cp852", "cp852_bin", ""))
137-
_charsets.add(Charset(82, "swe7", "swe7_bin", ""))
138-
_charsets.add(Charset(83, "utf8", "utf8_bin", ""))
139-
_charsets.add(Charset(84, "big5", "big5_bin", ""))
140-
_charsets.add(Charset(85, "euckr", "euckr_bin", ""))
141-
_charsets.add(Charset(86, "gb2312", "gb2312_bin", ""))
142-
_charsets.add(Charset(87, "gbk", "gbk_bin", ""))
143-
_charsets.add(Charset(88, "sjis", "sjis_bin", ""))
144-
_charsets.add(Charset(89, "tis620", "tis620_bin", ""))
145-
_charsets.add(Charset(91, "ujis", "ujis_bin", ""))
146-
_charsets.add(Charset(92, "geostd8", "geostd8_general_ci", "Yes"))
147-
_charsets.add(Charset(93, "geostd8", "geostd8_bin", ""))
148-
_charsets.add(Charset(94, "latin1", "latin1_spanish_ci", ""))
149-
_charsets.add(Charset(95, "cp932", "cp932_japanese_ci", "Yes"))
150-
_charsets.add(Charset(96, "cp932", "cp932_bin", ""))
151-
_charsets.add(Charset(97, "eucjpms", "eucjpms_japanese_ci", "Yes"))
152-
_charsets.add(Charset(98, "eucjpms", "eucjpms_bin", ""))
153-
_charsets.add(Charset(99, "cp1250", "cp1250_polish_ci", ""))
154-
_charsets.add(Charset(192, "utf8", "utf8_unicode_ci", ""))
155-
_charsets.add(Charset(193, "utf8", "utf8_icelandic_ci", ""))
156-
_charsets.add(Charset(194, "utf8", "utf8_latvian_ci", ""))
157-
_charsets.add(Charset(195, "utf8", "utf8_romanian_ci", ""))
158-
_charsets.add(Charset(196, "utf8", "utf8_slovenian_ci", ""))
159-
_charsets.add(Charset(197, "utf8", "utf8_polish_ci", ""))
160-
_charsets.add(Charset(198, "utf8", "utf8_estonian_ci", ""))
161-
_charsets.add(Charset(199, "utf8", "utf8_spanish_ci", ""))
162-
_charsets.add(Charset(200, "utf8", "utf8_swedish_ci", ""))
163-
_charsets.add(Charset(201, "utf8", "utf8_turkish_ci", ""))
164-
_charsets.add(Charset(202, "utf8", "utf8_czech_ci", ""))
165-
_charsets.add(Charset(203, "utf8", "utf8_danish_ci", ""))
166-
_charsets.add(Charset(204, "utf8", "utf8_lithuanian_ci", ""))
167-
_charsets.add(Charset(205, "utf8", "utf8_slovak_ci", ""))
168-
_charsets.add(Charset(206, "utf8", "utf8_spanish2_ci", ""))
169-
_charsets.add(Charset(207, "utf8", "utf8_roman_ci", ""))
170-
_charsets.add(Charset(208, "utf8", "utf8_persian_ci", ""))
171-
_charsets.add(Charset(209, "utf8", "utf8_esperanto_ci", ""))
172-
_charsets.add(Charset(210, "utf8", "utf8_hungarian_ci", ""))
173-
_charsets.add(Charset(211, "utf8", "utf8_sinhala_ci", ""))
174-
_charsets.add(Charset(212, "utf8", "utf8_german2_ci", ""))
175-
_charsets.add(Charset(213, "utf8", "utf8_croatian_ci", ""))
176-
_charsets.add(Charset(214, "utf8", "utf8_unicode_520_ci", ""))
177-
_charsets.add(Charset(215, "utf8", "utf8_vietnamese_ci", ""))
178-
_charsets.add(Charset(223, "utf8", "utf8_general_mysql500_ci", ""))
179-
_charsets.add(Charset(224, "utf8mb4", "utf8mb4_unicode_ci", ""))
180-
_charsets.add(Charset(225, "utf8mb4", "utf8mb4_icelandic_ci", ""))
181-
_charsets.add(Charset(226, "utf8mb4", "utf8mb4_latvian_ci", ""))
182-
_charsets.add(Charset(227, "utf8mb4", "utf8mb4_romanian_ci", ""))
183-
_charsets.add(Charset(228, "utf8mb4", "utf8mb4_slovenian_ci", ""))
184-
_charsets.add(Charset(229, "utf8mb4", "utf8mb4_polish_ci", ""))
185-
_charsets.add(Charset(230, "utf8mb4", "utf8mb4_estonian_ci", ""))
186-
_charsets.add(Charset(231, "utf8mb4", "utf8mb4_spanish_ci", ""))
187-
_charsets.add(Charset(232, "utf8mb4", "utf8mb4_swedish_ci", ""))
188-
_charsets.add(Charset(233, "utf8mb4", "utf8mb4_turkish_ci", ""))
189-
_charsets.add(Charset(234, "utf8mb4", "utf8mb4_czech_ci", ""))
190-
_charsets.add(Charset(235, "utf8mb4", "utf8mb4_danish_ci", ""))
191-
_charsets.add(Charset(236, "utf8mb4", "utf8mb4_lithuanian_ci", ""))
192-
_charsets.add(Charset(237, "utf8mb4", "utf8mb4_slovak_ci", ""))
193-
_charsets.add(Charset(238, "utf8mb4", "utf8mb4_spanish2_ci", ""))
194-
_charsets.add(Charset(239, "utf8mb4", "utf8mb4_roman_ci", ""))
195-
_charsets.add(Charset(240, "utf8mb4", "utf8mb4_persian_ci", ""))
196-
_charsets.add(Charset(241, "utf8mb4", "utf8mb4_esperanto_ci", ""))
197-
_charsets.add(Charset(242, "utf8mb4", "utf8mb4_hungarian_ci", ""))
198-
_charsets.add(Charset(243, "utf8mb4", "utf8mb4_sinhala_ci", ""))
199-
_charsets.add(Charset(244, "utf8mb4", "utf8mb4_german2_ci", ""))
200-
_charsets.add(Charset(245, "utf8mb4", "utf8mb4_croatian_ci", ""))
201-
_charsets.add(Charset(246, "utf8mb4", "utf8mb4_unicode_520_ci", ""))
202-
_charsets.add(Charset(247, "utf8mb4", "utf8mb4_vietnamese_ci", ""))
203-
_charsets.add(Charset(248, "gb18030", "gb18030_chinese_ci", "Yes"))
204-
_charsets.add(Charset(249, "gb18030", "gb18030_bin", ""))
205-
_charsets.add(Charset(250, "gb18030", "gb18030_unicode_520_ci", ""))
206-
_charsets.add(Charset(255, "utf8mb4", "utf8mb4_0900_ai_ci", ""))
20773

208-
charset_by_name = _charsets.by_name
209-
charset_by_id = _charsets.by_id
74+
_charsets.add(Charset(1, "big5", "big5_chinese_ci", True))
75+
_charsets.add(Charset(2, "latin2", "latin2_czech_cs"))
76+
_charsets.add(Charset(3, "dec8", "dec8_swedish_ci", True))
77+
_charsets.add(Charset(4, "cp850", "cp850_general_ci", True))
78+
_charsets.add(Charset(5, "latin1", "latin1_german1_ci"))
79+
_charsets.add(Charset(6, "hp8", "hp8_english_ci", True))
80+
_charsets.add(Charset(7, "koi8r", "koi8r_general_ci", True))
81+
_charsets.add(Charset(8, "latin1", "latin1_swedish_ci", True))
82+
_charsets.add(Charset(9, "latin2", "latin2_general_ci", True))
83+
_charsets.add(Charset(10, "swe7", "swe7_swedish_ci", True))
84+
_charsets.add(Charset(11, "ascii", "ascii_general_ci", True))
85+
_charsets.add(Charset(12, "ujis", "ujis_japanese_ci", True))
86+
_charsets.add(Charset(13, "sjis", "sjis_japanese_ci", True))
87+
_charsets.add(Charset(14, "cp1251", "cp1251_bulgarian_ci"))
88+
_charsets.add(Charset(15, "latin1", "latin1_danish_ci"))
89+
_charsets.add(Charset(16, "hebrew", "hebrew_general_ci", True))
90+
_charsets.add(Charset(18, "tis620", "tis620_thai_ci", True))
91+
_charsets.add(Charset(19, "euckr", "euckr_korean_ci", True))
92+
_charsets.add(Charset(20, "latin7", "latin7_estonian_cs"))
93+
_charsets.add(Charset(21, "latin2", "latin2_hungarian_ci"))
94+
_charsets.add(Charset(22, "koi8u", "koi8u_general_ci", True))
95+
_charsets.add(Charset(23, "cp1251", "cp1251_ukrainian_ci"))
96+
_charsets.add(Charset(24, "gb2312", "gb2312_chinese_ci", True))
97+
_charsets.add(Charset(25, "greek", "greek_general_ci", True))
98+
_charsets.add(Charset(26, "cp1250", "cp1250_general_ci", True))
99+
_charsets.add(Charset(27, "latin2", "latin2_croatian_ci"))
100+
_charsets.add(Charset(28, "gbk", "gbk_chinese_ci", True))
101+
_charsets.add(Charset(29, "cp1257", "cp1257_lithuanian_ci"))
102+
_charsets.add(Charset(30, "latin5", "latin5_turkish_ci", True))
103+
_charsets.add(Charset(31, "latin1", "latin1_german2_ci"))
104+
_charsets.add(Charset(32, "armscii8", "armscii8_general_ci", True))
105+
_charsets.add(Charset(33, "utf8mb3", "utf8mb3_general_ci", True))
106+
_charsets.add(Charset(34, "cp1250", "cp1250_czech_cs"))
107+
_charsets.add(Charset(36, "cp866", "cp866_general_ci", True))
108+
_charsets.add(Charset(37, "keybcs2", "keybcs2_general_ci", True))
109+
_charsets.add(Charset(38, "macce", "macce_general_ci", True))
110+
_charsets.add(Charset(39, "macroman", "macroman_general_ci", True))
111+
_charsets.add(Charset(40, "cp852", "cp852_general_ci", True))
112+
_charsets.add(Charset(41, "latin7", "latin7_general_ci", True))
113+
_charsets.add(Charset(42, "latin7", "latin7_general_cs"))
114+
_charsets.add(Charset(43, "macce", "macce_bin"))
115+
_charsets.add(Charset(44, "cp1250", "cp1250_croatian_ci"))
116+
_charsets.add(Charset(45, "utf8mb4", "utf8mb4_general_ci", True))
117+
_charsets.add(Charset(46, "utf8mb4", "utf8mb4_bin"))
118+
_charsets.add(Charset(47, "latin1", "latin1_bin"))
119+
_charsets.add(Charset(48, "latin1", "latin1_general_ci"))
120+
_charsets.add(Charset(49, "latin1", "latin1_general_cs"))
121+
_charsets.add(Charset(50, "cp1251", "cp1251_bin"))
122+
_charsets.add(Charset(51, "cp1251", "cp1251_general_ci", True))
123+
_charsets.add(Charset(52, "cp1251", "cp1251_general_cs"))
124+
_charsets.add(Charset(53, "macroman", "macroman_bin"))
125+
_charsets.add(Charset(57, "cp1256", "cp1256_general_ci", True))
126+
_charsets.add(Charset(58, "cp1257", "cp1257_bin"))
127+
_charsets.add(Charset(59, "cp1257", "cp1257_general_ci", True))
128+
_charsets.add(Charset(63, "binary", "binary", True))
129+
_charsets.add(Charset(64, "armscii8", "armscii8_bin"))
130+
_charsets.add(Charset(65, "ascii", "ascii_bin"))
131+
_charsets.add(Charset(66, "cp1250", "cp1250_bin"))
132+
_charsets.add(Charset(67, "cp1256", "cp1256_bin"))
133+
_charsets.add(Charset(68, "cp866", "cp866_bin"))
134+
_charsets.add(Charset(69, "dec8", "dec8_bin"))
135+
_charsets.add(Charset(70, "greek", "greek_bin"))
136+
_charsets.add(Charset(71, "hebrew", "hebrew_bin"))
137+
_charsets.add(Charset(72, "hp8", "hp8_bin"))
138+
_charsets.add(Charset(73, "keybcs2", "keybcs2_bin"))
139+
_charsets.add(Charset(74, "koi8r", "koi8r_bin"))
140+
_charsets.add(Charset(75, "koi8u", "koi8u_bin"))
141+
_charsets.add(Charset(76, "utf8mb3", "utf8mb3_tolower_ci"))
142+
_charsets.add(Charset(77, "latin2", "latin2_bin"))
143+
_charsets.add(Charset(78, "latin5", "latin5_bin"))
144+
_charsets.add(Charset(79, "latin7", "latin7_bin"))
145+
_charsets.add(Charset(80, "cp850", "cp850_bin"))
146+
_charsets.add(Charset(81, "cp852", "cp852_bin"))
147+
_charsets.add(Charset(82, "swe7", "swe7_bin"))
148+
_charsets.add(Charset(83, "utf8mb3", "utf8mb3_bin"))
149+
_charsets.add(Charset(84, "big5", "big5_bin"))
150+
_charsets.add(Charset(85, "euckr", "euckr_bin"))
151+
_charsets.add(Charset(86, "gb2312", "gb2312_bin"))
152+
_charsets.add(Charset(87, "gbk", "gbk_bin"))
153+
_charsets.add(Charset(88, "sjis", "sjis_bin"))
154+
_charsets.add(Charset(89, "tis620", "tis620_bin"))
155+
_charsets.add(Charset(91, "ujis", "ujis_bin"))
156+
_charsets.add(Charset(92, "geostd8", "geostd8_general_ci", True))
157+
_charsets.add(Charset(93, "geostd8", "geostd8_bin"))
158+
_charsets.add(Charset(94, "latin1", "latin1_spanish_ci"))
159+
_charsets.add(Charset(95, "cp932", "cp932_japanese_ci", True))
160+
_charsets.add(Charset(96, "cp932", "cp932_bin"))
161+
_charsets.add(Charset(97, "eucjpms", "eucjpms_japanese_ci", True))
162+
_charsets.add(Charset(98, "eucjpms", "eucjpms_bin"))
163+
_charsets.add(Charset(99, "cp1250", "cp1250_polish_ci"))
164+
_charsets.add(Charset(192, "utf8mb3", "utf8mb3_unicode_ci"))
165+
_charsets.add(Charset(193, "utf8mb3", "utf8mb3_icelandic_ci"))
166+
_charsets.add(Charset(194, "utf8mb3", "utf8mb3_latvian_ci"))
167+
_charsets.add(Charset(195, "utf8mb3", "utf8mb3_romanian_ci"))
168+
_charsets.add(Charset(196, "utf8mb3", "utf8mb3_slovenian_ci"))
169+
_charsets.add(Charset(197, "utf8mb3", "utf8mb3_polish_ci"))
170+
_charsets.add(Charset(198, "utf8mb3", "utf8mb3_estonian_ci"))
171+
_charsets.add(Charset(199, "utf8mb3", "utf8mb3_spanish_ci"))
172+
_charsets.add(Charset(200, "utf8mb3", "utf8mb3_swedish_ci"))
173+
_charsets.add(Charset(201, "utf8mb3", "utf8mb3_turkish_ci"))
174+
_charsets.add(Charset(202, "utf8mb3", "utf8mb3_czech_ci"))
175+
_charsets.add(Charset(203, "utf8mb3", "utf8mb3_danish_ci"))
176+
_charsets.add(Charset(204, "utf8mb3", "utf8mb3_lithuanian_ci"))
177+
_charsets.add(Charset(205, "utf8mb3", "utf8mb3_slovak_ci"))
178+
_charsets.add(Charset(206, "utf8mb3", "utf8mb3_spanish2_ci"))
179+
_charsets.add(Charset(207, "utf8mb3", "utf8mb3_roman_ci"))
180+
_charsets.add(Charset(208, "utf8mb3", "utf8mb3_persian_ci"))
181+
_charsets.add(Charset(209, "utf8mb3", "utf8mb3_esperanto_ci"))
182+
_charsets.add(Charset(210, "utf8mb3", "utf8mb3_hungarian_ci"))
183+
_charsets.add(Charset(211, "utf8mb3", "utf8mb3_sinhala_ci"))
184+
_charsets.add(Charset(212, "utf8mb3", "utf8mb3_german2_ci"))
185+
_charsets.add(Charset(213, "utf8mb3", "utf8mb3_croatian_ci"))
186+
_charsets.add(Charset(214, "utf8mb3", "utf8mb3_unicode_520_ci"))
187+
_charsets.add(Charset(215, "utf8mb3", "utf8mb3_vietnamese_ci"))
188+
_charsets.add(Charset(223, "utf8mb3", "utf8mb3_general_mysql500_ci"))
189+
_charsets.add(Charset(224, "utf8mb4", "utf8mb4_unicode_ci"))
190+
_charsets.add(Charset(225, "utf8mb4", "utf8mb4_icelandic_ci"))
191+
_charsets.add(Charset(226, "utf8mb4", "utf8mb4_latvian_ci"))
192+
_charsets.add(Charset(227, "utf8mb4", "utf8mb4_romanian_ci"))
193+
_charsets.add(Charset(228, "utf8mb4", "utf8mb4_slovenian_ci"))
194+
_charsets.add(Charset(229, "utf8mb4", "utf8mb4_polish_ci"))
195+
_charsets.add(Charset(230, "utf8mb4", "utf8mb4_estonian_ci"))
196+
_charsets.add(Charset(231, "utf8mb4", "utf8mb4_spanish_ci"))
197+
_charsets.add(Charset(232, "utf8mb4", "utf8mb4_swedish_ci"))
198+
_charsets.add(Charset(233, "utf8mb4", "utf8mb4_turkish_ci"))
199+
_charsets.add(Charset(234, "utf8mb4", "utf8mb4_czech_ci"))
200+
_charsets.add(Charset(235, "utf8mb4", "utf8mb4_danish_ci"))
201+
_charsets.add(Charset(236, "utf8mb4", "utf8mb4_lithuanian_ci"))
202+
_charsets.add(Charset(237, "utf8mb4", "utf8mb4_slovak_ci"))
203+
_charsets.add(Charset(238, "utf8mb4", "utf8mb4_spanish2_ci"))
204+
_charsets.add(Charset(239, "utf8mb4", "utf8mb4_roman_ci"))
205+
_charsets.add(Charset(240, "utf8mb4", "utf8mb4_persian_ci"))
206+
_charsets.add(Charset(241, "utf8mb4", "utf8mb4_esperanto_ci"))
207+
_charsets.add(Charset(242, "utf8mb4", "utf8mb4_hungarian_ci"))
208+
_charsets.add(Charset(243, "utf8mb4", "utf8mb4_sinhala_ci"))
209+
_charsets.add(Charset(244, "utf8mb4", "utf8mb4_german2_ci"))
210+
_charsets.add(Charset(245, "utf8mb4", "utf8mb4_croatian_ci"))
211+
_charsets.add(Charset(246, "utf8mb4", "utf8mb4_unicode_520_ci"))
212+
_charsets.add(Charset(247, "utf8mb4", "utf8mb4_vietnamese_ci"))
213+
_charsets.add(Charset(248, "gb18030", "gb18030_chinese_ci", True))
214+
_charsets.add(Charset(249, "gb18030", "gb18030_bin"))
215+
_charsets.add(Charset(250, "gb18030", "gb18030_unicode_520_ci"))
216+
_charsets.add(Charset(255, "utf8mb4", "utf8mb4_0900_ai_ci"))

pymysql/tests/test_charset.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pymysql.charset
2+
3+
4+
def test_utf8():
5+
utf8mb3 = pymysql.charset.charset_by_name("utf8mb3")
6+
assert utf8mb3.name == "utf8mb3"
7+
assert utf8mb3.collation == "utf8mb3_general_ci"
8+
assert (
9+
repr(utf8mb3)
10+
== "Charset(id=33, name='utf8mb3', collation='utf8mb3_general_ci')"
11+
)
12+
13+
# MySQL 8.0 changed the default collation for utf8mb4.
14+
# But we use old default for compatibility.
15+
utf8mb4 = pymysql.charset.charset_by_name("utf8mb4")
16+
assert utf8mb4.name == "utf8mb4"
17+
assert utf8mb4.collation == "utf8mb4_general_ci"
18+
assert (
19+
repr(utf8mb4)
20+
== "Charset(id=45, name='utf8mb4', collation='utf8mb4_general_ci')"
21+
)
22+
23+
# utf8 is alias of utf8mb4 since MySQL 8.0, and PyMySQL v1.1.
24+
utf8 = pymysql.charset.charset_by_name("utf8")
25+
assert utf8 == utf8mb4

0 commit comments

Comments
 (0)