Skip to content

Commit 20f59fe

Browse files
qigangxuvstinner
authored andcommitted
bpo-37751: Fix codecs.lookup() normalization (GH-15092)
Fix codecs.lookup() to normalize the encoding name the same way than encodings.normalize_encoding(), except that codecs.lookup() also converts the name to lower case.
1 parent 87bc3b7 commit 20f59fe

File tree

2 files changed

+17
-16
lines changed

2 files changed

+17
-16
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix :func:`codecs.lookup` to normalize the encoding name the same way than :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case.

Python/codecs.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,36 +49,36 @@ int PyCodec_Register(PyObject *search_function)
4949
return -1;
5050
}
5151

52-
/* Convert a string to a normalized Python string: all characters are
53-
converted to lower case, spaces are replaced with underscores. */
52+
extern int _Py_normalize_encoding(const char *, char *, size_t);
53+
54+
/* Convert a string to a normalized Python string(decoded from UTF-8): all characters are
55+
converted to lower case, spaces and hyphens are replaced with underscores. */
5456

5557
static
5658
PyObject *normalizestring(const char *string)
5759
{
58-
size_t i;
5960
size_t len = strlen(string);
60-
char *p;
61+
char *encoding;
6162
PyObject *v;
6263

6364
if (len > PY_SSIZE_T_MAX) {
6465
PyErr_SetString(PyExc_OverflowError, "string is too large");
6566
return NULL;
6667
}
6768

68-
p = PyMem_Malloc(len + 1);
69-
if (p == NULL)
69+
encoding = PyMem_Malloc(len + 1);
70+
if (encoding == NULL)
7071
return PyErr_NoMemory();
71-
for (i = 0; i < len; i++) {
72-
char ch = string[i];
73-
if (ch == ' ')
74-
ch = '-';
75-
else
76-
ch = Py_TOLOWER(Py_CHARMASK(ch));
77-
p[i] = ch;
72+
73+
if (!_Py_normalize_encoding(string, encoding, len + 1))
74+
{
75+
PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
76+
PyMem_Free(encoding);
77+
return NULL;
7878
}
79-
p[i] = '\0';
80-
v = PyUnicode_FromString(p);
81-
PyMem_Free(p);
79+
80+
v = PyUnicode_FromString(encoding);
81+
PyMem_Free(encoding);
8282
return v;
8383
}
8484

0 commit comments

Comments
 (0)