From ed9d5586725f36696e3abe06f6a796f50f6615fe Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 3 Oct 2021 21:14:41 +0900 Subject: [PATCH 1/7] bpo-20028: Update csv module to hanlde the giving invalid quotechar. --- Lib/test/test_csv.py | 43 ++++++++++++++++++- .../2021-10-03-21-14-37.bpo-20028.zBA4RK.rst | 2 + Modules/_csv.c | 20 ++++++--- 3 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 09e72a71f1db92..8d6ddd67437b41 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -897,7 +897,13 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be string, not int') + '"quotechar" must be string or None, not int') + + mydialect.quotechar = "" + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"quotechar" must be a 1-character string') def test_delimiter(self): class mydialect(csv.Dialect): @@ -934,6 +940,41 @@ class mydialect(csv.Dialect): self.assertEqual(str(cm.exception), '"delimiter" must be string, not int') + def test_escapechar(self): + class mydialect(csv.Dialect): + delimiter = ";" + escapechar = '\\' + doublequote = False + skipinitialspace = True + lineterminator = '\r\n' + quoting = csv.QUOTE_NONE + d = mydialect() + self.assertEqual(d.escapechar, "\\") + + mydialect.escapechar = "" + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a 1-character string') + + mydialect.escapechar = "**" + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a 1-character string') + + mydialect.escapechar = b"*" + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be string or None, not bytes') + + mydialect.escapechar = 4 + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be string or None, not int') + def test_lineterminator(self): class mydialect(csv.Dialect): delimiter = ";" diff --git a/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst b/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst new file mode 100644 index 00000000000000..91e032a8b78644 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst @@ -0,0 +1,2 @@ +Update :mod:`csv` to handle the giving invalid quotechar correctly for +initializing *dialect*. diff --git a/Modules/_csv.c b/Modules/_csv.c index 3729d2be362522..5dc52ddda44d21 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -238,21 +238,29 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) if (src != Py_None) { Py_ssize_t len; if (!PyUnicode_Check(src)) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); + if (strcmp(name, "delimiter") == 0) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be string, not %.200s", name, + Py_TYPE(src)->tp_name); + } + else { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be string or None, not %.200s", name, + Py_TYPE(src)->tp_name); + } return -1; } len = PyUnicode_GetLength(src); - if (len > 1) { + if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name); return -1; } /* PyUnicode_READY() is called in PyUnicode_GetLength() */ - if (len > 0) + else { *target = PyUnicode_READ_CHAR(src, 0); + } } } return 0; @@ -457,7 +465,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto err; if (self->delimiter == 0) { PyErr_SetString(PyExc_TypeError, - "\"delimiter\" must be a 1-character string"); + "delimiter must be set"); goto err; } if (quotechar == Py_None && quoting == NULL) From c9f4941566f18fd65bdd0e7e19076065f1ec8efa Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Wed, 6 Oct 2021 12:28:30 +0900 Subject: [PATCH 2/7] bpo-20028: revert error message --- Modules/_csv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 5dc52ddda44d21..a6d68cdcecb0a0 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -465,7 +465,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto err; if (self->delimiter == 0) { PyErr_SetString(PyExc_TypeError, - "delimiter must be set"); + "\"delimiter\" must be a 1-character string"); goto err; } if (quotechar == Py_None && quoting == NULL) From 638f7d0177be9bf04ea0e6e6245f9da6233351d2 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Wed, 6 Oct 2021 13:04:15 +0900 Subject: [PATCH 3/7] bpo-20028: test refactoring --- Lib/test/test_csv.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 8d6ddd67437b41..6c7da3649f9d62 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -952,28 +952,20 @@ class mydialect(csv.Dialect): self.assertEqual(d.escapechar, "\\") mydialect.escapechar = "" - with self.assertRaises(csv.Error) as cm: + with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): mydialect() - self.assertEqual(str(cm.exception), - '"escapechar" must be a 1-character string') mydialect.escapechar = "**" - with self.assertRaises(csv.Error) as cm: + with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): mydialect() - self.assertEqual(str(cm.exception), - '"escapechar" must be a 1-character string') mydialect.escapechar = b"*" - with self.assertRaises(csv.Error) as cm: + with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not bytes'): mydialect() - self.assertEqual(str(cm.exception), - '"escapechar" must be string or None, not bytes') mydialect.escapechar = 4 - with self.assertRaises(csv.Error) as cm: + with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not int'): mydialect() - self.assertEqual(str(cm.exception), - '"escapechar" must be string or None, not int') def test_lineterminator(self): class mydialect(csv.Dialect): From 85a418d14b1821bec6d869927ba54e907a4d9a3c Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sat, 9 Oct 2021 16:20:50 +0900 Subject: [PATCH 4/7] bpo-20028: Add authors --- .../next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst b/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst index 91e032a8b78644..fd583300db33e6 100644 --- a/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst +++ b/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst @@ -1,2 +1,2 @@ Update :mod:`csv` to handle the giving invalid quotechar correctly for -initializing *dialect*. +initializing *dialect*. Patch by Vajrasky Kok and Dong-hee Na. From 611fb4f17384d7a9defdb97df144bdcc21a9fc6b Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sat, 9 Oct 2021 22:20:23 +0900 Subject: [PATCH 5/7] bpo-20028: Address code review --- Lib/test/test_csv.py | 6 +++++ Modules/_csv.c | 54 +++++++++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 6c7da3649f9d62..bf26cb35d6ef0a 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -940,6 +940,12 @@ class mydialect(csv.Dialect): self.assertEqual(str(cm.exception), '"delimiter" must be string, not int') + mydialect.delimiter = None + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"delimiter" must be string, not NoneType') + def test_escapechar(self): class mydialect(csv.Dialect): delimiter = ";" diff --git a/Modules/_csv.c b/Modules/_csv.c index a6d68cdcecb0a0..2783524248c011 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -229,28 +229,21 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) } static int -_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) +_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) { - if (src == NULL) + if (src == NULL) { *target = dflt; + } else { *target = '\0'; if (src != Py_None) { - Py_ssize_t len; if (!PyUnicode_Check(src)) { - if (strcmp(name, "delimiter") == 0) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); - } - else { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string or None, not %.200s", name, - Py_TYPE(src)->tp_name); - } + PyErr_Format(PyExc_TypeError, + "\"%s\" must be string or None, not %.200s", name, + Py_TYPE(src)->tp_name); return -1; } - len = PyUnicode_GetLength(src); + Py_ssize_t len = PyUnicode_GetLength(src); if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", @@ -266,6 +259,35 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) return 0; } +static int +_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) +{ + if (src == NULL) { + *target = dflt; + } + else { + *target = '\0'; + if (!PyUnicode_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be string, not %.200s", name, + Py_TYPE(src)->tp_name); + return -1; + } + Py_ssize_t len = PyUnicode_GetLength(src); + if (len != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a 1-character string", + name); + return -1; + } + /* PyUnicode_READY() is called in PyUnicode_GetLength() */ + else { + *target = PyUnicode_READ_CHAR(src, 0); + } + } + return 0; +} + static int _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) { @@ -453,9 +475,9 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto err DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true); - DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); + DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, 0); DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); - DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); + DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"'); DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false); DIASET(_set_bool, "strict", &self->strict, strict, false); From a9fd17b47abcf1f5a1e23bf55c084c2b8fd58191 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sat, 9 Oct 2021 22:38:59 +0900 Subject: [PATCH 6/7] bpo-20028: Address code review --- Modules/_csv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Modules/_csv.c b/Modules/_csv.c index 2783524248c011..1d6277cfda8686 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -244,6 +244,9 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt return -1; } Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; + } if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", @@ -274,6 +277,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) return -1; } Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; + } if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", From 5680bff41430bb8c15ac742e0a3548b9725d7949 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sat, 9 Oct 2021 22:56:26 +0900 Subject: [PATCH 7/7] bpo-20028: Seperate the PR --- Lib/test/test_csv.py | 10 ---------- .../Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst | 4 ++-- Modules/_csv.c | 10 ++-------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index bf26cb35d6ef0a..6e5dfc63d43cef 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -899,12 +899,6 @@ class mydialect(csv.Dialect): self.assertEqual(str(cm.exception), '"quotechar" must be string or None, not int') - mydialect.quotechar = "" - with self.assertRaises(csv.Error) as cm: - mydialect() - self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') - def test_delimiter(self): class mydialect(csv.Dialect): delimiter = ";" @@ -957,10 +951,6 @@ class mydialect(csv.Dialect): d = mydialect() self.assertEqual(d.escapechar, "\\") - mydialect.escapechar = "" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): - mydialect() - mydialect.escapechar = "**" with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): mydialect() diff --git a/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst b/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst index fd583300db33e6..e75612116e9427 100644 --- a/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst +++ b/Misc/NEWS.d/next/Library/2021-10-03-21-14-37.bpo-20028.zBA4RK.rst @@ -1,2 +1,2 @@ -Update :mod:`csv` to handle the giving invalid quotechar correctly for -initializing *dialect*. Patch by Vajrasky Kok and Dong-hee Na. +Improve error message of :class:`csv.Dialect` when initializing. +Patch by Vajrasky Kok and Dong-hee Na. diff --git a/Modules/_csv.c b/Modules/_csv.c index 1d6277cfda8686..cfdfbce6e6824f 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -244,10 +244,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt return -1; } Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { + if (len > 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name); @@ -277,10 +274,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) return -1; } Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { + if (len > 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name);