From d8f1230f5e02b746982025fdb9f0b64bd567e893 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 10 Oct 2021 00:33:08 +0900 Subject: [PATCH 1/4] bpo-20028: Empty escapechar/quotechar is not allowed for csv.Dialect --- Doc/library/csv.rst | 4 ++++ Lib/test/test_csv.py | 10 ++++++++++ .../Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst | 2 ++ Modules/_csv.c | 4 ++-- 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 899ce0225ce7f3..3a7817cfdfad87 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -383,6 +383,8 @@ Dialects support the following attributes: :const:`False`. On reading, the *escapechar* removes any special meaning from the following character. It defaults to :const:`None`, which disables escaping. + .. versionchanged:: 3.11 + An empty *escapechar* is not allowed. .. attribute:: Dialect.lineterminator @@ -402,6 +404,8 @@ Dialects support the following attributes: as the *delimiter* or *quotechar*, or which contain new-line characters. It defaults to ``'"'``. + .. versionchanged:: 3.11 + An empty *quotechar* is not allowed. .. attribute:: Dialect.quoting diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index fb27ea396e04db..2f5a59e41fdaea 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -913,6 +913,12 @@ class mydialect(csv.Dialect): self.assertEqual(d.quotechar, '"') self.assertTrue(d.doublequote) + mydialect.quotechar = "" + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"quotechar" must be a 1-character string') + mydialect.quotechar = "''" with self.assertRaises(csv.Error) as cm: mydialect() @@ -977,6 +983,10 @@ class mydialect(csv.Dialect): d = mydialect() self.assertEqual(d.escapechar, "\\") + mydialect.escapechar = "" + with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + mydialect() + mydialect.escapechar = "**" with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): mydialect() diff --git a/Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst b/Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst new file mode 100644 index 00000000000000..9db15bc39e7ca9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst @@ -0,0 +1,2 @@ +Empty escapechar/quotechar is not allowed when initializing +:class:`csv.Dialect`. Patch by Vajrasky Kok and Dong-hee Na. diff --git a/Modules/_csv.c b/Modules/_csv.c index 469c1a15c340c1..432a10f81994f4 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -250,7 +250,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (len < 0) { return -1; } - if (len > 1) { + if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name); @@ -283,7 +283,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) if (len < 0) { return -1; } - if (len > 1) { + if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name); From ddc405668eb6b1254f8035dd47d028e883d3dee1 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 10 Oct 2021 01:33:23 +0900 Subject: [PATCH 2/4] bpo-20028: Fix test --- Lib/test/test_csv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 2f5a59e41fdaea..c8618a3bbd0821 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -342,7 +342,8 @@ def test_read_escape(self): self._read_test(['a,^b,c'], [['a', 'b', 'c']], escapechar='^') self._read_test(['a,\0b,c'], [['a', 'b', 'c']], escapechar='\0') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) - self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar='') + self.assertRaises(TypeError, self._read_test, + ['a,\\b,c'], [['a', '\\b', 'c']], escapechar='') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) def test_read_quoting(self): From a1f31a27ad5ef4e4734cc717abad07297b04c95a Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 10 Oct 2021 01:50:35 +0900 Subject: [PATCH 3/4] bpo-20028: Address code review --- Modules/_csv.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 432a10f81994f4..1c2f504ea5c097 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -257,9 +257,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt return -1; } /* PyUnicode_READY() is called in PyUnicode_GetLength() */ - else if (len > 0) { - *target = PyUnicode_READ_CHAR(src, 0); - } + *target = PyUnicode_READ_CHAR(src, 0); } } return 0; @@ -272,7 +270,6 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { - *target = NOT_SET; if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, "\"%s\" must be string, not %.200s", name, @@ -290,9 +287,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) return -1; } /* PyUnicode_READY() is called in PyUnicode_GetLength() */ - else if (len > 0) { - *target = PyUnicode_READ_CHAR(src, 0); - } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } From 138d80a7cb71c8436d892b962087134b149ddd5d Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 10 Oct 2021 11:47:57 +0900 Subject: [PATCH 4/4] bpo-20028: Address codereview --- Lib/test/test_csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index c8618a3bbd0821..95a19dd46cb4ff 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -44,6 +44,8 @@ def _test_arg_valid(self, ctor, arg): quoting=csv.QUOTE_ALL, quotechar='') self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_ALL, quotechar=None) + self.assertRaises(TypeError, ctor, arg, + quoting=csv.QUOTE_NONE, quotechar='') def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -342,8 +344,6 @@ def test_read_escape(self): self._read_test(['a,^b,c'], [['a', 'b', 'c']], escapechar='^') self._read_test(['a,\0b,c'], [['a', 'b', 'c']], escapechar='\0') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) - self.assertRaises(TypeError, self._read_test, - ['a,\\b,c'], [['a', '\\b', 'c']], escapechar='') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) def test_read_quoting(self):