Skip to content

bpo-20028: Empty escapechar/quotechar is not allowed for csv.Dialect #28833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/csv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ Dialects support the following attributes:
:const:`False`. On reading, the *escapechar* removes any special meaning from
the following character. It defaults to :const:`None`, which disables escaping.

.. versionchanged:: 3.11
An empty *escapechar* is not allowed.

.. attribute:: Dialect.lineterminator

Expand All @@ -402,6 +404,8 @@ Dialects support the following attributes:
as the *delimiter* or *quotechar*, or which contain new-line characters. It
defaults to ``'"'``.

.. versionchanged:: 3.11
An empty *quotechar* is not allowed.

.. attribute:: Dialect.quoting

Expand Down
13 changes: 12 additions & 1 deletion Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def _test_arg_valid(self, ctor, arg):
quoting=csv.QUOTE_ALL, quotechar='')
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')

def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
Expand Down Expand Up @@ -342,7 +344,6 @@ def test_read_escape(self):
self._read_test(['a,^b,c'], [['a', 'b', 'c']], escapechar='^')
self._read_test(['a,\0b,c'], [['a', 'b', 'c']], escapechar='\0')
self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None)
self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar='')
self._read_test(['a,\\b,c'], [['a', '\\b', 'c']])

def test_read_quoting(self):
Expand Down Expand Up @@ -913,6 +914,12 @@ class mydialect(csv.Dialect):
self.assertEqual(d.quotechar, '"')
self.assertTrue(d.doublequote)

mydialect.quotechar = ""
with self.assertRaises(csv.Error) as cm:
mydialect()
self.assertEqual(str(cm.exception),
'"quotechar" must be a 1-character string')

mydialect.quotechar = "''"
with self.assertRaises(csv.Error) as cm:
mydialect()
Expand Down Expand Up @@ -977,6 +984,10 @@ class mydialect(csv.Dialect):
d = mydialect()
self.assertEqual(d.escapechar, "\\")

mydialect.escapechar = ""
with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'):
mydialect()

mydialect.escapechar = "**"
with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'):
mydialect()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Empty escapechar/quotechar is not allowed when initializing
:class:`csv.Dialect`. Patch by Vajrasky Kok and Dong-hee Na.
13 changes: 4 additions & 9 deletions Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,14 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt
if (len < 0) {
return -1;
}
if (len > 1) {
if (len != 1) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be a 1-character string",
name);
return -1;
}
/* PyUnicode_READY() is called in PyUnicode_GetLength() */
else if (len > 0) {
*target = PyUnicode_READ_CHAR(src, 0);
}
*target = PyUnicode_READ_CHAR(src, 0);
}
}
return 0;
Expand All @@ -272,7 +270,6 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
*target = dflt;
}
else {
*target = NOT_SET;
if (!PyUnicode_Check(src)) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be string, not %.200s", name,
Expand All @@ -283,16 +280,14 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
if (len < 0) {
return -1;
}
if (len > 1) {
if (len != 1) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be a 1-character string",
name);
return -1;
}
/* PyUnicode_READY() is called in PyUnicode_GetLength() */
else if (len > 0) {
*target = PyUnicode_READ_CHAR(src, 0);
}
*target = PyUnicode_READ_CHAR(src, 0);
}
return 0;
}
Expand Down