Skip to content

gh-113796: Add more validation checks in the csv.Dialect constructor #113797

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 54 additions & 13 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
in TestDialectRegistry.
"""
def _test_arg_valid(self, ctor, arg):
ctor(arg)
self.assertRaises(TypeError, ctor)
self.assertRaises(TypeError, ctor, None)
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
self.assertRaises(TypeError, ctor, arg, bad_attr=0)
self.assertRaises(TypeError, ctor, arg, delimiter='')
self.assertRaises(TypeError, ctor, arg, escapechar='')
self.assertRaises(TypeError, ctor, arg, quotechar='')
self.assertRaises(TypeError, ctor, arg, delimiter='^^')
self.assertRaises(TypeError, ctor, arg, escapechar='^^')
self.assertRaises(TypeError, ctor, arg, quotechar='^^')
self.assertRaises(csv.Error, ctor, arg, 'foo')
self.assertRaises(TypeError, ctor, arg, delimiter=None)
self.assertRaises(TypeError, ctor, arg, delimiter=1)
self.assertRaises(TypeError, ctor, arg, escapechar=1)
self.assertRaises(TypeError, ctor, arg, quotechar=1)
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
Expand All @@ -46,6 +52,40 @@ def _test_arg_valid(self, ctor, arg):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
self.assertRaises(ValueError, ctor, arg, delimiter='\n')
self.assertRaises(ValueError, ctor, arg, escapechar='\n')
self.assertRaises(ValueError, ctor, arg, quotechar='\n')
self.assertRaises(ValueError, ctor, arg, delimiter='\r')
self.assertRaises(ValueError, ctor, arg, escapechar='\r')
self.assertRaises(ValueError, ctor, arg, quotechar='\r')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
delimiter=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
escapechar=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
quotechar=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
delimiter='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
escapechar='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
quotechar='\x85', lineterminator='\x85')

def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
Expand Down Expand Up @@ -530,14 +570,6 @@ class unspecified():
finally:
csv.unregister_dialect('testC')

def test_bad_dialect(self):
# Unknown parameter
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
# Bad values
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
self.assertRaises(TypeError, csv.reader, [], quoting = 100)

def test_copy(self):
for name in csv.list_dialects():
dialect = csv.get_dialect(name)
Expand Down Expand Up @@ -1083,10 +1115,15 @@ class mydialect(csv.Dialect):
'"lineterminator" must be a string')

def test_invalid_chars(self):
def create_invalid(field_name, value):
def create_invalid(field_name, value, **kwargs):
class mydialect(csv.Dialect):
pass
delimiter = ','
quoting = csv.QUOTE_ALL
quotechar = '"'
lineterminator = '\r\n'
Comment on lines 1119 to +1123
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test was broken. In most cases the exception was raised for different reason, not because wrong specified value, but because of the lack of required default values.

setattr(mydialect, field_name, value)
for field_name, value in kwargs.items():
setattr(mydialect, field_name, value)
d = mydialect()

for field_name in ("delimiter", "escapechar", "quotechar"):
Expand All @@ -1095,6 +1132,10 @@ class mydialect(csv.Dialect):
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
self.assertRaises(csv.Error, create_invalid, field_name, 5)
self.assertRaises(ValueError, create_invalid, field_name, "\n")
self.assertRaises(ValueError, create_invalid, field_name, "\r")
self.assertRaises(ValueError, create_invalid, field_name, " ",
skipinitialspace=True)


class TestSniffer(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add more validation checks in the :class:`csv.Dialect` constructor.
:exc:`ValueError` is now raised if the same character is used in different
roles.
39 changes: 39 additions & 0 deletions Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,33 @@ dialect_check_quoting(int quoting)
return -1;
}

static int
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
{
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
PyErr_Format(PyExc_ValueError, "bad %s value", name);
return -1;
}
if (PyUnicode_FindChar(
dialect->lineterminator, c, 0,
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
{
PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
return -1;
}
return 0;
}

static int
dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
{
if (c1 == c2 && c1 != NOT_SET) {
PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
return -1;
}
return 0;
}

#define D_OFF(x) offsetof(DialectObj, x)

static struct PyMemberDef Dialect_memberlist[] = {
Expand Down Expand Up @@ -510,6 +537,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
goto err;
}
if (dialect_check_char("delimiter", self->delimiter, self) ||
dialect_check_char("escapechar", self->escapechar, self) ||
dialect_check_char("quotechar", self->quotechar, self) ||
dialect_check_chars("delimiter", "escapechar",
self->delimiter, self->escapechar) ||
dialect_check_chars("delimiter", "quotechar",
self->delimiter, self->quotechar) ||
dialect_check_chars("escapechar", "quotechar",
self->escapechar, self->quotechar))
{
goto err;
}

ret = Py_NewRef(self);
err:
Expand Down