From aa490f08bcc922bd9815e716da78ef6b4826088b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:35:41 +0300 Subject: [PATCH] gh-132813: Improve error messages for incorrect types and values of csv.Dialog attributes (GH-133241) Make them similar to PyArg_Parse error messages, mention None as a possible value, show a wrong type and the string length. (cherry picked from commit df98a47a61a274eb7427c6201ddabec9ffd30b0a) Co-authored-by: Serhiy Storchaka --- Lib/test/test_csv.py | 48 ++++++++---- ...-05-01-10-56-44.gh-issue-132813.rKurvp.rst | 2 + Modules/_csv.c | 73 ++++++++----------- 3 files changed, 69 insertions(+), 54 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 9aace57633b0c6..60feab225a107c 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1122,19 +1122,22 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.quotechar = "''" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.quotechar = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be string or None, not int') + '"quotechar" must be a unicode character or None, ' + 'not int') def test_delimiter(self): class mydialect(csv.Dialect): @@ -1151,31 +1154,32 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, ' + 'not a string of length 3') mydialect.delimiter = "" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, not a string of length 0') mydialect.delimiter = b"," with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not bytes') + '"delimiter" must be a unicode character, not bytes') mydialect.delimiter = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not int') + '"delimiter" must be a unicode character, not int') mydialect.delimiter = None with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not NoneType') + '"delimiter" must be a unicode character, not NoneType') def test_escapechar(self): class mydialect(csv.Dialect): @@ -1189,20 +1193,32 @@ class mydialect(csv.Dialect): self.assertEqual(d.escapechar, "\\") mydialect.escapechar = "" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.escapechar = "**" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.escapechar = b"*" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not bytes'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not bytes') mydialect.escapechar = 4 - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not int'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not int') def test_lineterminator(self): class mydialect(csv.Dialect): @@ -1223,7 +1239,13 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"lineterminator" must be a string') + '"lineterminator" must be a string, not int') + + mydialect.lineterminator = None + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"lineterminator" must be a string, not NoneType') def test_invalid_chars(self): def create_invalid(field_name, value, **kwargs): diff --git a/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst b/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst new file mode 100644 index 00000000000000..55608528a4564b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst @@ -0,0 +1,2 @@ +Improve error messages for incorrect types and values of :class:`csv.Dialect` +attributes. diff --git a/Modules/_csv.c b/Modules/_csv.c index e5ae853590bf2c..2e04136e0ac657 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -237,7 +237,7 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) int value; if (!PyLong_CheckExact(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be an integer", name); + "\"%s\" must be an integer, not %T", name, src); return -1; } value = PyLong_AsInt(src); @@ -255,27 +255,29 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (src == NULL) { *target = dflt; } - else { + else if (src == Py_None) { *target = NOT_SET; - if (src != Py_None) { - if (!PyUnicode_Check(src)) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string or None, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; - } - Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); - return -1; - } - *target = PyUnicode_READ_CHAR(src, 0); + } + else { + // similar to PyArg_Parse("C?") + if (!PyUnicode_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, not %T", + name, src); + return -1; + } + Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; } + if (len != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, " + "not a string of length %zd", + name, len); + return -1; + } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } @@ -287,11 +289,12 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { + // similar to PyArg_Parse("C") if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; + "\"%s\" must be a unicode character, not %T", + name, src); + return -1; } Py_ssize_t len = PyUnicode_GetLength(src); if (len < 0) { @@ -299,8 +302,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) } if (len != 1) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); + "\"%s\" must be a unicode character, " + "not a string of length %zd", + name, len); return -1; } *target = PyUnicode_READ_CHAR(src, 0); @@ -314,16 +318,12 @@ _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) if (src == NULL) *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); else { - if (src == Py_None) - *target = NULL; - else if (!PyUnicode_Check(src)) { + if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a string", name); + "\"%s\" must be a string, not %T", name, src); return -1; } - else { - Py_XSETREF(*target, Py_NewRef(src)); - } + Py_XSETREF(*target, Py_NewRef(src)); } return 0; } @@ -533,11 +533,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) /* validate options */ if (dialect_check_quoting(self->quoting)) goto err; - if (self->delimiter == NOT_SET) { - PyErr_SetString(PyExc_TypeError, - "\"delimiter\" must be a 1-character string"); - goto err; - } if (quotechar == Py_None && quoting == NULL) self->quoting = QUOTE_NONE; if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { @@ -545,10 +540,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "quotechar must be set if quoting enabled"); goto err; } - if (self->lineterminator == NULL) { - PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); - goto err; - } if (dialect_check_char("delimiter", self->delimiter, self, true) || dialect_check_char("escapechar", self->escapechar, self, !self->skipinitialspace) ||