Skip to content

gh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True #115721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 57 additions & 10 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ def _test_arg_valid(self, ctor, arg):
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
delimiter=' ', skipinitialspace=True)
ctor(arg, delimiter=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
escapechar=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
Expand Down Expand Up @@ -192,9 +191,6 @@ def _write_error_test(self, exc, fields, **kwargs):

def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
self._write_error_test(OSError, BadIterable())
class BadList:
Expand All @@ -208,7 +204,6 @@ class BadItem:
def __str__(self):
raise OSError
self._write_error_test(OSError, [BadItem()])

def test_write_bigfield(self):
# This exercises the buffer realloc functionality
bigstring = 'X' * 50000
Expand Down Expand Up @@ -315,6 +310,49 @@ def test_writerows_with_none(self):
fileobj.seek(0)
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')


def test_write_empty_fields(self):
self._write_test((), '')
self._write_test([''], '""')
self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
self._write_test([''], '""', quoting=csv.QUOTE_STRINGS)
self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL)
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS)
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL)
self._write_test(['', ''], ',')
self._write_test([None, None], ',')

def test_write_empty_fields_space_delimiter(self):
self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
self._write_test([None], '""', delimiter=' ', skipinitialspace=True)

self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)

self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, ['', ''],
delimiter=' ', skipinitialspace=True,
quoting=csv.QUOTE_NONE)
for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False,
quoting=quoting)
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True,
quoting=quoting)

for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
quoting=quoting)
self._write_error_test(csv.Error, [None, None],
delimiter=' ', skipinitialspace=True,
quoting=quoting)

def test_writerows_errors(self):
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
writer = csv.writer(fileobj)
Expand Down Expand Up @@ -429,6 +467,14 @@ def test_read_skipinitialspace(self):
[[None, None, None]],
skipinitialspace=True, quoting=csv.QUOTE_STRINGS)

def test_read_space_delimiter(self):
self._read_test(['a b', ' a ', ' ', ''],
[['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
delimiter=' ', skipinitialspace=False)
self._read_test(['a b', ' a ', ' ', ''],
[['a', 'b'], ['a', ''], [''], []],
delimiter=' ', skipinitialspace=True)

def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
# limits.
Expand Down Expand Up @@ -555,10 +601,10 @@ class space(csv.excel):
escapechar = "\\"

with TemporaryFile("w+", encoding="utf-8") as fileobj:
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.seek(0)
reader = csv.reader(fileobj, dialect=space())
self.assertEqual(next(reader), ["abc", "def"])
self.assertEqual(next(reader), ["abc", "", "", "def"])
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])

def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
Expand Down Expand Up @@ -1164,8 +1210,9 @@ class mydialect(csv.Dialect):
self.assertRaises(csv.Error, create_invalid, field_name, 5)
self.assertRaises(ValueError, create_invalid, field_name, "\n")
self.assertRaises(ValueError, create_invalid, field_name, "\r")
self.assertRaises(ValueError, create_invalid, field_name, " ",
skipinitialspace=True)
if field_name != "delimiter":
self.assertRaises(ValueError, create_invalid, field_name, " ",
skipinitialspace=True)


class TestSniffer(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Restore support of space delimiter with ``skipinitialspace=True`` in
:mod:`csv`. :func:`csv.writer()` now quotes empty fields if delimiter is a
space and skipinitialspace is true and raises exception if quoting is not
possible.
36 changes: 29 additions & 7 deletions Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,9 @@ dialect_check_quoting(int quoting)
}

static int
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect, bool allowspace)
{
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
if (c == '\r' || c == '\n' || (c == ' ' && !allowspace)) {
PyErr_Format(PyExc_ValueError, "bad %s value", name);
return -1;
}
Expand Down Expand Up @@ -535,9 +535,11 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
goto err;
}
if (dialect_check_char("delimiter", self->delimiter, self) ||
dialect_check_char("escapechar", self->escapechar, self) ||
dialect_check_char("quotechar", self->quotechar, self) ||
if (dialect_check_char("delimiter", self->delimiter, self, true) ||
dialect_check_char("escapechar", self->escapechar, self,
!self->skipinitialspace) ||
dialect_check_char("quotechar", self->quotechar, self,
!self->skipinitialspace) ||
dialect_check_chars("delimiter", "escapechar",
self->delimiter, self->escapechar) ||
dialect_check_chars("delimiter", "quotechar",
Expand Down Expand Up @@ -1221,6 +1223,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
static int
join_append(WriterObj *self, PyObject *field, int quoted)
{
DialectObj *dialect = self->dialect;
int field_kind = -1;
const void *field_data = NULL;
Py_ssize_t field_len = 0;
Expand All @@ -1231,6 +1234,19 @@ join_append(WriterObj *self, PyObject *field, int quoted)
field_data = PyUnicode_DATA(field);
field_len = PyUnicode_GET_LENGTH(field);
}
if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
if (dialect->quoting == QUOTE_NONE ||
(field == NULL &&
(dialect->quoting == QUOTE_STRINGS ||
dialect->quoting == QUOTE_NOTNULL)))
{
PyErr_Format(self->error_obj,
"empty field must be quoted if delimiter is a space "
"and skipinitialspace is true");
return 0;
}
quoted = 1;
}
rec_len = join_append_data(self, field_kind, field_data, field_len,
&quoted, 0);
if (rec_len < 0)
Expand Down Expand Up @@ -1282,6 +1298,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
{
DialectObj *dialect = self->dialect;
PyObject *iter, *field, *line, *result;
bool null_field = false;

iter = PyObject_GetIter(seq);
if (iter == NULL) {
Expand Down Expand Up @@ -1318,11 +1335,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
break;
}

null_field = (field == Py_None);
if (PyUnicode_Check(field)) {
append_ok = join_append(self, field, quoted);
Py_DECREF(field);
}
else if (field == Py_None) {
else if (null_field) {
append_ok = join_append(self, NULL, quoted);
Py_DECREF(field);
}
Expand All @@ -1348,7 +1366,11 @@ csv_writerow(WriterObj *self, PyObject *seq)
return NULL;

if (self->num_fields > 0 && self->rec_len == 0) {
if (dialect->quoting == QUOTE_NONE) {
if (dialect->quoting == QUOTE_NONE ||
(null_field &&
(dialect->quoting == QUOTE_STRINGS ||
dialect->quoting == QUOTE_NOTNULL)))
{
PyErr_Format(self->error_obj,
"single empty field record must be quoted");
return NULL;
Expand Down