From eacf8d2e39313e0646b0b66445b406eb1b9dadee Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Mon, 21 Apr 2025 22:17:16 -0700 Subject: [PATCH] update csv to 3.13.3 --- Lib/csv.py | 80 ++++++++++-- Lib/test/test_csv.py | 285 ++++++++++++++++++++++++++++++------------- stdlib/src/csv.rs | 37 ++++-- 3 files changed, 295 insertions(+), 107 deletions(-) diff --git a/Lib/csv.py b/Lib/csv.py index 77f30c8d2b..cd20265987 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -1,28 +1,90 @@ -""" -csv.py - read/write/investigate CSV files +r""" +CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(",") is bound to fail. The module supports three +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret spaces which + immediately follow a delimiter. It defaults to False, which + means that spaces immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating-point + numbers. + csv.QUOTE_STRINGS means that quotes are always placed around + fields which are strings. Note that the Python value None + is not a string. + csv.QUOTE_NOTNULL means that quotes are only placed around fields + that are not the Python value None. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes """ import re import types -from _csv import Error, __version__, writer, reader, register_dialect, \ +from _csv import Error, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ - QUOTE_STRINGS, QUOTE_NOTNULL, \ - __doc__ + QUOTE_STRINGS, QUOTE_NOTNULL from _csv import Dialect as _Dialect from io import StringIO __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", "QUOTE_STRINGS", "QUOTE_NOTNULL", - "Error", "Dialect", "__doc__", "excel", "excel_tab", + "Error", "Dialect", "excel", "excel_tab", "field_size_limit", "reader", "writer", "register_dialect", "get_dialect", "list_dialects", "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter", + "unregister_dialect", "DictReader", "DictWriter", "unix_dialect"] +__version__ = "1.0" + + class Dialect: """Describe a CSV dialect. @@ -51,8 +113,8 @@ def _validate(self): try: _Dialect(self) except TypeError as e: - # We do this for compatibility with py2.3 - raise Error(str(e)) + # Re-raise to get a traceback showing more user code. + raise Error(str(e)) from None class excel(Dialect): """Describe the usual properties of Excel-generated CSV files.""" diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 9a1743da6d..4185c70ec5 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -10,7 +10,7 @@ import gc import pickle from test import support -from test.support import warnings_helper, import_helper, check_disallow_instantiation +from test.support import import_helper, check_disallow_instantiation from itertools import permutations from textwrap import dedent from collections import OrderedDict @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase): in TestDialectRegistry. """ def _test_arg_valid(self, ctor, arg): + ctor(arg) self.assertRaises(TypeError, ctor) self.assertRaises(TypeError, ctor, None) - self.assertRaises(TypeError, ctor, arg, bad_attr = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') + self.assertRaises(TypeError, ctor, arg, bad_attr=0) + self.assertRaises(TypeError, ctor, arg, delimiter='') + self.assertRaises(TypeError, ctor, arg, escapechar='') + self.assertRaises(TypeError, ctor, arg, quotechar='') + self.assertRaises(TypeError, ctor, arg, delimiter='^^') + self.assertRaises(TypeError, ctor, arg, escapechar='^^') + self.assertRaises(TypeError, ctor, arg, quotechar='^^') self.assertRaises(csv.Error, ctor, arg, 'foo') self.assertRaises(TypeError, ctor, arg, delimiter=None) self.assertRaises(TypeError, ctor, arg, delimiter=1) + self.assertRaises(TypeError, ctor, arg, escapechar=1) self.assertRaises(TypeError, ctor, arg, quotechar=1) self.assertRaises(TypeError, ctor, arg, lineterminator=None) self.assertRaises(TypeError, ctor, arg, lineterminator=1) @@ -46,6 +52,39 @@ def _test_arg_valid(self, ctor, arg): quoting=csv.QUOTE_ALL, quotechar=None) self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_NONE, quotechar='') + self.assertRaises(ValueError, ctor, arg, delimiter='\n') + self.assertRaises(ValueError, ctor, arg, escapechar='\n') + self.assertRaises(ValueError, ctor, arg, quotechar='\n') + self.assertRaises(ValueError, ctor, arg, delimiter='\r') + self.assertRaises(ValueError, ctor, arg, escapechar='\r') + self.assertRaises(ValueError, ctor, arg, quotechar='\r') + ctor(arg, delimiter=' ') + ctor(arg, escapechar=' ') + ctor(arg, quotechar=' ') + ctor(arg, delimiter='\t', skipinitialspace=True) + ctor(arg, escapechar='\t', skipinitialspace=True) + ctor(arg, quotechar='\t', skipinitialspace=True) + ctor(arg, delimiter=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + escapechar=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + quotechar=' ', skipinitialspace=True) + ctor(arg, delimiter='^') + ctor(arg, escapechar='^') + ctor(arg, quotechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^') + self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^') + ctor(arg, delimiter='\x85') + ctor(arg, escapechar='\x85') + ctor(arg, quotechar='\x85') + ctor(arg, lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + delimiter='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + escapechar='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + quotechar='\x85', lineterminator='\x85') def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -150,13 +189,8 @@ def _write_error_test(self, exc, fields, **kwargs): fileobj.seek(0) self.assertEqual(fileobj.read(), '') - # TODO: RUSTPYTHON ''\r\n to ""\r\n unsupported - @unittest.expectedFailure def test_write_arg_valid(self): self._write_error_test(csv.Error, None) - self._write_test((), '') - self._write_test([None], '""') - self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE) # Check that exceptions are passed up the chain self._write_error_test(OSError, BadIterable()) class BadList: @@ -170,15 +204,12 @@ class BadItem: def __str__(self): raise OSError self._write_error_test(OSError, [BadItem()]) - def test_write_bigfield(self): # This exercises the buffer realloc functionality bigstring = 'X' * 50000 self._write_test([bigstring,bigstring], '%s,%s' % \ (bigstring, bigstring)) - # TODO: RUSTPYTHON quoting style check is unsupported - @unittest.expectedFailure def test_write_quoting(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"') self._write_error_test(csv.Error, ['a',1,'p,q'], @@ -196,8 +227,6 @@ def test_write_quoting(self): self._write_test(['a','',None,1], '"a","",,"1"', quoting = csv.QUOTE_NOTNULL) - # TODO: RUSTPYTHON doublequote check is unsupported - @unittest.expectedFailure def test_write_escape(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"', escapechar='\\') @@ -229,8 +258,6 @@ def test_write_escape(self): self._write_test(['C\\', '6', '7', 'X"'], 'C\\\\,6,7,"X"""', escapechar='\\', quoting=csv.QUOTE_MINIMAL) - # TODO: RUSTPYTHON lineterminator double char unsupported - @unittest.expectedFailure def test_write_lineterminator(self): for lineterminator in '\r\n', '\n', '\r', '!@#', '\0': with self.subTest(lineterminator=lineterminator): @@ -238,12 +265,12 @@ def test_write_lineterminator(self): writer = csv.writer(sio, lineterminator=lineterminator) writer.writerow(['a', 'b']) writer.writerow([1, 2]) + writer.writerow(['\r', '\n']) self.assertEqual(sio.getvalue(), f'a,b{lineterminator}' - f'1,2{lineterminator}') + f'1,2{lineterminator}' + f'"\r","\n"{lineterminator}') - # TODO: RUSTPYTHON ''\r\n to ""\r\n unspported - @unittest.expectedFailure def test_write_iterable(self): self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"') self._write_test(iter(['a', 1, None]), 'a,1,') @@ -285,6 +312,49 @@ def test_writerows_with_none(self): fileobj.seek(0) self.assertEqual(fileobj.read(), 'a\r\n""\r\n') + + def test_write_empty_fields(self): + self._write_test((), '') + self._write_test([''], '""') + self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE) + self._write_test([''], '""', quoting=csv.QUOTE_STRINGS) + self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL) + self._write_test([None], '""') + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE) + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS) + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL) + self._write_test(['', ''], ',') + self._write_test([None, None], ',') + + def test_write_empty_fields_space_delimiter(self): + self._write_test([''], '""', delimiter=' ', skipinitialspace=False) + self._write_test([''], '""', delimiter=' ', skipinitialspace=True) + self._write_test([None], '""', delimiter=' ', skipinitialspace=False) + self._write_test([None], '""', delimiter=' ', skipinitialspace=True) + + self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False) + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True) + self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False) + self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True) + + self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False, + quoting=csv.QUOTE_NONE) + self._write_error_test(csv.Error, ['', ''], + delimiter=' ', skipinitialspace=True, + quoting=csv.QUOTE_NONE) + for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL: + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False, + quoting=quoting) + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True, + quoting=quoting) + + for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL: + self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False, + quoting=quoting) + self._write_error_test(csv.Error, [None, None], + delimiter=' ', skipinitialspace=True, + quoting=quoting) + def test_writerows_errors(self): with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: writer = csv.writer(fileobj) @@ -296,8 +366,6 @@ def _read_test(self, input, expect, **kwargs): result = list(reader) self.assertEqual(result, expect) - # TODO RUSTPYTHON strict mode is unsupported - @unittest.expectedFailure def test_read_oddinputs(self): self._read_test([], []) self._read_test([''], [[]]) @@ -309,16 +377,19 @@ def test_read_oddinputs(self): [b'abc'], None) def test_read_eol(self): - self._read_test(['a,b'], [['a','b']]) - self._read_test(['a,b\n'], [['a','b']]) - self._read_test(['a,b\r\n'], [['a','b']]) - self._read_test(['a,b\r'], [['a','b']]) - self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], []) - self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], []) - self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], []) - - # TODO RUSTPYTHON double quote umimplement - @unittest.expectedFailure + self._read_test(['a,b', 'c,d'], [['a','b'], ['c','d']]) + self._read_test(['a,b\n', 'c,d\n'], [['a','b'], ['c','d']]) + self._read_test(['a,b\r\n', 'c,d\r\n'], [['a','b'], ['c','d']]) + self._read_test(['a,b\r', 'c,d\r'], [['a','b'], ['c','d']]) + + errmsg = "with newline=''" + with self.assertRaisesRegex(csv.Error, errmsg): + next(csv.reader(['a,b\rc,d'])) + with self.assertRaisesRegex(csv.Error, errmsg): + next(csv.reader(['a,b\nc,d'])) + with self.assertRaisesRegex(csv.Error, errmsg): + next(csv.reader(['a,b\r\nc,d'])) + def test_read_eof(self): self._read_test(['a,"'], [['a', '']]) self._read_test(['"a'], [['a']]) @@ -328,8 +399,6 @@ def test_read_eof(self): self.assertRaises(csv.Error, self._read_test, ['^'], [], escapechar='^', strict=True) - # TODO RUSTPYTHON - @unittest.expectedFailure def test_read_nul(self): self._read_test(['\0'], [['\0']]) self._read_test(['a,\0b,c'], [['a', '\0b', 'c']]) @@ -342,8 +411,6 @@ def test_read_delimiter(self): self._read_test(['a;b;c'], [['a', 'b', 'c']], delimiter=';') self._read_test(['a\0b\0c'], [['a', 'b', 'c']], delimiter='\0') - # TODO RUSTPYTHON - @unittest.expectedFailure def test_read_escape(self): self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\') self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\') @@ -356,8 +423,6 @@ def test_read_escape(self): self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) - # TODO RUSTPYTHON escapechar unsupported - @unittest.expectedFailure def test_read_quoting(self): self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], @@ -367,17 +432,54 @@ def test_read_quoting(self): # will this fail where locale uses comma for decimals? self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]], quoting=csv.QUOTE_NONNUMERIC) + self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']], + quoting=csv.QUOTE_NOTNULL) + self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]], + quoting=csv.QUOTE_STRINGS) + + self._read_test([',,"",'], [['', '', '', '']]) + self._read_test([',,"",'], [['', '', '', '']], + quoting=csv.QUOTE_NONNUMERIC) + self._read_test([',,"",'], [[None, None, '', None]], + quoting=csv.QUOTE_NOTNULL) + self._read_test([',,"",'], [[None, None, '', None]], + quoting=csv.QUOTE_STRINGS) + self._read_test(['"a\nb", 7'], [['a\nb', ' 7']]) self.assertRaises(ValueError, self._read_test, ['abc,3'], [[]], quoting=csv.QUOTE_NONNUMERIC) + self.assertRaises(ValueError, self._read_test, + ['abc,3'], [[]], + quoting=csv.QUOTE_STRINGS) self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@') self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0') + self._read_test(['1\\.5,\\.5,.5'], [[1.5, 0.5, 0.5]], + quoting=csv.QUOTE_NONNUMERIC, escapechar='\\') + self._read_test(['1\\.5,\\.5,"\\.5"'], [[1.5, 0.5, ".5"]], + quoting=csv.QUOTE_STRINGS, escapechar='\\') def test_read_skipinitialspace(self): self._read_test(['no space, space, spaces,\ttab'], [['no space', 'space', 'spaces', '\ttab']], skipinitialspace=True) + self._read_test([' , , '], + [['', '', '']], + skipinitialspace=True) + self._read_test([' , , '], + [[None, None, None]], + skipinitialspace=True, quoting=csv.QUOTE_NOTNULL) + self._read_test([' , , '], + [[None, None, None]], + skipinitialspace=True, quoting=csv.QUOTE_STRINGS) + + def test_read_space_delimiter(self): + self._read_test(['a b', ' a ', ' ', ''], + [['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []], + delimiter=' ', skipinitialspace=False) + self._read_test(['a b', ' a ', ' ', ''], + [['a', 'b'], ['a', ''], [''], []], + delimiter=' ', skipinitialspace=True) def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size @@ -410,27 +512,45 @@ def test_read_linenum(self): self.assertRaises(StopIteration, next, r) self.assertEqual(r.line_num, 3) - # TODO: RUSTPYTHON only '\r\n' unsupported - @unittest.expectedFailure def test_roundtrip_quoteed_newlines(self): - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj) - rows = [['a\nb','b'],['c','x\r\nd']] - writer.writerows(rows) - fileobj.seek(0) - for i, row in enumerate(csv.reader(fileobj)): - self.assertEqual(row, rows[i]) + rows = [ + ['\na', 'b\nc', 'd\n'], + ['\re', 'f\rg', 'h\r'], + ['\r\ni', 'j\r\nk', 'l\r\n'], + ['\n\rm', 'n\n\ro', 'p\n\r'], + ['\r\rq', 'r\r\rs', 't\r\r'], + ['\n\nu', 'v\n\nw', 'x\n\n'], + ] + for lineterminator in '\r\n', '\n', '\r': + with self.subTest(lineterminator=lineterminator): + with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: + writer = csv.writer(fileobj, lineterminator=lineterminator) + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj)): + self.assertEqual(row, rows[i]) - # TODO: RUSTPYTHON only '\r\n' unsupported - @unittest.expectedFailure def test_roundtrip_escaped_unquoted_newlines(self): - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\") - rows = [['a\nb','b'],['c','x\r\nd']] - writer.writerows(rows) - fileobj.seek(0) - for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")): - self.assertEqual(row,rows[i]) + rows = [ + ['\na', 'b\nc', 'd\n'], + ['\re', 'f\rg', 'h\r'], + ['\r\ni', 'j\r\nk', 'l\r\n'], + ['\n\rm', 'n\n\ro', 'p\n\r'], + ['\r\rq', 'r\r\rs', 't\r\r'], + ['\n\nu', 'v\n\nw', 'x\n\n'], + ] + for lineterminator in '\r\n', '\n', '\r': + with self.subTest(lineterminator=lineterminator): + with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: + writer = csv.writer(fileobj, lineterminator=lineterminator, + quoting=csv.QUOTE_NONE, escapechar="\\") + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj, + quoting=csv.QUOTE_NONE, + escapechar="\\")): + self.assertEqual(row, rows[i]) + class TestDialectRegistry(unittest.TestCase): def test_registry_badargs(self): @@ -509,10 +629,10 @@ class space(csv.excel): escapechar = "\\" with TemporaryFile("w+", encoding="utf-8") as fileobj: - fileobj.write("abc def\nc1ccccc1 benzene\n") + fileobj.write("abc def\nc1ccccc1 benzene\n") fileobj.seek(0) reader = csv.reader(fileobj, dialect=space()) - self.assertEqual(next(reader), ["abc", "def"]) + self.assertEqual(next(reader), ["abc", "", "", "def"]) self.assertEqual(next(reader), ["c1ccccc1", "benzene"]) def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): @@ -524,8 +644,6 @@ def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): fileobj.seek(0) self.assertEqual(fileobj.read(), expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" @@ -556,21 +674,11 @@ class unspecified(): finally: csv.unregister_dialect('testC') - def test_bad_dialect(self): - # Unknown parameter - self.assertRaises(TypeError, csv.reader, [], bad_attr = 0) - # Bad values - self.assertRaises(TypeError, csv.reader, [], delimiter = None) - self.assertRaises(TypeError, csv.reader, [], quoting = -1) - self.assertRaises(TypeError, csv.reader, [], quoting = 100) - def test_copy(self): for name in csv.list_dialects(): dialect = csv.get_dialect(name) self.assertRaises(TypeError, copy.copy, dialect) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pickle(self): for name in csv.list_dialects(): dialect = csv.get_dialect(name) @@ -657,8 +765,6 @@ def test_quoted_quote(self): '"I see," said the blind man', 'as he picked up his hammer and saw']]) - # Rustpython TODO - @unittest.expectedFailure def test_quoted_nl(self): input = '''\ 1,2,3,"""I see,"" @@ -699,21 +805,15 @@ class EscapedExcel(csv.excel): class TestEscapedExcel(TestCsvBase): dialect = EscapedExcel() - # TODO RUSTPYTHON - @unittest.expectedFailure def test_escape_fieldsep(self): self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n') - # TODO RUSTPYTHON - @unittest.expectedFailure def test_read_escape_fieldsep(self): self.readerAssertEqual('abc\\,def\r\n', [['abc,def']]) class TestDialectUnix(TestCsvBase): dialect = 'unix' - # TODO RUSTPYTHON - @unittest.expectedFailure def test_simple_writer(self): self.writerAssertEqual([[1, 'abc def', 'abc']], '"1","abc def","abc"\n') @@ -730,8 +830,6 @@ class TestQuotedEscapedExcel(TestCsvBase): def test_write_escape_fieldsep(self): self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') - # TODO RUSTPYTHON - @unittest.expectedFailure def test_read_escape_fieldsep(self): self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']]) @@ -928,8 +1026,6 @@ def test_read_multi(self): "s1": 'abc', "s2": 'def'}) - # TODO RUSTPYTHON - @unittest.expectedFailure def test_read_with_blanks(self): reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n", "1,2,abc,4,5,6\r\n"], @@ -981,9 +1077,11 @@ def test_float_write(self): fileobj.seek(0) self.assertEqual(fileobj.read(), expected) + # TODO: RUSTPYTHON: array needs to be updated + @unittest.expectedFailure def test_char_write(self): import array, string - a = array.array('u', string.ascii_letters) + a = array.array('w', string.ascii_letters) with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: writer = csv.writer(fileobj, dialect="excel") @@ -1007,6 +1105,12 @@ class mydialect(csv.Dialect): mydialect.quoting = None self.assertRaises(csv.Error, mydialect) + mydialect.quoting = 42 + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + 'bad "quoting" value') + mydialect.doublequote = True mydialect.quoting = csv.QUOTE_ALL mydialect.quotechar = '"' @@ -1123,10 +1227,15 @@ class mydialect(csv.Dialect): '"lineterminator" must be a string') def test_invalid_chars(self): - def create_invalid(field_name, value): + def create_invalid(field_name, value, **kwargs): class mydialect(csv.Dialect): - pass + delimiter = ',' + quoting = csv.QUOTE_ALL + quotechar = '"' + lineterminator = '\r\n' setattr(mydialect, field_name, value) + for field_name, value in kwargs.items(): + setattr(mydialect, field_name, value) d = mydialect() for field_name in ("delimiter", "escapechar", "quotechar"): @@ -1135,6 +1244,11 @@ class mydialect(csv.Dialect): self.assertRaises(csv.Error, create_invalid, field_name, "abc") self.assertRaises(csv.Error, create_invalid, field_name, b'x') self.assertRaises(csv.Error, create_invalid, field_name, 5) + self.assertRaises(ValueError, create_invalid, field_name, "\n") + self.assertRaises(ValueError, create_invalid, field_name, "\r") + if field_name != "delimiter": + self.assertRaises(ValueError, create_invalid, field_name, " ", + skipinitialspace=True) class TestSniffer(unittest.TestCase): @@ -1451,8 +1565,7 @@ def test_ordered_dict_reader(self): class MiscTestCase(unittest.TestCase): def test__all__(self): - extra = {'__doc__', '__version__'} - support.check__all__(self, csv, ('csv', '_csv'), extra=extra) + support.check__all__(self, csv, ('csv', '_csv')) def test_subclassable(self): # issue 44089 diff --git a/stdlib/src/csv.rs b/stdlib/src/csv.rs index 730d3b2feb..f93f730c64 100644 --- a/stdlib/src/csv.rs +++ b/stdlib/src/csv.rs @@ -51,6 +51,25 @@ mod _csv { vm.new_exception_msg(super::_csv::error(vm), msg) } + struct StyleDescriptor { + style: QuoteStyle, + name: String + } + + fn get_dialect_from_registry(name: PyObjectRef, vm: &VirtualMachine) -> PyResult { + let Some(name) = name.payload_if_subclass::(vm) else { + return Err(vm.new_exception_msg( + super::_csv::error(vm), + format!("argument 0 must be a string, not '{}'", name.class()), + )); + }; + let g = GLOBAL_HASHMAP.lock(); + if let Some(dialect) = g.get(name.as_str()) { + return Ok(*dialect); + } + Err(vm.new_exception_msg(super::_csv::error(vm), "unknown dialect".to_string())) + } + #[pyattr] #[pyclass(module = "csv", name = "Dialect")] #[derive(Debug, PyPayload, Clone, Copy)] @@ -290,17 +309,7 @@ mod _csv { mut _rest: FuncArgs, vm: &VirtualMachine, ) -> PyResult { - let Some(name) = name.payload_if_subclass::(vm) else { - return Err(vm.new_exception_msg( - super::_csv::error(vm), - format!("argument 0 must be a string, not '{}'", name.class()), - )); - }; - let g = GLOBAL_HASHMAP.lock(); - if let Some(dialect) = g.get(name.as_str()) { - return Ok(*dialect); - } - Err(vm.new_exception_msg(super::_csv::error(vm), "unknown dialect".to_string())) + get_dialect_from_registry(name, vm) } #[pyfunction] @@ -968,7 +977,11 @@ mod _csv { .rposition(|&x| x != b' ') .map(|i| i + 1) .unwrap_or(0); - &input[trimmed_start..trimmed_end] + if trimmed_start < trimmed_end { + &input[trimmed_start..trimmed_end] + } else { + &input[0..0] + } } let input = if *skipinitialspace { let t = input.split(|x| x == delimiter);