From ea65e69620ab5f54bb837e30e33014190cba1906 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Tue, 28 Jan 2025 22:09:26 +0530 Subject: [PATCH 1/6] Fix a SEGV in _csv.c module by modifying int type to Py_ssize_t --- Modules/_csv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 7ca30e39e00c0c..e5ae853590bf2c 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1138,7 +1138,7 @@ join_append_data(WriterObj *self, int field_kind, const void *field_data, int copy_phase) { DialectObj *dialect = self->dialect; - int i; + Py_ssize_t i; Py_ssize_t rec_len; #define INCLEN \ From 33edb99e274dfbebbefab0b566834730c0c1a764 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Tue, 28 Jan 2025 22:33:49 +0530 Subject: [PATCH 2/6] Add a test case for writing data that is over 2GB --- Lib/test/test_csv.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 4af8f7f480e759..1f7eb6877c1dc3 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1560,6 +1560,20 @@ def test_ordered_dict_reader(self): OrderedDict([('fname', 'John'), ('lname', 'Cleese')]), ]) +class HugeDataTest(unittest.TestCase): + def test_write_huge_data(self): + bad_size = 2 * 1024 * 1024 * 1024 + 1 # Just over 2GB + val = 'x' * bad_size + with TemporaryFile("w", encoding="utf-8", newline='') as fileobj: + csv_writer = csv.writer(fileobj, delimiter=',', + quotechar='|', + quoting=csv.QUOTE_MINIMAL) + csv_writer.writerow([val]) + fileobj.seek(0) + reader = csv.reader(fileobj) + row = next(reader) + self.assertEqual(len(row), 1) + class MiscTestCase(unittest.TestCase): def test__all__(self): From 8e23d250152fcc038644da36049c85a91c32fcf7 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 29 Jan 2025 11:26:38 +0530 Subject: [PATCH 3/6] Undo the test --- Lib/test/test_csv.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 1f7eb6877c1dc3..1ddc0b92f55bd1 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1560,19 +1560,6 @@ def test_ordered_dict_reader(self): OrderedDict([('fname', 'John'), ('lname', 'Cleese')]), ]) -class HugeDataTest(unittest.TestCase): - def test_write_huge_data(self): - bad_size = 2 * 1024 * 1024 * 1024 + 1 # Just over 2GB - val = 'x' * bad_size - with TemporaryFile("w", encoding="utf-8", newline='') as fileobj: - csv_writer = csv.writer(fileobj, delimiter=',', - quotechar='|', - quoting=csv.QUOTE_MINIMAL) - csv_writer.writerow([val]) - fileobj.seek(0) - reader = csv.reader(fileobj) - row = next(reader) - self.assertEqual(len(row), 1) class MiscTestCase(unittest.TestCase): From 584074a8e03ab3785519a3a58ab96f10216dc0c8 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 29 Jan 2025 11:27:59 +0530 Subject: [PATCH 4/6] Revert to main --- Lib/test/test_csv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 1ddc0b92f55bd1..4af8f7f480e759 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1561,7 +1561,6 @@ def test_ordered_dict_reader(self): ]) - class MiscTestCase(unittest.TestCase): def test__all__(self): support.check__all__(self, csv, ('csv', '_csv')) From c5761ecd8149018a4a5a22c13380bd90f9cbe6e2 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 29 Jan 2025 14:32:32 +0530 Subject: [PATCH 5/6] Add blurb notes --- .../next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst diff --git a/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst b/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst new file mode 100644 index 00000000000000..288009660ebfd0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst @@ -0,0 +1,2 @@ +Fixed a segmentation fault in the :mod:`csv` module when writing data fields +larger than 2GB due to integer overflow. From f23f20eb17bba4af3ece32dfe6069280e744664f Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 29 Jan 2025 16:13:45 +0530 Subject: [PATCH 6/6] I think this captures better --- .../Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst b/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst index 288009660ebfd0..7e00b44c0ef471 100644 --- a/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst +++ b/Misc/NEWS.d/next/Library/2025-01-29-14-30-54.gh-issue-129409.JZbOE6.rst @@ -1,2 +1,2 @@ -Fixed a segmentation fault in the :mod:`csv` module when writing data fields -larger than 2GB due to integer overflow. +Fix an integer overflow in the :mod:`csv` module when writing a data field +larger than 2GB.