From b8a9ceabc9a1b666d087631e3558c4f592619d09 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 24 Jan 2025 12:30:41 +0200 Subject: [PATCH 1/2] gh-117779: Fix reading duplicated entries in zipfile by name --- Lib/test/test_zipfile/test_core.py | 36 ++++++++++++++++--- Lib/zipfile/__init__.py | 5 ++- ...-01-24-12-30-38.gh-issue-117779.gADGXI.rst | 1 + 3 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 76b1de0e3519f9..0bf5c9f100bf24 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2417,10 +2417,10 @@ def test_decompress_without_3rd_party_library(self): self.assertRaises(RuntimeError, zf.extract, 'a.txt') @requires_zlib() - def test_full_overlap(self): + def test_full_overlap_different_names(self): data = ( b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' - b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed' b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' @@ -2441,9 +2441,37 @@ def test_full_overlap(self): self.assertEqual(zi.header_offset, 0) self.assertEqual(zi.compress_size, 16) self.assertEqual(zi.file_size, 1033) - self.assertEqual(len(zipf.read('a')), 1033) + self.assertEqual(len(zipf.read('b')), 1033) with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): - zipf.read('b') + zipf.read('a') + + @requires_zlib() + def test_full_overlap_same_name(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' + b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' + b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' + b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' + b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05' + b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' + b'\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'a']) + self.assertEqual(len(zipf.infolist()), 2) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + self.assertEqual(len(zipf.read('a')), 1033) + self.assertEqual(len(zipf.read(zi)), 1033) + self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): + zipf.read(zipf.infolist()[0]) @requires_zlib() def test_quoted_overlap(self): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index b8b496ad9471f4..9f1c7c540ecb17 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1545,9 +1545,8 @@ def _RealGetContents(self): print("total", total) end_offset = self.start_dir - for zinfo in sorted(self.filelist, - key=lambda zinfo: zinfo.header_offset, - reverse=True): + for zinfo in reversed(sorted(self.filelist, + key=lambda zinfo: zinfo.header_offset)): zinfo._end_offset = end_offset end_offset = zinfo.header_offset diff --git a/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst b/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst new file mode 100644 index 00000000000000..1400c3e8d36b95 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst @@ -0,0 +1 @@ +Fix reading duplicated entries in :mod:`zipfile` by name. From a334ec021dabcddd0f183479e8ef0d69b2b5d76a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 28 Mar 2025 13:20:05 +0200 Subject: [PATCH 2/2] Emit a warning instead of raising an exception. --- Lib/test/test_zipfile/test_core.py | 80 ++++++++++++++++++- Lib/zipfile/__init__.py | 11 ++- ...-01-24-12-30-38.gh-issue-117779.gADGXI.rst | 2 + 3 files changed, 90 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 60177c593f7290..0934a66c41684c 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2443,6 +2443,37 @@ def test_full_overlap_different_names(self): with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): zipf.read('a') + @requires_zlib() + def test_full_overlap_different_names2(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' + b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' + b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' + b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' + b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' + b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' + b'\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): + zipf.read('b') + with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm: + self.assertEqual(len(zipf.read('a')), 1033) + self.assertEqual(cm.filename, __file__) + @requires_zlib() def test_full_overlap_same_name(self): data = ( @@ -2468,8 +2499,12 @@ def test_full_overlap_same_name(self): self.assertEqual(len(zipf.read('a')), 1033) self.assertEqual(len(zipf.read(zi)), 1033) self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033) - with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): - zipf.read(zipf.infolist()[0]) + with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm: + self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033) + self.assertEqual(cm.filename, __file__) + with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm: + zipf.open(zipf.infolist()[0]).close() + self.assertEqual(cm.filename, __file__) @requires_zlib() def test_quoted_overlap(self): @@ -2502,6 +2537,47 @@ def test_quoted_overlap(self): zipf.read('a') self.assertEqual(len(zipf.read('b')), 1033) + @requires_zlib() + def test_overlap_with_central_dir(self): + data = ( + b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z' + b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP' + b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a']) + self.assertEqual(len(zipf.infolist()), 1) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 11) + self.assertEqual(zi.file_size, 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'): + zipf.read('a') + + @requires_zlib() + def test_overlap_with_archive_comment(self): + data = ( + b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z' + b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP' + b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00' + b'\x00\x00\x00*\x00' + b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e' + b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK' + b'L\x1c\x05\xa3`\x14\x8cx\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a']) + self.assertEqual(len(zipf.infolist()), 1) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 69) + self.assertEqual(zi.compress_size, 11) + self.assertEqual(zi.file_size, 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): + zipf.read('a') + def tearDown(self): unlink(TESTFN) unlink(TESTFN2) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 9f1c7c540ecb17..715d4e783b000b 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1708,7 +1708,16 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): if (zinfo._end_offset is not None and zef_file.tell() + zinfo.compress_size > zinfo._end_offset): - raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") + if zinfo._end_offset == zinfo.header_offset: + import warnings + warnings.warn( + f"Overlapped entries: {zinfo.orig_filename!r} " + f"(possible zip bomb)", + skip_file_prefixes=(os.path.dirname(__file__),)) + else: + raise BadZipFile( + f"Overlapped entries: {zinfo.orig_filename!r} " + f"(possible zip bomb)") # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED diff --git a/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst b/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst index 1400c3e8d36b95..115362cfc83284 100644 --- a/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst +++ b/Misc/NEWS.d/next/Library/2025-01-24-12-30-38.gh-issue-117779.gADGXI.rst @@ -1 +1,3 @@ Fix reading duplicated entries in :mod:`zipfile` by name. +Reading duplicated entries (except the last one) by ``ZipInfo`` +now emits a warning instead of raising an exception.