Skip to content

gh-117779: Fix reading duplicated entries in zipfile by name #129254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 106 additions & 2 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2415,7 +2415,36 @@ def test_decompress_without_3rd_party_library(self):
self.assertRaises(RuntimeError, zf.extract, 'a.txt')

@requires_zlib()
def test_full_overlap(self):
def test_full_overlap_different_names(self):
data = (
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed'
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
b'\x00\x00\x00'
)
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
self.assertEqual(zipf.namelist(), ['a', 'b'])
zi = zipf.getinfo('a')
self.assertEqual(zi.header_offset, 0)
self.assertEqual(zi.compress_size, 16)
self.assertEqual(zi.file_size, 1033)
zi = zipf.getinfo('b')
self.assertEqual(zi.header_offset, 0)
self.assertEqual(zi.compress_size, 16)
self.assertEqual(zi.file_size, 1033)
self.assertEqual(len(zipf.read('b')), 1033)
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
zipf.read('a')

@requires_zlib()
def test_full_overlap_different_names2(self):
data = (
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
Expand All @@ -2439,9 +2468,43 @@ def test_full_overlap(self):
self.assertEqual(zi.header_offset, 0)
self.assertEqual(zi.compress_size, 16)
self.assertEqual(zi.file_size, 1033)
self.assertEqual(len(zipf.read('a')), 1033)
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
zipf.read('b')
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
self.assertEqual(len(zipf.read('a')), 1033)
self.assertEqual(cm.filename, __file__)

@requires_zlib()
def test_full_overlap_same_name(self):
data = (
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05'
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
b'\x00\x00\x00'
)
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
self.assertEqual(zipf.namelist(), ['a', 'a'])
self.assertEqual(len(zipf.infolist()), 2)
zi = zipf.getinfo('a')
self.assertEqual(zi.header_offset, 0)
self.assertEqual(zi.compress_size, 16)
self.assertEqual(zi.file_size, 1033)
self.assertEqual(len(zipf.read('a')), 1033)
self.assertEqual(len(zipf.read(zi)), 1033)
self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033)
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033)
self.assertEqual(cm.filename, __file__)
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
zipf.open(zipf.infolist()[0]).close()
self.assertEqual(cm.filename, __file__)

@requires_zlib()
def test_quoted_overlap(self):
Expand Down Expand Up @@ -2474,6 +2537,47 @@ def test_quoted_overlap(self):
zipf.read('a')
self.assertEqual(len(zipf.read('b')), 1033)

@requires_zlib()
def test_overlap_with_central_dir(self):
data = (
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP'
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00'
)
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
self.assertEqual(zipf.namelist(), ['a'])
self.assertEqual(len(zipf.infolist()), 1)
zi = zipf.getinfo('a')
self.assertEqual(zi.header_offset, 0)
self.assertEqual(zi.compress_size, 11)
self.assertEqual(zi.file_size, 1033)
with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'):
zipf.read('a')

@requires_zlib()
def test_overlap_with_archive_comment(self):
data = (
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP'
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
b'\x00\x00\x00*\x00'
b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e'
b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK'
b'L\x1c\x05\xa3`\x14\x8cx\x00\x00'
)
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
self.assertEqual(zipf.namelist(), ['a'])
self.assertEqual(len(zipf.infolist()), 1)
zi = zipf.getinfo('a')
self.assertEqual(zi.header_offset, 69)
self.assertEqual(zi.compress_size, 11)
self.assertEqual(zi.file_size, 1033)
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
zipf.read('a')

def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
Expand Down
16 changes: 12 additions & 4 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1545,9 +1545,8 @@ def _RealGetContents(self):
print("total", total)

end_offset = self.start_dir
for zinfo in sorted(self.filelist,
key=lambda zinfo: zinfo.header_offset,
reverse=True):
for zinfo in reversed(sorted(self.filelist,
key=lambda zinfo: zinfo.header_offset)):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset

Expand Down Expand Up @@ -1709,7 +1708,16 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):

if (zinfo._end_offset is not None and
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
if zinfo._end_offset == zinfo.header_offset:
import warnings
warnings.warn(
f"Overlapped entries: {zinfo.orig_filename!r} "
f"(possible zip bomb)",
skip_file_prefixes=(os.path.dirname(__file__),))
else:
raise BadZipFile(
f"Overlapped entries: {zinfo.orig_filename!r} "
f"(possible zip bomb)")

# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix reading duplicated entries in :mod:`zipfile` by name.
Reading duplicated entries (except the last one) by ``ZipInfo``
now emits a warning instead of raising an exception.
Loading