Skip to content

Commit a05e930

Browse files
[3.12] gh-117779: Fix reading duplicated entries in zipfile by name (GH-129254) (GH-132264)
(cherry picked from commit 0f04f24) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent b41e008 commit a05e930

File tree

3 files changed

+121
-6
lines changed

3 files changed

+121
-6
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2348,7 +2348,36 @@ def test_decompress_without_3rd_party_library(self):
23482348
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
23492349

23502350
@requires_zlib()
2351-
def test_full_overlap(self):
2351+
def test_full_overlap_different_names(self):
2352+
data = (
2353+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2354+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed'
2355+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2356+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2357+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2358+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2359+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2360+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2361+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
2362+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2363+
b'\x00\x00\x00'
2364+
)
2365+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2366+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2367+
zi = zipf.getinfo('a')
2368+
self.assertEqual(zi.header_offset, 0)
2369+
self.assertEqual(zi.compress_size, 16)
2370+
self.assertEqual(zi.file_size, 1033)
2371+
zi = zipf.getinfo('b')
2372+
self.assertEqual(zi.header_offset, 0)
2373+
self.assertEqual(zi.compress_size, 16)
2374+
self.assertEqual(zi.file_size, 1033)
2375+
self.assertEqual(len(zipf.read('b')), 1033)
2376+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
2377+
zipf.read('a')
2378+
2379+
@requires_zlib()
2380+
def test_full_overlap_different_names2(self):
23522381
data = (
23532382
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
23542383
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
@@ -2372,9 +2401,43 @@ def test_full_overlap(self):
23722401
self.assertEqual(zi.header_offset, 0)
23732402
self.assertEqual(zi.compress_size, 16)
23742403
self.assertEqual(zi.file_size, 1033)
2375-
self.assertEqual(len(zipf.read('a')), 1033)
23762404
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
23772405
zipf.read('b')
2406+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2407+
self.assertEqual(len(zipf.read('a')), 1033)
2408+
self.assertEqual(cm.filename, __file__)
2409+
2410+
@requires_zlib()
2411+
def test_full_overlap_same_name(self):
2412+
data = (
2413+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2414+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
2415+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2416+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2417+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2418+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2419+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2420+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2421+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05'
2422+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2423+
b'\x00\x00\x00'
2424+
)
2425+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2426+
self.assertEqual(zipf.namelist(), ['a', 'a'])
2427+
self.assertEqual(len(zipf.infolist()), 2)
2428+
zi = zipf.getinfo('a')
2429+
self.assertEqual(zi.header_offset, 0)
2430+
self.assertEqual(zi.compress_size, 16)
2431+
self.assertEqual(zi.file_size, 1033)
2432+
self.assertEqual(len(zipf.read('a')), 1033)
2433+
self.assertEqual(len(zipf.read(zi)), 1033)
2434+
self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033)
2435+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2436+
self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033)
2437+
self.assertEqual(cm.filename, __file__)
2438+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2439+
zipf.open(zipf.infolist()[0]).close()
2440+
self.assertEqual(cm.filename, __file__)
23782441

23792442
@requires_zlib()
23802443
def test_quoted_overlap(self):
@@ -2407,6 +2470,47 @@ def test_quoted_overlap(self):
24072470
zipf.read('a')
24082471
self.assertEqual(len(zipf.read('b')), 1033)
24092472

2473+
@requires_zlib()
2474+
def test_overlap_with_central_dir(self):
2475+
data = (
2476+
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
2477+
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
2478+
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP'
2479+
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
2480+
b'\x00\x00\x00\x00\x00'
2481+
)
2482+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2483+
self.assertEqual(zipf.namelist(), ['a'])
2484+
self.assertEqual(len(zipf.infolist()), 1)
2485+
zi = zipf.getinfo('a')
2486+
self.assertEqual(zi.header_offset, 0)
2487+
self.assertEqual(zi.compress_size, 11)
2488+
self.assertEqual(zi.file_size, 1033)
2489+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'):
2490+
zipf.read('a')
2491+
2492+
@requires_zlib()
2493+
def test_overlap_with_archive_comment(self):
2494+
data = (
2495+
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
2496+
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
2497+
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP'
2498+
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
2499+
b'\x00\x00\x00*\x00'
2500+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e'
2501+
b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK'
2502+
b'L\x1c\x05\xa3`\x14\x8cx\x00\x00'
2503+
)
2504+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2505+
self.assertEqual(zipf.namelist(), ['a'])
2506+
self.assertEqual(len(zipf.infolist()), 1)
2507+
zi = zipf.getinfo('a')
2508+
self.assertEqual(zi.header_offset, 69)
2509+
self.assertEqual(zi.compress_size, 11)
2510+
self.assertEqual(zi.file_size, 1033)
2511+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
2512+
zipf.read('a')
2513+
24102514
def tearDown(self):
24112515
unlink(TESTFN)
24122516
unlink(TESTFN2)

Lib/zipfile/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,9 +1490,8 @@ def _RealGetContents(self):
14901490
print("total", total)
14911491

14921492
end_offset = self.start_dir
1493-
for zinfo in sorted(self.filelist,
1494-
key=lambda zinfo: zinfo.header_offset,
1495-
reverse=True):
1493+
for zinfo in reversed(sorted(self.filelist,
1494+
key=lambda zinfo: zinfo.header_offset)):
14961495
zinfo._end_offset = end_offset
14971496
end_offset = zinfo.header_offset
14981497

@@ -1654,7 +1653,16 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
16541653

16551654
if (zinfo._end_offset is not None and
16561655
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1657-
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1656+
if zinfo._end_offset == zinfo.header_offset:
1657+
import warnings
1658+
warnings.warn(
1659+
f"Overlapped entries: {zinfo.orig_filename!r} "
1660+
f"(possible zip bomb)",
1661+
skip_file_prefixes=(os.path.dirname(__file__),))
1662+
else:
1663+
raise BadZipFile(
1664+
f"Overlapped entries: {zinfo.orig_filename!r} "
1665+
f"(possible zip bomb)")
16581666

16591667
# check for encrypted flag & handle password
16601668
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix reading duplicated entries in :mod:`zipfile` by name.
2+
Reading duplicated entries (except the last one) by ``ZipInfo``
3+
now emits a warning instead of raising an exception.

0 commit comments

Comments
 (0)