Skip to content

Commit f8fade1

Browse files
committed
Introduce _scan_data_descriptor_no_sig_by_decompression
1 parent 31c4c93 commit f8fade1

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,6 +2008,9 @@ def test_repack_data_descriptor_no_sig(self):
20082008

20092009
def test_repack_data_descriptor_no_sig_strict(self):
20102010
"""Should skip data descriptor without signature when `strict_descriptor` is set."""
2011+
if self.compression not in (zipfile.ZIP_STORED, zipfile.ZIP_LZMA):
2012+
self.skipTest('require unsupported decompression method')
2013+
20112014
for ii in ([0], [0, 1]):
20122015
with self.subTest(remove=ii):
20132016
# calculate the expected results
@@ -2046,6 +2049,47 @@ def test_repack_data_descriptor_no_sig_strict(self):
20462049
with zipfile.ZipFile(TESTFN) as zh:
20472050
self.assertIsNone(zh.testzip())
20482051

2052+
def test_repack_data_descriptor_no_sig_strict_by_decompressoin(self):
2053+
"""Should correctly handle file entries using data descriptor without signature
2054+
through decompression."""
2055+
if self.compression in (zipfile.ZIP_STORED, zipfile.ZIP_LZMA):
2056+
self.skipTest('require supported decompression method')
2057+
2058+
for ii in ([0], [0, 1]):
2059+
with self.subTest(remove=ii):
2060+
# calculate the expected results
2061+
test_files = [data for j, data in enumerate(self.test_files) if j not in ii]
2062+
with open(TESTFN, 'wb') as fh:
2063+
with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig):
2064+
expected_zinfos = self._prepare_zip_from_test_files(Unseekable(fh), test_files)
2065+
expected_size = os.path.getsize(TESTFN)
2066+
2067+
# do the removal and check the result
2068+
with open(TESTFN, 'wb') as fh:
2069+
with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig):
2070+
zinfos = self._prepare_zip_from_test_files(Unseekable(fh), self.test_files)
2071+
with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
2072+
# make sure data descriptor bit is really set (by making zipfile unseekable)
2073+
for zi in zh.infolist():
2074+
self.assertTrue(zi.flag_bits & 8, f'data descriptor flag not set: {zi.filename}')
2075+
2076+
for i in ii:
2077+
zh.remove(self.test_files[i][0])
2078+
zh.repack(strict_descriptor=True)
2079+
2080+
# check infolist
2081+
self.assertEqual(
2082+
[ComparableZipInfo(zi) for zi in zh.infolist()],
2083+
expected_zinfos,
2084+
)
2085+
2086+
# check file size
2087+
self.assertEqual(os.path.getsize(TESTFN), expected_size)
2088+
2089+
# make sure the zip file is still valid
2090+
with zipfile.ZipFile(TESTFN) as zh:
2091+
self.assertIsNone(zh.testzip())
2092+
20492093
def test_repack_data_descriptor_no_sig_and_zip64(self):
20502094
"""Should correctly handle file entries using data descriptor without signature and zip64."""
20512095
for ii in ([0], [0, 1], [1], [2]):

Lib/zipfile/__init__.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,8 +1628,14 @@ def _validate_local_file_entry(self, fp, offset, end_offset):
16281628
zip64 = fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff
16291629

16301630
dd = self._scan_data_descriptor(fp, pos, end_offset, zip64)
1631-
if dd is None and not self.strict_descriptor:
1632-
dd = self._scan_data_descriptor_no_sig(fp, pos, end_offset, zip64)
1631+
if dd is None:
1632+
dd = self._scan_data_descriptor_no_sig_by_decompression(
1633+
fp, pos, end_offset, zip64, fheader[_FH_COMPRESSION_METHOD])
1634+
if dd is False:
1635+
if not self.strict_descriptor:
1636+
dd = self._scan_data_descriptor_no_sig(fp, pos, end_offset, zip64)
1637+
else:
1638+
dd = None
16331639
if dd is None:
16341640
return None
16351641

@@ -1705,6 +1711,56 @@ def _scan_data_descriptor_no_sig(self, fp, offset, end_offset, zip64, chunk_size
17051711

17061712
return None
17071713

1714+
def _scan_data_descriptor_no_sig_by_decompression(self, fp, offset, end_offset, zip64, method):
1715+
dd_fmt = '<LQQ' if zip64 else '<LLL'
1716+
dd_size = struct.calcsize(dd_fmt)
1717+
1718+
if offset + dd_size > end_offset:
1719+
return False
1720+
1721+
try:
1722+
decompressor = _get_decompressor(method)
1723+
except NotImplementedError:
1724+
return False
1725+
1726+
if decompressor is None:
1727+
return False
1728+
1729+
# Current LZMADecompressor is unreliable since it's `.eof` is usually
1730+
# not set as expected.
1731+
if isinstance(decompressor, LZMADecompressor):
1732+
return False
1733+
1734+
try:
1735+
pos = self._find_compression_end_offset(fp, offset, end_offset - dd_size, decompressor)
1736+
except Exception:
1737+
return None
1738+
1739+
fp.seek(pos)
1740+
dd = fp.read(dd_size)
1741+
crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
1742+
if pos - offset != compress_size:
1743+
return None
1744+
1745+
return crc, compress_size, file_size, dd_size
1746+
1747+
def _find_compression_end_offset(self, fp, offset, end_offset, decompressor, chunk_size=4096):
1748+
fp.seek(offset)
1749+
read_size = 0
1750+
while True:
1751+
chunk = fp.read(min(chunk_size, end_offset - offset - read_size))
1752+
if not chunk:
1753+
raise EOFError('Unexpected EOF while decompressing')
1754+
1755+
# may raise on error
1756+
decompressor.decompress(chunk)
1757+
1758+
read_size += len(chunk)
1759+
1760+
if decompressor.eof:
1761+
unused_len = len(decompressor.unused_data)
1762+
return offset + read_size - unused_len
1763+
17081764
def _calc_local_file_entry_size(self, fp, zinfo):
17091765
fp.seek(zinfo.header_offset)
17101766
fheader = self._read_local_file_header(fp)

0 commit comments

Comments
 (0)