Skip to content

Commit 2da9363

Browse files
committed
bpo-28494: Improve zipfile.is_zipfile reliability
The zipfile.is_zipfile function would only search for the EndOfZipfile section header. This failed to correctly identify non-zipfiles that contained this header. Now the zipfile.is_zipfile function verifies the first central directory entry. Changes: * Extended zipfile.is_zipfile to verify zipfile catalog * Added tests to validate failure of binary non-zipfiles
1 parent e307e5c commit 2da9363

File tree

3 files changed

+44
-2
lines changed

3 files changed

+44
-2
lines changed

Lib/test/test_zipfile.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,6 +1411,25 @@ def test_is_zip_erroneous_file(self):
14111411
self.assertFalse(zipfile.is_zipfile(fp))
14121412
fp.seek(0, 0)
14131413
self.assertFalse(zipfile.is_zipfile(fp))
1414+
# - passing non-zipfile with ZIP header elements
1415+
# data created using pyPNG like so:
1416+
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
1417+
# w = png.Writer(1,2,alpha=True,compression=0)
1418+
# f = open('onepix.png', 'wb')
1419+
# w.write(f, d)
1420+
# w.close()
1421+
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
1422+
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
1423+
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
1424+
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
1425+
# - passing a filename
1426+
with open(TESTFN, "wb") as fp:
1427+
fp.write(data)
1428+
self.assertFalse(zipfile.is_zipfile(TESTFN))
1429+
# - passing a file-like object
1430+
fp = io.BytesIO()
1431+
fp.write(data)
1432+
self.assertFalse(zipfile.is_zipfile(fp))
14141433

14151434
def test_damaged_zipfile(self):
14161435
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""

Lib/zipfile.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,18 @@ def _strip_extra(extra, xids):
186186

187187
def _check_zipfile(fp):
188188
try:
189-
if _EndRecData(fp):
190-
return True # file has correct magic number
189+
endrec = _EndRecData(fp)
190+
if endrec:
191+
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
192+
return True # Empty zipfiles are still zipfiles
193+
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
194+
fp.seek(endrec[_ECD_OFFSET]) # Central directory is on the same disk
195+
if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
196+
data = fp.read(sizeCentralDir) # CD is where we expect it to be
197+
if len(data) == sizeCentralDir:
198+
centdir = struct.unpack(structCentralDir, data) # CD is the right size
199+
if centdir[_CD_SIGNATURE] == stringCentralDir:
200+
return True # First central directory entry has correct magic number
191201
except OSError:
192202
pass
193203
return False
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Improve zipfile validation in `zipfile.is_zipfile`.
2+
3+
Before this change `zipfile.is_zipfile()` only checked the End Central Directory
4+
signature. If the signature could be found in the last 64k of the file,
5+
success! This produced false positives on any file with `'PK\x05\x06'` in the
6+
last 64k of the file - including PDFs and PNGs.
7+
8+
This is now corrected by actually validating the Central Directory location
9+
and size based on the information provided by the End Central Directory
10+
along with verifying the Central Directory signature of the first entry.
11+
12+
This should be sufficient for the vast number of zipfiles with fewer
13+
false positives.

0 commit comments

Comments
 (0)