Skip to content

Commit 93b061b

Browse files
committed
Issue #1159051: Back out a fix for handling corrupted gzip files that
broke backwards compatibility.
1 parent a9217a4 commit 93b061b

File tree

4 files changed

+41
-68
lines changed

4 files changed

+41
-68
lines changed

Lib/gzip.py

+38-35
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ def write32u(output, value):
3333
# or unsigned.
3434
output.write(struct.pack("<L", value))
3535

36+
def read32(input):
37+
return struct.unpack("<I", input.read(4))[0]
38+
3639
def open(filename, mode="rb", compresslevel=9):
3740
"""Shorthand for GzipFile(filename, mode, compresslevel).
3841
@@ -256,32 +259,27 @@ def _init_read(self):
256259
self.crc = zlib.crc32(b"") & 0xffffffff
257260
self.size = 0
258261

259-
def _read_exact(self, n):
260-
data = self.fileobj.read(n)
261-
while len(data) < n:
262-
b = self.fileobj.read(n - len(data))
263-
if not b:
264-
raise EOFError("Compressed file ended before the "
265-
"end-of-stream marker was reached")
266-
data += b
267-
return data
268-
269262
def _read_gzip_header(self):
270263
magic = self.fileobj.read(2)
271264
if magic == b'':
272-
return False
265+
raise EOFError("Reached EOF")
273266

274267
if magic != b'\037\213':
275268
raise IOError('Not a gzipped file')
276-
277-
method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8))
269+
method = ord( self.fileobj.read(1) )
278270
if method != 8:
279271
raise IOError('Unknown compression method')
272+
flag = ord( self.fileobj.read(1) )
273+
self.mtime = read32(self.fileobj)
274+
# extraflag = self.fileobj.read(1)
275+
# os = self.fileobj.read(1)
276+
self.fileobj.read(2)
280277

281278
if flag & FEXTRA:
282279
# Read & discard the extra field, if present
283-
extra_len, = struct.unpack("<H", self._read_exact(2))
284-
self._read_exact(extra_len)
280+
xlen = ord(self.fileobj.read(1))
281+
xlen = xlen + 256*ord(self.fileobj.read(1))
282+
self.fileobj.read(xlen)
285283
if flag & FNAME:
286284
# Read and discard a null-terminated string containing the filename
287285
while True:
@@ -295,13 +293,12 @@ def _read_gzip_header(self):
295293
if not s or s==b'\000':
296294
break
297295
if flag & FHCRC:
298-
self._read_exact(2) # Read & discard the 16-bit header CRC
296+
self.fileobj.read(2) # Read & discard the 16-bit header CRC
299297

300298
unused = self.fileobj.unused()
301299
if unused:
302300
uncompress = self.decompress.decompress(unused)
303301
self._add_read_data(uncompress)
304-
return True
305302

306303
def write(self,data):
307304
self._check_closed()
@@ -335,16 +332,20 @@ def read(self, size=-1):
335332

336333
readsize = 1024
337334
if size < 0: # get the whole thing
338-
while self._read(readsize):
339-
readsize = min(self.max_read_chunk, readsize * 2)
340-
size = self.extrasize
335+
try:
336+
while True:
337+
self._read(readsize)
338+
readsize = min(self.max_read_chunk, readsize * 2)
339+
except EOFError:
340+
size = self.extrasize
341341
else: # just get some more of it
342-
while size > self.extrasize:
343-
if not self._read(readsize):
344-
if size > self.extrasize:
345-
size = self.extrasize
346-
break
347-
readsize = min(self.max_read_chunk, readsize * 2)
342+
try:
343+
while size > self.extrasize:
344+
self._read(readsize)
345+
readsize = min(self.max_read_chunk, readsize * 2)
346+
except EOFError:
347+
if size > self.extrasize:
348+
size = self.extrasize
348349

349350
offset = self.offset - self.extrastart
350351
chunk = self.extrabuf[offset: offset + size]
@@ -365,9 +366,12 @@ def peek(self, n):
365366
if self.extrasize == 0:
366367
if self.fileobj is None:
367368
return b''
368-
# Ensure that we don't return b"" if we haven't reached EOF.
369-
# 1024 is the same buffering heuristic used in read()
370-
while self.extrasize == 0 and self._read(max(n, 1024)):
369+
try:
370+
# Ensure that we don't return b"" if we haven't reached EOF.
371+
while self.extrasize == 0:
372+
# 1024 is the same buffering heuristic used in read()
373+
self._read(max(n, 1024))
374+
except EOFError:
371375
pass
372376
offset = self.offset - self.extrastart
373377
remaining = self.extrasize
@@ -380,14 +384,13 @@ def _unread(self, buf):
380384

381385
def _read(self, size=1024):
382386
if self.fileobj is None:
383-
return False
387+
raise EOFError("Reached EOF")
384388

385389
if self._new_member:
386390
# If the _new_member flag is set, we have to
387391
# jump to the next member, if there is one.
388392
self._init_read()
389-
if not self._read_gzip_header():
390-
return False
393+
self._read_gzip_header()
391394
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
392395
self._new_member = False
393396

@@ -404,7 +407,7 @@ def _read(self, size=1024):
404407
self.fileobj.prepend(self.decompress.unused_data, True)
405408
self._read_eof()
406409
self._add_read_data( uncompress )
407-
return False
410+
raise EOFError('Reached EOF')
408411

409412
uncompress = self.decompress.decompress(buf)
410413
self._add_read_data( uncompress )
@@ -420,7 +423,6 @@ def _read(self, size=1024):
420423
# a new member on the next call
421424
self._read_eof()
422425
self._new_member = True
423-
return True
424426

425427
def _add_read_data(self, data):
426428
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
@@ -435,7 +437,8 @@ def _read_eof(self):
435437
# We check the that the computed CRC and size of the
436438
# uncompressed data matches the stored values. Note that the size
437439
# stored is the true file size mod 2**32.
438-
crc32, isize = struct.unpack("<II", self._read_exact(8))
440+
crc32 = read32(self.fileobj)
441+
isize = read32(self.fileobj) # may exceed 2GB
439442
if crc32 != self.crc:
440443
raise IOError("CRC check failed %s != %s" % (hex(crc32),
441444
hex(self.crc)))

Lib/test/test_bz2.py

-18
Original file line numberDiff line numberDiff line change
@@ -292,24 +292,6 @@ def testMixedIterationReads(self):
292292
self.assertRaises(ValueError, f.readline)
293293
self.assertRaises(ValueError, f.readlines)
294294

295-
def test_read_truncated(self):
296-
# Drop the eos_magic field (6 bytes) and CRC (4 bytes).
297-
truncated = self.DATA[:-10]
298-
with open(self.filename, 'wb') as f:
299-
f.write(truncated)
300-
with BZ2File(self.filename) as f:
301-
self.assertRaises(EOFError, f.read)
302-
with BZ2File(self.filename) as f:
303-
self.assertEqual(f.read(len(self.TEXT)), self.TEXT)
304-
self.assertRaises(EOFError, f.read, 1)
305-
# Incomplete 4-byte file header, and block header of at least 146 bits.
306-
for i in range(22):
307-
with open(self.filename, 'wb') as f:
308-
f.write(truncated[:i])
309-
with BZ2File(self.filename) as f:
310-
self.assertRaises(EOFError, f.read, 1)
311-
312-
313295
class BZ2CompressorTest(BaseTest):
314296
def testCompress(self):
315297
# "Test BZ2Compressor.compress()/flush()"

Lib/test/test_gzip.py

100644100755
-13
Original file line numberDiff line numberDiff line change
@@ -365,19 +365,6 @@ def test_decompress(self):
365365
datac = gzip.compress(data)
366366
self.assertEqual(gzip.decompress(datac), data)
367367

368-
def test_read_truncated(self):
369-
data = data1*50
370-
# Drop the CRC (4 bytes) and file size (4 bytes).
371-
truncated = gzip.compress(data)[:-8]
372-
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
373-
self.assertRaises(EOFError, f.read)
374-
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
375-
self.assertEqual(f.read(len(data)), data)
376-
self.assertRaises(EOFError, f.read, 1)
377-
# Incomplete 10-byte header.
378-
for i in range(2, 10):
379-
with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f:
380-
self.assertRaises(EOFError, f.read, 1)
381368

382369
def test_read_with_extra(self):
383370
# Gzip data with an extra field

Misc/NEWS

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ Library
1414
which were omitted in 3.2.4 when updating the bundled version of
1515
libffi used by ctypes.
1616

17-
- Issue #17666: Fix reading gzip files with an extra field.
18-
1917
- Issue #15535: Fix namedtuple pickles which were picking up the OrderedDict
2018
instead of just the underlying tuple.
2119

20+
- Issue #1159051: Back out a fix for handling corrupted gzip files that
21+
broke backwards compatibility.
22+
2223
Build
2324
-----
2425

0 commit comments

Comments
 (0)