Skip to content

Commit 617e064

Browse files
committed
Check gzip headers for corrupted fields
1 parent 39f7d2f commit 617e064

File tree

3 files changed

+73
-11
lines changed

3 files changed

+73
-11
lines changed

Lib/gzip.py

+42-11
Original file line numberDiff line numberDiff line change
@@ -426,29 +426,60 @@ def _read_gzip_header(fp):
426426

427427
if magic != b'\037\213':
428428
raise BadGzipFile('Not a gzipped file (%r)' % magic)
429-
430-
(method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))
429+
base_header = _read_exact(fp, 8)
430+
(method, flag, last_mtime) = struct.unpack("<BBIxx", base_header)
431431
if method != 8:
432432
raise BadGzipFile('Unknown compression method')
433433

434+
# FHCRC will be checked often. So save the result of the check.
435+
fhcrc = flag & FHCRC
436+
# Only create and append to a list of header parts when FHCRC is set.
437+
# In the most common use cases FHCRC is not set. So we optimize for those
438+
# cases.
439+
if fhcrc:
440+
header_parts = [magic, base_header]
441+
434442
if flag & FEXTRA:
435-
# Read & discard the extra field, if present
436-
extra_len, = struct.unpack("<H", _read_exact(fp, 2))
437-
_read_exact(fp, extra_len)
443+
# Read the extra field, if present, save the fields if FHCRC is set.
444+
extra_len_bytes = _read_exact(fp, 2)
445+
extra_len, = struct.unpack("<H", extra_len_bytes)
446+
extra = _read_exact(fp, extra_len)
447+
if fhcrc:
448+
header_parts.extend([extra_len_bytes, extra])
449+
438450
if flag & FNAME:
439-
# Read and discard a null-terminated string containing the filename
451+
# Read a null-terminated string containing the filename. Save it
452+
# if FHCRC is set.
440453
while True:
441454
s = fp.read(1)
442-
if not s or s==b'\000':
455+
if not s:
456+
raise EOFError("Compressed file ended before the "
457+
"end-of-stream marker was reached")
458+
if fhcrc:
459+
header_parts.append(s)
460+
if s == b'\000':
443461
break
444462
if flag & FCOMMENT:
445-
# Read and discard a null-terminated string containing a comment
463+
# Read a null-terminated string containing the filename. Save it
464+
# if FHCRC is set.
446465
while True:
447466
s = fp.read(1)
448-
if not s or s==b'\000':
467+
if not s:
468+
raise EOFError("Compressed file ended before the "
469+
"end-of-stream marker was reached")
470+
if fhcrc:
471+
header_parts.append(s)
472+
if s == b'\000':
449473
break
450-
if flag & FHCRC:
451-
_read_exact(fp, 2) # Read & discard the 16-bit header CRC
474+
475+
if fhcrc:
476+
# Read the 16-bit header CRC and check it against the header.
477+
header_crc, = struct.unpack("<H", _read_exact(fp, 2))
478+
header = b"".join(header_parts)
479+
true_crc = zlib.crc32(header) & 0xFFFF
480+
if header_crc != true_crc:
481+
raise BadGzipFile(f"Corrupted gzip header. Checksums do not "
482+
f"match: {true_crc:04x} != {header_crc:04x}")
452483
return last_mtime
453484

454485

Lib/test/test_gzip.py

+30
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import struct
1010
import sys
1111
import unittest
12+
import zlib
1213
from subprocess import PIPE, Popen
1314
from test.support import import_helper
1415
from test.support import os_helper
@@ -570,6 +571,35 @@ def test_decompress_missing_trailer(self):
570571
compressed_data = gzip.compress(data1)
571572
self.assertRaises(EOFError, gzip.decompress, compressed_data[:-8])
572573

574+
def test_truncated_header(self):
575+
truncated_headers = [
576+
b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00", # Missing OS byte
577+
b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\xff", # FHRC, but no checksum
578+
b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff", # FEXTRA, but no xlen
579+
b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\xaa\x00", # FEXTRA, xlen, but no data
580+
b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x00\xff", # FNAME but no fname
581+
b"\x1f\x8b\x08\x10\x00\x00\x00\x00\x00\xff", # FCOMMENT, but no fcomment
582+
]
583+
for header in truncated_headers:
584+
with self.subTest(header=header):
585+
with self.assertRaises(EOFError):
586+
gzip.decompress(header)
587+
588+
def test_corrupted_gzip_header(self):
589+
header = (b"\x1f\x8b\x08\x1f\x00\x00\x00\x00\x00\xff" # All flags set
590+
b"\x05\x00" # Xlen = 5
591+
b"extra"
592+
b"name\x00"
593+
b"comment\x00")
594+
true_crc = zlib.crc32(header) & 0xFFFF
595+
corrupted_crc = true_crc ^ 0xFFFF
596+
corrupted_header = header + corrupted_crc.to_bytes(2, "little")
597+
with self.assertRaises(gzip.BadGzipFile) as err:
598+
gzip.decompress(corrupted_header)
599+
self.assertEqual(str(err.exception),
600+
f"Corrupted gzip header. Checksums do not "
601+
f"match: {true_crc:04x} != {corrupted_crc:04x}")
602+
573603
def test_read_truncated(self):
574604
data = data1*50
575605
# Drop the CRC (4 bytes) and file size (4 bytes).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Gzip headers are now checked for corrupted NAME, COMMENT and HCRC fields.

0 commit comments

Comments
 (0)