@@ -399,6 +399,59 @@ def readline(self, size=-1):
399
399
return self ._buffer .readline (size )
400
400
401
401
402
+ def _read_exact (fp , n ):
403
+ '''Read exactly *n* bytes from `fp`
404
+
405
+ This method is required because fp may be unbuffered,
406
+ i.e. return short reads.
407
+ '''
408
+ data = fp .read (n )
409
+ while len (data ) < n :
410
+ b = fp .read (n - len (data ))
411
+ if not b :
412
+ raise EOFError ("Compressed file ended before the "
413
+ "end-of-stream marker was reached" )
414
+ data += b
415
+ return data
416
+
417
+
418
+ def _read_gzip_header (fp ):
419
+ '''Read a gzip header from `fp` and progress to the end of the header.
420
+
421
+ Returns last mtime if header was present or None otherwise.
422
+ '''
423
+ magic = fp .read (2 )
424
+ if magic == b'' :
425
+ return None
426
+
427
+ if magic != b'\037 \213 ' :
428
+ raise BadGzipFile ('Not a gzipped file (%r)' % magic )
429
+
430
+ (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
431
+ if method != 8 :
432
+ raise BadGzipFile ('Unknown compression method' )
433
+
434
+ if flag & FEXTRA :
435
+ # Read & discard the extra field, if present
436
+ extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
437
+ _read_exact (fp , extra_len )
438
+ if flag & FNAME :
439
+ # Read and discard a null-terminated string containing the filename
440
+ while True :
441
+ s = fp .read (1 )
442
+ if not s or s == b'\000 ' :
443
+ break
444
+ if flag & FCOMMENT :
445
+ # Read and discard a null-terminated string containing a comment
446
+ while True :
447
+ s = fp .read (1 )
448
+ if not s or s == b'\000 ' :
449
+ break
450
+ if flag & FHCRC :
451
+ _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
452
+ return last_mtime
453
+
454
+
402
455
class _GzipReader (_compression .DecompressReader ):
403
456
def __init__ (self , fp ):
404
457
super ().__init__ (_PaddedFile (fp ), zlib .decompressobj ,
@@ -411,53 +464,11 @@ def _init_read(self):
411
464
self ._crc = zlib .crc32 (b"" )
412
465
self ._stream_size = 0 # Decompressed size of unconcatenated stream
413
466
414
- def _read_exact (self , n ):
415
- '''Read exactly *n* bytes from `self._fp`
416
-
417
- This method is required because self._fp may be unbuffered,
418
- i.e. return short reads.
419
- '''
420
-
421
- data = self ._fp .read (n )
422
- while len (data ) < n :
423
- b = self ._fp .read (n - len (data ))
424
- if not b :
425
- raise EOFError ("Compressed file ended before the "
426
- "end-of-stream marker was reached" )
427
- data += b
428
- return data
429
-
430
467
def _read_gzip_header (self ):
431
- magic = self ._fp . read ( 2 )
432
- if magic == b'' :
468
+ last_mtime = _read_gzip_header ( self ._fp )
469
+ if last_mtime is None :
433
470
return False
434
-
435
- if magic != b'\037 \213 ' :
436
- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
437
-
438
- (method , flag ,
439
- self ._last_mtime ) = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
440
- if method != 8 :
441
- raise BadGzipFile ('Unknown compression method' )
442
-
443
- if flag & FEXTRA :
444
- # Read & discard the extra field, if present
445
- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
446
- self ._read_exact (extra_len )
447
- if flag & FNAME :
448
- # Read and discard a null-terminated string containing the filename
449
- while True :
450
- s = self ._fp .read (1 )
451
- if not s or s == b'\000 ' :
452
- break
453
- if flag & FCOMMENT :
454
- # Read and discard a null-terminated string containing a comment
455
- while True :
456
- s = self ._fp .read (1 )
457
- if not s or s == b'\000 ' :
458
- break
459
- if flag & FHCRC :
460
- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
471
+ self ._last_mtime = last_mtime
461
472
return True
462
473
463
474
def read (self , size = - 1 ):
@@ -520,7 +531,7 @@ def _read_eof(self):
520
531
# We check that the computed CRC and size of the
521
532
# uncompressed data matches the stored values. Note that the size
522
533
# stored is the true file size mod 2**32.
523
- crc32 , isize = struct .unpack ("<II" , self . _read_exact (8 ))
534
+ crc32 , isize = struct .unpack ("<II" , _read_exact (self . _fp , 8 ))
524
535
if crc32 != self ._crc :
525
536
raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
526
537
hex (self ._crc )))
@@ -540,21 +551,69 @@ def _rewind(self):
540
551
super ()._rewind ()
541
552
self ._new_member = True
542
553
554
+
555
+ def _create_simple_gzip_header (compresslevel : int ,
556
+ mtime = None ) -> bytes :
557
+ """
558
+ Write a simple gzip header with no extra fields.
559
+ :param compresslevel: Compresslevel used to determine the xfl bytes.
560
+ :param mtime: The mtime (must support conversion to a 32-bit integer).
561
+ :return: A bytes object representing the gzip header.
562
+ """
563
+ if mtime is None :
564
+ mtime = time .time ()
565
+ if compresslevel == _COMPRESS_LEVEL_BEST :
566
+ xfl = 2
567
+ elif compresslevel == _COMPRESS_LEVEL_FAST :
568
+ xfl = 4
569
+ else :
570
+ xfl = 0
571
+ # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
572
+ # fields added to header), mtime, xfl and os (255 for unknown OS).
573
+ return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
574
+
575
+
543
576
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
544
577
"""Compress data in one shot and return the compressed string.
545
- Optional argument is the compression level, in range of 0-9.
578
+
579
+ compresslevel sets the compression level in range of 0-9.
580
+ mtime can be used to set the modification time. The modification time is
581
+ set to the current time by default.
546
582
"""
547
- buf = io .BytesIO ()
548
- with GzipFile (fileobj = buf , mode = 'wb' , compresslevel = compresslevel , mtime = mtime ) as f :
549
- f .write (data )
550
- return buf .getvalue ()
583
+ if mtime == 0 :
584
+ # Use zlib as it creates the header with 0 mtime by default.
585
+ # This is faster and with less overhead.
586
+ return zlib .compress (data , level = compresslevel , wbits = 31 )
587
+ header = _create_simple_gzip_header (compresslevel , mtime )
588
+ trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
589
+ # Wbits=-15 creates a raw deflate block.
590
+ return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
591
+ trailer )
592
+
551
593
552
594
def decompress (data ):
553
595
"""Decompress a gzip compressed string in one shot.
554
596
Return the decompressed string.
555
597
"""
556
- with GzipFile (fileobj = io .BytesIO (data )) as f :
557
- return f .read ()
598
+ decompressed_members = []
599
+ while True :
600
+ fp = io .BytesIO (data )
601
+ if _read_gzip_header (fp ) is None :
602
+ return b"" .join (decompressed_members )
603
+ # Use a zlib raw deflate compressor
604
+ do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
605
+ # Read all the data except the header
606
+ decompressed = do .decompress (data [fp .tell ():])
607
+ if not do .eof or len (do .unused_data ) < 8 :
608
+ raise EOFError ("Compressed file ended before the end-of-stream "
609
+ "marker was reached" )
610
+ crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
611
+ if crc != zlib .crc32 (decompressed ):
612
+ raise BadGzipFile ("CRC check failed" )
613
+ if length != (len (decompressed ) & 0xffffffff ):
614
+ raise BadGzipFile ("Incorrect length of data produced" )
615
+ decompressed_members .append (decompressed )
616
+ data = do .unused_data [8 :].lstrip (b"\x00 " )
558
617
559
618
560
619
def main ():
0 commit comments