@@ -33,6 +33,9 @@ def write32u(output, value):
33
33
# or unsigned.
34
34
output .write (struct .pack ("<L" , value ))
35
35
36
+ def read32 (input ):
37
+ return struct .unpack ("<I" , input .read (4 ))[0 ]
38
+
36
39
def open (filename , mode = "rb" , compresslevel = 9 ):
37
40
"""Shorthand for GzipFile(filename, mode, compresslevel).
38
41
@@ -256,32 +259,27 @@ def _init_read(self):
256
259
self .crc = zlib .crc32 (b"" ) & 0xffffffff
257
260
self .size = 0
258
261
259
- def _read_exact (self , n ):
260
- data = self .fileobj .read (n )
261
- while len (data ) < n :
262
- b = self .fileobj .read (n - len (data ))
263
- if not b :
264
- raise EOFError ("Compressed file ended before the "
265
- "end-of-stream marker was reached" )
266
- data += b
267
- return data
268
-
269
262
def _read_gzip_header (self ):
270
263
magic = self .fileobj .read (2 )
271
264
if magic == b'' :
272
- return False
265
+ raise EOFError ( "Reached EOF" )
273
266
274
267
if magic != b'\037 \213 ' :
275
268
raise IOError ('Not a gzipped file' )
276
-
277
- method , flag , self .mtime = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
269
+ method = ord ( self .fileobj .read (1 ) )
278
270
if method != 8 :
279
271
raise IOError ('Unknown compression method' )
272
+ flag = ord ( self .fileobj .read (1 ) )
273
+ self .mtime = read32 (self .fileobj )
274
+ # extraflag = self.fileobj.read(1)
275
+ # os = self.fileobj.read(1)
276
+ self .fileobj .read (2 )
280
277
281
278
if flag & FEXTRA :
282
279
# Read & discard the extra field, if present
283
- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
284
- self ._read_exact (extra_len )
280
+ xlen = ord (self .fileobj .read (1 ))
281
+ xlen = xlen + 256 * ord (self .fileobj .read (1 ))
282
+ self .fileobj .read (xlen )
285
283
if flag & FNAME :
286
284
# Read and discard a null-terminated string containing the filename
287
285
while True :
@@ -295,13 +293,12 @@ def _read_gzip_header(self):
295
293
if not s or s == b'\000 ' :
296
294
break
297
295
if flag & FHCRC :
298
- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
296
+ self .fileobj . read (2 ) # Read & discard the 16-bit header CRC
299
297
300
298
unused = self .fileobj .unused ()
301
299
if unused :
302
300
uncompress = self .decompress .decompress (unused )
303
301
self ._add_read_data (uncompress )
304
- return True
305
302
306
303
def write (self ,data ):
307
304
self ._check_closed ()
@@ -335,16 +332,20 @@ def read(self, size=-1):
335
332
336
333
readsize = 1024
337
334
if size < 0 : # get the whole thing
338
- while self ._read (readsize ):
339
- readsize = min (self .max_read_chunk , readsize * 2 )
340
- size = self .extrasize
335
+ try :
336
+ while True :
337
+ self ._read (readsize )
338
+ readsize = min (self .max_read_chunk , readsize * 2 )
339
+ except EOFError :
340
+ size = self .extrasize
341
341
else : # just get some more of it
342
- while size > self .extrasize :
343
- if not self ._read (readsize ):
344
- if size > self .extrasize :
345
- size = self .extrasize
346
- break
347
- readsize = min (self .max_read_chunk , readsize * 2 )
342
+ try :
343
+ while size > self .extrasize :
344
+ self ._read (readsize )
345
+ readsize = min (self .max_read_chunk , readsize * 2 )
346
+ except EOFError :
347
+ if size > self .extrasize :
348
+ size = self .extrasize
348
349
349
350
offset = self .offset - self .extrastart
350
351
chunk = self .extrabuf [offset : offset + size ]
@@ -365,9 +366,12 @@ def peek(self, n):
365
366
if self .extrasize == 0 :
366
367
if self .fileobj is None :
367
368
return b''
368
- # Ensure that we don't return b"" if we haven't reached EOF.
369
- # 1024 is the same buffering heuristic used in read()
370
- while self .extrasize == 0 and self ._read (max (n , 1024 )):
369
+ try :
370
+ # Ensure that we don't return b"" if we haven't reached EOF.
371
+ while self .extrasize == 0 :
372
+ # 1024 is the same buffering heuristic used in read()
373
+ self ._read (max (n , 1024 ))
374
+ except EOFError :
371
375
pass
372
376
offset = self .offset - self .extrastart
373
377
remaining = self .extrasize
@@ -380,14 +384,13 @@ def _unread(self, buf):
380
384
381
385
def _read (self , size = 1024 ):
382
386
if self .fileobj is None :
383
- return False
387
+ raise EOFError ( "Reached EOF" )
384
388
385
389
if self ._new_member :
386
390
# If the _new_member flag is set, we have to
387
391
# jump to the next member, if there is one.
388
392
self ._init_read ()
389
- if not self ._read_gzip_header ():
390
- return False
393
+ self ._read_gzip_header ()
391
394
self .decompress = zlib .decompressobj (- zlib .MAX_WBITS )
392
395
self ._new_member = False
393
396
@@ -404,7 +407,7 @@ def _read(self, size=1024):
404
407
self .fileobj .prepend (self .decompress .unused_data , True )
405
408
self ._read_eof ()
406
409
self ._add_read_data ( uncompress )
407
- return False
410
+ raise EOFError ( 'Reached EOF' )
408
411
409
412
uncompress = self .decompress .decompress (buf )
410
413
self ._add_read_data ( uncompress )
@@ -420,7 +423,6 @@ def _read(self, size=1024):
420
423
# a new member on the next call
421
424
self ._read_eof ()
422
425
self ._new_member = True
423
- return True
424
426
425
427
def _add_read_data (self , data ):
426
428
self .crc = zlib .crc32 (data , self .crc ) & 0xffffffff
@@ -435,7 +437,8 @@ def _read_eof(self):
435
437
# We check the that the computed CRC and size of the
436
438
# uncompressed data matches the stored values. Note that the size
437
439
# stored is the true file size mod 2**32.
438
- crc32 , isize = struct .unpack ("<II" , self ._read_exact (8 ))
440
+ crc32 = read32 (self .fileobj )
441
+ isize = read32 (self .fileobj ) # may exceed 2GB
439
442
if crc32 != self .crc :
440
443
raise IOError ("CRC check failed %s != %s" % (hex (crc32 ),
441
444
hex (self .crc )))
0 commit comments