Skip to content

Commit 5ae16a2

Browse files
committed
update gzip and test_gzip from CPython 3.10.4
1 parent 8e20e23 commit 5ae16a2

File tree

2 files changed

+45
-27
lines changed

2 files changed

+45
-27
lines changed

Lib/gzip.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
6262
raise TypeError("filename must be a str or bytes object, or a file")
6363

6464
if "t" in mode:
65+
encoding = io.text_encoding(encoding)
6566
return io.TextIOWrapper(binary_file, encoding, errors, newline)
6667
else:
6768
return binary_file
@@ -277,7 +278,7 @@ def write(self,data):
277278
if self.fileobj is None:
278279
raise ValueError("write() on closed GzipFile object")
279280

280-
if isinstance(data, bytes):
281+
if isinstance(data, (bytes, bytearray)):
281282
length = len(data)
282283
else:
283284
# accept any data that supports the buffer protocol
@@ -516,7 +517,7 @@ def _add_read_data(self, data):
516517

517518
def _read_eof(self):
518519
# We've read to the end of the file
519-
# We check the that the computed CRC and size of the
520+
# We check that the computed CRC and size of the
520521
# uncompressed data matches the stored values. Note that the size
521522
# stored is the true file size mod 2**32.
522523
crc32, isize = struct.unpack("<II", self._read_exact(8))
@@ -583,8 +584,7 @@ def main():
583584
g = sys.stdout.buffer
584585
else:
585586
if arg[-3:] != ".gz":
586-
print("filename doesn't end in .gz:", repr(arg))
587-
continue
587+
sys.exit(f"filename doesn't end in .gz: {arg!r}")
588588
f = open(arg, "rb")
589589
g = builtins.open(arg[:-3], "wb")
590590
else:
@@ -596,7 +596,7 @@ def main():
596596
f = builtins.open(arg, "rb")
597597
g = open(arg + ".gz", "wb")
598598
while True:
599-
chunk = f.read(1024)
599+
chunk = f.read(io.DEFAULT_BUFFER_SIZE)
600600
if not chunk:
601601
break
602602
g.write(chunk)

Lib/test/test_gzip.py

+40-22
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
import sys
1111
import unittest
1212
from subprocess import PIPE, Popen
13-
from test import support
14-
from test.support import _4G, bigmemtest, os_helper, import_helper
13+
from test.support import import_helper
14+
from test.support import os_helper
15+
from test.support import _4G, bigmemtest
1516
from test.support.script_helper import assert_python_ok, assert_python_failure
1617

1718
gzip = import_helper.import_module('gzip')
@@ -328,8 +329,15 @@ def test_metadata(self):
328329
cmByte = fRead.read(1)
329330
self.assertEqual(cmByte, b'\x08') # deflate
330331

332+
try:
333+
expectedname = self.filename.encode('Latin-1') + b'\x00'
334+
expectedflags = b'\x08' # only the FNAME flag is set
335+
except UnicodeEncodeError:
336+
expectedname = b''
337+
expectedflags = b'\x00'
338+
331339
flagsByte = fRead.read(1)
332-
self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set
340+
self.assertEqual(flagsByte, expectedflags)
333341

334342
mtimeBytes = fRead.read(4)
335343
self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
@@ -344,9 +352,8 @@ def test_metadata(self):
344352
# RFC 1952 specifies that this is the name of the input file, if any.
345353
# However, the gzip module defaults to storing the name of the output
346354
# file in this field.
347-
expected = self.filename.encode('Latin-1') + b'\x00'
348-
nameBytes = fRead.read(len(expected))
349-
self.assertEqual(nameBytes, expected)
355+
nameBytes = fRead.read(len(expectedname))
356+
self.assertEqual(nameBytes, expectedname)
350357

351358
# Since no other flags were set, the header ends here.
352359
# Rather than process the compressed data, let's seek to the trailer.
@@ -358,6 +365,10 @@ def test_metadata(self):
358365
isizeBytes = fRead.read(4)
359366
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
360367

368+
def test_metadata_ascii_name(self):
369+
self.filename = os_helper.TESTFN_ASCII
370+
self.test_metadata()
371+
361372
def test_compresslevel_metadata(self):
362373
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
363374
# specifically, discussion of XFL in section 2.3.1
@@ -489,7 +500,9 @@ def test_fileobj_mode(self):
489500
if "x" in mode:
490501
os_helper.unlink(self.filename)
491502
with open(self.filename, mode) as f:
492-
with gzip.GzipFile(fileobj=f) as g:
503+
with self.assertWarns(FutureWarning):
504+
g = gzip.GzipFile(fileobj=f)
505+
with g:
493506
self.assertEqual(g.mode, gzip.WRITE)
494507

495508
def test_bytes_filename(self):
@@ -578,6 +591,15 @@ def test_prepend_error(self):
578591
with gzip.open(self.filename, "rb") as f:
579592
f._buffer.raw._fp.prepend()
580593

594+
def test_issue44439(self):
595+
q = array.array('Q', [1, 2, 3, 4, 5])
596+
LENGTH = len(q) * q.itemsize
597+
598+
with gzip.GzipFile(fileobj=io.BytesIO(), mode='w') as f:
599+
self.assertEqual(f.write(q), LENGTH)
600+
self.assertEqual(f.tell(), LENGTH)
601+
602+
581603
class TestOpen(BaseTest):
582604
def test_binary_modes(self):
583605
uncompressed = data1 * 50
@@ -647,14 +669,14 @@ def test_implicit_binary_modes(self):
647669
def test_text_modes(self):
648670
uncompressed = data1.decode("ascii") * 50
649671
uncompressed_raw = uncompressed.replace("\n", os.linesep)
650-
with gzip.open(self.filename, "wt") as f:
672+
with gzip.open(self.filename, "wt", encoding="ascii") as f:
651673
f.write(uncompressed)
652674
with open(self.filename, "rb") as f:
653675
file_data = gzip.decompress(f.read()).decode("ascii")
654676
self.assertEqual(file_data, uncompressed_raw)
655-
with gzip.open(self.filename, "rt") as f:
677+
with gzip.open(self.filename, "rt", encoding="ascii") as f:
656678
self.assertEqual(f.read(), uncompressed)
657-
with gzip.open(self.filename, "at") as f:
679+
with gzip.open(self.filename, "at", encoding="ascii") as f:
658680
f.write(uncompressed)
659681
with open(self.filename, "rb") as f:
660682
file_data = gzip.decompress(f.read()).decode("ascii")
@@ -668,7 +690,7 @@ def test_fileobj(self):
668690
self.assertEqual(f.read(), uncompressed_bytes)
669691
with gzip.open(io.BytesIO(compressed), "rb") as f:
670692
self.assertEqual(f.read(), uncompressed_bytes)
671-
with gzip.open(io.BytesIO(compressed), "rt") as f:
693+
with gzip.open(io.BytesIO(compressed), "rt", encoding="ascii") as f:
672694
self.assertEqual(f.read(), uncompressed_str)
673695

674696
def test_bad_params(self):
@@ -716,9 +738,9 @@ def test_encoding_error_handler(self):
716738
def test_newline(self):
717739
# Test with explicit newline (universal newline mode disabled).
718740
uncompressed = data1.decode("ascii") * 50
719-
with gzip.open(self.filename, "wt", newline="\n") as f:
741+
with gzip.open(self.filename, "wt", encoding="ascii", newline="\n") as f:
720742
f.write(uncompressed)
721-
with gzip.open(self.filename, "rt", newline="\r") as f:
743+
with gzip.open(self.filename, "rt", encoding="ascii", newline="\r") as f:
722744
self.assertEqual(f.readlines(), [uncompressed])
723745

724746

@@ -768,10 +790,10 @@ def test_decompress_infile_outfile(self):
768790
self.assertEqual(err, b'')
769791

770792
def test_decompress_infile_outfile_error(self):
771-
rc, out, err = assert_python_ok('-m', 'gzip', '-d', 'thisisatest.out')
772-
self.assertIn(b"filename doesn't end in .gz:", out)
773-
self.assertEqual(rc, 0)
774-
self.assertEqual(err, b'')
793+
rc, out, err = assert_python_failure('-m', 'gzip', '-d', 'thisisatest.out')
794+
self.assertEqual(b"filename doesn't end in .gz: 'thisisatest.out'", err.strip())
795+
self.assertEqual(rc, 1)
796+
self.assertEqual(out, b'')
775797

776798
@create_and_remove_directory(TEMPDIR)
777799
def test_compress_stdin_outfile(self):
@@ -827,9 +849,5 @@ def test_decompress_cannot_have_flags_compression(self):
827849
self.assertEqual(out, b'')
828850

829851

830-
def test_main(verbose=None):
831-
support.run_unittest(TestGzip, TestOpen, TestCommandLine)
832-
833-
834852
if __name__ == "__main__":
835-
test_main(verbose=True)
853+
unittest.main()

0 commit comments

Comments
 (0)