From 5884185f05c8cb18554a0958febb528148849d22 Mon Sep 17 00:00:00 2001 From: animalize Date: Thu, 17 Jun 2021 14:23:37 +0800 Subject: [PATCH 1/2] BZ2File.write()/LZMAFile.write() handle length correctly No longer use len() to get the length of the input data. For some buffer protocol objects, the length obtained by using len() is wrong. --- Lib/bz2.py | 17 ++++++++++++----- Lib/gzip.py | 2 +- Lib/lzma.py | 17 ++++++++++++----- Lib/test/test_bz2.py | 8 ++++++++ Lib/test/test_gzip.py | 8 ++++++++ Lib/test/test_lzma.py | 8 ++++++++ .../2021-06-17-15-01-51.bpo-44439.1S7QhT.rst | 3 +++ 7 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-06-17-15-01-51.bpo-44439.1S7QhT.rst diff --git a/Lib/bz2.py b/Lib/bz2.py index a2c588e7487f3d..b101add9df24b5 100644 --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -218,15 +218,22 @@ def readlines(self, size=-1): def write(self, data): """Write a byte string to the file. - Returns the number of uncompressed bytes written, which is - always len(data). Note that due to buffering, the file on disk - may not reflect the data written until close() is called. + Returns the number of uncompressed bytes written. Note that + due to buffering, the file on disk may not reflect the data + written until close() is called. """ self._check_can_write() + if isinstance(data, (bytes, bytearray)): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + compressed = self._compressor.compress(data) self._fp.write(compressed) - self._pos += len(data) - return len(data) + self._pos += length + return length def writelines(self, seq): """Write a sequence of byte strings to the file. diff --git a/Lib/gzip.py b/Lib/gzip.py index 1c1e795e1715db..3d837b744800ed 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -278,7 +278,7 @@ def write(self,data): if self.fileobj is None: raise ValueError("write() on closed GzipFile object") - if isinstance(data, bytes): + if isinstance(data, (bytes, bytearray)): length = len(data) else: # accept any data that supports the buffer protocol diff --git a/Lib/lzma.py b/Lib/lzma.py index 2ada7d81d3c813..3ff8c252f8ba82 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -228,15 +228,22 @@ def __iter__(self): def write(self, data): """Write a bytes object to the file. - Returns the number of uncompressed bytes written, which is - always len(data). Note that due to buffering, the file on disk - may not reflect the data written until close() is called. + Returns the number of uncompressed bytes written. Note that + due to buffering, the file on disk may not reflect the data + written until close() is called. """ self._check_can_write() + if isinstance(data, (bytes, bytearray)): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + compressed = self._compressor.compress(data) self._fp.write(compressed) - self._pos += len(data) - return len(data) + self._pos += length + return length def seek(self, offset, whence=io.SEEK_SET): """Change the file position. diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index efed3a859ba217..db1e5305364e72 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1,6 +1,7 @@ from test import support from test.support import bigmemtest, _4G +import array import unittest from io import BytesIO, DEFAULT_BUFFER_SIZE import os @@ -620,6 +621,13 @@ def test_read_truncated(self): with BZ2File(BytesIO(truncated[:i])) as f: self.assertRaises(EOFError, f.read, 1) + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + + with BZ2File(BytesIO(), 'w') as f: + f.write(q) + self.assertEqual(f.tell(), len(q) * q.itemsize) + class BZ2CompressorTest(BaseTest): def testCompress(self): diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 446b61ab439ffe..572e61528c5b37 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -592,6 +592,14 @@ def test_prepend_error(self): with gzip.open(self.filename, "rb") as f: f._buffer.raw._fp.prepend() + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + + with gzip.GzipFile(fileobj=io.BytesIO(), mode='w') as f: + f.write(q) + self.assertEqual(f.tell(), len(q) * q.itemsize) + + class TestOpen(BaseTest): def test_binary_modes(self): uncompressed = data1 * 50 diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index db20300056e489..0194735aceef78 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1,4 +1,5 @@ import _compression +import array from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE import os import pathlib @@ -1231,6 +1232,13 @@ def test_issue21872(self): self.assertTrue(d2.eof) self.assertEqual(out1 + out2, entire) + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + + with LZMAFile(BytesIO(), 'w') as f: + f.write(q) + self.assertEqual(f.tell(), len(q) * q.itemsize) + class OpenTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2021-06-17-15-01-51.bpo-44439.1S7QhT.rst b/Misc/NEWS.d/next/Library/2021-06-17-15-01-51.bpo-44439.1S7QhT.rst new file mode 100644 index 00000000000000..27396683700a83 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-06-17-15-01-51.bpo-44439.1S7QhT.rst @@ -0,0 +1,3 @@ +Fix in :meth:`bz2.BZ2File.write` / :meth:`lzma.LZMAFile.write` methods, when +the input data is an object that supports the buffer protocol, the file length +may be wrong. From d069c7ece97b76a3eae5463fcf6d8a3570bbd200 Mon Sep 17 00:00:00 2001 From: animalize Date: Tue, 22 Jun 2021 14:08:09 +0800 Subject: [PATCH 2/2] address review comment --- Lib/bz2.py | 7 ++++--- Lib/lzma.py | 7 ++++--- Lib/test/test_bz2.py | 5 +++-- Lib/test/test_gzip.py | 5 +++-- Lib/test/test_lzma.py | 5 +++-- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Lib/bz2.py b/Lib/bz2.py index b101add9df24b5..7f1d20632ef139 100644 --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -218,9 +218,10 @@ def readlines(self, size=-1): def write(self, data): """Write a byte string to the file. - Returns the number of uncompressed bytes written. Note that - due to buffering, the file on disk may not reflect the data - written until close() is called. + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until close() + is called. """ self._check_can_write() if isinstance(data, (bytes, bytearray)): diff --git a/Lib/lzma.py b/Lib/lzma.py index 3ff8c252f8ba82..9abf06d91db184 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -228,9 +228,10 @@ def __iter__(self): def write(self, data): """Write a bytes object to the file. - Returns the number of uncompressed bytes written. Note that - due to buffering, the file on disk may not reflect the data - written until close() is called. + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until close() + is called. """ self._check_can_write() if isinstance(data, (bytes, bytearray)): diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index db1e5305364e72..7913beb87a3525 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -623,10 +623,11 @@ def test_read_truncated(self): def test_issue44439(self): q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize with BZ2File(BytesIO(), 'w') as f: - f.write(q) - self.assertEqual(f.tell(), len(q) * q.itemsize) + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) class BZ2CompressorTest(BaseTest): diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 572e61528c5b37..7b51e45aad92ba 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -594,10 +594,11 @@ def test_prepend_error(self): def test_issue44439(self): q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize with gzip.GzipFile(fileobj=io.BytesIO(), mode='w') as f: - f.write(q) - self.assertEqual(f.tell(), len(q) * q.itemsize) + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) class TestOpen(BaseTest): diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index 0194735aceef78..1e2066b89168f4 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1234,10 +1234,11 @@ def test_issue21872(self): def test_issue44439(self): q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize with LZMAFile(BytesIO(), 'w') as f: - f.write(q) - self.assertEqual(f.tell(), len(q) * q.itemsize) + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) class OpenTestCase(unittest.TestCase):