From 8eb63c9d886112400a0c703c277962e225e8d688 Mon Sep 17 00:00:00 2001 From: Emma Harper Smith Date: Sun, 20 Jul 2025 18:54:49 -0700 Subject: [PATCH 1/5] Revert "gh-84481: Make ZipFile.data_offset more robust (#132178)" This reverts commit 6cd1d6c6b142697fb72f422b7b448c27ebc30534. --- Lib/test/test_zipfile/test_core.py | 8 +------- Lib/zipfile/__init__.py | 3 +-- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index ada96813709aea..e21761dd389e10 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3506,12 +3506,6 @@ def test_data_offset_write_with_prefix(self): with zipfile.ZipFile(fp, "w") as zipfp: self.assertEqual(zipfp.data_offset, 16) - def test_data_offset_append_with_bad_zip(self): - with io.BytesIO() as fp: - fp.write(b"this is a prefix") - with zipfile.ZipFile(fp, "a") as zipfp: - self.assertEqual(zipfp.data_offset, 16) - def test_data_offset_write_no_tell(self): # The initializer in ZipFile checks if tell raises AttributeError or # OSError when creating a file in write mode when deducing the offset @@ -3521,7 +3515,7 @@ def tell(self): raise OSError("Unimplemented!") with NoTellBytesIO() as fp: with zipfile.ZipFile(fp, "w") as zipfp: - self.assertIsNone(zipfp.data_offset) + self.assertIs(zipfp.data_offset, None) class EncodedMetadataTests(unittest.TestCase): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 18caeb3e04a2b5..3cec410387c275 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1452,7 +1452,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self._lock = threading.RLock() self._seekable = True self._writing = False - self._data_offset = None try: if mode == 'r': @@ -1468,6 +1467,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self.fp = _Tellable(self.fp) self.start_dir = 0 self._seekable = False + self._data_offset = None else: # Some file-like objects can provide tell() but not seek() try: @@ -1488,7 +1488,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, # even if no files are added to the archive self._didModify = True self.start_dir = self.fp.tell() - self._data_offset = self.start_dir else: raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") except: From 4221022ce6d53fb5d1a2deb327b7e121f403ec8b Mon Sep 17 00:00:00 2001 From: Emma Harper Smith Date: Sun, 20 Jul 2025 19:03:11 -0700 Subject: [PATCH 2/5] Revert "gh-84481: Add ZipFile.data_offset attribute (#132165)" This reverts commit 0788948dcb980c7648b29ca363390b696d7f188f. --- Doc/library/zipfile.rst | 8 ----- Lib/test/test_zipfile/test_core.py | 48 ------------------------------ Lib/zipfile/__init__.py | 12 -------- 3 files changed, 68 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index bf9136a2139112..a1261ec471c92e 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -558,14 +558,6 @@ The following data attributes are also available: it should be no longer than 65535 bytes. Comments longer than this will be truncated. -.. attribute:: ZipFile.data_offset - - The offset to the start of ZIP data from the beginning of the file. When the - :class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the - underlying file does not support ``tell()``, the value will be ``None`` - instead. - - .. versionadded:: 3.14 .. _path-objects: diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index e21761dd389e10..c033059a515db6 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3470,54 +3470,6 @@ def test_execute_zip64(self): self.assertIn(b'number in executable: 5', output) -class TestDataOffsetPrependedZip(unittest.TestCase): - """Test .data_offset on reading zip files with an executable prepended.""" - - def setUp(self): - self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata') - self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata') - - def _test_data_offset(self, name): - with zipfile.ZipFile(name) as zipfp: - self.assertEqual(zipfp.data_offset, 713) - - def test_data_offset_with_exe_prepended(self): - self._test_data_offset(self.exe_zip) - - def test_data_offset_with_exe_prepended_zip64(self): - self._test_data_offset(self.exe_zip64) - -class TestDataOffsetZipWrite(unittest.TestCase): - """Test .data_offset for ZipFile opened in write mode.""" - - def setUp(self): - os.mkdir(TESTFNDIR) - self.addCleanup(rmtree, TESTFNDIR) - self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip') - - def test_data_offset_write_no_prefix(self): - with io.BytesIO() as fp: - with zipfile.ZipFile(fp, "w") as zipfp: - self.assertEqual(zipfp.data_offset, 0) - - def test_data_offset_write_with_prefix(self): - with io.BytesIO() as fp: - fp.write(b"this is a prefix") - with zipfile.ZipFile(fp, "w") as zipfp: - self.assertEqual(zipfp.data_offset, 16) - - def test_data_offset_write_no_tell(self): - # The initializer in ZipFile checks if tell raises AttributeError or - # OSError when creating a file in write mode when deducing the offset - # of the beginning of zip data - class NoTellBytesIO(io.BytesIO): - def tell(self): - raise OSError("Unimplemented!") - with NoTellBytesIO() as fp: - with zipfile.ZipFile(fp, "w") as zipfp: - self.assertIs(zipfp.data_offset, None) - - class EncodedMetadataTests(unittest.TestCase): file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three' file_content = [ diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 3cec410387c275..2969f735e8abb9 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1462,12 +1462,10 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self._didModify = True try: self.start_dir = self.fp.tell() - self._data_offset = self.start_dir except (AttributeError, OSError): self.fp = _Tellable(self.fp) self.start_dir = 0 self._seekable = False - self._data_offset = None else: # Some file-like objects can provide tell() but not seek() try: @@ -1534,10 +1532,6 @@ def _RealGetContents(self): # self.start_dir: Position of start of central directory self.start_dir = offset_cd + concat - # store the offset to the beginning of data for the - # .data_offset property - self._data_offset = concat - if self.start_dir < 0: raise BadZipFile("Bad offset for central directory") fp.seek(self.start_dir, 0) @@ -1598,12 +1592,6 @@ def _RealGetContents(self): zinfo._end_offset = end_offset end_offset = zinfo.header_offset - @property - def data_offset(self): - """The offset to the start of zip data in the file or None if - unavailable.""" - return self._data_offset - def namelist(self): """Return a list of file names in the archive.""" return [data.filename for data in self.filelist] From c341057b1e4d95b069b034fab38c1105b5bafa75 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 21 Jul 2025 22:35:54 +0000 Subject: [PATCH 3/5] NEWS entry about the removal. --- .../Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst new file mode 100644 index 00000000000000..af2d6ebaf95ca8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst @@ -0,0 +1,3 @@ +Removed the new in 3.14alpha ``zipfile.ZipFile.data_offset`` property as it +wasn't fully clear which behavior it should have in some situations so the +result was not always what a user might expect. From 830911006d4263330387150ae3a9849275f195f1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 21 Jul 2025 22:37:39 +0000 Subject: [PATCH 4/5] reword news --- .../Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst index af2d6ebaf95ca8..388e916e2b0420 100644 --- a/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst +++ b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst @@ -1,3 +1,3 @@ -Removed the new in 3.14alpha ``zipfile.ZipFile.data_offset`` property as it -wasn't fully clear which behavior it should have in some situations so the -result was not always what a user might expect. +Removed the unreleased ``zipfile.ZipFile.data_offset`` property added in 3.14a7 +as it wasn't fully clear which behavior it should have in some situations so +the result was not always what a user might expect. From 289b7cdd22a42a3ed3db89eca59526506736e533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 22 Jul 2025 00:50:31 +0200 Subject: [PATCH 5/5] Update Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst --- .../next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst index 388e916e2b0420..fd30fe156a1b32 100644 --- a/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst +++ b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst @@ -1,3 +1,3 @@ -Removed the unreleased ``zipfile.ZipFile.data_offset`` property added in 3.14a7 +Removed the unreleased ``zipfile.ZipFile.data_offset`` property added in 3.14.0a7 as it wasn't fully clear which behavior it should have in some situations so the result was not always what a user might expect.