Skip to content

gh-81719: Add private members to zipfile.ZipFile to make it easier to subclass #137101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
9 changes: 8 additions & 1 deletion Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ ZipFile Objects

.. class:: ZipFile(file, mode='r', compression=ZIP_STORED, allowZip64=True, \
compresslevel=None, *, strict_timestamps=True, \
metadata_encoding=None)
metadata_encoding=None, \
zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile)

Open a ZIP file, where *file* can be a path to a file (a string), a
file-like object or a :term:`path-like object`.
Expand Down Expand Up @@ -228,6 +229,9 @@ ZipFile Objects
:meth:`closed <close>` without adding any files to the archive, the appropriate
ZIP structures for an empty archive will be written to the file.

The *zipinfo_class* and *zipextfile_class* arguments can be used to replace
the default :class:`ZipInfo` and :class:`!ZipExtFile` classes with different ones.

ZipFile is also a context manager and therefore supports the
:keyword:`with` statement. In the example, *myzip* is closed after the
:keyword:`!with` statement's suite is finished---even if an exception occurs::
Expand Down Expand Up @@ -278,6 +282,9 @@ ZipFile Objects
Added support for specifying member name encoding for reading
metadata in the zipfile's directory and file headers.

.. versionchanged:: next
Added the *zipinfo_class* and *zipextfile_class* parameters.


.. method:: ZipFile.close()

Expand Down
8 changes: 8 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,14 @@ unittest
(Contributed by Garry Cairns in :gh:`134567`.)


zipfile
-------

* :class:`zipfile.ZipFile` now accepts the keyword-only arguments *zipinfo_class*
and *zipextfile_class* to make it easier to subclass and extend.
(Contributed by Adi Roiban in :gh:`81719`.)


zlib
----

Expand Down
108 changes: 107 additions & 1 deletion Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import unittest.mock as mock
import zipfile


from pathlib import Path
from tempfile import TemporaryFile
from random import randint, random, randbytes

Expand Down Expand Up @@ -675,6 +675,112 @@ def test_add_file_after_2107(self):
zinfo = zipfp.getinfo(TESTFN)
self.assertEqual(zinfo.date_time, (2107, 12, 31, 23, 59, 59))

class CustomZipInfo(zipfile.ZipInfo):
pass

class CustomZipExtFile(zipfile.ZipExtFile):
pass

def test_read_custom_zipinfo_and_zipextfile(self):
"""
A subclass of ZipFile can be implemented to read and handle the
archive content using custom ZipInfo and ZipExtFile implementations.
"""
# Create the file using the default Zipfile.
source = io.BytesIO()
with zipfile.ZipFile(source, 'w', zipfile.ZIP_STORED) as zipfp:
zipfp.writestr('test.txt', 'some-text-content')
source.seek(0)

with zipfile.ZipFile(
source, 'r',
zipinfo_class=self.CustomZipInfo,
zipextfile_class=self.CustomZipExtFile,
) as zipfp:
# Archive content returns the custom ZipInfo
members = zipfp.infolist()
self.assertEqual(1, len(members))
self.assertIsInstance(members[0], self.CustomZipInfo)

# Archive members can be opened using the custom ZipInfo
target_member = members[0]
with zipfp.open(target_member, mode='r') as memberfp:
self.assertIsInstance(memberfp, self.CustomZipExtFile)
self.assertEqual(b'some-text-content', memberfp.read())

def test_write_custom_zipinfo(self):
"""
A subclass of ZipFile can be implemented to write and handle the
archive content using custom ZipInfo implementation.
"""
destination = io.BytesIO()
with zipfile.ZipFile(
destination, 'w', zipinfo_class=self.CustomZipInfo) as zipfp:
# It can write using the specific custom class.
new_member = self.CustomZipInfo('new-member.txt')
with zipfp.open(new_member, mode='w') as memberfp:
self.assertIs(new_member, memberfp._zinfo)

# When creating a new member using just the name,
# the custom ZipInfo is used internally.
with zipfp.open('other-member.txt', mode='w') as memberfp:
memberfp.write(b'some-content')
self.assertIsInstance(
zipfp.NameToInfo['other-member.txt'], self.CustomZipInfo)

# ZipFile.writestr can handle the custom class or just the
# archive name as text.
custom_member = self.CustomZipInfo('some-member.txt')
zipfp.writestr(custom_member, b'some-new-content')
zipfp.writestr('some-name.txt', b'other-content')
self.assertIsInstance(
zipfp.NameToInfo['some-name.txt'], self.CustomZipInfo)

# ZipFile.mkdir can handle the custom class or just text.
custom_dir = self.CustomZipInfo('some-directory/')
custom_dir.CRC = 0
zipfp.mkdir(custom_dir)
zipfp.mkdir('dir-as-text/')
self.assertIsInstance(
zipfp.NameToInfo['dir-as-text/'], self.CustomZipInfo)

# When writing from an external file, the file is created using
# the custom ZipInfo
with temp_dir() as source_dir:
source_file = Path(source_dir) / 'source.txt'
with open(source_file, 'wb') as fp:
fp.write(b'some-content')
zipfp.write(source_file, arcname='newly-file.txt')
self.assertIsInstance(
zipfp.NameToInfo['newly-file.txt'], self.CustomZipInfo)

def test_extract_custom_zipinfo(self):
"""
A subclass of ZipFile can be implemented to extact the
archive content using custom ZipInfo implementation.
"""

destination = io.BytesIO()
with zipfile.ZipFile(destination, 'w') as zipfp:
zipfp.mkdir('dir-as-text/')
zipfp.writestr('test.txt', b'new file content')

destination.seek(0)
with zipfile.ZipFile(
destination, 'r', zipinfo_class=self.CustomZipInfo) as zipfp:
with temp_dir() as extract_dir:
expected_dir = Path(extract_dir) / 'dir-as-text'
expected_file = Path(extract_dir) / 'test.txt'

# Check extracting using custom ZipInfo
dir_info = zipfp.NameToInfo['dir-as-text/']
#zipfp.extract(dir_info, path=extract_dir)
#self.assertTrue(expected_dir.is_dir())
# Check extracting using file name.
zipfp.extract('test.txt', path=extract_dir)
with expected_file.open('rb') as fp:
self.assertEqual(b'new file content', fp.read())


@requires_zlib()
class DeflateTestsWithSourceFile(AbstractTestsWithSourceFile,
Expand Down
41 changes: 27 additions & 14 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,10 @@ def _get_decompressor(compress_type):


class _SharedFile:
"""
Protect an already opened member of the archive from being read or written
at the same time.
"""
def __init__(self, file, pos, close, lock, writing):
self._file = file
self._pos = pos
Expand Down Expand Up @@ -1372,7 +1376,7 @@ class ZipFile:
""" Class with methods to open, read, write, close, list zip files.

z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None)
compresslevel=None, zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile)

file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
Expand All @@ -1392,21 +1396,30 @@ class ZipFile:
When using ZIP_ZSTANDARD integers -7 though 22 are common,
see the CompressionParameter enum in compression.zstd for
details.

zipinfo_class: A class that can replace ZipInfo. This is designed to help
extend ZipFile.
For example, to implement other encryption or compression
methods.
zipextfile_class: A class that can replace ZipExtFile. This is designed to
help extend ZipFile.
For example to implement other encryption or compression
methods.
"""

fp = None # Set here since __del__ checks it
_windows_illegal_name_trans_table = None

def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None,
zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile):
"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
or append 'a'."""
if mode not in ('r', 'w', 'x', 'a'):
raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")

_check_compression(compression)

self._ZipInfo = zipinfo_class
self._ZipExtFile = zipextfile_class
self._allowZip64 = allowZip64
self._didModify = False
self.debug = 0 # Level of printing: 0 through 3
Expand Down Expand Up @@ -1558,7 +1571,7 @@ def _RealGetContents(self):
# Historical ZIP filename encoding
filename = filename.decode(self.metadata_encoding or 'cp437')
# Create ZipInfo instance to store file information
x = ZipInfo(filename)
x = self._ZipInfo(filename)
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Expand Down Expand Up @@ -1693,11 +1706,11 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
"Attempt to use ZIP archive that was already closed")

# Make sure we have an info object
if isinstance(name, ZipInfo):
if isinstance(name, self._ZipInfo):
# 'name' is already an info object
zinfo = name
elif mode == 'w':
zinfo = ZipInfo(name)
zinfo = self._ZipInfo(name)
zinfo.compress_type = self.compression
zinfo.compress_level = self.compresslevel
else:
Expand Down Expand Up @@ -1774,7 +1787,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
else:
pwd = None

return ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True)
return self._ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True)
except:
zef_file.close()
raise
Expand Down Expand Up @@ -1872,7 +1885,7 @@ def _extract_member(self, member, targetpath, pwd):
"""Extract the ZipInfo object 'member' to a physical
file on the path targetpath.
"""
if not isinstance(member, ZipInfo):
if not isinstance(member, self._ZipInfo):
member = self.getinfo(member)

# build the destination pathname, replacing
Expand Down Expand Up @@ -1952,7 +1965,7 @@ def write(self, filename, arcname=None,
"Can't write to ZIP archive while an open writing handle exists"
)

zinfo = ZipInfo.from_file(filename, arcname,
zinfo = self._ZipInfo.from_file(filename, arcname,
strict_timestamps=self._strict_timestamps)

if zinfo.is_dir():
Expand Down Expand Up @@ -1982,10 +1995,10 @@ def writestr(self, zinfo_or_arcname, data,
the name of the file in the archive."""
if isinstance(data, str):
data = data.encode("utf-8")
if isinstance(zinfo_or_arcname, ZipInfo):
if isinstance(zinfo_or_arcname, self._ZipInfo):
zinfo = zinfo_or_arcname
else:
zinfo = ZipInfo(zinfo_or_arcname)._for_archive(self)
zinfo = self._ZipInfo(zinfo_or_arcname)._for_archive(self)

if not self.fp:
raise ValueError(
Expand All @@ -2008,15 +2021,15 @@ def writestr(self, zinfo_or_arcname, data,

def mkdir(self, zinfo_or_directory_name, mode=511):
"""Creates a directory inside the zip archive."""
if isinstance(zinfo_or_directory_name, ZipInfo):
if isinstance(zinfo_or_directory_name, self._ZipInfo):
zinfo = zinfo_or_directory_name
if not zinfo.is_dir():
raise ValueError("The given ZipInfo does not describe a directory")
elif isinstance(zinfo_or_directory_name, str):
directory_name = zinfo_or_directory_name
if not directory_name.endswith("/"):
directory_name += "/"
zinfo = ZipInfo(directory_name)
zinfo = self._ZipInfo(directory_name)
zinfo.compress_size = 0
zinfo.CRC = 0
zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:class:`zipfile.ZipFile` was given the *zipinfo_class* and *zipextfile_class* to make it easier to subclass and extend it.
Loading