From 4b29e2e81d094aa902ab145d79ef9f97d0499df1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 23 Jun 2023 21:46:48 +0100 Subject: [PATCH 01/31] Add `pathlib._VirtualPath` --- Lib/pathlib.py | 405 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 320 insertions(+), 85 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e15718dc98d677..d05c499eb085c2 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -5,6 +5,7 @@ operating systems. """ +import contextlib import fnmatch import functools import io @@ -15,10 +16,19 @@ import sys import warnings from _collections_abc import Sequence -from errno import ENOENT, ENOTDIR, EBADF, ELOOP +from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes +try: + import pwd +except ImportError: + pwd = None +try: + import grp +except ImportError: + grp = None + __all__ = [ "UnsupportedOperation", @@ -771,23 +781,20 @@ class PureWindowsPath(PurePath): # Filesystem-accessing classes -class Path(PurePath): - """PurePath subclass that can make system calls. - - Path represents a filesystem path but unlike PurePath, also offers - methods to do system calls on path objects. Depending on your system, - instantiating a Path will return either a PosixPath or a WindowsPath - object. You can also instantiate a PosixPath or WindowsPath directly, - but cannot instantiate a WindowsPath on a POSIX system or vice versa. +class _VirtualPath(PurePath): + """PurePath subclass for virtual filesystems, such as archives and remote + storage. """ __slots__ = () + __bytes__ = None + __fspath__ = None def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - return os.stat(self, follow_symlinks=follow_symlinks) + raise UnsupportedOperation(f"{type(self).__name__}.stat()") def lstat(self): """ @@ -854,7 +861,21 @@ def is_mount(self): """ Check if this path is a mount point """ - return self._flavour.ismount(self) + # Need to exist and be a dir + if not self.exists() or not self.is_dir(): + return False + + try: + parent_dev = self.parent.stat().st_dev + except OSError: + return False + + dev = self.stat().st_dev + if dev != parent_dev: + return True + ino = self.stat().st_ino + parent_ino = self.parent.stat().st_ino + return ino == parent_ino def is_symlink(self): """ @@ -875,7 +896,15 @@ def is_junction(self): """ Whether this path is a junction. """ - return self._flavour.isjunction(self) + import stat + try: + return self.lstat().st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT + except OSError as e: + if not _ignore_error(e): + raise + return False + except (ValueError, AttributeError): + return False def is_block_device(self): """ @@ -958,9 +987,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - if "b" not in mode: - encoding = io.text_encoding(encoding) - return io.open(self, mode, buffering, encoding, errors, newline) + raise UnsupportedOperation(f"{type(self).__name__}.open()") def read_bytes(self): """ @@ -1003,14 +1030,10 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - for name in os.listdir(self): - yield self._make_child_relpath(name) + raise UnsupportedOperation(f"{type(self).__name__}.iterdir()") def _scandir(self): - # bpo-24132: a future version of pathlib will support subclassing of - # pathlib.Path to customize how the filesystem is accessed. This - # includes scandir(), which is used to implement glob(). - return os.scandir(self) + return contextlib.nullcontext(list(self.iterdir())) def _make_child_relpath(self, name): sep = self._flavour.sep @@ -1134,13 +1157,13 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): # blow up for a minor reason when (say) a thousand readable # directories are still left to visit. That logic is copied here. try: - scandir_it = path._scandir() + scandir_obj = path._scandir() except OSError as error: if on_error is not None: on_error(error) continue - with scandir_it: + with scandir_obj as scandir_it: dirnames = [] filenames = [] for entry in scandir_it: @@ -1162,6 +1185,210 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + def absolute(self): + """Return an absolute version of this path by prepending the current + working directory. No normalization or symlink resolution is performed. + + Use resolve() to get the canonical path to a file. + """ + raise UnsupportedOperation(f"{type(self).__name__}.absolute()") + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory.""" + return cls().absolute() + + def expanduser(self): + """ Return a new path with expanded ~ and ~user constructs + (as returned by os.path.expanduser) + """ + raise UnsupportedOperation(f"{type(self).__name__}.expanduser()") + + @classmethod + def home(cls): + """Return a new path pointing to the user's home directory (as + returned by os.path.expanduser('~')). + """ + return cls("~").expanduser() + + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + raise UnsupportedOperation(f"{type(self).__name__}.readlink()") + + def resolve(self, strict=False): + """ + Resolve '..' segments in the path. Where possible, make the path + absolute and resolve symlinks on the way. + """ + try: + path = self.absolute() + tail_idx = len(path._tail) - len(self._tail) + except UnsupportedOperation: + path = self + tail_idx = 0 + if not path._tail: + return path + drv = path.drive + root = path.root + tail = list(path._tail) + dirty = False + link_count = 0 + readlink_supported = True + while tail_idx < len(tail): + if tail[tail_idx] == '..': + if tail_idx == 0: + if root: + # Delete '..' part immediately following root. + del tail[tail_idx] + dirty = True + continue + elif tail[tail_idx - 1] != '..': + # Delete '..' part and its predecessor. + tail_idx -= 1 + del tail[tail_idx:tail_idx + 2] + dirty = True + continue + elif readlink_supported: + link = self._from_parsed_parts(drv, root, tail[:tail_idx + 1]) + try: + link_target = link.readlink() + except UnsupportedOperation: + readlink_supported = False + except OSError as e: + if e.errno != EINVAL: + if strict: + raise + else: + break + else: + link_count += 1 + if link_count >= 40: + raise OSError(ELOOP, "Symlink loop", path) + elif link_target.root or link_target.drive: + link_target = link.parent / link_target + drv = link_target.drive + root = link_target.root + tail[:tail_idx + 1] = link_target._tail + tail_idx = 0 + else: + tail[tail_idx:tail_idx + 1] = link_target._tail + dirty = True + continue + tail_idx += 1 + if dirty: + path = self._from_parsed_parts(drv, root, tail) + return path + + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + raise UnsupportedOperation(f"{type(self).__name__}.symlink_to()") + + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + raise UnsupportedOperation(f"{type(self).__name__}.hardlink_to()") + + def touch(self, mode=0o666, exist_ok=True): + """ + Create this file with the given access mode, if it doesn't exist. + """ + raise UnsupportedOperation(f"{type(self).__name__}.touch()") + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """ + Create a new directory at this given path. + """ + raise UnsupportedOperation(f"{type(self).__name__}.mkdir()") + + def rename(self, target): + """ + Rename this path to the target path. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + raise UnsupportedOperation(f"{type(self).__name__}.rename()") + + def replace(self, target): + """ + Rename this path to the target path, overwriting if that path exists. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + raise UnsupportedOperation(f"{type(self).__name__}.replace()") + + def chmod(self, mode, *, follow_symlinks=True): + """ + Change the permissions of the path, like os.chmod(). + """ + raise UnsupportedOperation(f"{type(self).__name__}.chmod()") + + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + self.chmod(mode, follow_symlinks=False) + + def unlink(self, missing_ok=False): + """ + Remove this file or link. + If the path is a directory, use rmdir() instead. + """ + raise UnsupportedOperation(f"{type(self).__name__}.unlink()") + + def rmdir(self): + """ + Remove this directory. The directory must be empty. + """ + raise UnsupportedOperation(f"{type(self).__name__}.rmdir()") + + def owner(self): + """ + Return the login name of the file owner. + """ + raise UnsupportedOperation(f"{type(self).__name__}.owner()") + + def group(self): + """ + Return the group name of the file gid. + """ + raise UnsupportedOperation(f"{type(self).__name__}.group()") + + def as_uri(self): + """Return the path as a URI.""" + raise UnsupportedOperation(f"{type(self).__name__}.as_uri()") + + +class Path(_VirtualPath): + """PurePath subclass that can make system calls. + + Path represents a filesystem path but unlike PurePath, also offers + methods to do system calls on path objects. Depending on your system, + instantiating a Path will return either a PosixPath or a WindowsPath + object. You can also instantiate a PosixPath or WindowsPath directly, + but cannot instantiate a WindowsPath on a POSIX system or vice versa. + """ + __slots__ = () + __bytes__ = PurePath.__bytes__ + __fspath__ = PurePath.__fspath__ + as_uri = PurePath.as_uri + def __init__(self, *args, **kwargs): if kwargs: msg = ("support for supplying keyword arguments to pathlib.PurePath " @@ -1174,21 +1401,46 @@ def __new__(cls, *args, **kwargs): cls = WindowsPath if os.name == 'nt' else PosixPath return object.__new__(cls) - @classmethod - def cwd(cls): - """Return a new path pointing to the current working directory.""" - # We call 'absolute()' rather than using 'os.getcwd()' directly to - # enable users to replace the implementation of 'absolute()' in a - # subclass and benefit from the new behaviour here. This works because - # os.path.abspath('.') == os.getcwd(). - return cls().absolute() + def stat(self, *, follow_symlinks=True): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return os.stat(self, follow_symlinks=follow_symlinks) - @classmethod - def home(cls): - """Return a new path pointing to the user's home directory (as - returned by os.path.expanduser('~')). + def is_mount(self): """ - return cls("~").expanduser() + Check if this path is a mount point + """ + return self._flavour.ismount(self) + + def is_junction(self): + """ + Whether this path is a junction. + """ + return self._flavour.isjunction(self) + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + """ + Open the file pointed by this path and return a file object, as + the built-in open() function does. + """ + if "b" not in mode: + encoding = io.text_encoding(encoding) + return io.open(self, mode, buffering, encoding, errors, newline) + + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + for name in os.listdir(self): + yield self._make_child_relpath(name) + + def _scandir(self): + return os.scandir(self) def absolute(self): """Return an absolute version of this path by prepending the current @@ -1241,34 +1493,26 @@ def check_eloop(e): check_eloop(e) return p - def owner(self): - """ - Return the login name of the file owner. - """ - try: - import pwd + if pwd: + def owner(self): + """ + Return the login name of the file owner. + """ return pwd.getpwuid(self.stat().st_uid).pw_name - except ImportError: - raise UnsupportedOperation("Path.owner() is unsupported on this system") - - def group(self): - """ - Return the group name of the file gid. - """ - try: - import grp + if grp: + def group(self): + """ + Return the group name of the file gid. + """ return grp.getgrgid(self.stat().st_gid).gr_name - except ImportError: - raise UnsupportedOperation("Path.group() is unsupported on this system") - def readlink(self): - """ - Return the path to which the symbolic link points. - """ - if not hasattr(os, "readlink"): - raise UnsupportedOperation("os.readlink() not available on this system") - return self.with_segments(os.readlink(self)) + if hasattr(os, "readlink"): + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + return self.with_segments(os.readlink(self)) def touch(self, mode=0o666, exist_ok=True): """ @@ -1315,13 +1559,6 @@ def chmod(self, mode, *, follow_symlinks=True): """ os.chmod(self, mode, follow_symlinks=follow_symlinks) - def lchmod(self, mode): - """ - Like chmod(), except if the path points to a symlink, the symlink's - permissions are changed, rather than its target's. - """ - self.chmod(mode, follow_symlinks=False) - def unlink(self, missing_ok=False): """ Remove this file or link. @@ -1365,24 +1602,22 @@ def replace(self, target): os.replace(self, target) return self.with_segments(target) - def symlink_to(self, target, target_is_directory=False): - """ - Make this path a symlink pointing to the target path. - Note the order of arguments (link, target) is the reverse of os.symlink. - """ - if not hasattr(os, "symlink"): - raise UnsupportedOperation("os.symlink() not available on this system") - os.symlink(target, self, target_is_directory) - - def hardlink_to(self, target): - """ - Make this path a hard link pointing to the same file as *target*. - - Note the order of arguments (self, target) is the reverse of os.link's. - """ - if not hasattr(os, "link"): - raise UnsupportedOperation("os.link() not available on this system") - os.link(target, self) + if hasattr(os, "symlink"): + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + os.symlink(target, self, target_is_directory) + + if hasattr(os, "link"): + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + os.link(target, self) def expanduser(self): """ Return a new path with expanded ~ and ~user constructs From 8ce0139454b9a4937cc0c9a0b64bf971c642a38b Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Jul 2023 19:42:40 +0100 Subject: [PATCH 02/31] Add tests for `pathlib._VirtualPath` --- Lib/test/test_pathlib.py | 324 ++++++++++++++++++++++++++++++++++----- 1 file changed, 282 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 464a835212d472..5d3750fd152a7d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1566,14 +1566,165 @@ def test_group(self): # -# Tests for the concrete classes. +# Tests for the virtual classes. # -class PathTest(unittest.TestCase): - """Tests for the FS-accessing functionalities of the Path classes.""" +class VirtualPathTest(PurePathTest): + cls = pathlib._VirtualPath - cls = pathlib.Path - can_symlink = os_helper.can_symlink() + def test_unsupported_operation(self): + P = self.cls + p = self.cls() + e = pathlib.UnsupportedOperation + self.assertRaises(e, p.stat) + self.assertRaises(e, p.lstat) + self.assertRaises(e, p.exists) + self.assertRaises(e, p.samefile, 'foo') + self.assertRaises(e, p.is_dir) + self.assertRaises(e, p.is_file) + self.assertRaises(e, p.is_mount) + self.assertRaises(e, p.is_symlink) + self.assertRaises(e, p.is_block_device) + self.assertRaises(e, p.is_char_device) + self.assertRaises(e, p.is_fifo) + self.assertRaises(e, p.is_socket) + self.assertRaises(e, p.is_junction) + self.assertRaises(e, p.open) + self.assertRaises(e, p.read_bytes) + self.assertRaises(e, p.read_text) + self.assertRaises(e, p.write_bytes, b'foo') + self.assertRaises(e, p.write_text, 'foo') + self.assertRaises(e, p.iterdir) + self.assertRaises(e, p.glob, '*') + self.assertRaises(e, p.rglob, '*') + self.assertRaises(e, lambda: list(p.walk())) + self.assertRaises(e, p.absolute) + self.assertRaises(e, P.cwd) + self.assertRaises(e, p.expanduser) + self.assertRaises(e, p.home) + self.assertRaises(e, p.readlink) + self.assertRaises(e, p.symlink_to, 'foo') + self.assertRaises(e, p.hardlink_to, 'foo') + self.assertRaises(e, p.mkdir) + self.assertRaises(e, p.touch) + self.assertRaises(e, p.rename, 'foo') + self.assertRaises(e, p.replace, 'foo') + self.assertRaises(e, p.chmod, 0o755) + self.assertRaises(e, p.lchmod, 0o755) + self.assertRaises(e, p.unlink) + self.assertRaises(e, p.rmdir) + self.assertRaises(e, p.owner) + self.assertRaises(e, p.group) + self.assertRaises(e, p.as_uri) + + def test_as_uri_common(self): + e = pathlib.UnsupportedOperation + self.assertRaises(e, self.cls().as_uri) + + def test_fspath_common(self): + self.assertRaises(TypeError, os.fspath, self.cls()) + + def test_as_bytes_common(self): + self.assertRaises(TypeError, bytes, self.cls()) + + +class DummyVirtualPathIO(io.BytesIO): + """ + Used by DummyVirtualPath to implement `open('w')` + """ + + def __init__(self, files, path): + super().__init__() + self.files = files + self.path = path + + def close(self): + self.files[self.path] = self.getvalue() + super().close() + + +class DummyVirtualPath(pathlib._VirtualPath): + """ + Simple implementation of VirtualPath that keeps files and directories in + memory. + """ + _files = {} + _directories = {} + _symlinks = {} + + def stat(self, *, follow_symlinks=True): + if follow_symlinks: + path = str(self.resolve()) + else: + path = str(self.parent.resolve() / self.name) + if path in self._files: + st_mode = stat.S_IFREG + elif path in self._directories: + st_mode = stat.S_IFDIR + elif path in self._symlinks: + st_mode = stat.S_IFLNK + else: + raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) + return os.stat_result((st_mode, hash(str(self)), 0, 0, 0, 0, 0, 0, 0, 0)) + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + if buffering != -1: + raise NotImplementedError + path_obj = self.resolve() + path = str(path_obj) + name = path_obj.name + parent = str(path_obj.parent) + if path in self._directories: + raise IsADirectoryError(errno.EISDIR, "Is a directory", path) + + text = 'b' not in mode + mode = ''.join(c for c in mode if c not in 'btU') + if mode == 'r': + if path not in self._files: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + stream = io.BytesIO(self._files[path]) + elif mode == 'w': + if parent not in self._directories: + raise FileNotFoundError(errno.ENOENT, "File not found", parent) + stream = DummyVirtualPathIO(self._files, path) + self._files[path] = b'' + self._directories[parent].add(name) + else: + raise NotImplementedError + if text: + stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) + return stream + + def iterdir(self): + path = str(self.resolve()) + if path in self._files: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) + elif path in self._directories: + for name in self._directories[path]: + yield self / name + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + try: + self._directories[str(self.parent)].add(self.name) + self._directories[str(self)] = set() + except KeyError: + if not parents or self.parent == self: + raise FileNotFoundError(errno.ENOENT, "File not found", str(self.parent)) from None + self.parent.mkdir(parents=True, exist_ok=True) + self.mkdir(mode, parents=False, exist_ok=exist_ok) + except FileExistsError: + if not exist_ok: + raise + + +class DummyVirtualPathTest(unittest.TestCase): + """Tests for VirtualPath methods that use stat(), open() and iterdir().""" + + cls = DummyVirtualPath + can_symlink = False # (BASE) # | @@ -1596,37 +1747,37 @@ class PathTest(unittest.TestCase): # def setUp(self): - def cleanup(): - os.chmod(join('dirE'), 0o777) - os_helper.rmtree(BASE) - self.addCleanup(cleanup) - os.mkdir(BASE) - os.mkdir(join('dirA')) - os.mkdir(join('dirB')) - os.mkdir(join('dirC')) - os.mkdir(join('dirC', 'dirD')) - os.mkdir(join('dirE')) - with open(join('fileA'), 'wb') as f: - f.write(b"this is file A\n") - with open(join('dirB', 'fileB'), 'wb') as f: - f.write(b"this is file B\n") - with open(join('dirC', 'fileC'), 'wb') as f: - f.write(b"this is file C\n") - with open(join('dirC', 'novel.txt'), 'wb') as f: - f.write(b"this is a novel\n") - with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: - f.write(b"this is file D\n") - os.chmod(join('dirE'), 0) - if self.can_symlink: - # Relative symlinks. - os.symlink('fileA', join('linkA')) - os.symlink('non-existing', join('brokenLink')) - os.symlink('dirB', join('linkB'), target_is_directory=True) - os.symlink(os.path.join('..', 'dirB'), join('dirA', 'linkC'), target_is_directory=True) - # This one goes upwards, creating a loop. - os.symlink(os.path.join('..', 'dirB'), join('dirB', 'linkD'), target_is_directory=True) - # Broken symlink (pointing to itself). - os.symlink('brokenLinkLoop', join('brokenLinkLoop')) + # note: this must be kept in sync with `PathTest.setUp()` + cls = self.cls + cls._files.clear() + cls._directories.clear() + cls._symlinks.clear() + cls._files.update({ + f'{BASE}/fileA': b'this is file A\n', + f'{BASE}/dirB/fileB': b'this is file B\n', + f'{BASE}/dirC/fileC': b'this is file C\n', + f'{BASE}/dirC/dirD/fileD': b'this is file D\n', + f'{BASE}/dirC/novel.txt': b'this is a novel\n', + }) + cls._directories.update({ + BASE: {'dirA', 'dirB', 'dirC', 'dirE', 'fileA', }, + f'{BASE}/dirA': set(), + f'{BASE}/dirB': {'fileB'}, + f'{BASE}/dirC': {'dirD', 'fileC', 'novel.txt'}, + f'{BASE}/dirC/dirD': {'fileD'}, + f'{BASE}/dirE': {}, + }) + dirname = BASE + while True: + dirname, basename = os.path.split(dirname) + if not basename: + break + cls._directories[dirname] = {basename} + + def tempdir(self): + path = self.cls(BASE).with_name('tmp-dirD') + path.mkdir() + return path def assertFileNotFound(self, func, *args, **kwargs): with self.assertRaises(FileNotFoundError) as cm: @@ -1975,9 +2126,11 @@ def test_rglob_symlink_loop(self): def test_glob_many_open_files(self): depth = 30 P = self.cls - base = P(BASE) / 'deep' - p = P(base, *(['d']*depth)) - p.mkdir(parents=True) + p = base = P(BASE) / 'deep' + p.mkdir() + for _ in range(depth): + p /= 'd' + p.mkdir() pattern = '/'.join(['*'] * depth) iters = [base.glob(pattern) for j in range(100)] for it in iters: @@ -2109,9 +2262,7 @@ def test_resolve_common(self): # resolves to 'dirB/..' first before resolving to parent of dirB. self._check_resolve_relative(p, P(BASE, 'foo', 'in', 'spam'), False) # Now create absolute symlinks. - d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', - dir=os.getcwd())) - self.addCleanup(os_helper.rmtree, d) + d = self.tempdir() P(BASE, 'dirA', 'linkX').symlink_to(d) P(BASE, str(d), 'linkY').symlink_to(join('dirB')) p = P(BASE, 'dirA', 'linkX', 'linkY', 'fileB') @@ -2353,6 +2504,10 @@ def _check_complex_symlinks(self, link0_target): self.assertEqualNormCase(str(p), BASE) # Resolve relative paths. + try: + self.cls().absolute() + except pathlib.UnsupportedOperation: + return old_path = os.getcwd() os.chdir(BASE) try: @@ -2380,6 +2535,91 @@ def test_complex_symlinks_relative(self): def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) + +class DummyVirtualPathWithSymlinks(DummyVirtualPath): + def readlink(self): + path = str(self) + if path in self._symlinks: + return self.with_segments(self._symlinks[path]) + elif path in self._files or path in self._directories: + raise OSError(errno.EINVAL, "Not a symlink", path) + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + + def symlink_to(self, target, target_is_directory=False): + self._directories[str(self.parent)].add(self.name) + self._symlinks[str(self)] = str(target) + + +class DummyVirtualPathWithSymlinksTest(DummyVirtualPathTest): + cls = DummyVirtualPathWithSymlinks + can_symlink = True + + def setUp(self): + super().setUp() + cls = self.cls + cls._symlinks.update({ + f'{BASE}/linkA': 'fileA', + f'{BASE}/linkB': 'dirB', + f'{BASE}/dirA/linkC': '../dirB', + f'{BASE}/dirB/linkD': '../dirB', + f'{BASE}/brokenLink': 'non-existing', + f'{BASE}/brokenLinkLoop': 'brokenLinkLoop', + }) + cls._directories[BASE].update({'linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'}) + cls._directories[f'{BASE}/dirA'].add('linkC') + cls._directories[f'{BASE}/dirB'].add('linkD') + + +# +# Tests for the concrete classes. +# + +class PathTest(DummyVirtualPathTest): + """Tests for the FS-accessing functionalities of the Path classes.""" + cls = pathlib.Path + can_symlink = os_helper.can_symlink() + + def setUp(self): + # note: this must be kept in sync with `DummyVirtualPathTest.setUp()` + def cleanup(): + os.chmod(join('dirE'), 0o777) + os_helper.rmtree(BASE) + self.addCleanup(cleanup) + os.mkdir(BASE) + os.mkdir(join('dirA')) + os.mkdir(join('dirB')) + os.mkdir(join('dirC')) + os.mkdir(join('dirC', 'dirD')) + os.mkdir(join('dirE')) + with open(join('fileA'), 'wb') as f: + f.write(b"this is file A\n") + with open(join('dirB', 'fileB'), 'wb') as f: + f.write(b"this is file B\n") + with open(join('dirC', 'fileC'), 'wb') as f: + f.write(b"this is file C\n") + with open(join('dirC', 'novel.txt'), 'wb') as f: + f.write(b"this is a novel\n") + with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: + f.write(b"this is file D\n") + os.chmod(join('dirE'), 0) + if self.can_symlink: + # Relative symlinks. + os.symlink('fileA', join('linkA')) + os.symlink('non-existing', join('brokenLink')) + os.symlink('dirB', join('linkB'), target_is_directory=True) + os.symlink(os.path.join('..', 'dirB'), join('dirA', 'linkC'), target_is_directory=True) + # This one goes upwards, creating a loop. + os.symlink(os.path.join('..', 'dirB'), join('dirB', 'linkD'), target_is_directory=True) + # Broken symlink (pointing to itself). + os.symlink('brokenLinkLoop', join('brokenLinkLoop')) + + def tempdir(self): + d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', + dir=os.getcwd())) + self.addCleanup(os_helper.rmtree, d) + return d + def test_concrete_class(self): if self.cls is pathlib.Path: expected = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath From b850d116c6c21d17f8a0b01ba40b97fb6384e515 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Jul 2023 20:10:16 +0100 Subject: [PATCH 03/31] Fix tests on Windows --- Lib/test/test_pathlib.py | 42 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 5d3750fd152a7d..d8f6648e4d2cb7 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1752,24 +1752,25 @@ def setUp(self): cls._files.clear() cls._directories.clear() cls._symlinks.clear() + join = cls._flavour.join cls._files.update({ - f'{BASE}/fileA': b'this is file A\n', - f'{BASE}/dirB/fileB': b'this is file B\n', - f'{BASE}/dirC/fileC': b'this is file C\n', - f'{BASE}/dirC/dirD/fileD': b'this is file D\n', - f'{BASE}/dirC/novel.txt': b'this is a novel\n', + join(BASE, 'fileA'): b'this is file A\n', + join(BASE, 'dirB', 'fileB'): b'this is file B\n', + join(BASE, 'dirC', 'fileC'): b'this is file C\n', + join(BASE, 'dirC', 'dirD', 'fileD'): b'this is file D\n', + join(BASE, 'dirC', 'novel.txt'): b'this is a novel\n', }) cls._directories.update({ - BASE: {'dirA', 'dirB', 'dirC', 'dirE', 'fileA', }, - f'{BASE}/dirA': set(), - f'{BASE}/dirB': {'fileB'}, - f'{BASE}/dirC': {'dirD', 'fileC', 'novel.txt'}, - f'{BASE}/dirC/dirD': {'fileD'}, - f'{BASE}/dirE': {}, + BASE: {'dirA', 'dirB', 'dirC', 'dirE', 'fileA'}, + join(BASE, 'dirA'): set(), + join(BASE, 'dirB'): {'fileB'}, + join(BASE, 'dirC'): {'dirD', 'fileC', 'novel.txt'}, + join(BASE, 'dirC', 'dirD'): {'fileD'}, + join(BASE, 'dirE'): {}, }) dirname = BASE while True: - dirname, basename = os.path.split(dirname) + dirname, basename = cls._flavour.split(dirname) if not basename: break cls._directories[dirname] = {basename} @@ -2558,17 +2559,18 @@ class DummyVirtualPathWithSymlinksTest(DummyVirtualPathTest): def setUp(self): super().setUp() cls = self.cls + join = cls._flavour.join cls._symlinks.update({ - f'{BASE}/linkA': 'fileA', - f'{BASE}/linkB': 'dirB', - f'{BASE}/dirA/linkC': '../dirB', - f'{BASE}/dirB/linkD': '../dirB', - f'{BASE}/brokenLink': 'non-existing', - f'{BASE}/brokenLinkLoop': 'brokenLinkLoop', + join(BASE, 'linkA'): 'fileA', + join(BASE, 'linkB'): 'dirB', + join(BASE, 'dirA', 'linkC'): join('..', 'dirB'), + join(BASE, 'dirB', 'linkD'): join('..', 'dirB'), + join(BASE, 'brokenLink'): 'non-existing', + join(BASE, 'brokenLinkLoop'): 'brokenLinkLoop', }) cls._directories[BASE].update({'linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'}) - cls._directories[f'{BASE}/dirA'].add('linkC') - cls._directories[f'{BASE}/dirB'].add('linkD') + cls._directories[join(BASE, 'dirA')].add('linkC') + cls._directories[join(BASE, 'dirB')].add('linkD') # From 39bf6b378b2d5111ba8c05a4d6b3d0f73c1ca6fc Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 11:36:20 +0100 Subject: [PATCH 04/31] Fix tests on Windows (take 2) --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index d8f6648e4d2cb7..456ee3fe714ba4 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2253,7 +2253,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if os.name == 'nt': + if isinstance(p, pathlib.WindowsPath): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(BASE, 'dirA', 'foo', 'in', From 0515deaf96ad8d148936400ba88d8f9518ce3a1c Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 12:24:56 +0100 Subject: [PATCH 05/31] Fix tests on Windows (take 3) --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 456ee3fe714ba4..d751cd0e930901 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2273,7 +2273,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if os.name == 'nt': + if isinstance(p, pathlib.WindowsPath): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) From 596016f2154fc916ad48ad79aacaaf5335fedb5e Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 12:30:10 +0100 Subject: [PATCH 06/31] Fix tests on Windows (take 4) --- Lib/test/test_pathlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index d751cd0e930901..15065ff5f51767 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2253,7 +2253,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if isinstance(p, pathlib.WindowsPath): + if os.name == 'nt' and isinstance(p, pathlib.Path): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(BASE, 'dirA', 'foo', 'in', @@ -2273,7 +2273,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if isinstance(p, pathlib.WindowsPath): + if os.name == 'nt' and isinstance(p, pathlib.Path): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) From 1a6122bc0ee919fd468cc2460db4da3a639375d4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 19:52:31 +0100 Subject: [PATCH 07/31] Add `tarfile.TarPath` --- Lib/tarfile.py | 229 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 228 insertions(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index df4e41f7a0d23a..a8ca2264040063 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -37,9 +37,13 @@ # Imports #--------- from builtins import open as bltn_open +from collections import namedtuple +import errno import sys import os import io +import pathlib +import posixpath import shutil import stat import time @@ -69,7 +73,7 @@ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", "tar_filter", "FilterError", "AbsoluteLinkError", "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", - "LinkOutsideDestinationError"] + "LinkOutsideDestinationError", "TarPath"] #--------------------------------------------------------- @@ -2772,6 +2776,229 @@ def __exit__(self, type, value, traceback): self.fileobj.close() self.closed = True + +_tar_stat_fields = ('st_mode st_ino st_dev st_nlink st_uid st_gid ' + 'st_size st_atime st_mtime st_ctime st_uname st_gname') + + +class _TarStatResult(namedtuple('_TarStatResult', _tar_stat_fields)): + """Tar-specific version of os.stat_result. Returned by TarPath.stat().""" + __slots__ = () + + @classmethod + def from_tarinfo(cls, tarfile, tarinfo): + """Create a _TarStatResult from TarFile and TarInfo objects.""" + if tarinfo.type in REGULAR_TYPES: + st_mode = stat.S_IFREG + elif tarinfo.type == DIRTYPE: + st_mode = stat.S_IFDIR + elif tarinfo.type == SYMTYPE or tarinfo.type == LNKTYPE: + st_mode = stat.S_IFLNK + elif tarinfo.type == FIFOTYPE: + st_mode = stat.S_IFIFO + elif tarinfo.type == CHRTYPE: + st_mode = stat.S_IFCHR + elif tarinfo.type == BLKTYPE: + st_mode = stat.S_IFBLK + else: + raise ValueError(tarinfo.type) + return cls(st_mode=tarinfo.mode | st_mode, + st_ino=tarinfo.offset_data, + st_dev=id(tarfile), + st_nlink=0, + st_uid=tarinfo.uid, + st_gid=tarinfo.gid, + st_size=tarinfo.size, + st_atime=0, + st_mtime=tarinfo.mtime, + st_ctime=0, + st_uname=tarinfo.uname, + st_gname=tarinfo.gname) + + @classmethod + def implied_directory(cls, tarfile, path): + """Create a _TarStatResult for a directory that is implied to exist + by another archive member's path. + """ + return cls(stat.S_IFDIR, hash(path), id(tarfile), 0, 0, 0, 0, 0, 0, 0, None, None) + + +class _TarPathWriter(io.BytesIO): + """File object that flushes its contents to a tar archive on close. + Returned by TarPath.open(mode="w"). + """ + + def __init__(self, tarfile, path): + super().__init__() + self.tarfile = tarfile + self.path = path + + def close(self): + info = TarInfo(self.path) + info.size = self.tell() + self.seek(0) + self.tarfile.addfile(info, self) + super().close() + + +class TarPath(pathlib._VirtualPath): + """A pathlib-compatible interface for tar files.""" + + __slots__ = ('tarfile',) + _flavour = posixpath + + def __init__(self, *pathsegments, tarfile): + super().__init__(*pathsegments) + self.tarfile = tarfile + + def __repr__(self): + return f"{type(self).__name__}({str(self)!r}, tarfile={self.tarfile!r})" + + def __hash__(self): + return hash((id(self.tarfile), str(self))) + + def __eq__(self, other): + if not isinstance(other, TarPath): + return NotImplemented + elif other.tarfile is not self.tarfile: + return False + return super().__eq__(other) + + def __lt__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__lt__(other) + + def __le__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__le__(other) + + def __gt__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__gt__(other) + + def __ge__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__ge__(other) + + def with_segments(self, *pathsegments): + """Construct a new TarPath object with the same underlying TarFile + object from any number of path-like objects. + """ + return type(self)(*pathsegments, tarfile=self.tarfile) + + def stat(self, *, follow_symlinks=True): + """Return the path's status, similar to os.stat().""" + if follow_symlinks: + resolved = self.resolve() + else: + resolved = self.parent.resolve() / self.name + implied_directory = False + for info in reversed(self.tarfile.getmembers()): + path = self.with_segments(info.name) + if path == resolved: + return _TarStatResult.from_tarinfo(self.tarfile, info) + elif resolved in path.parents: + implied_directory = True + if implied_directory: + return _TarStatResult.implied_directory(self.tarfile, str(resolved)) + else: + raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) + + def owner(self): + """Return the user name of the path owner.""" + name = self.stat().st_uname + if name is not None: + return name + raise pathlib.UnsupportedOperation() + + def group(self): + """Return the group name of the path owner.""" + name = self.stat().st_gname + if name is not None: + return name + raise pathlib.UnsupportedOperation() + + def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): + """Open the archive member pointed by this path and return a file + object, similar to the built-in open() function. + """ + if buffering != -1: + return super().open(mode, buffering, encoding, errors, newline) + action = ''.join(c for c in mode if c not in 'btU') + if action == 'r': + fileobj = self.tarfile.extractfile(str(self.resolve())) + elif action == 'w': + fileobj = _TarPathWriter(self.tarfile, str(self.resolve())) + else: + raise pathlib.UnsupportedOperation() + if 'b' not in mode: + fileobj = io.TextIOWrapper(fileobj, encoding, errors, newline) + return fileobj + + def iterdir(self): + """Yield path objects of the directory contents. The children are + yielded in arbitrary order. + """ + resolved = self.resolve() + seen = set() + for info in self.tarfile.getmembers(): + path = self.with_segments(info.name) + if path == resolved: + if info.type != DIRTYPE: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", str(self)) + while True: + parent = path.parent + if parent == path: + break + elif parent == resolved: + path_str = str(path) + if path_str not in seen: + seen.add(path_str) + yield self / path.name + break + path = parent + if not seen: + raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) + + def readlink(self): + """Return the path to which the symbolic link points.""" + for info in reversed(self.tarfile.getmembers()): + path = self.with_segments(info.name) + if path == self: + if info.issym(): + return self.with_segments(info.linkname) + else: + raise OSError(errno.EINVAL, "Not a symlink", str(self)) + elif self in path.parents: + raise OSError(errno.EINVAL, "Not a symlink", str(self)) + raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """Create a new directory at this given path.""" + info = TarInfo(str(self)) + info.type = DIRTYPE + info.mode = mode + self.tarfile.addfile(info) + + def symlink_to(self, target, target_is_directory=False): + """Make this path a symlink pointing to the target path.""" + info = TarInfo(str(self)) + info.type = SYMTYPE + info.linkname = str(self.with_segments(target)) + self.tarfile.addfile(info) + + def hardlink_to(self, target): + """Make this path a hard link pointing to the target path.""" + info = TarInfo(str(self)) + info.type = LNKTYPE + info.linkname = str(self.with_segments(target)) + self.tarfile.addfile(info) + + #-------------------- # exported functions #-------------------- From 6833ed8a006ca90d811e2efb59330307b77c5c19 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 20:24:48 +0100 Subject: [PATCH 08/31] Add docs for `tarfile.TarPath` --- Doc/library/tarfile.rst | 41 +++++++++++++++++++ Doc/whatsnew/3.13.rst | 7 ++++ ...3-07-03-20-23-56.gh-issue-89812.cFkDOE.rst | 2 + 3 files changed, 50 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index fd4820e78d68d1..431d422ec13682 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -854,6 +854,47 @@ A :class:`TarInfo` object also provides some convenient query methods: Return :const:`True` if it is one of character device, block device or FIFO. +TarPath Objects +--------------- + +The :class:`TarPath` class provides an interface for tar files that's +compatible with :class:`pathlib.Path`. + +.. class:: TarPath(*pathsegments, tarfile) + + Create a :class:`TarPath` object from a given :class:`TarFile` object. + If *pathsegments* are supplied, they are joined together to form a path + within the archive; otherwise the path is positioned at the archive root. + + .. versionadded:: 3.13 + +.. attribute:: TarPath.tarfile + + The backing :class:`TarFile` instance, as supplied to the initializer. + +Features such as testing file types, reading or writing files, and iterating +or globbing directories are supported:: + + import tarfile + with tarfile.open("sample.tar.gz", "r:gz") as tar: + root = tarfile.TarPath(tarfile=tar) + for readme in root.glob("**/README*", case_sensitive=False): + print(f"Found README file at {readme}:") + print(readme.read_text()) + break + +Some :class:`TarPath` methods unconditionally raise +:exc:`pathlib.UnsupportedOperation`. They are: + +- ``absolute()``, ``cwd()``, ``expanduser()``, ``home()`` and ``as_uri()``, + because tar archives lack these features. +- ``touch()``, ``rename()``, ``replace()``, ``chmod()``, ``lchmod()``, + ``unlink()`` and ``rmdir()``, because the :class:`TarFile` class does not + support reading and writing the same archive. + +Refer to the :mod:`pathlib` documentation for information about other methods. + + .. _tarfile-extraction-filter: Extraction filters diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 9696dd4ff0b700..083c46dd905bc7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -118,6 +118,13 @@ pathlib :meth:`~pathlib.Path.is_dir`. (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`.) +tarfile +------- + +* Add :class:`tarfile.TarPath` class, which provides access to tar archive + members via the :class:`pathlib.Path` interface. + (Contributed by Barney Gale in :gh:`89812`.) + traceback --------- diff --git a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst new file mode 100644 index 00000000000000..9ad271a33d6057 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst @@ -0,0 +1,2 @@ +Add :class:`tarfile.TarPath` class, which provides access to tar archive +members via the :class:`pathlib.Path` interface. From 4d2e8a923c936a195a8b7b7496ea909f06c58801 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 21:11:01 +0100 Subject: [PATCH 09/31] Add tests for `tarfile.TarPath` --- Lib/test/test_tarfile.py | 568 +++++++++++++++++++++++++++++++++++++++ Lib/test/testtarpath.tar | Bin 0 -> 20480 bytes 2 files changed, 568 insertions(+) create mode 100644 Lib/test/testtarpath.tar diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2eda7fc4ceac71..96b60abc975ca0 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,3 +1,4 @@ +import errno import sys import os import io @@ -3943,6 +3944,573 @@ def valueerror_filter(tarinfo, path): self.expect_exception(TypeError) # errorlevel is not int +class TarPathTest(unittest.TestCase): + def setUp(self): + self.tarpath = support.findfile("testtarpath.tar") + self.tar = tarfile.TarFile(self.tarpath) + self.root = tarfile.TarPath(tarfile=self.tar) + + def tearDown(self): + self.tar.close() + + def test_tarfile(self): + self.assertIs(self.root.tarfile, self.tar) + + def test_hash(self): + with tarfile.TarFile(self.tarpath) as tar0: + with tarfile.TarFile(self.tarpath) as tar1: + p = tarfile.TarPath('fileA', tarfile=tar0) + p1 = tarfile.TarPath('fileA', tarfile=tar0) + p2 = tarfile.TarPath('fileA', tarfile=tar1) + p3 = tarfile.TarPath('fileB', tarfile=tar0) + self.assertEqual(hash(p), hash(p1)) + self.assertNotEqual(hash(p), hash(p2)) + self.assertNotEqual(hash(p), hash(p3)) + + def test_eq(self): + with tarfile.TarFile(self.tarpath) as tar0: + with tarfile.TarFile(self.tarpath) as tar1: + p = tarfile.TarPath('fileA', tarfile=tar0) + p1 = tarfile.TarPath('fileA', tarfile=tar0) + p2 = tarfile.TarPath('fileA', tarfile=tar1) + p3 = tarfile.TarPath('fileB', tarfile=tar0) + self.assertEqual(p, p1) + self.assertNotEqual(p, p2) + self.assertNotEqual(p, p3) + + def test_samefile(self): + p = self.root / 'fileA' + pp = self.root / 'fileA' + q = self.root / 'dirB' / 'fileB' + self.assertTrue(p.samefile('fileA')) + self.assertTrue(p.samefile(pp)) + self.assertFalse(p.samefile('dirB/fileB')) + self.assertFalse(p.samefile(q)) + # Test the non-existent file case + r = self.root / 'foo' + self.assertRaises(FileNotFoundError, p.samefile, r) + self.assertRaises(FileNotFoundError, p.samefile, 'foo') + self.assertRaises(FileNotFoundError, r.samefile, p) + self.assertRaises(FileNotFoundError, r.samefile, 'foo') + self.assertRaises(FileNotFoundError, r.samefile, r) + self.assertRaises(FileNotFoundError, r.samefile, 'foo') + + def test_exists(self): + p = self.root + self.assertTrue(p.exists()) + self.assertTrue((p / 'dirA').exists()) + self.assertTrue((p / 'fileA').exists()) + self.assertFalse((p / 'fileA' / 'bah').exists()) + self.assertTrue((p / 'linkA').exists()) + self.assertTrue((p / 'linkB').exists()) + self.assertTrue((p / 'linkB' / 'fileB').exists()) + self.assertFalse((p / 'linkA' / 'bah').exists()) + self.assertFalse((p / 'brokenLink').exists()) + self.assertTrue((p / 'brokenLink').exists(follow_symlinks=False)) + self.assertFalse((p / 'foo').exists()) + self.assertFalse(p.with_segments('/xyzzy').exists()) + + def test_open(self): + with (self.root / 'fileA').open('r') as f: + self.assertIsInstance(f, io.TextIOBase) + self.assertEqual(f.read(), "this is file A\n") + with (self.root / 'fileA').open('rb') as f: + self.assertIsInstance(f, io.BufferedIOBase) + self.assertEqual(f.read().strip(), b"this is file A") + + def test_iterdir(self): + it = self.root.iterdir() + paths = sorted(it) + expected = ['brokenLink', 'brokenLinkLoop', + 'dirA', 'dirB', 'dirC', 'dirE', 'fileA', + 'linkA', 'linkB'] + self.assertEqual(paths, [ self.root / q for q in expected ]) + + def test_iterdir_symlink(self): + p = self.root / 'linkB' + paths = sorted(p.iterdir()) + expected = [ p / q for q in ['fileB', 'linkD'] ] + self.assertEqual(paths, expected) + + def test_iterdir_nodir(self): + p = self.root / 'foo' + with self.assertRaises(OSError) as cm: + next(p.iterdir()) + + def test_glob(self): + def _check(pattern, expected): + actual = sorted(self.root.glob(pattern)) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + _check("fileA", ["fileA"]) + _check("fileB", []) + _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check("*A", ['dirA', 'fileA', 'linkA']) + _check("*B/*", ['dirB/fileB', 'dirB/linkD', 'linkB/fileB', 'linkB/linkD']) + _check("*/fileB", ['dirB/fileB', 'linkB/fileB']) + _check("brokenLink", ['brokenLink']) + _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + + def test_glob_case_sensitive(self): + def _check(pattern, case_sensitive, expected): + actual = sorted([str(q) for q in self.root.glob(pattern, case_sensitive=case_sensitive)]) + expected = [str(self.root / q) for q in expected] + self.assertEqual(actual, expected) + + _check("DIRB/FILE*", True, []) + _check("DIRB/FILE*", False, ["dirB/fileB"]) + _check("dirb/file*", True, []) + _check("dirb/file*", False, ["dirB/fileB"]) + + def test_glob_follow_symlinks(self): + def _check(pattern, expected): + actual = sorted([q for q in self.root.glob(pattern, follow_symlinks=True) + if "linkD" not in q.parent.parts]) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + _check("fileB", []) + _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check("*A", ["dirA", "fileA", "linkA"]) + _check("*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check("*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + _check("dir*/*/..", ["dirA/linkC/..", "dirC/dirD/.."]) + _check("dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE"]) + _check("dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", + "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check("dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"]) + _check("dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."]) + _check("dir*/**/fileC", ["dirC/fileC"]) + _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) + _check("*/dirD/**/", ["dirC/dirD"]) + + def test_glob_no_follow_symlinks(self): + def _check(pattern, expected): + actual = sorted(self.root.glob(pattern, follow_symlinks=False)) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + _check("fileB", []) + _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check("*A", ["dirA", "fileA", "linkA"]) + _check("*B/*", ["dirB/fileB", "dirB/linkD"]) + _check("*/fileB", ["dirB/fileB"]) + _check("*/", ["dirA", "dirB", "dirC", "dirE"]) + _check("dir*/*/..", ["dirC/dirD/.."]) + _check("dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check("dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check("dir*/*/**/", ["dirC/dirD"]) + _check("dir*/*/**/..", ["dirC/dirD/.."]) + _check("dir*/**/fileC", ["dirC/fileC"]) + _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) + _check("*/dirD/**/", ["dirC/dirD"]) + + def test_rglob(self): + def _check(glob, expected): + self.assertEqual(sorted(glob), sorted(self.root / q for q in expected)) + p = self.root + _check(p.rglob("fileA"), ["fileA"]) + _check(p.rglob("fileB"), ["dirB/fileB"]) + _check(p.rglob("**/fileB"), ["dirB/fileB"]) + _check(p.rglob("*/fileA"), []) + _check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB", + "linkB/fileB", "dirA/linkC/fileB"]) + _check(p.rglob("file*"), ["fileA", "dirB/fileB", + "dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("*/"), [ + "dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC", + "dirC/dirD", "dirE", "linkB", + ]) + _check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + q = p / "dirC" + _check(q.rglob("*"), ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(q.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(q.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(q.rglob("dir*/**"), ["dirC/dirD"]) + _check(q.rglob("*/*"), ["dirC/dirD/fileD"]) + _check(q.rglob("*/"), ["dirC/dirD"]) + _check(q.rglob(""), ["dirC", "dirC/dirD"]) + _check(q.rglob("**"), ["dirC", "dirC/dirD"]) + _check(q.rglob("*.txt"), ["dirC/novel.txt"]) + _check(q.rglob("*.*"), ["dirC/novel.txt"]) + + def test_rglob_follow_symlinks(self): + def _check(path, pattern, expected): + actual = sorted([q for q in path.rglob(pattern, follow_symlinks=True) + if "linkD" not in q.parent.parts]) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + p = self.root + _check(p, "fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) + _check(p, "file*", ["dirA/linkC/fileB", "dirB/fileB", + "dirC/dirD/fileD", "dirC/fileC", "fileA", "linkB/fileB"]) + _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) + _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) + + q = p / "dirC" + _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) + _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC"]) + _check(q, "*/*", ["dirC/dirD/fileD"]) + _check(q, "*/", ["dirC/dirD"]) + _check(q, "", ["dirC", "dirC/dirD"]) + _check(q, "*.txt", ["dirC/novel.txt"]) + _check(q, "*.*", ["dirC/novel.txt"]) + + def test_rglob_no_follow_symlinks(self): + def _check(path, pattern, expected): + actual = sorted(path.rglob(pattern, follow_symlinks=False)) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + p = self.root + _check(p, "fileB", ["dirB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "file*", ["dirB/fileB", "dirC/dirD/fileD", "dirC/fileC", "fileA"]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check(p, "", ["", "dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + + q = p / "dirC" + _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) + _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC", ]) + _check(q, "*/*", ["dirC/dirD/fileD"]) + _check(q, "*/", ["dirC/dirD"]) + _check(q, "", ["dirC", "dirC/dirD"]) + _check(q, "*.txt", ["dirC/novel.txt"]) + _check(q, "*.*", ["dirC/novel.txt"]) + + def test_rglob_symlink_loop(self): + given = sorted(self.root.rglob('*')) + expect = ['brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + 'brokenLinkLoop', + ] + self.assertEqual(given, sorted(self.root / x for x in expect)) + + def test_glob_dotdot(self): + p = self.root + self.assertEqual(sorted(p.glob("..")), [ p / ".." ]) + self.assertEqual(sorted(p.glob("../..")), [ p / ".." / ".." ]) + self.assertEqual(sorted(p.glob("dirA/..")), [ p / "dirA" / ".." ]) + self.assertEqual(sorted(p.glob("dirA/../file*")), [ p / "dirA/../fileA" ]) + self.assertEqual(sorted(p.glob("dirA/../file*/..")), []) + self.assertEqual(sorted(p.glob("../xyzzy")), []) + self.assertEqual(sorted(p.glob("xyzzy/..")), []) + self.assertEqual(sorted(p.glob("/".join([".."] * 50))), [ p.joinpath(*[".."] * 50)]) + + def test_walk(self): + def _sorted_walk(follow_symlinks): + results = [] + for dirpath, dirnames, filenames in self.root.walk(follow_symlinks=follow_symlinks): + if 'linkD' in dirnames: + # Treat recursive symlink as file + dirnames.remove('linkD') + filenames.append('linkD') + dirnames.sort() + filenames.sort() + results.append((dirpath, dirnames, filenames)) + return results + + p = self.root + self.assertEqual(_sorted_walk(False), [ + (p, + ['dirA', 'dirB', 'dirC', 'dirE'], + ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA', 'linkB']), + (p / 'dirA', [], ['linkC']), + (p / 'dirB', [], ['fileB', 'linkD']), + (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), + (p / 'dirC' / 'dirD', [], ['fileD']), + ]) + + self.assertEqual(_sorted_walk(True), [ + (p, + ['dirA', 'dirB', 'dirC', 'dirE', 'linkB'], + ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA']), + (p / 'dirA', ['linkC'], []), + (p / 'dirA' / 'linkC', [], ['fileB', 'linkD']), + (p / 'dirB', [], ['fileB', 'linkD']), + (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), + (p / 'dirC' / 'dirD', [], ['fileD']), + (p / 'linkB', [], ['fileB', 'linkD']), + ]) + + def test_readlink(self): + p = self.root + self.assertEqual((p / 'linkA').readlink(), p / 'fileA') + self.assertEqual((p / 'brokenLink').readlink(), p / 'non-existing') + self.assertEqual((p / 'linkB').readlink(), p / 'dirB') + with self.assertRaises(OSError): + (p / 'fileA').readlink() + + def test_resolve(self): + with self.assertRaises(OSError) as cm: + self.root.joinpath('foo').resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + def _check(path, expected, strict=True): + self.assertEqual(self.root.joinpath(path).resolve(strict=strict), + self.root.joinpath(expected)) + _check('foo/in/spam', 'foo/in/spam', False) + _check('../foo/in/spam', '../foo/in/spam', False) + _check('dirB/fileB', 'dirB/fileB') + _check('linkA', 'fileA') + _check('dirA/linkC/fileB', 'dirB/fileB') + _check('dirB/linkD/fileB', 'dirB/fileB') + _check('dirA/linkC/fileB/foo/in/spam', 'dirB/fileB/foo/in/spam', False) + _check('dirA/linkC/../foo/in/spam', 'foo/in/spam', False) + + def test_stat(self): + statA = self.root.joinpath('fileA').stat() + statB = self.root.joinpath('dirB', 'fileB').stat() + statC = self.root.joinpath('dirC').stat() + # st_mode: files are the same, directory differs. + self.assertIsInstance(statA.st_mode, int) + self.assertEqual(statA.st_mode, statB.st_mode) + self.assertNotEqual(statA.st_mode, statC.st_mode) + self.assertNotEqual(statB.st_mode, statC.st_mode) + # st_ino: all different, + self.assertIsInstance(statA.st_ino, int) + self.assertNotEqual(statA.st_ino, statB.st_ino) + self.assertNotEqual(statA.st_ino, statC.st_ino) + self.assertNotEqual(statB.st_ino, statC.st_ino) + # st_dev: all the same. + self.assertIsInstance(statA.st_dev, int) + self.assertEqual(statA.st_dev, statB.st_dev) + self.assertEqual(statA.st_dev, statC.st_dev) + # other attributes not used by pathlib. + + def test_stat_no_follow_symlinks(self): + p = self.root / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.stat(follow_symlinks=False)) + + def test_stat_no_follow_symlinks_nosymlink(self): + p = self.root / 'fileA' + st = p.stat() + self.assertEqual(st, p.stat(follow_symlinks=False)) + + def test_lstat(self): + p = self.root / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.lstat()) + + def test_lstat_nosymlink(self): + p = self.root / 'fileA' + st = p.stat() + self.assertEqual(st, p.lstat()) + + def test_owner(self): + p = self.root + self.assertRaises(pathlib.UnsupportedOperation, p.owner) + self.assertEqual((p / 'fileA').owner(), 'barney') + + def test_group(self): + p = self.root + self.assertRaises(pathlib.UnsupportedOperation, p.group) + self.assertEqual((p / 'fileA').group(), 'barney') + + def test_read_write_bytes(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + p.write_bytes(b'abcdefg') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + self.assertEqual(p.read_bytes(), b'abcdefg') + + def test_read_write_text(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + p.write_text('äbcdefg', encoding='latin-1') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + self.assertEqual(p.read_text(encoding='utf-8', errors='ignore'), 'bcdefg') + + def test_mkdir(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath('dirA', tarfile=tar) + p.mkdir() + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + info = tar.getmember('dirA') + self.assertEqual(info.type, tarfile.DIRTYPE) + + def test_symlink_to(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath(tarfile=tar) + p.joinpath('linkA').symlink_to('fileA') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + info = tar.getmember('linkA') + self.assertEqual(info.type, tarfile.SYMTYPE) + self.assertEqual(info.linkname, 'fileA') + + def test_hardlink_to(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath(tarfile=tar) + p.joinpath('linkA').hardlink_to('fileA') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + info = tar.getmember('linkA') + self.assertEqual(info.type, tarfile.LNKTYPE) + self.assertEqual(info.linkname, 'fileA') + + +class TarPathFileTypeTest(unittest.TestCase): + def setUp(self): + tarpath = support.findfile("testtar.tar") + self.tar = tarfile.TarFile(tarpath) + self.root = tarfile.TarPath(tarfile=self.tar) + + def tearDown(self): + self.tar.close() + + def test_is_dir(self): + p = self.root + self.assertTrue(p.is_dir()) + self.assertTrue((p / 'ustar').is_dir()) + self.assertTrue((p / 'ustar' / 'dirtype').is_dir()) + self.assertFalse((p / 'ustar' / 'regtype').is_dir()) + self.assertFalse((p / 'non-existing').is_dir()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_dir()) + self.assertFalse((p / 'ustar' / 'symtype').is_dir()) + self.assertFalse((p / 'ustar' / 'lnktype').is_dir()) + self.assertFalse((p / 'ustar' / 'fifotype').is_dir()) + self.assertFalse((p / 'ustar' / 'blktype').is_dir()) + self.assertFalse((p / 'ustar' / 'chrtype').is_dir()) + + def test_is_file(self): + p = self.root + self.assertFalse(p.is_file()) + self.assertFalse((p / 'ustar').is_file()) + self.assertFalse((p / 'ustar' / 'dirtype').is_file()) + self.assertTrue((p / 'ustar' / 'regtype').is_file()) + self.assertFalse((p / 'non-existing').is_file()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_file()) + self.assertTrue((p / 'ustar' / 'symtype').is_file()) + self.assertFalse((p / 'ustar' / 'symtype').is_file(follow_symlinks=False)) + self.assertFalse((p / 'ustar' / 'fifotype').is_file()) + self.assertFalse((p / 'ustar' / 'blktype').is_file()) + self.assertFalse((p / 'ustar' / 'chrtype').is_file()) + + def test_is_mount(self): + p = self.root + self.assertTrue(p.is_mount()) + self.assertFalse((p / 'ustar').is_mount()) + self.assertFalse((p / 'ustar' / 'dirtype').is_mount()) + self.assertFalse((p / 'ustar' / 'regtype').is_mount()) + self.assertFalse((p / 'non-existing').is_mount()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_mount()) + self.assertFalse((p / 'ustar' / 'symtype').is_mount()) + self.assertFalse((p / 'ustar' / 'fifotype').is_mount()) + self.assertFalse((p / 'ustar' / 'blktype').is_mount()) + self.assertFalse((p / 'ustar' / 'chrtype').is_mount()) + + def test_is_symlink(self): + p = self.root + self.assertFalse(p.is_symlink()) + self.assertFalse((p / 'ustar').is_symlink()) + self.assertFalse((p / 'ustar' / 'dirtype').is_symlink()) + self.assertFalse((p / 'ustar' / 'regtype').is_symlink()) + self.assertFalse((p / 'non-existing').is_symlink()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_symlink()) + self.assertTrue((p / 'ustar' / 'symtype').is_symlink()) + self.assertFalse((p / 'ustar' / 'fifotype').is_symlink()) + self.assertFalse((p / 'ustar' / 'blktype').is_symlink()) + self.assertFalse((p / 'ustar' / 'chrtype').is_symlink()) + + def test_is_junction(self): + p = self.root + self.assertFalse(p.is_junction()) + self.assertFalse((p / 'ustar').is_junction()) + self.assertFalse((p / 'ustar' / 'dirtype').is_junction()) + self.assertFalse((p / 'ustar' / 'regtype').is_junction()) + self.assertFalse((p / 'non-existing').is_junction()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_junction()) + self.assertFalse((p / 'ustar' / 'symtype').is_junction()) + self.assertFalse((p / 'ustar' / 'fifotype').is_junction()) + self.assertFalse((p / 'ustar' / 'blktype').is_junction()) + self.assertFalse((p / 'ustar' / 'chrtype').is_junction()) + + def test_is_fifo(self): + p = self.root + self.assertFalse(p.is_fifo()) + self.assertFalse((p / 'ustar').is_fifo()) + self.assertFalse((p / 'ustar' / 'dirtype').is_fifo()) + self.assertFalse((p / 'ustar' / 'regtype').is_fifo()) + self.assertFalse((p / 'non-existing').is_fifo()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_fifo()) + self.assertFalse((p / 'ustar' / 'symtype').is_fifo()) + self.assertTrue((p / 'ustar' / 'fifotype').is_fifo()) + self.assertFalse((p / 'ustar' / 'blktype').is_fifo()) + self.assertFalse((p / 'ustar' / 'chrtype').is_fifo()) + + def test_is_socket(self): + p = self.root + self.assertFalse(p.is_socket()) + self.assertFalse((p / 'ustar').is_socket()) + self.assertFalse((p / 'ustar' / 'dirtype').is_socket()) + self.assertFalse((p / 'ustar' / 'regtype').is_socket()) + self.assertFalse((p / 'non-existing').is_socket()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_socket()) + self.assertFalse((p / 'ustar' / 'symtype').is_socket()) + self.assertFalse((p / 'ustar' / 'fifotype').is_socket()) + self.assertFalse((p / 'ustar' / 'blktype').is_socket()) + self.assertFalse((p / 'ustar' / 'chrtype').is_socket()) + + def test_is_block_device(self): + p = self.root + self.assertFalse(p.is_block_device()) + self.assertFalse((p / 'ustar').is_block_device()) + self.assertFalse((p / 'ustar' / 'dirtype').is_block_device()) + self.assertFalse((p / 'ustar' / 'regtype').is_block_device()) + self.assertFalse((p / 'non-existing').is_block_device()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_block_device()) + self.assertFalse((p / 'ustar' / 'symtype').is_block_device()) + self.assertFalse((p / 'ustar' / 'fifotype').is_block_device()) + self.assertTrue((p / 'ustar' / 'blktype').is_block_device()) + self.assertFalse((p / 'ustar' / 'chrtype').is_block_device()) + + def test_is_char_device(self): + p = self.root + self.assertFalse(p.is_char_device()) + self.assertFalse((p / 'ustar').is_char_device()) + self.assertFalse((p / 'ustar' / 'dirtype').is_char_device()) + self.assertFalse((p / 'ustar' / 'regtype').is_char_device()) + self.assertFalse((p / 'non-existing').is_char_device()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_char_device()) + self.assertFalse((p / 'ustar' / 'symtype').is_char_device()) + self.assertFalse((p / 'ustar' / 'fifotype').is_char_device()) + self.assertFalse((p / 'ustar' / 'blktype').is_char_device()) + self.assertTrue((p / 'ustar' / 'chrtype').is_char_device()) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Lib/test/testtarpath.tar b/Lib/test/testtarpath.tar new file mode 100644 index 0000000000000000000000000000000000000000..f90c18fa9de46a30e75fda4b9a0e261488cb0a19 GIT binary patch literal 20480 zcmeI2O>V+45QV+&DSUyF*iM`^KizQ&P}4R7B~?f(di#z|fk1_*D@|l6FG3J0!ZzRY z8;{4V+h$YN%c|Z8wkxHpC<;Lzs>sv+r+efsp|UJbRc>sqM5?mF7DDQ}sUORIwd<<- zm1}e#yKdD=Db}mDF28Q~a_{-=Z$|xOebRr~H19k^bpnU!Z7or>;GJ}(}YL% zQ>zF4^SV6^H=gxZA(U%IsaRm-H!jNxQhQ;n^eej2mmAZfA?1HB)!}ekh6y& zh@+2)2zdXy^?$)PsDG{h2J1hcBIlM#|9O<_KkfgJ|DSc@y7T(e+k$Zt9Q+@$|I>=f zK+vB{^j^DX{apsYvj~Lee?}?V|0DnF3B`1Kk^e1_|1n{Xwps80y7?@(lWyPf4D+u4 z^>zKvsEqPI_@76Qeyd0SucV&@2mpOJBe?hecS}EW{WrM&qv8J{rr**3zWy_Z{fA&4 zQy=92C&sOx|Eb~iA3FY}^&k6x{+Z%@KV|ObZvOYH{;y0%^&jMa%)(If7x90J_|H!~ zt{zqE|8x2?9{u{s9=`r-W$FB1Q*y#Rwz2>JBd%zK(SQI5 zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>m GmB2R@KXU~D literal 0 HcmV?d00001 From 508cabe051440b0f1a5f5ec902dc454156391417 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Jul 2023 19:53:33 +0100 Subject: [PATCH 10/31] Undo changes to tarfile. --- Doc/library/tarfile.rst | 41 -- Doc/whatsnew/3.13.rst | 7 - Lib/tarfile.py | 229 +------ Lib/test/test_tarfile.py | 568 ------------------ Lib/test/testtarpath.tar | Bin 20480 -> 0 bytes ...3-07-03-20-23-56.gh-issue-89812.cFkDOE.rst | 4 +- 6 files changed, 3 insertions(+), 846 deletions(-) delete mode 100644 Lib/test/testtarpath.tar diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 431d422ec13682..fd4820e78d68d1 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -854,47 +854,6 @@ A :class:`TarInfo` object also provides some convenient query methods: Return :const:`True` if it is one of character device, block device or FIFO. -TarPath Objects ---------------- - -The :class:`TarPath` class provides an interface for tar files that's -compatible with :class:`pathlib.Path`. - -.. class:: TarPath(*pathsegments, tarfile) - - Create a :class:`TarPath` object from a given :class:`TarFile` object. - If *pathsegments* are supplied, they are joined together to form a path - within the archive; otherwise the path is positioned at the archive root. - - .. versionadded:: 3.13 - -.. attribute:: TarPath.tarfile - - The backing :class:`TarFile` instance, as supplied to the initializer. - -Features such as testing file types, reading or writing files, and iterating -or globbing directories are supported:: - - import tarfile - with tarfile.open("sample.tar.gz", "r:gz") as tar: - root = tarfile.TarPath(tarfile=tar) - for readme in root.glob("**/README*", case_sensitive=False): - print(f"Found README file at {readme}:") - print(readme.read_text()) - break - -Some :class:`TarPath` methods unconditionally raise -:exc:`pathlib.UnsupportedOperation`. They are: - -- ``absolute()``, ``cwd()``, ``expanduser()``, ``home()`` and ``as_uri()``, - because tar archives lack these features. -- ``touch()``, ``rename()``, ``replace()``, ``chmod()``, ``lchmod()``, - ``unlink()`` and ``rmdir()``, because the :class:`TarFile` class does not - support reading and writing the same archive. - -Refer to the :mod:`pathlib` documentation for information about other methods. - - .. _tarfile-extraction-filter: Extraction filters diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 1e34e5055721e6..b7c436fc151611 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -119,13 +119,6 @@ pathlib :meth:`~pathlib.Path.is_dir`. (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`.) -tarfile -------- - -* Add :class:`tarfile.TarPath` class, which provides access to tar archive - members via the :class:`pathlib.Path` interface. - (Contributed by Barney Gale in :gh:`89812`.) - traceback --------- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index a8ca2264040063..df4e41f7a0d23a 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -37,13 +37,9 @@ # Imports #--------- from builtins import open as bltn_open -from collections import namedtuple -import errno import sys import os import io -import pathlib -import posixpath import shutil import stat import time @@ -73,7 +69,7 @@ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", "tar_filter", "FilterError", "AbsoluteLinkError", "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", - "LinkOutsideDestinationError", "TarPath"] + "LinkOutsideDestinationError"] #--------------------------------------------------------- @@ -2776,229 +2772,6 @@ def __exit__(self, type, value, traceback): self.fileobj.close() self.closed = True - -_tar_stat_fields = ('st_mode st_ino st_dev st_nlink st_uid st_gid ' - 'st_size st_atime st_mtime st_ctime st_uname st_gname') - - -class _TarStatResult(namedtuple('_TarStatResult', _tar_stat_fields)): - """Tar-specific version of os.stat_result. Returned by TarPath.stat().""" - __slots__ = () - - @classmethod - def from_tarinfo(cls, tarfile, tarinfo): - """Create a _TarStatResult from TarFile and TarInfo objects.""" - if tarinfo.type in REGULAR_TYPES: - st_mode = stat.S_IFREG - elif tarinfo.type == DIRTYPE: - st_mode = stat.S_IFDIR - elif tarinfo.type == SYMTYPE or tarinfo.type == LNKTYPE: - st_mode = stat.S_IFLNK - elif tarinfo.type == FIFOTYPE: - st_mode = stat.S_IFIFO - elif tarinfo.type == CHRTYPE: - st_mode = stat.S_IFCHR - elif tarinfo.type == BLKTYPE: - st_mode = stat.S_IFBLK - else: - raise ValueError(tarinfo.type) - return cls(st_mode=tarinfo.mode | st_mode, - st_ino=tarinfo.offset_data, - st_dev=id(tarfile), - st_nlink=0, - st_uid=tarinfo.uid, - st_gid=tarinfo.gid, - st_size=tarinfo.size, - st_atime=0, - st_mtime=tarinfo.mtime, - st_ctime=0, - st_uname=tarinfo.uname, - st_gname=tarinfo.gname) - - @classmethod - def implied_directory(cls, tarfile, path): - """Create a _TarStatResult for a directory that is implied to exist - by another archive member's path. - """ - return cls(stat.S_IFDIR, hash(path), id(tarfile), 0, 0, 0, 0, 0, 0, 0, None, None) - - -class _TarPathWriter(io.BytesIO): - """File object that flushes its contents to a tar archive on close. - Returned by TarPath.open(mode="w"). - """ - - def __init__(self, tarfile, path): - super().__init__() - self.tarfile = tarfile - self.path = path - - def close(self): - info = TarInfo(self.path) - info.size = self.tell() - self.seek(0) - self.tarfile.addfile(info, self) - super().close() - - -class TarPath(pathlib._VirtualPath): - """A pathlib-compatible interface for tar files.""" - - __slots__ = ('tarfile',) - _flavour = posixpath - - def __init__(self, *pathsegments, tarfile): - super().__init__(*pathsegments) - self.tarfile = tarfile - - def __repr__(self): - return f"{type(self).__name__}({str(self)!r}, tarfile={self.tarfile!r})" - - def __hash__(self): - return hash((id(self.tarfile), str(self))) - - def __eq__(self, other): - if not isinstance(other, TarPath): - return NotImplemented - elif other.tarfile is not self.tarfile: - return False - return super().__eq__(other) - - def __lt__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__lt__(other) - - def __le__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__le__(other) - - def __gt__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__gt__(other) - - def __ge__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__ge__(other) - - def with_segments(self, *pathsegments): - """Construct a new TarPath object with the same underlying TarFile - object from any number of path-like objects. - """ - return type(self)(*pathsegments, tarfile=self.tarfile) - - def stat(self, *, follow_symlinks=True): - """Return the path's status, similar to os.stat().""" - if follow_symlinks: - resolved = self.resolve() - else: - resolved = self.parent.resolve() / self.name - implied_directory = False - for info in reversed(self.tarfile.getmembers()): - path = self.with_segments(info.name) - if path == resolved: - return _TarStatResult.from_tarinfo(self.tarfile, info) - elif resolved in path.parents: - implied_directory = True - if implied_directory: - return _TarStatResult.implied_directory(self.tarfile, str(resolved)) - else: - raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) - - def owner(self): - """Return the user name of the path owner.""" - name = self.stat().st_uname - if name is not None: - return name - raise pathlib.UnsupportedOperation() - - def group(self): - """Return the group name of the path owner.""" - name = self.stat().st_gname - if name is not None: - return name - raise pathlib.UnsupportedOperation() - - def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): - """Open the archive member pointed by this path and return a file - object, similar to the built-in open() function. - """ - if buffering != -1: - return super().open(mode, buffering, encoding, errors, newline) - action = ''.join(c for c in mode if c not in 'btU') - if action == 'r': - fileobj = self.tarfile.extractfile(str(self.resolve())) - elif action == 'w': - fileobj = _TarPathWriter(self.tarfile, str(self.resolve())) - else: - raise pathlib.UnsupportedOperation() - if 'b' not in mode: - fileobj = io.TextIOWrapper(fileobj, encoding, errors, newline) - return fileobj - - def iterdir(self): - """Yield path objects of the directory contents. The children are - yielded in arbitrary order. - """ - resolved = self.resolve() - seen = set() - for info in self.tarfile.getmembers(): - path = self.with_segments(info.name) - if path == resolved: - if info.type != DIRTYPE: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", str(self)) - while True: - parent = path.parent - if parent == path: - break - elif parent == resolved: - path_str = str(path) - if path_str not in seen: - seen.add(path_str) - yield self / path.name - break - path = parent - if not seen: - raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) - - def readlink(self): - """Return the path to which the symbolic link points.""" - for info in reversed(self.tarfile.getmembers()): - path = self.with_segments(info.name) - if path == self: - if info.issym(): - return self.with_segments(info.linkname) - else: - raise OSError(errno.EINVAL, "Not a symlink", str(self)) - elif self in path.parents: - raise OSError(errno.EINVAL, "Not a symlink", str(self)) - raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) - - def mkdir(self, mode=0o777, parents=False, exist_ok=False): - """Create a new directory at this given path.""" - info = TarInfo(str(self)) - info.type = DIRTYPE - info.mode = mode - self.tarfile.addfile(info) - - def symlink_to(self, target, target_is_directory=False): - """Make this path a symlink pointing to the target path.""" - info = TarInfo(str(self)) - info.type = SYMTYPE - info.linkname = str(self.with_segments(target)) - self.tarfile.addfile(info) - - def hardlink_to(self, target): - """Make this path a hard link pointing to the target path.""" - info = TarInfo(str(self)) - info.type = LNKTYPE - info.linkname = str(self.with_segments(target)) - self.tarfile.addfile(info) - - #-------------------- # exported functions #-------------------- diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 96b60abc975ca0..2eda7fc4ceac71 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,4 +1,3 @@ -import errno import sys import os import io @@ -3944,573 +3943,6 @@ def valueerror_filter(tarinfo, path): self.expect_exception(TypeError) # errorlevel is not int -class TarPathTest(unittest.TestCase): - def setUp(self): - self.tarpath = support.findfile("testtarpath.tar") - self.tar = tarfile.TarFile(self.tarpath) - self.root = tarfile.TarPath(tarfile=self.tar) - - def tearDown(self): - self.tar.close() - - def test_tarfile(self): - self.assertIs(self.root.tarfile, self.tar) - - def test_hash(self): - with tarfile.TarFile(self.tarpath) as tar0: - with tarfile.TarFile(self.tarpath) as tar1: - p = tarfile.TarPath('fileA', tarfile=tar0) - p1 = tarfile.TarPath('fileA', tarfile=tar0) - p2 = tarfile.TarPath('fileA', tarfile=tar1) - p3 = tarfile.TarPath('fileB', tarfile=tar0) - self.assertEqual(hash(p), hash(p1)) - self.assertNotEqual(hash(p), hash(p2)) - self.assertNotEqual(hash(p), hash(p3)) - - def test_eq(self): - with tarfile.TarFile(self.tarpath) as tar0: - with tarfile.TarFile(self.tarpath) as tar1: - p = tarfile.TarPath('fileA', tarfile=tar0) - p1 = tarfile.TarPath('fileA', tarfile=tar0) - p2 = tarfile.TarPath('fileA', tarfile=tar1) - p3 = tarfile.TarPath('fileB', tarfile=tar0) - self.assertEqual(p, p1) - self.assertNotEqual(p, p2) - self.assertNotEqual(p, p3) - - def test_samefile(self): - p = self.root / 'fileA' - pp = self.root / 'fileA' - q = self.root / 'dirB' / 'fileB' - self.assertTrue(p.samefile('fileA')) - self.assertTrue(p.samefile(pp)) - self.assertFalse(p.samefile('dirB/fileB')) - self.assertFalse(p.samefile(q)) - # Test the non-existent file case - r = self.root / 'foo' - self.assertRaises(FileNotFoundError, p.samefile, r) - self.assertRaises(FileNotFoundError, p.samefile, 'foo') - self.assertRaises(FileNotFoundError, r.samefile, p) - self.assertRaises(FileNotFoundError, r.samefile, 'foo') - self.assertRaises(FileNotFoundError, r.samefile, r) - self.assertRaises(FileNotFoundError, r.samefile, 'foo') - - def test_exists(self): - p = self.root - self.assertTrue(p.exists()) - self.assertTrue((p / 'dirA').exists()) - self.assertTrue((p / 'fileA').exists()) - self.assertFalse((p / 'fileA' / 'bah').exists()) - self.assertTrue((p / 'linkA').exists()) - self.assertTrue((p / 'linkB').exists()) - self.assertTrue((p / 'linkB' / 'fileB').exists()) - self.assertFalse((p / 'linkA' / 'bah').exists()) - self.assertFalse((p / 'brokenLink').exists()) - self.assertTrue((p / 'brokenLink').exists(follow_symlinks=False)) - self.assertFalse((p / 'foo').exists()) - self.assertFalse(p.with_segments('/xyzzy').exists()) - - def test_open(self): - with (self.root / 'fileA').open('r') as f: - self.assertIsInstance(f, io.TextIOBase) - self.assertEqual(f.read(), "this is file A\n") - with (self.root / 'fileA').open('rb') as f: - self.assertIsInstance(f, io.BufferedIOBase) - self.assertEqual(f.read().strip(), b"this is file A") - - def test_iterdir(self): - it = self.root.iterdir() - paths = sorted(it) - expected = ['brokenLink', 'brokenLinkLoop', - 'dirA', 'dirB', 'dirC', 'dirE', 'fileA', - 'linkA', 'linkB'] - self.assertEqual(paths, [ self.root / q for q in expected ]) - - def test_iterdir_symlink(self): - p = self.root / 'linkB' - paths = sorted(p.iterdir()) - expected = [ p / q for q in ['fileB', 'linkD'] ] - self.assertEqual(paths, expected) - - def test_iterdir_nodir(self): - p = self.root / 'foo' - with self.assertRaises(OSError) as cm: - next(p.iterdir()) - - def test_glob(self): - def _check(pattern, expected): - actual = sorted(self.root.glob(pattern)) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - _check("fileA", ["fileA"]) - _check("fileB", []) - _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check("*A", ['dirA', 'fileA', 'linkA']) - _check("*B/*", ['dirB/fileB', 'dirB/linkD', 'linkB/fileB', 'linkB/linkD']) - _check("*/fileB", ['dirB/fileB', 'linkB/fileB']) - _check("brokenLink", ['brokenLink']) - _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) - - def test_glob_case_sensitive(self): - def _check(pattern, case_sensitive, expected): - actual = sorted([str(q) for q in self.root.glob(pattern, case_sensitive=case_sensitive)]) - expected = [str(self.root / q) for q in expected] - self.assertEqual(actual, expected) - - _check("DIRB/FILE*", True, []) - _check("DIRB/FILE*", False, ["dirB/fileB"]) - _check("dirb/file*", True, []) - _check("dirb/file*", False, ["dirB/fileB"]) - - def test_glob_follow_symlinks(self): - def _check(pattern, expected): - actual = sorted([q for q in self.root.glob(pattern, follow_symlinks=True) - if "linkD" not in q.parent.parts]) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - _check("fileB", []) - _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check("*A", ["dirA", "fileA", "linkA"]) - _check("*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) - _check("*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) - _check("dir*/*/..", ["dirA/linkC/..", "dirC/dirD/.."]) - _check("dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE"]) - _check("dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", - "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check("dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"]) - _check("dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."]) - _check("dir*/**/fileC", ["dirC/fileC"]) - _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) - _check("*/dirD/**/", ["dirC/dirD"]) - - def test_glob_no_follow_symlinks(self): - def _check(pattern, expected): - actual = sorted(self.root.glob(pattern, follow_symlinks=False)) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - _check("fileB", []) - _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check("*A", ["dirA", "fileA", "linkA"]) - _check("*B/*", ["dirB/fileB", "dirB/linkD"]) - _check("*/fileB", ["dirB/fileB"]) - _check("*/", ["dirA", "dirB", "dirC", "dirE"]) - _check("dir*/*/..", ["dirC/dirD/.."]) - _check("dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - _check("dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check("dir*/*/**/", ["dirC/dirD"]) - _check("dir*/*/**/..", ["dirC/dirD/.."]) - _check("dir*/**/fileC", ["dirC/fileC"]) - _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) - _check("*/dirD/**/", ["dirC/dirD"]) - - def test_rglob(self): - def _check(glob, expected): - self.assertEqual(sorted(glob), sorted(self.root / q for q in expected)) - p = self.root - _check(p.rglob("fileA"), ["fileA"]) - _check(p.rglob("fileB"), ["dirB/fileB"]) - _check(p.rglob("**/fileB"), ["dirB/fileB"]) - _check(p.rglob("*/fileA"), []) - _check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB", - "linkB/fileB", "dirA/linkC/fileB"]) - _check(p.rglob("file*"), ["fileA", "dirB/fileB", - "dirC/fileC", "dirC/dirD/fileD"]) - _check(p.rglob("*/"), [ - "dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC", - "dirC/dirD", "dirE", "linkB", - ]) - _check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) - q = p / "dirC" - _check(q.rglob("*"), ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(q.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) - _check(q.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) - _check(q.rglob("dir*/**"), ["dirC/dirD"]) - _check(q.rglob("*/*"), ["dirC/dirD/fileD"]) - _check(q.rglob("*/"), ["dirC/dirD"]) - _check(q.rglob(""), ["dirC", "dirC/dirD"]) - _check(q.rglob("**"), ["dirC", "dirC/dirD"]) - _check(q.rglob("*.txt"), ["dirC/novel.txt"]) - _check(q.rglob("*.*"), ["dirC/novel.txt"]) - - def test_rglob_follow_symlinks(self): - def _check(path, pattern, expected): - actual = sorted([q for q in path.rglob(pattern, follow_symlinks=True) - if "linkD" not in q.parent.parts]) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - p = self.root - _check(p, "fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) - _check(p, "file*", ["dirA/linkC/fileB", "dirB/fileB", - "dirC/dirD/fileD", "dirC/fileC", "fileA", "linkB/fileB"]) - _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) - _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) - - q = p / "dirC" - _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) - _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC"]) - _check(q, "*/*", ["dirC/dirD/fileD"]) - _check(q, "*/", ["dirC/dirD"]) - _check(q, "", ["dirC", "dirC/dirD"]) - _check(q, "*.txt", ["dirC/novel.txt"]) - _check(q, "*.*", ["dirC/novel.txt"]) - - def test_rglob_no_follow_symlinks(self): - def _check(path, pattern, expected): - actual = sorted(path.rglob(pattern, follow_symlinks=False)) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - p = self.root - _check(p, "fileB", ["dirB/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB"]) - _check(p, "file*", ["dirB/fileB", "dirC/dirD/fileD", "dirC/fileC", "fileA"]) - _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - _check(p, "", ["", "dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - - q = p / "dirC" - _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) - _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC", ]) - _check(q, "*/*", ["dirC/dirD/fileD"]) - _check(q, "*/", ["dirC/dirD"]) - _check(q, "", ["dirC", "dirC/dirD"]) - _check(q, "*.txt", ["dirC/novel.txt"]) - _check(q, "*.*", ["dirC/novel.txt"]) - - def test_rglob_symlink_loop(self): - given = sorted(self.root.rglob('*')) - expect = ['brokenLink', - 'dirA', 'dirA/linkC', - 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', - 'dirC/fileC', 'dirC/novel.txt', - 'dirE', - 'fileA', - 'linkA', - 'linkB', - 'brokenLinkLoop', - ] - self.assertEqual(given, sorted(self.root / x for x in expect)) - - def test_glob_dotdot(self): - p = self.root - self.assertEqual(sorted(p.glob("..")), [ p / ".." ]) - self.assertEqual(sorted(p.glob("../..")), [ p / ".." / ".." ]) - self.assertEqual(sorted(p.glob("dirA/..")), [ p / "dirA" / ".." ]) - self.assertEqual(sorted(p.glob("dirA/../file*")), [ p / "dirA/../fileA" ]) - self.assertEqual(sorted(p.glob("dirA/../file*/..")), []) - self.assertEqual(sorted(p.glob("../xyzzy")), []) - self.assertEqual(sorted(p.glob("xyzzy/..")), []) - self.assertEqual(sorted(p.glob("/".join([".."] * 50))), [ p.joinpath(*[".."] * 50)]) - - def test_walk(self): - def _sorted_walk(follow_symlinks): - results = [] - for dirpath, dirnames, filenames in self.root.walk(follow_symlinks=follow_symlinks): - if 'linkD' in dirnames: - # Treat recursive symlink as file - dirnames.remove('linkD') - filenames.append('linkD') - dirnames.sort() - filenames.sort() - results.append((dirpath, dirnames, filenames)) - return results - - p = self.root - self.assertEqual(_sorted_walk(False), [ - (p, - ['dirA', 'dirB', 'dirC', 'dirE'], - ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA', 'linkB']), - (p / 'dirA', [], ['linkC']), - (p / 'dirB', [], ['fileB', 'linkD']), - (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), - (p / 'dirC' / 'dirD', [], ['fileD']), - ]) - - self.assertEqual(_sorted_walk(True), [ - (p, - ['dirA', 'dirB', 'dirC', 'dirE', 'linkB'], - ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA']), - (p / 'dirA', ['linkC'], []), - (p / 'dirA' / 'linkC', [], ['fileB', 'linkD']), - (p / 'dirB', [], ['fileB', 'linkD']), - (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), - (p / 'dirC' / 'dirD', [], ['fileD']), - (p / 'linkB', [], ['fileB', 'linkD']), - ]) - - def test_readlink(self): - p = self.root - self.assertEqual((p / 'linkA').readlink(), p / 'fileA') - self.assertEqual((p / 'brokenLink').readlink(), p / 'non-existing') - self.assertEqual((p / 'linkB').readlink(), p / 'dirB') - with self.assertRaises(OSError): - (p / 'fileA').readlink() - - def test_resolve(self): - with self.assertRaises(OSError) as cm: - self.root.joinpath('foo').resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOENT) - def _check(path, expected, strict=True): - self.assertEqual(self.root.joinpath(path).resolve(strict=strict), - self.root.joinpath(expected)) - _check('foo/in/spam', 'foo/in/spam', False) - _check('../foo/in/spam', '../foo/in/spam', False) - _check('dirB/fileB', 'dirB/fileB') - _check('linkA', 'fileA') - _check('dirA/linkC/fileB', 'dirB/fileB') - _check('dirB/linkD/fileB', 'dirB/fileB') - _check('dirA/linkC/fileB/foo/in/spam', 'dirB/fileB/foo/in/spam', False) - _check('dirA/linkC/../foo/in/spam', 'foo/in/spam', False) - - def test_stat(self): - statA = self.root.joinpath('fileA').stat() - statB = self.root.joinpath('dirB', 'fileB').stat() - statC = self.root.joinpath('dirC').stat() - # st_mode: files are the same, directory differs. - self.assertIsInstance(statA.st_mode, int) - self.assertEqual(statA.st_mode, statB.st_mode) - self.assertNotEqual(statA.st_mode, statC.st_mode) - self.assertNotEqual(statB.st_mode, statC.st_mode) - # st_ino: all different, - self.assertIsInstance(statA.st_ino, int) - self.assertNotEqual(statA.st_ino, statB.st_ino) - self.assertNotEqual(statA.st_ino, statC.st_ino) - self.assertNotEqual(statB.st_ino, statC.st_ino) - # st_dev: all the same. - self.assertIsInstance(statA.st_dev, int) - self.assertEqual(statA.st_dev, statB.st_dev) - self.assertEqual(statA.st_dev, statC.st_dev) - # other attributes not used by pathlib. - - def test_stat_no_follow_symlinks(self): - p = self.root / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.stat(follow_symlinks=False)) - - def test_stat_no_follow_symlinks_nosymlink(self): - p = self.root / 'fileA' - st = p.stat() - self.assertEqual(st, p.stat(follow_symlinks=False)) - - def test_lstat(self): - p = self.root / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.lstat()) - - def test_lstat_nosymlink(self): - p = self.root / 'fileA' - st = p.stat() - self.assertEqual(st, p.lstat()) - - def test_owner(self): - p = self.root - self.assertRaises(pathlib.UnsupportedOperation, p.owner) - self.assertEqual((p / 'fileA').owner(), 'barney') - - def test_group(self): - p = self.root - self.assertRaises(pathlib.UnsupportedOperation, p.group) - self.assertEqual((p / 'fileA').group(), 'barney') - - def test_read_write_bytes(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - p.write_bytes(b'abcdefg') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - self.assertEqual(p.read_bytes(), b'abcdefg') - - def test_read_write_text(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - p.write_text('äbcdefg', encoding='latin-1') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - self.assertEqual(p.read_text(encoding='utf-8', errors='ignore'), 'bcdefg') - - def test_mkdir(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath('dirA', tarfile=tar) - p.mkdir() - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - info = tar.getmember('dirA') - self.assertEqual(info.type, tarfile.DIRTYPE) - - def test_symlink_to(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath(tarfile=tar) - p.joinpath('linkA').symlink_to('fileA') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - info = tar.getmember('linkA') - self.assertEqual(info.type, tarfile.SYMTYPE) - self.assertEqual(info.linkname, 'fileA') - - def test_hardlink_to(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath(tarfile=tar) - p.joinpath('linkA').hardlink_to('fileA') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - info = tar.getmember('linkA') - self.assertEqual(info.type, tarfile.LNKTYPE) - self.assertEqual(info.linkname, 'fileA') - - -class TarPathFileTypeTest(unittest.TestCase): - def setUp(self): - tarpath = support.findfile("testtar.tar") - self.tar = tarfile.TarFile(tarpath) - self.root = tarfile.TarPath(tarfile=self.tar) - - def tearDown(self): - self.tar.close() - - def test_is_dir(self): - p = self.root - self.assertTrue(p.is_dir()) - self.assertTrue((p / 'ustar').is_dir()) - self.assertTrue((p / 'ustar' / 'dirtype').is_dir()) - self.assertFalse((p / 'ustar' / 'regtype').is_dir()) - self.assertFalse((p / 'non-existing').is_dir()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_dir()) - self.assertFalse((p / 'ustar' / 'symtype').is_dir()) - self.assertFalse((p / 'ustar' / 'lnktype').is_dir()) - self.assertFalse((p / 'ustar' / 'fifotype').is_dir()) - self.assertFalse((p / 'ustar' / 'blktype').is_dir()) - self.assertFalse((p / 'ustar' / 'chrtype').is_dir()) - - def test_is_file(self): - p = self.root - self.assertFalse(p.is_file()) - self.assertFalse((p / 'ustar').is_file()) - self.assertFalse((p / 'ustar' / 'dirtype').is_file()) - self.assertTrue((p / 'ustar' / 'regtype').is_file()) - self.assertFalse((p / 'non-existing').is_file()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_file()) - self.assertTrue((p / 'ustar' / 'symtype').is_file()) - self.assertFalse((p / 'ustar' / 'symtype').is_file(follow_symlinks=False)) - self.assertFalse((p / 'ustar' / 'fifotype').is_file()) - self.assertFalse((p / 'ustar' / 'blktype').is_file()) - self.assertFalse((p / 'ustar' / 'chrtype').is_file()) - - def test_is_mount(self): - p = self.root - self.assertTrue(p.is_mount()) - self.assertFalse((p / 'ustar').is_mount()) - self.assertFalse((p / 'ustar' / 'dirtype').is_mount()) - self.assertFalse((p / 'ustar' / 'regtype').is_mount()) - self.assertFalse((p / 'non-existing').is_mount()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_mount()) - self.assertFalse((p / 'ustar' / 'symtype').is_mount()) - self.assertFalse((p / 'ustar' / 'fifotype').is_mount()) - self.assertFalse((p / 'ustar' / 'blktype').is_mount()) - self.assertFalse((p / 'ustar' / 'chrtype').is_mount()) - - def test_is_symlink(self): - p = self.root - self.assertFalse(p.is_symlink()) - self.assertFalse((p / 'ustar').is_symlink()) - self.assertFalse((p / 'ustar' / 'dirtype').is_symlink()) - self.assertFalse((p / 'ustar' / 'regtype').is_symlink()) - self.assertFalse((p / 'non-existing').is_symlink()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_symlink()) - self.assertTrue((p / 'ustar' / 'symtype').is_symlink()) - self.assertFalse((p / 'ustar' / 'fifotype').is_symlink()) - self.assertFalse((p / 'ustar' / 'blktype').is_symlink()) - self.assertFalse((p / 'ustar' / 'chrtype').is_symlink()) - - def test_is_junction(self): - p = self.root - self.assertFalse(p.is_junction()) - self.assertFalse((p / 'ustar').is_junction()) - self.assertFalse((p / 'ustar' / 'dirtype').is_junction()) - self.assertFalse((p / 'ustar' / 'regtype').is_junction()) - self.assertFalse((p / 'non-existing').is_junction()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_junction()) - self.assertFalse((p / 'ustar' / 'symtype').is_junction()) - self.assertFalse((p / 'ustar' / 'fifotype').is_junction()) - self.assertFalse((p / 'ustar' / 'blktype').is_junction()) - self.assertFalse((p / 'ustar' / 'chrtype').is_junction()) - - def test_is_fifo(self): - p = self.root - self.assertFalse(p.is_fifo()) - self.assertFalse((p / 'ustar').is_fifo()) - self.assertFalse((p / 'ustar' / 'dirtype').is_fifo()) - self.assertFalse((p / 'ustar' / 'regtype').is_fifo()) - self.assertFalse((p / 'non-existing').is_fifo()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_fifo()) - self.assertFalse((p / 'ustar' / 'symtype').is_fifo()) - self.assertTrue((p / 'ustar' / 'fifotype').is_fifo()) - self.assertFalse((p / 'ustar' / 'blktype').is_fifo()) - self.assertFalse((p / 'ustar' / 'chrtype').is_fifo()) - - def test_is_socket(self): - p = self.root - self.assertFalse(p.is_socket()) - self.assertFalse((p / 'ustar').is_socket()) - self.assertFalse((p / 'ustar' / 'dirtype').is_socket()) - self.assertFalse((p / 'ustar' / 'regtype').is_socket()) - self.assertFalse((p / 'non-existing').is_socket()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_socket()) - self.assertFalse((p / 'ustar' / 'symtype').is_socket()) - self.assertFalse((p / 'ustar' / 'fifotype').is_socket()) - self.assertFalse((p / 'ustar' / 'blktype').is_socket()) - self.assertFalse((p / 'ustar' / 'chrtype').is_socket()) - - def test_is_block_device(self): - p = self.root - self.assertFalse(p.is_block_device()) - self.assertFalse((p / 'ustar').is_block_device()) - self.assertFalse((p / 'ustar' / 'dirtype').is_block_device()) - self.assertFalse((p / 'ustar' / 'regtype').is_block_device()) - self.assertFalse((p / 'non-existing').is_block_device()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_block_device()) - self.assertFalse((p / 'ustar' / 'symtype').is_block_device()) - self.assertFalse((p / 'ustar' / 'fifotype').is_block_device()) - self.assertTrue((p / 'ustar' / 'blktype').is_block_device()) - self.assertFalse((p / 'ustar' / 'chrtype').is_block_device()) - - def test_is_char_device(self): - p = self.root - self.assertFalse(p.is_char_device()) - self.assertFalse((p / 'ustar').is_char_device()) - self.assertFalse((p / 'ustar' / 'dirtype').is_char_device()) - self.assertFalse((p / 'ustar' / 'regtype').is_char_device()) - self.assertFalse((p / 'non-existing').is_char_device()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_char_device()) - self.assertFalse((p / 'ustar' / 'symtype').is_char_device()) - self.assertFalse((p / 'ustar' / 'fifotype').is_char_device()) - self.assertFalse((p / 'ustar' / 'blktype').is_char_device()) - self.assertTrue((p / 'ustar' / 'chrtype').is_char_device()) - - def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Lib/test/testtarpath.tar b/Lib/test/testtarpath.tar deleted file mode 100644 index f90c18fa9de46a30e75fda4b9a0e261488cb0a19..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20480 zcmeI2O>V+45QV+&DSUyF*iM`^KizQ&P}4R7B~?f(di#z|fk1_*D@|l6FG3J0!ZzRY z8;{4V+h$YN%c|Z8wkxHpC<;Lzs>sv+r+efsp|UJbRc>sqM5?mF7DDQ}sUORIwd<<- zm1}e#yKdD=Db}mDF28Q~a_{-=Z$|xOebRr~H19k^bpnU!Z7or>;GJ}(}YL% zQ>zF4^SV6^H=gxZA(U%IsaRm-H!jNxQhQ;n^eej2mmAZfA?1HB)!}ekh6y& zh@+2)2zdXy^?$)PsDG{h2J1hcBIlM#|9O<_KkfgJ|DSc@y7T(e+k$Zt9Q+@$|I>=f zK+vB{^j^DX{apsYvj~Lee?}?V|0DnF3B`1Kk^e1_|1n{Xwps80y7?@(lWyPf4D+u4 z^>zKvsEqPI_@76Qeyd0SucV&@2mpOJBe?hecS}EW{WrM&qv8J{rr**3zWy_Z{fA&4 zQy=92C&sOx|Eb~iA3FY}^&k6x{+Z%@KV|ObZvOYH{;y0%^&jMa%)(If7x90J_|H!~ zt{zqE|8x2?9{u{s9=`r-W$FB1Q*y#Rwz2>JBd%zK(SQI5 zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>m GmB2R@KXU~D diff --git a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst index 9ad271a33d6057..2b100f09c67ad9 100644 --- a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst +++ b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst @@ -1,2 +1,2 @@ -Add :class:`tarfile.TarPath` class, which provides access to tar archive -members via the :class:`pathlib.Path` interface. +Add private ``pathlib._VirtualPath`` class, which provides experimental support +for virtual filesystems, and may be made public in a future version of Python. From 2c565916bc86242f54ab5600344e67f1bf9cbd21 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Jul 2023 20:01:38 +0100 Subject: [PATCH 11/31] `_VirtualPath` --> `_PathBase` --- Lib/pathlib.py | 4 +-- Lib/test/test_pathlib.py | 30 +++++++++---------- ...3-07-03-20-23-56.gh-issue-89812.cFkDOE.rst | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index bed05295046e6a..1c08665c6d07f0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -785,7 +785,7 @@ class PureWindowsPath(PurePath): # Filesystem-accessing classes -class _VirtualPath(PurePath): +class _PathBase(PurePath): """PurePath subclass for virtual filesystems, such as archives and remote storage. """ @@ -1379,7 +1379,7 @@ def as_uri(self): raise UnsupportedOperation(f"{type(self).__name__}.as_uri()") -class Path(_VirtualPath): +class Path(_PathBase): """PurePath subclass that can make system calls. Path represents a filesystem path but unlike PurePath, also offers diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 28581c493070d3..004ecba3f0b0ef 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1573,8 +1573,8 @@ def test_group(self): # Tests for the virtual classes. # -class VirtualPathTest(PurePathTest): - cls = pathlib._VirtualPath +class PathBaseTest(PurePathTest): + cls = pathlib._PathBase def test_unsupported_operation(self): P = self.cls @@ -1632,9 +1632,9 @@ def test_as_bytes_common(self): self.assertRaises(TypeError, bytes, self.cls()) -class DummyVirtualPathIO(io.BytesIO): +class DummyPathIO(io.BytesIO): """ - Used by DummyVirtualPath to implement `open('w')` + Used by DummyPath to implement `open('w')` """ def __init__(self, files, path): @@ -1647,9 +1647,9 @@ def close(self): super().close() -class DummyVirtualPath(pathlib._VirtualPath): +class DummyPath(pathlib._PathBase): """ - Simple implementation of VirtualPath that keeps files and directories in + Simple implementation of PathBase that keeps files and directories in memory. """ _files = {} @@ -1691,7 +1691,7 @@ def open(self, mode='r', buffering=-1, encoding=None, elif mode == 'w': if parent not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", parent) - stream = DummyVirtualPathIO(self._files, path) + stream = DummyPathIO(self._files, path) self._files[path] = b'' self._directories[parent].add(name) else: @@ -1724,10 +1724,10 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): raise -class DummyVirtualPathTest(unittest.TestCase): - """Tests for VirtualPath methods that use stat(), open() and iterdir().""" +class DummyPathTest(unittest.TestCase): + """Tests for PathBase methods that use stat(), open() and iterdir().""" - cls = DummyVirtualPath + cls = DummyPath can_symlink = False # (BASE) @@ -2541,7 +2541,7 @@ def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) -class DummyVirtualPathWithSymlinks(DummyVirtualPath): +class DummyPathWithSymlinks(DummyPath): def readlink(self): path = str(self) if path in self._symlinks: @@ -2556,8 +2556,8 @@ def symlink_to(self, target, target_is_directory=False): self._symlinks[str(self)] = str(target) -class DummyVirtualPathWithSymlinksTest(DummyVirtualPathTest): - cls = DummyVirtualPathWithSymlinks +class DummyPathWithSymlinksTest(DummyPathTest): + cls = DummyPathWithSymlinks can_symlink = True def setUp(self): @@ -2581,13 +2581,13 @@ def setUp(self): # Tests for the concrete classes. # -class PathTest(DummyVirtualPathTest): +class PathTest(DummyPathTest): """Tests for the FS-accessing functionalities of the Path classes.""" cls = pathlib.Path can_symlink = os_helper.can_symlink() def setUp(self): - # note: this must be kept in sync with `DummyVirtualPathTest.setUp()` + # note: this must be kept in sync with `DummyPathTest.setUp()` def cleanup(): os.chmod(join('dirE'), 0o777) os_helper.rmtree(BASE) diff --git a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst index 2b100f09c67ad9..a4221fc4ca900b 100644 --- a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst +++ b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst @@ -1,2 +1,2 @@ -Add private ``pathlib._VirtualPath`` class, which provides experimental support +Add private ``pathlib._PathBase`` class, which provides experimental support for virtual filesystems, and may be made public in a future version of Python. From 89440987805b583e9e1d42991e1140a7a4f63bc2 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 28 Aug 2023 14:55:59 +0100 Subject: [PATCH 12/31] Apply suggestions from code review Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/pathlib.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 75c86c9b7b1600..5dd88634462077 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -790,7 +790,7 @@ class _PathBase(PurePath): """ __slots__ = () __bytes__ = None - __fspath__ = None + __fspath__ = None # virtual paths have no local file system representation def stat(self, *, follow_symlinks=True): """ @@ -1190,8 +1190,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): paths += [path._make_child_relpath(d) for d in reversed(dirnames)] def absolute(self): - """Return an absolute version of this path by prepending the current - working directory. No normalization or symlink resolution is performed. + """Return an absolute version of this path + No normalization or symlink resolution is performed. Use resolve() to get the canonical path to a file. """ @@ -1210,8 +1210,7 @@ def expanduser(self): @classmethod def home(cls): - """Return a new path pointing to the user's home directory (as - returned by os.path.expanduser('~')). + """Return a new path pointing to expanduser('~'). """ return cls("~").expanduser() From b61141ae2a568583e8825f691f39869398d8ec82 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 14:57:13 +0100 Subject: [PATCH 13/31] Improve _PathBase docstring --- Lib/pathlib.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5dd88634462077..8f4b62924f2858 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -785,8 +785,17 @@ class PureWindowsPath(PurePath): class _PathBase(PurePath): - """PurePath subclass for virtual filesystems, such as archives and remote - storage. + """Base class for concrete path objects. + + This class provides dummy implementations for many methods that derived + classes can override selectively; the default implementations raise + UnsupportedOperation. The most basic methods, such as stat() and open(), + directly raise UnsupportedOperation; these basic methods are called by + other methods such as is_dir() and read_text(). + + The Path class derives this class to implement local filesystem paths. + Users may derive their own classes to implement virtual filesystem paths, + such as paths in archive files or on remote storage systems. """ __slots__ = () __bytes__ = None From 1e462b08e67ce6e6878e4fa2cc0c3f7ac79d8576 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 18:12:11 +0100 Subject: [PATCH 14/31] Explain use of nullcontext() in comment --- Lib/pathlib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8f4b62924f2858..cdf9c02aa4de6e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1046,6 +1046,7 @@ def iterdir(self): raise UnsupportedOperation(f"{type(self).__name__}.iterdir()") def _scandir(self): + # os.scandir() returns an object that can be used as a context manager return contextlib.nullcontext(list(self.iterdir())) def _make_child_relpath(self, name): From d321cadbfd53c47a60b2ce81b0ded5c5bac4057c Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 18:42:07 +0100 Subject: [PATCH 15/31] Align and test Path/PathBase docstrings --- Lib/pathlib.py | 14 +++++++------- Lib/test/test_pathlib.py | 9 +++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b2f916e36e6e8f..d2d11341b88c3c 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -426,7 +426,7 @@ def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) def as_uri(self): - """Return the path as a 'file' URI.""" + """Return the path as a URI.""" if not self.is_absolute(): raise ValueError("relative path can't be expressed as a file URI") @@ -1210,7 +1210,7 @@ def absolute(self): """Return an absolute version of this path No normalization or symlink resolution is performed. - Use resolve() to get the canonical path to a file. + Use resolve() to resolve symlinks and remove '..' segments. """ raise UnsupportedOperation(f"{type(self).__name__}.absolute()") @@ -1239,8 +1239,8 @@ def readlink(self): def resolve(self, strict=False): """ - Resolve '..' segments in the path. Where possible, make the path - absolute and resolve symlinks on the way. + Make the path absolute, resolving all symlinks on the way and also + normalizing it. """ try: path = self.absolute() @@ -1463,10 +1463,10 @@ def _scandir(self): return os.scandir(self) def absolute(self): - """Return an absolute version of this path by prepending the current - working directory. No normalization or symlink resolution is performed. + """Return an absolute version of this path + No normalization or symlink resolution is performed. - Use resolve() to get the canonical path to a file. + Use resolve() to resolve symlinks and remove '..' segments. """ if self.is_absolute(): return self diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index e9c9e2b93c3d3f..35b4c9ec7d144e 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1643,6 +1643,15 @@ def test_fspath_common(self): def test_as_bytes_common(self): self.assertRaises(TypeError, bytes, self.cls()) + def test_matches_path_api(self): + our_names = {name for name in dir(self.cls) if name[0] != '_'} + path_names = {name for name in dir(pathlib.Path) if name[0] != '_'} + self.assertEqual(our_names, path_names) + for attr_name in our_names: + our_attr = getattr(self.cls, attr_name) + path_attr = getattr(pathlib.Path, attr_name) + self.assertEqual(our_attr.__doc__, path_attr.__doc__) + class DummyPathIO(io.BytesIO): """ From acfc1b04bdabeea77d7a286aeabdfb99afdb95c5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 19:49:57 +0100 Subject: [PATCH 16/31] Revise `_PathBase.is_junction()` --- Lib/pathlib.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index d2d11341b88c3c..c433e3b3e3d6c0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -910,15 +910,20 @@ def is_junction(self): """ Whether this path is a junction. """ - import stat try: - return self.lstat().st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT + self.lstat() except OSError as e: if not _ignore_error(e): raise + except ValueError: + # Non-encodable path return False - except (ValueError, AttributeError): - return False + + # Junctions are a Windows-only feature, not present in POSIX nor the + # vast majority of virtual filesystems. There is no cross-platform + # idiom to check for junctions (using stat().st_mode). And so this + # default implementation returns false if lstat() doesn't raise. + return False def is_block_device(self): """ From bc82225072d8639e5be818dc7ad9110d229961f1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 20:05:56 +0100 Subject: [PATCH 17/31] Make is_junction() code more consistent with other is_*() methods. --- Lib/pathlib.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c433e3b3e3d6c0..65180471dfd2ac 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -912,19 +912,20 @@ def is_junction(self): """ try: self.lstat() + # Junctions are a Windows-only feature, not present in POSIX nor + # the majority of virtual filesystems. There is no cross-platform + # idiom to check for junctions (using stat().st_mode). And so this + # default implementation returns false if lstat() doesn't raise. + return False except OSError as e: if not _ignore_error(e): raise + # Path doesn't exist + return False except ValueError: # Non-encodable path return False - # Junctions are a Windows-only feature, not present in POSIX nor the - # vast majority of virtual filesystems. There is no cross-platform - # idiom to check for junctions (using stat().st_mode). And so this - # default implementation returns false if lstat() doesn't raise. - return False - def is_block_device(self): """ Whether this path is a block device. From c3127b865d7bb8655d4d4e258e1361d75cbf9a4e Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 17:00:37 +0100 Subject: [PATCH 18/31] Improve `UnsupportedOperation` exception message. --- Lib/pathlib.py | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b2633fa69c9b11..127a1dbe84da4a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -803,12 +803,18 @@ class _PathBase(PurePath): __bytes__ = None __fspath__ = None # virtual paths have no local file system representation + def _unsupported(self, method_name): + msg = f"{type(self).__name__}.{method_name}() is unsupported" + if isinstance(self, Path): + msg += " on this system" + raise UnsupportedOperation(msg) + def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - raise UnsupportedOperation(f"{type(self).__name__}.stat()") + return self._unsupported("stat") def lstat(self): """ @@ -1008,7 +1014,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - raise UnsupportedOperation(f"{type(self).__name__}.open()") + return self._unsupported("open") def read_bytes(self): """ @@ -1051,7 +1057,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(f"{type(self).__name__}.iterdir()") + return self._unsupported("iterdir") def _scandir(self): # os.scandir() returns an object that can be used as a context manager @@ -1218,7 +1224,7 @@ def absolute(self): Use resolve() to resolve symlinks and remove '..' segments. """ - raise UnsupportedOperation(f"{type(self).__name__}.absolute()") + return self._unsupported("absolute") @classmethod def cwd(cls): @@ -1229,7 +1235,7 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - raise UnsupportedOperation(f"{type(self).__name__}.expanduser()") + return self._unsupported("expanduser") @classmethod def home(cls): @@ -1241,7 +1247,7 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - raise UnsupportedOperation(f"{type(self).__name__}.readlink()") + return self._unsupported("readlink") def resolve(self, strict=False): """ @@ -1312,7 +1318,7 @@ def symlink_to(self, target, target_is_directory=False): Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - raise UnsupportedOperation(f"{type(self).__name__}.symlink_to()") + return self._unsupported("symlink_to") def hardlink_to(self, target): """ @@ -1320,19 +1326,19 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - raise UnsupportedOperation(f"{type(self).__name__}.hardlink_to()") + return self._unsupported("hardlink_to") def touch(self, mode=0o666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. """ - raise UnsupportedOperation(f"{type(self).__name__}.touch()") + return self._unsupported("touch") def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - raise UnsupportedOperation(f"{type(self).__name__}.mkdir()") + return self._unsupported("mkdir") def rename(self, target): """ @@ -1344,7 +1350,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ - raise UnsupportedOperation(f"{type(self).__name__}.rename()") + return self._unsupported("rename") def replace(self, target): """ @@ -1356,13 +1362,13 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ - raise UnsupportedOperation(f"{type(self).__name__}.replace()") + return self._unsupported("replace") def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). """ - raise UnsupportedOperation(f"{type(self).__name__}.chmod()") + return self._unsupported("chmod") def lchmod(self, mode): """ @@ -1376,29 +1382,29 @@ def unlink(self, missing_ok=False): Remove this file or link. If the path is a directory, use rmdir() instead. """ - raise UnsupportedOperation(f"{type(self).__name__}.unlink()") + return self._unsupported("unlink") def rmdir(self): """ Remove this directory. The directory must be empty. """ - raise UnsupportedOperation(f"{type(self).__name__}.rmdir()") + return self._unsupported("rmdir") def owner(self): """ Return the login name of the file owner. """ - raise UnsupportedOperation(f"{type(self).__name__}.owner()") + return self._unsupported("owner") def group(self): """ Return the group name of the file gid. """ - raise UnsupportedOperation(f"{type(self).__name__}.group()") + return self._unsupported("group") def as_uri(self): """Return the path as a URI.""" - raise UnsupportedOperation(f"{type(self).__name__}.as_uri()") + return self._unsupported("as_uri") class Path(_PathBase): From 3540ae1a11a29dbd853ce32d1689e27be05dcfc7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 17:42:10 +0100 Subject: [PATCH 19/31] Slightly improve symlink loop code, exception message. --- Lib/pathlib.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 127a1dbe84da4a..111493b59ca5c0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -40,6 +40,9 @@ # Internals # +# Maximum number of symlinks to follow in _PathBase.resolve() +MAX_SYMLINKS = 40 + # Reference for Windows paths can be found at # https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . _WIN_RESERVED_NAMES = frozenset( @@ -1296,8 +1299,8 @@ def resolve(self, strict=False): break else: link_count += 1 - if link_count >= 40: - raise OSError(ELOOP, "Symlink loop", path) + if link_count >= MAX_SYMLINKS: + raise OSError(ELOOP, "Too many symbolic links in path", path) elif link_target.root or link_target.drive: link_target = link.parent / link_target drv = link_target.drive From c9f0f20daf503656c5f4d3c4f7239fc20ca08aeb Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 18:28:02 +0100 Subject: [PATCH 20/31] Restore deleted comment in `cwd()`, expand `_scandir()` comment. --- Lib/pathlib.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 111493b59ca5c0..148508d55bd67a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1063,7 +1063,8 @@ def iterdir(self): return self._unsupported("iterdir") def _scandir(self): - # os.scandir() returns an object that can be used as a context manager + # Emulate os.scandir(), which returns an object that can be used as a + # context manager. This method is called by walk() and glob(). return contextlib.nullcontext(self.iterdir()) def _make_child_relpath(self, name): @@ -1232,6 +1233,10 @@ def absolute(self): @classmethod def cwd(cls): """Return a new path pointing to the current working directory.""" + # We call 'absolute()' rather than using 'os.getcwd()' directly to + # enable users to replace the implementation of 'absolute()' in a + # subclass and benefit from the new behaviour here. This works because + # os.path.abspath('.') == os.getcwd(). return cls().absolute() def expanduser(self): From 0ee10cac6cf01a92ca6c5fb90bfa3a2312ad3d6c Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 18:34:51 +0100 Subject: [PATCH 21/31] Make `_PathBase.is_junction()` immediately return false. --- Lib/pathlib.py | 19 ++++--------------- Lib/test/test_pathlib.py | 1 - 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 148508d55bd67a..03673d5f541f34 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -919,21 +919,10 @@ def is_junction(self): """ Whether this path is a junction. """ - try: - self.lstat() - # Junctions are a Windows-only feature, not present in POSIX nor - # the majority of virtual filesystems. There is no cross-platform - # idiom to check for junctions (using stat().st_mode). And so this - # default implementation returns false if lstat() doesn't raise. - return False - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist - return False - except ValueError: - # Non-encodable path - return False + # Junctions are a Windows-only feature, not present in POSIX nor the + # majority of virtual filesystems. There is no cross-platform idiom + # to check for junctions (using stat().st_mode). + return False def is_block_device(self): """ diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index be24601e653392..de07a67680a3d2 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1604,7 +1604,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.is_char_device) self.assertRaises(e, p.is_fifo) self.assertRaises(e, p.is_socket) - self.assertRaises(e, p.is_junction) self.assertRaises(e, p.open) self.assertRaises(e, p.read_bytes) self.assertRaises(e, p.read_text) From 17eee2fc16bdf56616f093c508df140d12e9bf95 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 9 Sep 2023 15:37:49 +0100 Subject: [PATCH 22/31] MAX_SYMLINKS --> _MAX_SYMLINKS --- Lib/pathlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 03673d5f541f34..1a2eb1ec92e260 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -41,7 +41,7 @@ # # Maximum number of symlinks to follow in _PathBase.resolve() -MAX_SYMLINKS = 40 +_MAX_SYMLINKS = 40 # Reference for Windows paths can be found at # https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . @@ -1293,7 +1293,7 @@ def resolve(self, strict=False): break else: link_count += 1 - if link_count >= MAX_SYMLINKS: + if link_count >= _MAX_SYMLINKS: raise OSError(ELOOP, "Too many symbolic links in path", path) elif link_target.root or link_target.drive: link_target = link.parent / link_target From c7c46bc5f7b2b26cb993627e7aa7db21063c048f Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 9 Sep 2023 19:21:20 +0100 Subject: [PATCH 23/31] `return self._unsupported()` --> `self._unsupported()` --- Lib/pathlib.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1a2eb1ec92e260..253f2da838efbe 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -817,7 +817,7 @@ def stat(self, *, follow_symlinks=True): Return the result of the stat() system call on this path, like os.stat() does. """ - return self._unsupported("stat") + self._unsupported("stat") def lstat(self): """ @@ -1006,7 +1006,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - return self._unsupported("open") + self._unsupported("open") def read_bytes(self): """ @@ -1049,7 +1049,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - return self._unsupported("iterdir") + self._unsupported("iterdir") def _scandir(self): # Emulate os.scandir(), which returns an object that can be used as a @@ -1217,7 +1217,7 @@ def absolute(self): Use resolve() to resolve symlinks and remove '..' segments. """ - return self._unsupported("absolute") + self._unsupported("absolute") @classmethod def cwd(cls): @@ -1232,7 +1232,7 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - return self._unsupported("expanduser") + self._unsupported("expanduser") @classmethod def home(cls): @@ -1244,7 +1244,7 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - return self._unsupported("readlink") + self._unsupported("readlink") def resolve(self, strict=False): """ @@ -1315,7 +1315,7 @@ def symlink_to(self, target, target_is_directory=False): Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - return self._unsupported("symlink_to") + self._unsupported("symlink_to") def hardlink_to(self, target): """ @@ -1323,19 +1323,19 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - return self._unsupported("hardlink_to") + self._unsupported("hardlink_to") def touch(self, mode=0o666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. """ - return self._unsupported("touch") + self._unsupported("touch") def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - return self._unsupported("mkdir") + self._unsupported("mkdir") def rename(self, target): """ @@ -1347,7 +1347,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ - return self._unsupported("rename") + self._unsupported("rename") def replace(self, target): """ @@ -1359,13 +1359,13 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ - return self._unsupported("replace") + self._unsupported("replace") def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). """ - return self._unsupported("chmod") + self._unsupported("chmod") def lchmod(self, mode): """ @@ -1379,29 +1379,29 @@ def unlink(self, missing_ok=False): Remove this file or link. If the path is a directory, use rmdir() instead. """ - return self._unsupported("unlink") + self._unsupported("unlink") def rmdir(self): """ Remove this directory. The directory must be empty. """ - return self._unsupported("rmdir") + self._unsupported("rmdir") def owner(self): """ Return the login name of the file owner. """ - return self._unsupported("owner") + self._unsupported("owner") def group(self): """ Return the group name of the file gid. """ - return self._unsupported("group") + self._unsupported("group") def as_uri(self): """Return the path as a URI.""" - return self._unsupported("as_uri") + self._unsupported("as_uri") class Path(_PathBase): From a51d7a0028d8a8629560ace4f70730b56d33347f Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 15 Sep 2023 20:08:00 +0100 Subject: [PATCH 24/31] WIP --- Lib/pathlib.py | 107 +++++++++++++++++++++------------------ Lib/test/test_pathlib.py | 12 ++++- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 253f2da838efbe..c6a19ae0c4dba7 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -305,6 +305,11 @@ class PurePath: # The `_hash` slot stores the hash of the case-normalized string # path. It's set when `__hash__()` is called for the first time. '_hash', + + # The '_resolving' slot stores a boolean indicating whether the path + # is being processed by `_PathBase.resolve()`. This prevents duplicate + # work from occurring when `resolve()` calls `stat()` or `readlink()`. + '_resolving', ) pathmod = os.path @@ -344,6 +349,7 @@ def __init__(self, *args): f"not {type(path).__name__!r}") paths.append(path) self._raw_paths = paths + self._resolving = False def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -704,7 +710,9 @@ def parent(self): tail = self._tail if not tail: return self - return self._from_parsed_parts(drv, root, tail[:-1]) + path = self._from_parsed_parts(drv, root, tail[:-1]) + path._resolving = self._resolving + return path @property def parents(self): @@ -1251,63 +1259,64 @@ def resolve(self, strict=False): Make the path absolute, resolving all symlinks on the way and also normalizing it. """ + if self._resolving: + return self try: path = self.absolute() - tail_idx = len(path._tail) - len(self._tail) except UnsupportedOperation: path = self - tail_idx = 0 - if not path._tail: - return path - drv = path.drive - root = path.root - tail = list(path._tail) - dirty = False + + def split(path): + return path._from_parsed_parts(path.drive, path.root, []), path._tail[::-1] + link_count = 0 - readlink_supported = True - while tail_idx < len(tail): - if tail[tail_idx] == '..': - if tail_idx == 0: - if root: - # Delete '..' part immediately following root. - del tail[tail_idx] - dirty = True + stat_cache = {} + target_cache = {} + path, parts = split(path) + while parts: + part = parts.pop() + if part == '..': + if not path._tail: + if path.root: + # Delete '..' segment immediately following root continue - elif tail[tail_idx - 1] != '..': - # Delete '..' part and its predecessor. - tail_idx -= 1 - del tail[tail_idx:tail_idx + 2] - dirty = True + elif path._tail[-1] != '..': + # Delete '..' segment and its predecessor + path = path.parent continue - elif readlink_supported: - link = self._from_parsed_parts(drv, root, tail[:tail_idx + 1]) + path = path._make_child_relpath(part) + else: + lookup_path = path + path = path._make_child_relpath(part) + path._resolving = True + path_str = str(path) try: - link_target = link.readlink() - except UnsupportedOperation: - readlink_supported = False - except OSError as e: - if e.errno != EINVAL: - if strict: - raise - else: - break - else: - link_count += 1 - if link_count >= _MAX_SYMLINKS: - raise OSError(ELOOP, "Too many symbolic links in path", path) - elif link_target.root or link_target.drive: - link_target = link.parent / link_target - drv = link_target.drive - root = link_target.root - tail[:tail_idx + 1] = link_target._tail - tail_idx = 0 + st = stat_cache.get(path_str) + if st is None: + st = stat_cache[path_str] = path.stat(follow_symlinks=False) + if S_ISLNK(st.st_mode): + # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are + # encountered during resolution. + link_count += 1 + if link_count >= _MAX_SYMLINKS: + raise OSError(ELOOP, "Too many symbolic links in path", path_str) + target = target_cache.get(path_str) + if target is None: + target = target_cache[path_str] = path.readlink() + target, target_parts = split(target) + path = target if target.root else lookup_path + parts.extend(target_parts) + elif parts and not S_ISDIR(st.st_mode): + raise NotADirectoryError(ENOTDIR, "Not a directory", path_str) + except OSError: + if strict: + raise else: - tail[tail_idx:tail_idx + 1] = link_target._tail - dirty = True - continue - tail_idx += 1 - if dirty: - path = self._from_parsed_parts(drv, root, tail) + # Append remaining path segments without further processing. + for part in reversed(parts): + path = path._make_child_relpath(part) + break + path._resolving = False return path def symlink_to(self, target, target_is_directory=False): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index de07a67680a3d2..f8fee9a683d47c 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2241,6 +2241,7 @@ def test_readlink(self): self.assertEqual((P / 'brokenLink').readlink(), self.cls('non-existing')) self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) + self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) with self.assertRaises(OSError): (P / 'fileA').readlink() @@ -2262,11 +2263,18 @@ def test_resolve_common(self): if not self.can_symlink: self.skipTest("symlinks required") P = self.cls + # Non-existent file p = P(BASE, 'foo') - with self.assertRaises(OSError) as cm: + with self.assertRaises(FileNotFoundError) as cm: p.resolve(strict=True) self.assertEqual(cm.exception.errno, errno.ENOENT) + # File treated as directory + p = P(BASE, 'fileA', 'fileB') + with self.assertRaises(NotADirectoryError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOTDIR) # Non-strict + p = P(BASE, 'foo') self.assertEqualNormCase(str(p.resolve(strict=False)), os.path.join(BASE, 'foo')) p = P(BASE, 'foo', 'in', 'spam') @@ -2575,7 +2583,7 @@ def test_complex_symlinks_relative_dot_dot(self): class DummyPathWithSymlinks(DummyPath): def readlink(self): - path = str(self) + path = str(self.parent.resolve() / self.name) if path in self._symlinks: return self.with_segments(self._symlinks[path]) elif path in self._files or path in self._directories: From 7e3729e017673a89b21d6960a37955a31ce0dd23 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 23 Sep 2023 18:30:32 +0100 Subject: [PATCH 25/31] Undo test change. This will require further refactoring in another PR. --- Lib/test/test_pathlib.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f8fee9a683d47c..203e6022709003 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2263,18 +2263,11 @@ def test_resolve_common(self): if not self.can_symlink: self.skipTest("symlinks required") P = self.cls - # Non-existent file p = P(BASE, 'foo') - with self.assertRaises(FileNotFoundError) as cm: + with self.assertRaises(OSError) as cm: p.resolve(strict=True) self.assertEqual(cm.exception.errno, errno.ENOENT) - # File treated as directory - p = P(BASE, 'fileA', 'fileB') - with self.assertRaises(NotADirectoryError) as cm: - p.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOTDIR) # Non-strict - p = P(BASE, 'foo') self.assertEqualNormCase(str(p.resolve(strict=False)), os.path.join(BASE, 'foo')) p = P(BASE, 'foo', 'in', 'spam') From 703fe5ccd5c7ca31d0da889d245daf470f1d6fa4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:32:34 +0100 Subject: [PATCH 26/31] Ensure `..` segments are resolved in non-strict mode --- Lib/pathlib.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1758a2beef0dbd..450fd1f49b077a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1269,6 +1269,7 @@ def resolve(self, strict=False): def split(path): return path._from_parsed_parts(path.drive, path.root, []), path._tail[::-1] + missing = False link_count = 0 stat_cache = {} target_cache = {} @@ -1285,6 +1286,8 @@ def split(path): path = path.parent continue path = path._make_child_relpath(part) + elif missing: + path = path._make_child_relpath(part) else: lookup_path = path path = path._make_child_relpath(part) @@ -1312,10 +1315,7 @@ def split(path): if strict: raise else: - # Append remaining path segments without further processing. - for part in reversed(parts): - path = path._make_child_relpath(part) - break + missing = True path._resolving = False return path From e5e5be5d9962a169fc14df8e41e0ce03f29a82fb Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:34:12 +0100 Subject: [PATCH 27/31] Move symlink loop resolution test from `PosixPathTest` to `DummyPathTest` --- Lib/test/test_pathlib.py | 62 +++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index c3c58a10c6b299..319148e9065a65 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2334,6 +2334,38 @@ def test_resolve_dot(self): # Non-strict self.assertEqual(r.resolve(strict=False), p / '3' / '4') + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(OSError) as cm: + path.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ELOOP) + + def test_resolve_loop(self): + if not self.can_symlink: + self.skipTest("symlinks required") + if os.name == 'nt' and issubclass(self.cls, pathlib.Path): + self.skipTest("symlink loops work differently with concrete Windows paths") + # Loops with relative symlinks. + self.cls(BASE, 'linkX').symlink_to('linkX/inside') + self._check_symlink_loop(BASE, 'linkX') + self.cls(BASE, 'linkY').symlink_to('linkY') + self._check_symlink_loop(BASE, 'linkY') + self.cls(BASE, 'linkZ').symlink_to('linkZ/../linkZ') + self._check_symlink_loop(BASE, 'linkZ') + # Non-strict + p = self.cls(BASE, 'linkZ', 'foo') + self.assertEqual(p.resolve(strict=False), p) + # Loops with absolute symlinks. + self.cls(BASE, 'linkU').symlink_to(join('linkU/inside')) + self._check_symlink_loop(BASE, 'linkU') + self.cls(BASE, 'linkV').symlink_to(join('linkV')) + self._check_symlink_loop(BASE, 'linkV') + self.cls(BASE, 'linkW').symlink_to(join('linkW/../linkW')) + self._check_symlink_loop(BASE, 'linkW') + # Non-strict + q = self.cls(BASE, 'linkW', 'foo') + self.assertEqual(q.resolve(strict=False), q) + def test_stat(self): statA = self.cls(BASE).joinpath('fileA').stat() statB = self.cls(BASE).joinpath('dirB', 'fileB').stat() @@ -3428,12 +3460,6 @@ def test_absolute(self): self.assertEqual(str(P('//a').absolute()), '//a') self.assertEqual(str(P('//a/b').absolute()), '//a/b') - def _check_symlink_loop(self, *args): - path = self.cls(*args) - with self.assertRaises(OSError) as cm: - path.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ELOOP) - @unittest.skipIf( is_emscripten or is_wasi, "umask is not implemented on Emscripten/WASI." @@ -3480,30 +3506,6 @@ def test_touch_mode(self): st = os.stat(join('masked_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) - def test_resolve_loop(self): - if not self.can_symlink: - self.skipTest("symlinks required") - # Loops with relative symlinks. - os.symlink('linkX/inside', join('linkX')) - self._check_symlink_loop(BASE, 'linkX') - os.symlink('linkY', join('linkY')) - self._check_symlink_loop(BASE, 'linkY') - os.symlink('linkZ/../linkZ', join('linkZ')) - self._check_symlink_loop(BASE, 'linkZ') - # Non-strict - p = self.cls(BASE, 'linkZ', 'foo') - self.assertEqual(p.resolve(strict=False), p) - # Loops with absolute symlinks. - os.symlink(join('linkU/inside'), join('linkU')) - self._check_symlink_loop(BASE, 'linkU') - os.symlink(join('linkV'), join('linkV')) - self._check_symlink_loop(BASE, 'linkV') - os.symlink(join('linkW/../linkW'), join('linkW')) - self._check_symlink_loop(BASE, 'linkW') - # Non-strict - q = self.cls(BASE, 'linkW', 'foo') - self.assertEqual(q.resolve(strict=False), q) - def test_glob(self): P = self.cls p = P(BASE) From 38769a0ff7e9299ed4dc6a8e1825cd7eae122c8b Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:43:39 +0100 Subject: [PATCH 28/31] Add `PathBase._split_stack()` helper method. --- Lib/pathlib.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 450fd1f49b077a..f8ec98c28e1f63 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1254,6 +1254,14 @@ def readlink(self): """ self._unsupported("readlink") + def _split_stack(self): + """ + Split the path into a 2-tuple (anchor, parts), where *anchor* is the + uppermost parent of the path (equivalent to path.parents[-1]), and + *parts* is a reversed list of parts following the anchor. + """ + return self._from_parsed_parts(self.drive, self.root, []), self._tail[::-1] + def resolve(self, strict=False): """ Make the path absolute, resolving all symlinks on the way and also @@ -1266,14 +1274,11 @@ def resolve(self, strict=False): except UnsupportedOperation: path = self - def split(path): - return path._from_parsed_parts(path.drive, path.root, []), path._tail[::-1] - missing = False link_count = 0 stat_cache = {} target_cache = {} - path, parts = split(path) + path, parts = path._split_stack() while parts: part = parts.pop() if part == '..': @@ -1306,7 +1311,7 @@ def split(path): target = target_cache.get(path_str) if target is None: target = target_cache[path_str] = path.readlink() - target, target_parts = split(target) + target, target_parts = target._split_stack() path = target if target.root else lookup_path parts.extend(target_parts) elif parts and not S_ISDIR(st.st_mode): From 7c78952cc951583f80a311079f0b98b15f3eb726 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:57:52 +0100 Subject: [PATCH 29/31] Use path object as stat/link target cache key --- Lib/pathlib.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f8ec98c28e1f63..a636d9707d3ca1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1297,25 +1297,24 @@ def resolve(self, strict=False): lookup_path = path path = path._make_child_relpath(part) path._resolving = True - path_str = str(path) try: - st = stat_cache.get(path_str) + st = stat_cache.get(path) if st is None: - st = stat_cache[path_str] = path.stat(follow_symlinks=False) + st = stat_cache[path] = path.stat(follow_symlinks=False) if S_ISLNK(st.st_mode): # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are # encountered during resolution. link_count += 1 if link_count >= _MAX_SYMLINKS: - raise OSError(ELOOP, "Too many symbolic links in path", path_str) - target = target_cache.get(path_str) + raise OSError(ELOOP, "Too many symbolic links in path", str(path)) + target = target_cache.get(path) if target is None: - target = target_cache[path_str] = path.readlink() + target = target_cache[path] = path.readlink() target, target_parts = target._split_stack() path = target if target.root else lookup_path parts.extend(target_parts) elif parts and not S_ISDIR(st.st_mode): - raise NotADirectoryError(ENOTDIR, "Not a directory", path_str) + raise NotADirectoryError(ENOTDIR, "Not a directory", str(path)) except OSError: if strict: raise From fe57725fc2e1e812ba470c1f9d625b051518698d Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 27 Sep 2023 03:27:24 +0100 Subject: [PATCH 30/31] Optimise resolve(): skip stat() in non-strict mode if readlink() is unsupported. --- Lib/pathlib.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a636d9707d3ca1..26a85de48ee847 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1253,6 +1253,7 @@ def readlink(self): Return the path to which the symbolic link points. """ self._unsupported("readlink") + readlink._unsupported = True def _split_stack(self): """ @@ -1274,7 +1275,7 @@ def resolve(self, strict=False): except UnsupportedOperation: path = self - missing = False + querying = strict or not getattr(self.readlink, '_unsupported', False) link_count = 0 stat_cache = {} target_cache = {} @@ -1290,12 +1291,9 @@ def resolve(self, strict=False): # Delete '..' segment and its predecessor path = path.parent continue - path = path._make_child_relpath(part) - elif missing: - path = path._make_child_relpath(part) - else: - lookup_path = path - path = path._make_child_relpath(part) + lookup_path = path + path = path._make_child_relpath(part) + if querying and part != '..': path._resolving = True try: st = stat_cache.get(path) @@ -1319,7 +1317,7 @@ def resolve(self, strict=False): if strict: raise else: - missing = True + querying = False path._resolving = False return path From cf9c8b68bab433d807f4b5ddf79b75df02f7b259 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 29 Sep 2023 23:17:07 +0100 Subject: [PATCH 31/31] Address code review comments --- Lib/pathlib.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 26a85de48ee847..e6be9061013a8a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1253,7 +1253,7 @@ def readlink(self): Return the path to which the symbolic link points. """ self._unsupported("readlink") - readlink._unsupported = True + readlink._supported = False def _split_stack(self): """ @@ -1275,7 +1275,9 @@ def resolve(self, strict=False): except UnsupportedOperation: path = self - querying = strict or not getattr(self.readlink, '_unsupported', False) + # If the user has *not* overridden the `readlink()` method, then symlinks are unsupported + # and (in non-strict mode) we can improve performance by not calling `stat()`. + querying = strict or getattr(self.readlink, '_supported', True) link_count = 0 stat_cache = {} target_cache = {} @@ -1291,7 +1293,8 @@ def resolve(self, strict=False): # Delete '..' segment and its predecessor path = path.parent continue - lookup_path = path + # Join the current part onto the path. + path_parent = path path = path._make_child_relpath(part) if querying and part != '..': path._resolving = True @@ -1309,7 +1312,12 @@ def resolve(self, strict=False): if target is None: target = target_cache[path] = path.readlink() target, target_parts = target._split_stack() - path = target if target.root else lookup_path + # If the symlink target is absolute (like '/etc/hosts'), set the current + # path to its uppermost parent (like '/'). If not, the symlink target is + # relative to the symlink parent, which we recorded earlier. + path = target if target.root else path_parent + # Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to + # the stack of unresolved path parts. parts.extend(target_parts) elif parts and not S_ISDIR(st.st_mode): raise NotADirectoryError(ENOTDIR, "Not a directory", str(path))