diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 50e089653fe71b..2fb9ae82fd6b47 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -437,6 +437,9 @@ the :mod:`glob` module.) :func:`os.lstat`, or :func:`os.stat`. This function implements the underlying comparison used by :func:`samefile` and :func:`sameopenfile`. + Do not use stat results created with the *fast* argument, as they may be + missing information necessary to compare the two files. + .. availability:: Unix, Windows. .. versionchanged:: 3.4 diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 775aa32df99a46..0e01c56b3c8a77 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1037,17 +1037,27 @@ as internal buffering of data. .. availability:: Unix. -.. function:: fstat(fd) +.. function:: fstat(fd, *, fast=False) Get the status of the file descriptor *fd*. Return a :class:`stat_result` object. - As of Python 3.3, this is equivalent to ``os.stat(fd)``. + As of Python 3.3, this is equivalent to ``os.stat(fd, fast=fast)``. + + Passing *fast* as ``True`` may omit some information on some platforms + for the sake of performance. These omissions are not guaranteed (that is, + the information may be returned anyway), and may change between Python + releases without a deprecation period or due to operating system updates + without warning. See :class:`stat_result` documentation for the fields + that are guaranteed to be present under this option. .. seealso:: The :func:`.stat` function. + .. versionchanged:: 3.12 + Added the *fast* parameter. + .. function:: fstatvfs(fd, /) @@ -2175,7 +2185,7 @@ features: Accepts a :term:`path-like object`. -.. function:: lstat(path, *, dir_fd=None) +.. function:: lstat(path, *, dir_fd=None, fast=False) Perform the equivalent of an :c:func:`lstat` system call on the given path. Similar to :func:`~os.stat`, but does not follow symbolic links. Return a @@ -2184,8 +2194,15 @@ features: On platforms that do not support symbolic links, this is an alias for :func:`~os.stat`. + Passing *fast* as ``True`` may omit some information on some platforms + for the sake of performance. These omissions are not guaranteed (that is, + the information may be returned anyway), and may change between Python + releases without a deprecation period or due to operating system updates + without warning. See :class:`stat_result` documentation for the fields + that are guaranteed to be present under this option. + As of Python 3.3, this is equivalent to ``os.stat(path, dir_fd=dir_fd, - follow_symlinks=False)``. + follow_symlinks=False, fast=fast)``. This function can also support :ref:`paths relative to directory descriptors `. @@ -2209,6 +2226,9 @@ features: Other kinds of reparse points are resolved by the operating system as for :func:`~os.stat`. + .. versionchanged:: 3.12 + Added the *fast* parameter. + .. function:: mkdir(path, mode=0o777, *, dir_fd=None) @@ -2781,7 +2801,7 @@ features: for :class:`bytes` paths on Windows. -.. function:: stat(path, *, dir_fd=None, follow_symlinks=True) +.. function:: stat(path, *, dir_fd=None, follow_symlinks=True, fast=False) Get the status of a file or a file descriptor. Perform the equivalent of a :c:func:`stat` system call on the given path. *path* may be specified as @@ -2806,6 +2826,13 @@ features: possible and call :func:`lstat` on the result. This does not apply to dangling symlinks or junction points, which will raise the usual exceptions. + Passing *fast* as ``True`` may omit some information on some platforms + for the sake of performance. These omissions are not guaranteed (that is, + the information may be returned anyway), and may change between Python + releases without a deprecation period or due to operating system updates + without warning. See :class:`stat_result` documentation for the fields + that are guaranteed to be present under this option. + .. index:: module: stat Example:: @@ -2838,6 +2865,9 @@ features: returns the information for the original path as if ``follow_symlinks=False`` had been specified instead of raising an error. + .. versionchanged:: 3.12 + Added the *fast* parameter. + .. class:: stat_result @@ -2845,12 +2875,22 @@ features: :c:type:`stat` structure. It is used for the result of :func:`os.stat`, :func:`os.fstat` and :func:`os.lstat`. + When the *fast* argument to these functions is passed ``True``, some + information may be reduced or omitted. Those attributes that are + guaranteed to be valid, and those currently known to be omitted, are + marked in the documentation below. If not specified and you depend on + that field, explicitly pass *fast* as ``False`` to ensure it is + calculated. + Attributes: .. attribute:: st_mode File mode: file type and file mode bits (permissions). + When *fast* is ``True``, only the file type bits are guaranteed + to be valid (the mode bits may be zero). + .. attribute:: st_ino Platform dependent, but if non-zero, uniquely identifies the @@ -2865,6 +2905,8 @@ features: Identifier of the device on which this file resides. + On Windows, when *fast* is ``True``, this may be zero. + .. attribute:: st_nlink Number of hard links. @@ -2883,6 +2925,8 @@ features: The size of a symbolic link is the length of the pathname it contains, without a terminating null byte. + This field is guaranteed to be filled when specifying *fast*. + Timestamps: .. attribute:: st_atime @@ -2893,6 +2937,8 @@ features: Time of most recent content modification expressed in seconds. + This field is guaranteed to be filled when specifying *fast*. + .. attribute:: st_ctime Platform dependent: @@ -2909,6 +2955,9 @@ features: Time of most recent content modification expressed in nanoseconds as an integer. + This field is guaranteed to be filled when specifying *fast*, subject + to the note below. + .. attribute:: st_ctime_ns Platform dependent: @@ -2998,12 +3047,16 @@ features: :c:func:`GetFileInformationByHandle`. See the ``FILE_ATTRIBUTE_*`` constants in the :mod:`stat` module. + This field is guaranteed to be filled when specifying *fast*. + .. attribute:: st_reparse_tag When :attr:`st_file_attributes` has the ``FILE_ATTRIBUTE_REPARSE_POINT`` set, this field contains the tag identifying the type of reparse point. See the ``IO_REPARSE_TAG_*`` constants in the :mod:`stat` module. + This field is guaranteed to be filled when specifying *fast*. + The standard module :mod:`stat` defines functions and constants that are useful for extracting information from a :c:type:`stat` structure. (On Windows, some items are filled with dummy values.) @@ -3039,6 +3092,10 @@ features: files as :const:`S_IFCHR`, :const:`S_IFIFO` or :const:`S_IFBLK` as appropriate. + .. versionchanged:: 3.12 + Added the *fast* argument and defined the minimum set of returned + fields. + .. function:: statvfs(path) Perform a :c:func:`statvfs` system call on the given path. The return value is diff --git a/Include/internal/pycore_fileutils_windows.h b/Include/internal/pycore_fileutils_windows.h new file mode 100644 index 00000000000000..d1545eff30f51a --- /dev/null +++ b/Include/internal/pycore_fileutils_windows.h @@ -0,0 +1,77 @@ +#ifndef Py_INTERNAL_FILEUTILS_WINDOWS_H +#define Py_INTERNAL_FILEUTILS_WINDOWS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "Py_BUILD_CORE must be defined to include this header" +#endif + +#ifdef MS_WINDOWS + +#if !defined(NTDDI_WIN10_NI) || !(NTDDI_VERSION >= NTDDI_WIN10_NI) +typedef struct _FILE_STAT_BASIC_INFORMATION { + LARGE_INTEGER FileId; + LARGE_INTEGER CreationTime; + LARGE_INTEGER LastAccessTime; + LARGE_INTEGER LastWriteTime; + LARGE_INTEGER ChangeTime; + LARGE_INTEGER AllocationSize; + LARGE_INTEGER EndOfFile; + ULONG FileAttributes; + ULONG ReparseTag; + ULONG NumberOfLinks; + ULONG DeviceType; + ULONG DeviceCharacteristics; +} FILE_STAT_BASIC_INFORMATION; + +typedef enum _FILE_INFO_BY_NAME_CLASS { + FileStatByNameInfo, + FileStatLxByNameInfo, + FileCaseSensitiveByNameInfo, + FileStatBasicByNameInfo, + MaximumFileInfoByNameClass +} FILE_INFO_BY_NAME_CLASS; +#endif + +typedef BOOL (WINAPI *PGetFileInformationByName)( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +); + +static inline BOOL GetFileInformationByName( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +) { + static PGetFileInformationByName GetFileInformationByName = NULL; + static int GetFileInformationByName_init = -1; + + if (GetFileInformationByName_init < 0) { + HMODULE hMod = LoadLibraryW(L"api-ms-win-core-file-l2-1-4"); + GetFileInformationByName_init = 0; + if (hMod) { + GetFileInformationByName = (PGetFileInformationByName)GetProcAddress( + hMod, "GetFileInformationByName"); + if (GetFileInformationByName) { + GetFileInformationByName_init = 1; + } else { + FreeLibrary(hMod); + } + } + } + + if (GetFileInformationByName_init <= 0) { + SetLastError(ERROR_NOT_SUPPORTED); + return FALSE; + } + return GetFileInformationByName(FileName, FileInformationClass, FileInfoBuffer, FileInfoBufferSize); +} + +#endif + +#endif diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 494bcf293cdb7b..379c00adf8d736 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -903,6 +903,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(false)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(family)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fanout)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fast)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fdel)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index b0cb8365933e77..d3d6a26f19026b 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -389,6 +389,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(false) STRUCT_FOR_ID(family) STRUCT_FOR_ID(fanout) + STRUCT_FOR_ID(fast) STRUCT_FOR_ID(fd) STRUCT_FOR_ID(fd2) STRUCT_FOR_ID(fdel) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4b128da54555b7..b580e36a74e008 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -895,6 +895,7 @@ extern "C" { INIT_ID(false), \ INIT_ID(family), \ INIT_ID(fanout), \ + INIT_ID(fast), \ INIT_ID(fd), \ INIT_ID(fd2), \ INIT_ID(fdel), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 7ef1f7e94ddead..efb42da89413ab 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -684,6 +684,8 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(fanout); PyUnicode_InternInPlace(&string); + string = &_Py_ID(fast); + PyUnicode_InternInPlace(&string); string = &_Py_ID(fd); PyUnicode_InternInPlace(&string); string = &_Py_ID(fd2); diff --git a/Lib/asyncio/proactor_events.py b/Lib/asyncio/proactor_events.py index c6aab408fc7410..482e643ef57f71 100644 --- a/Lib/asyncio/proactor_events.py +++ b/Lib/asyncio/proactor_events.py @@ -734,7 +734,7 @@ async def _sock_sendfile_native(self, sock, file, offset, count): except (AttributeError, io.UnsupportedOperation) as err: raise exceptions.SendfileNotAvailableError("not a regular file") try: - fsize = os.fstat(fileno).st_size + fsize = os.fstat(fileno, fast=True).st_size except OSError: raise exceptions.SendfileNotAvailableError("not a regular file") blocksize = count if count else fsize diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index b21e0394141bf4..40b296ed3db1bc 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -307,7 +307,7 @@ async def create_unix_server( # Check for abstract socket. `str` and `bytes` paths are supported. if path[0] not in (0, '\x00'): try: - if stat.S_ISSOCK(os.stat(path).st_mode): + if stat.S_ISSOCK(os.stat(path, fast=True).st_mode): os.remove(path) except FileNotFoundError: pass @@ -363,7 +363,7 @@ async def _sock_sendfile_native(self, sock, file, offset, count): except (AttributeError, io.UnsupportedOperation) as err: raise exceptions.SendfileNotAvailableError("not a regular file") try: - fsize = os.fstat(fileno).st_size + fsize = os.fstat(fileno, fast=True).st_size except OSError: raise exceptions.SendfileNotAvailableError("not a regular file") blocksize = count if count else fsize @@ -472,7 +472,7 @@ def __init__(self, loop, pipe, protocol, waiter=None, extra=None): self._closing = False self._paused = False - mode = os.fstat(self._fileno).st_mode + mode = os.fstat(self._fileno, fast=True).st_mode if not (stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode) or stat.S_ISCHR(mode)): @@ -607,7 +607,7 @@ def __init__(self, loop, pipe, protocol, waiter=None, extra=None): self._conn_lost = 0 self._closing = False # Set when close() or write_eof() called. - mode = os.fstat(self._fileno).st_mode + mode = os.fstat(self._fileno, fast=True).st_mode is_char = stat.S_ISCHR(mode) is_fifo = stat.S_ISFIFO(mode) is_socket = stat.S_ISSOCK(mode) diff --git a/Lib/compileall.py b/Lib/compileall.py index 330a90786efc5f..c9249fb0871172 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -220,7 +220,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if tail == '.py': if not force: try: - mtime = int(os.stat(fullname).st_mtime) + mtime = int(os.stat(fullname, fast=True).st_mtime) expect = struct.pack('<4sLL', importlib.util.MAGIC_NUMBER, 0, mtime & 0xFFFF_FFFF) for cfile in opt_cfiles.values(): diff --git a/Lib/filecmp.py b/Lib/filecmp.py index 30bd900fa805aa..83a964b00f8fd2 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -50,8 +50,8 @@ def cmp(f1, f2, shallow=True): """ - s1 = _sig(os.stat(f1)) - s2 = _sig(os.stat(f2)) + s1 = _sig(os.stat(f1, fast=True)) + s2 = _sig(os.stat(f2, fast=True)) if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: return False if shallow and s1 == s2: @@ -159,12 +159,12 @@ def phase2(self): # Distinguish files, directories, funnies ok = True try: - a_stat = os.stat(a_path) + a_stat = os.stat(a_path, fast=True) except OSError: # print('Can\'t stat', a_path, ':', why.args[1]) ok = False try: - b_stat = os.stat(b_path) + b_stat = os.stat(b_path, fast=True) except OSError: # print('Can\'t stat', b_path, ':', why.args[1]) ok = False diff --git a/Lib/glob.py b/Lib/glob.py index a7256422d520fb..dce8261f1f947a 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -193,7 +193,7 @@ def _lexists(pathname, dir_fd): if dir_fd is None: return os.path.lexists(pathname) try: - os.lstat(pathname, dir_fd=dir_fd) + os.lstat(pathname, dir_fd=dir_fd, fast=True) except (OSError, ValueError): return False else: @@ -204,7 +204,7 @@ def _isdir(pathname, dir_fd): if dir_fd is None: return os.path.isdir(pathname) try: - st = os.stat(pathname, dir_fd=dir_fd) + st = os.stat(pathname, dir_fd=dir_fd, fast=True) except (OSError, ValueError): return False else: diff --git a/Lib/http/server.py b/Lib/http/server.py index 8aee31bac2752a..3b66638db82cfb 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -722,7 +722,7 @@ def send_head(self): return None try: - fs = os.fstat(f.fileno()) + fs = os.fstat(f.fileno(), fast=True) # Use browser cache if possible if ("If-Modified-Since" in self.headers and "If-None-Match" not in self.headers): diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index f4dbbebcd224c8..78cb23122851e6 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -137,20 +137,20 @@ def _path_split(path): return path[:i], path[i + 1:] -def _path_stat(path): +def _path_stat(path, *, fast=False): """Stat the path. Made a separate function to make it easier to override in experiments (e.g. cache stat results). """ - return _os.stat(path) + return _os.stat(path, fast=fast) def _path_is_mode_type(path, mode): """Test whether the path is the specified mode type.""" try: - stat_info = _path_stat(path) + stat_info = _path_stat(path, fast=True) except OSError: return False return (stat_info.st_mode & 0o170000) == mode @@ -955,7 +955,7 @@ def find_spec(cls, fullname, path=None, target=None): if filepath is None: return None try: - _path_stat(filepath) + _path_stat(filepath, fast=True) except OSError: return None for loader, suffixes in _get_supported_file_loaders(): @@ -1212,7 +1212,7 @@ class SourceFileLoader(FileLoader, SourceLoader): def path_stats(self, path): """Return the metadata for the path.""" - st = _path_stat(path) + st = _path_stat(path, fast=True) return {'mtime': st.st_mtime, 'size': st.st_size} def _cache_bytecode(self, source_path, bytecode_path, data): @@ -1663,7 +1663,7 @@ def find_spec(self, fullname, target=None): is_namespace = False tail_module = fullname.rpartition('.')[2] try: - mtime = _path_stat(self.path or _os.getcwd()).st_mtime + mtime = _path_stat(self.path or _os.getcwd(), fast=True).st_mtime except OSError: mtime = -1 if mtime != self._path_mtime: diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py index 9847104446eaf6..4c5af414f9f75b 100644 --- a/Lib/logging/handlers.py +++ b/Lib/logging/handlers.py @@ -263,7 +263,7 @@ def __init__(self, filename, when='h', interval=1, backupCount=0, # path object (see Issue #27493), but self.baseFilename will be a string filename = self.baseFilename if os.path.exists(filename): - t = os.stat(filename)[ST_MTIME] + t = os.stat(filename, fast=True)[ST_MTIME] else: t = int(time.time()) self.rolloverAt = self.computeRollover(t) diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 881f2001dd5980..78a7fdf4baa611 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -110,7 +110,7 @@ def __init__(self, name=None, create=False, size=0): try: if create and size: os.ftruncate(self._fd, size) - stats = os.fstat(self._fd) + stats = os.fstat(self._fd, fast=True) size = stats.st_size self._mmap = mmap.mmap(self._fd, size) except OSError: diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 873c884c3bd934..3bb47bedf28b85 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -262,7 +262,7 @@ def islink(path): This will always return false for Windows prior to 6.0. """ try: - st = os.lstat(path) + st = os.lstat(path, fast=True) except (OSError, ValueError, AttributeError): return False return stat.S_ISLNK(st.st_mode) @@ -274,7 +274,7 @@ def islink(path): def isjunction(path): """Test whether a path is a junction""" try: - st = os.lstat(path) + st = os.lstat(path, fast=True) except (OSError, ValueError, AttributeError): return False return bool(st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT) @@ -290,7 +290,7 @@ def isjunction(path): def lexists(path): """Test whether a path exists. Returns True for broken symbolic links""" try: - st = os.lstat(path) + st = os.lstat(path, fast=True) except (OSError, ValueError): return False return True diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 737f8a5c156d81..64aef2a9dfe7a1 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -164,7 +164,7 @@ def dirname(p): def islink(path): """Test whether a path is a symbolic link""" try: - st = os.lstat(path) + st = os.lstat(path, fast=True) except (OSError, ValueError, AttributeError): return False return stat.S_ISLNK(st.st_mode) @@ -184,7 +184,7 @@ def isjunction(path): def lexists(path): """Test whether a path exists. Returns True for broken symbolic links""" try: - os.lstat(path) + os.lstat(path, fast=True) except (OSError, ValueError): return False return True @@ -196,7 +196,7 @@ def lexists(path): def ismount(path): """Test whether a path is a mount point""" try: - s1 = os.lstat(path) + s1 = os.lstat(path, fast=True) except (OSError, ValueError): # It doesn't exist -- so not a mount point. :-) return False @@ -216,6 +216,10 @@ def ismount(path): except (OSError, ValueError): return False + # No st_dev/ino? Get the full stat instead of the fast one + if not s1.st_dev or not s1.st_ino: + s1 = os.lstat(path) + dev1 = s1.st_dev dev2 = s2.st_dev if dev1 != dev2: @@ -458,7 +462,7 @@ def _joinrealpath(path, rest, strict, seen): continue newpath = join(path, name) try: - st = os.lstat(newpath) + st = os.lstat(newpath, fast=True) except OSError: if strict: raise @@ -478,7 +482,7 @@ def _joinrealpath(path, rest, strict, seen): # The symlink is not resolved, so we must have a symlink loop. if strict: # Raise OSError(errno.ELOOP) - os.stat(newpath) + os.stat(newpath, fast=True) else: # Return already resolved part + rest of the path unchanged. return join(newpath, rest), False diff --git a/Lib/shutil.py b/Lib/shutil.py index f372406a6c51a8..910ad7c98a6faa 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -130,7 +130,7 @@ def _fastcopy_sendfile(fsrc, fdst): # should not make any difference, also in case the file content # changes while being copied. try: - blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB + blocksize = max(os.fstat(infd, fast=True).st_size, 2 ** 23) # min 8MiB except OSError: blocksize = 2 ** 27 # 128MiB # On 32-bit architectures truncate to 1GiB to avoid OverflowError, @@ -219,7 +219,7 @@ def _samefile(src, dst): os.path.normcase(os.path.abspath(dst))) def _stat(fn): - return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn) + return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn, fast=True) def _islink(fn): return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) @@ -567,7 +567,7 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, if hasattr(os.stat_result, 'st_file_attributes'): def _rmtree_islink(path): try: - st = os.lstat(path) + st = os.lstat(path, fast=True) return (stat.S_ISLNK(st.st_mode) or (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) diff --git a/Lib/socket.py b/Lib/socket.py index 1c8cef6ce65810..ff49ebd3d016ea 100644 --- a/Lib/socket.py +++ b/Lib/socket.py @@ -354,7 +354,7 @@ def _sendfile_use_sendfile(self, file, offset=0, count=None): except (AttributeError, io.UnsupportedOperation) as err: raise _GiveupOnSendfile(err) # not a regular file try: - fsize = os.fstat(fileno).st_size + fsize = os.fstat(fileno, fast=True).st_size except OSError as err: raise _GiveupOnSendfile(err) # not a regular file if not fsize: diff --git a/Lib/test/test_inspect.py b/Lib/test/test_inspect.py index 3f5c299ce681c5..65a54c3e891c91 100644 --- a/Lib/test/test_inspect.py +++ b/Lib/test/test_inspect.py @@ -1004,8 +1004,8 @@ def test_getfullargspec_builtin_methods(self): self.assertFullArgSpecEquals( os.stat, args_e=['path'], - kwonlyargs_e=['dir_fd', 'follow_symlinks'], - kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True}) + kwonlyargs_e=['dir_fd', 'follow_symlinks', 'fast'], + kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True, 'fast': False}) @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 94db8bb7737acd..76c2a75e6813fb 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -613,6 +613,18 @@ def test_stat_result_pickle(self): unpickled = pickle.loads(p) self.assertEqual(result, unpickled) + def test_stat_result_fast(self): + # Minimum guaranteed fields when requesting incomplete info + result_1 = os.stat(self.fname, fast=True) + result_2 = os.stat(self.fname, fast=False) + result_3 = os.stat(self.fname) + self.assertEqual(stat.S_IFMT(result_1.st_mode), + stat.S_IFMT(result_2.st_mode)) + self.assertEqual(result_1.st_size, result_2.st_size) + self.assertEqual(result_1.st_mtime, result_2.st_mtime) + # Ensure the default matches fast=False + self.assertEqual(result_2, result_3) + @unittest.skipUnless(hasattr(os, 'statvfs'), 'test needs os.statvfs()') def test_statvfs_attributes(self): result = os.statvfs(self.fname) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 6c1c0f5577b7ec..10fd043d914797 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -209,7 +209,7 @@ def test_ismount_different_device(self): # Simulate the path being on a different device from its parent by # mocking out st_dev. save_lstat = os.lstat - def fake_lstat(path): + def fake_lstat(path, fast=False): st_ino = 0 st_dev = 0 if path == ABSTFN: @@ -227,7 +227,7 @@ def test_ismount_directory_not_readable(self): # issue #2466: Simulate ismount run on a directory that is not # readable, which used to return False. save_lstat = os.lstat - def fake_lstat(path): + def fake_lstat(path, fast=False): st_ino = 0 st_dev = 0 if path.startswith(ABSTFN) and path != ABSTFN: diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py index cefc71cb5a7f54..2148a785a26571 100644 --- a/Lib/test/test_pydoc.py +++ b/Lib/test/test_pydoc.py @@ -1162,7 +1162,7 @@ def test_bound_builtin_method(self): @requires_docstrings def test_module_level_callable(self): self.assertEqual(self._get_summary_line(os.stat), - "stat(path, *, dir_fd=None, follow_symlinks=True)") + "stat(path, *, dir_fd=None, follow_symlinks=True, fast=False)") @requires_docstrings def test_staticmethod(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 278aa3a14bfeea..df4fdcf92d72f6 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1505,7 +1505,7 @@ def open_local_file(self, req): filename = req.selector localfile = url2pathname(filename) try: - stats = os.stat(localfile) + stats = os.stat(localfile, fast=True) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(filename)[0] @@ -2022,7 +2022,7 @@ def open_local_file(self, url): host, file = _splithost(url) localname = url2pathname(file) try: - stats = os.stat(localname) + stats = os.stat(localname, fast=True) except OSError as e: raise URLError(e.strerror, e.filename) size = stats.st_size diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 77b643caf9fc91..d0bf36958b816a 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -525,7 +525,7 @@ def from_file(cls, filename, arcname=None, *, strict_timestamps=True): """ if isinstance(filename, os.PathLike): filename = os.fspath(filename) - st = os.stat(filename) + st = os.stat(filename, fast=True) isdir = stat.S_ISDIR(st.st_mode) mtime = time.localtime(st.st_mtime) date_time = mtime[0:6] @@ -2129,23 +2129,23 @@ def _compile(file, optimize=-1): if self._optimize == -1: # legacy mode: use whatever file is present if (os.path.isfile(file_pyc) and - os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): + os.stat(file_pyc, fast=True).st_mtime >= os.stat(file_py, fast=True).st_mtime): # Use .pyc file. arcname = fname = file_pyc elif (os.path.isfile(pycache_opt0) and - os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): + os.stat(pycache_opt0, fast=True).st_mtime >= os.stat(file_py, fast=True).st_mtime): # Use the __pycache__/*.pyc file, but write it to the legacy pyc # file name in the archive. fname = pycache_opt0 arcname = file_pyc elif (os.path.isfile(pycache_opt1) and - os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): + os.stat(pycache_opt1, fast=True).st_mtime >= os.stat(file_py, fast=True).st_mtime): # Use the __pycache__/*.pyc file, but write it to the legacy pyc # file name in the archive. fname = pycache_opt1 arcname = file_pyc elif (os.path.isfile(pycache_opt2) and - os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): + os.stat(pycache_opt2, fast=True).st_mtime >= os.stat(file_py, fast=True).st_mtime): # Use the __pycache__/*.pyc file, but write it to the legacy pyc # file name in the archive. fname = pycache_opt2 @@ -2177,7 +2177,7 @@ def _compile(file, optimize=-1): msg = "invalid value for 'optimize': {!r}".format(self._optimize) raise ValueError(msg) if not (os.path.isfile(fname) and - os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): + os.stat(fname, fast=True).st_mtime >= os.stat(file_py, fast=True).st_mtime): if not _compile(file_py, optimize=self._optimize): fname = arcname = file_py archivename = os.path.split(arcname)[1] diff --git a/Misc/NEWS.d/next/Library/2022-11-23-15-15-59.gh-issue-99726.6m-YhG.rst b/Misc/NEWS.d/next/Library/2022-11-23-15-15-59.gh-issue-99726.6m-YhG.rst new file mode 100644 index 00000000000000..f07812ae4b2d19 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-23-15-15-59.gh-issue-99726.6m-YhG.rst @@ -0,0 +1,2 @@ +Adds ``fast`` argument to :func:`os.stat` and :func:`os.lstat` to enable +performance optimizations by skipping some fields in the result. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index f9f6ca372ec6c7..113c1773d809e0 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -9,7 +9,7 @@ preserve PyDoc_STRVAR(os_stat__doc__, -"stat($module, /, path, *, dir_fd=None, follow_symlinks=True)\n" +"stat($module, /, path, *, dir_fd=None, follow_symlinks=True, fast=False)\n" "--\n" "\n" "Perform a stat system call on the given path.\n" @@ -25,6 +25,9 @@ PyDoc_STRVAR(os_stat__doc__, " If False, and the last element of the path is a symbolic link,\n" " stat will examine the symbolic link itself instead of the file\n" " the link points to.\n" +" fast\n" +" If True, certain data may be omitted on some platforms to\n" +" allow faster results. See the documentation for specific cases.\n" "\n" "dir_fd and follow_symlinks may not be implemented\n" " on your platform. If they are unavailable, using them will raise a\n" @@ -37,7 +40,8 @@ PyDoc_STRVAR(os_stat__doc__, {"stat", _PyCFunction_CAST(os_stat), METH_FASTCALL|METH_KEYWORDS, os_stat__doc__}, static PyObject * -os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks); +os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks, + int fast); static PyObject * os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -45,14 +49,14 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), &_Py_ID(follow_symlinks), }, + .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), &_Py_ID(follow_symlinks), &_Py_ID(fast), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -61,18 +65,19 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"path", "dir_fd", "follow_symlinks", NULL}; + static const char * const _keywords[] = {"path", "dir_fd", "follow_symlinks", "fast", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "stat", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t path = PATH_T_INITIALIZE("stat", "path", 0, 1); int dir_fd = DEFAULT_DIR_FD; int follow_symlinks = 1; + int fast = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { @@ -92,12 +97,21 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn goto skip_optional_kwonly; } } - follow_symlinks = PyObject_IsTrue(args[2]); - if (follow_symlinks < 0) { + if (args[2]) { + follow_symlinks = PyObject_IsTrue(args[2]); + if (follow_symlinks < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + fast = PyObject_IsTrue(args[3]); + if (fast < 0) { goto exit; } skip_optional_kwonly: - return_value = os_stat_impl(module, &path, dir_fd, follow_symlinks); + return_value = os_stat_impl(module, &path, dir_fd, follow_symlinks, fast); exit: /* Cleanup for path */ @@ -107,11 +121,15 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn } PyDoc_STRVAR(os_lstat__doc__, -"lstat($module, /, path, *, dir_fd=None)\n" +"lstat($module, /, path, *, dir_fd=None, fast=False)\n" "--\n" "\n" "Perform a stat system call on the given path, without following symbolic links.\n" "\n" +" fast\n" +" If True, certain data may be omitted on some platforms to\n" +" allow faster results. See the documentation for specific cases.\n" +"\n" "Like stat(), but do not follow symbolic links.\n" "Equivalent to stat(path, follow_symlinks=False)."); @@ -119,7 +137,7 @@ PyDoc_STRVAR(os_lstat__doc__, {"lstat", _PyCFunction_CAST(os_lstat), METH_FASTCALL|METH_KEYWORDS, os_lstat__doc__}, static PyObject * -os_lstat_impl(PyObject *module, path_t *path, int dir_fd); +os_lstat_impl(PyObject *module, path_t *path, int dir_fd, int fast); static PyObject * os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -127,14 +145,14 @@ os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), }, + .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), &_Py_ID(fast), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -143,17 +161,18 @@ os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"path", "dir_fd", NULL}; + static const char * const _keywords[] = {"path", "dir_fd", "fast", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "lstat", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t path = PATH_T_INITIALIZE("lstat", "path", 0, 0); int dir_fd = DEFAULT_DIR_FD; + int fast = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { @@ -165,11 +184,20 @@ os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw if (!noptargs) { goto skip_optional_kwonly; } - if (!FSTATAT_DIR_FD_CONVERTER(args[1], &dir_fd)) { + if (args[1]) { + if (!FSTATAT_DIR_FD_CONVERTER(args[1], &dir_fd)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + fast = PyObject_IsTrue(args[2]); + if (fast < 0) { goto exit; } skip_optional_kwonly: - return_value = os_lstat_impl(module, &path, dir_fd); + return_value = os_lstat_impl(module, &path, dir_fd, fast); exit: /* Cleanup for path */ @@ -6728,19 +6756,23 @@ os__fcopyfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #endif /* defined(__APPLE__) */ PyDoc_STRVAR(os_fstat__doc__, -"fstat($module, /, fd)\n" +"fstat($module, /, fd, *, fast=False)\n" "--\n" "\n" "Perform a stat system call on the given file descriptor.\n" "\n" +" fast\n" +" If True, certain data may be omitted on some platforms to\n" +" allow faster results. See the documentation for specific cases.\n" +"\n" "Like stat(), but for an open file descriptor.\n" -"Equivalent to os.stat(fd)."); +"Equivalent to os.stat(fd, fast=fast)."); #define OS_FSTAT_METHODDEF \ {"fstat", _PyCFunction_CAST(os_fstat), METH_FASTCALL|METH_KEYWORDS, os_fstat__doc__}, static PyObject * -os_fstat_impl(PyObject *module, int fd); +os_fstat_impl(PyObject *module, int fd, int fast); static PyObject * os_fstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -6748,14 +6780,14 @@ os_fstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 2 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(fd), }, + .ob_item = { &_Py_ID(fd), &_Py_ID(fast), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -6764,15 +6796,17 @@ os_fstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"fd", NULL}; + static const char * const _keywords[] = {"fd", "fast", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "fstat", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[1]; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; int fd; + int fast = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { @@ -6782,7 +6816,15 @@ os_fstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw if (fd == -1 && PyErr_Occurred()) { goto exit; } - return_value = os_fstat_impl(module, fd); + if (!noptargs) { + goto skip_optional_kwonly; + } + fast = PyObject_IsTrue(args[1]); + if (fast < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = os_fstat_impl(module, fd, fast); exit: return return_value; @@ -11549,4 +11591,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=4192d8e09e216300 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8ac51554262db9eb input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 8185517b06b5dd..1bdcb72cdf0996 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -36,6 +36,7 @@ # include "posixmodule.h" #else # include "winreparse.h" +# include "pycore_fileutils_windows.h" // GetFileInformationByName() #endif #if !defined(EX_OK) && defined(EXIT_SUCCESS) @@ -664,6 +665,8 @@ PyOS_AfterFork(void) void _Py_time_t_to_FILE_TIME(time_t, int, FILETIME *); void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *, ULONG, struct _Py_stat_struct *); +void _Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *, + struct _Py_stat_struct *); #endif @@ -1834,17 +1837,112 @@ attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *re return TRUE; } +static void +win32_xstat_fixup_exec_mode(const wchar_t *path, + struct _Py_stat_struct *result) +{ + if (!(result->st_file_attributes & FILE_ATTRIBUTE_DIRECTORY)) { + /* Fix the file execute permissions. This hack sets S_IEXEC if + the filename has an extension that is commonly used by files + that CreateProcessW can execute. A real implementation calls + GetSecurityInfo, OpenThreadToken/OpenProcessToken, and + AccessCheck to check for generic read, write, and execute + access. */ + const wchar_t *fileExtension = wcsrchr(path, '.'); + if (fileExtension) { + if (_wcsicmp(fileExtension, L".exe") == 0 || + _wcsicmp(fileExtension, L".bat") == 0 || + _wcsicmp(fileExtension, L".cmd") == 0 || + _wcsicmp(fileExtension, L".com") == 0) { + result->st_mode |= 0111; + } + } + } +} + +static int +win32_xstat_get_st_dev(const wchar_t *path, + struct _Py_stat_struct *result) +{ + const wchar_t *rootEnd; + WCHAR rootBuffer[MAX_PATH]; + WCHAR *root = rootBuffer; + DWORD oldErrorMode; + DWORD vsn = 0; + int retval = 0; + + if (result->st_dev || !path || !path[0]) { + return 0; + } + + if (PathCchSkipRoot(path, &rootEnd) || + wcsncpy_s(rootBuffer, MAX_PATH, path, (rootEnd - path))) { + /* No root for the path, so let it use the current volume */ + root = NULL; + } + + /* Change the thread's error mode to avoid popping up dialogs for + "no disk in drive" situations. */ + if (!SetThreadErrorMode(SEM_FAILCRITICALERRORS, &oldErrorMode)) { + oldErrorMode = 0; + } + + if (GetVolumeInformationW(root, NULL, 0, &vsn, NULL, NULL, NULL, 0)) { + result->st_dev = vsn; + result->st_rdev = vsn; + } else { + retval = -1; + } + + SetThreadErrorMode(oldErrorMode, NULL); + return retval; +} + static int win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, - BOOL traverse) + BOOL traverse, BOOL fast) { HANDLE hFile; + FILE_STAT_BASIC_INFORMATION statInfo; BY_HANDLE_FILE_INFORMATION fileInfo; FILE_ATTRIBUTE_TAG_INFO tagInfo = { 0 }; - DWORD fileType, error; + DWORD fileType, error = 0; BOOL isUnhandledTag = FALSE; int retval = 0; + /* Try the fast path first. This is an lstat equivalent, but if we + don't find a symlink it'll be faster to try it first */ + if (GetFileInformationByName(path, FileStatBasicByNameInfo, + &statInfo, sizeof(statInfo))) { + /* Fast path succeeded. If the file isn't a reparse point or if it's + a name-surrogate reparse point and we're not traversing, then we + can use the fast stat. */ + if (!(statInfo.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) || + !traverse && IsReparseTagNameSurrogate(statInfo.ReparseTag)) { + _Py_stat_basic_info_to_stat(&statInfo, result); + if (!fast) { + win32_xstat_fixup_exec_mode(path, result); + /* st_dev is not included in FileStatBasicByName, + so we get it separately */ + if (win32_xstat_get_st_dev(path, result) != 0) { + return -1; + } + } + return 0; + } + // Continue to the slow path. For efficiency, set traverse to true. + traverse = TRUE; + } else { + /* Some errors aren't worth retrying with the slow path */ + switch(GetLastError()) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_NOT_READY: + case ERROR_BAD_NET_NAME: + return -1; + } + } + DWORD access = FILE_READ_ATTRIBUTES; DWORD flags = FILE_FLAG_BACKUP_SEMANTICS; /* Allow opening directories. */ if (!traverse) { @@ -1969,7 +2067,7 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, for an unhandled tag. */ } else if (!isUnhandledTag) { CloseHandle(hFile); - return win32_xstat_impl(path, result, TRUE); + return win32_xstat_impl(path, result, TRUE, fast); } } } @@ -1991,23 +2089,8 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, } _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, result); - - if (!(fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - /* Fix the file execute permissions. This hack sets S_IEXEC if - the filename has an extension that is commonly used by files - that CreateProcessW can execute. A real implementation calls - GetSecurityInfo, OpenThreadToken/OpenProcessToken, and - AccessCheck to check for generic read, write, and execute - access. */ - const wchar_t *fileExtension = wcsrchr(path, '.'); - if (fileExtension) { - if (_wcsicmp(fileExtension, L".exe") == 0 || - _wcsicmp(fileExtension, L".bat") == 0 || - _wcsicmp(fileExtension, L".cmd") == 0 || - _wcsicmp(fileExtension, L".com") == 0) { - result->st_mode |= 0111; - } - } + if (!fast) { + win32_xstat_fixup_exec_mode(path, result); } cleanup: @@ -2026,11 +2109,12 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, } static int -win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) +win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse, BOOL fast) { /* Protocol violation: we explicitly clear errno, instead of setting it to a POSIX error. Callers should use GetLastError. */ - int code = win32_xstat_impl(path, result, traverse); + int code = win32_xstat_impl(path, result, traverse, fast); errno = 0; return code; } @@ -2047,13 +2131,13 @@ win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) static int win32_lstat(const wchar_t* path, struct _Py_stat_struct *result) { - return win32_xstat(path, result, FALSE); + return win32_xstat(path, result, FALSE, TRUE); } static int win32_stat(const wchar_t* path, struct _Py_stat_struct *result) { - return win32_xstat(path, result, TRUE); + return win32_xstat(path, result, TRUE, TRUE); } #endif /* MS_WINDOWS */ @@ -2463,7 +2547,7 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) static PyObject * posix_do_stat(PyObject *module, const char *function_name, path_t *path, - int dir_fd, int follow_symlinks) + int dir_fd, int follow_symlinks, int fast) { STRUCT_STAT st; int result; @@ -2486,10 +2570,8 @@ posix_do_stat(PyObject *module, const char *function_name, path_t *path, if (path->fd != -1) result = FSTAT(path->fd, &st); #ifdef MS_WINDOWS - else if (follow_symlinks) - result = win32_stat(path->wide, &st); else - result = win32_lstat(path->wide, &st); + result = win32_xstat(path->wide, &st, follow_symlinks, fast); #else else #if defined(HAVE_LSTAT) @@ -2837,6 +2919,10 @@ os.stat stat will examine the symbolic link itself instead of the file the link points to. + fast: bool = False + If True, certain data may be omitted on some platforms to + allow faster results. See the documentation for specific cases. + Perform a stat system call on the given path. dir_fd and follow_symlinks may not be implemented @@ -2849,10 +2935,12 @@ It's an error to use dir_fd or follow_symlinks when specifying path as [clinic start generated code]*/ static PyObject * -os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks) -/*[clinic end generated code: output=7d4976e6f18a59c5 input=01d362ebcc06996b]*/ +os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks, + int fast) +/*[clinic end generated code: output=2657ee2ccb8586f6 input=ec99c0b72e50d965]*/ { - return posix_do_stat(module, "stat", path, dir_fd, follow_symlinks); + return posix_do_stat(module, "stat", path, dir_fd, + follow_symlinks, fast); } @@ -2865,6 +2953,10 @@ os.lstat dir_fd : dir_fd(requires='fstatat') = None + fast: bool = False + If True, certain data may be omitted on some platforms to + allow faster results. See the documentation for specific cases. + Perform a stat system call on the given path, without following symbolic links. Like stat(), but do not follow symbolic links. @@ -2872,11 +2964,12 @@ Equivalent to stat(path, follow_symlinks=False). [clinic start generated code]*/ static PyObject * -os_lstat_impl(PyObject *module, path_t *path, int dir_fd) -/*[clinic end generated code: output=ef82a5d35ce8ab37 input=0b7474765927b925]*/ +os_lstat_impl(PyObject *module, path_t *path, int dir_fd, int fast) +/*[clinic end generated code: output=e7fc00813e269d21 input=4311ddb7b2baed54]*/ { int follow_symlinks = 0; - return posix_do_stat(module, "lstat", path, dir_fd, follow_symlinks); + return posix_do_stat(module, "lstat", path, dir_fd, + follow_symlinks, fast); } @@ -10163,19 +10256,27 @@ os.fstat fd : int + * + + fast: bool = False + If True, certain data may be omitted on some platforms to + allow faster results. See the documentation for specific cases. + Perform a stat system call on the given file descriptor. Like stat(), but for an open file descriptor. -Equivalent to os.stat(fd). +Equivalent to os.stat(fd, fast=fast). [clinic start generated code]*/ static PyObject * -os_fstat_impl(PyObject *module, int fd) -/*[clinic end generated code: output=efc038cb5f654492 input=27e0e0ebbe5600c9]*/ +os_fstat_impl(PyObject *module, int fd, int fast) +/*[clinic end generated code: output=7bd835f9da58993a input=a21b5b699d3a18c7]*/ { STRUCT_STAT st; int res; int async_err = 0; + /* Currently we do not do anything with the fast option. */ + (void)fast; do { Py_BEGIN_ALLOW_THREADS diff --git a/Python/fileutils.c b/Python/fileutils.c index 244bd899b3bd24..5309d1379cf49c 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -9,6 +9,7 @@ # include # include # include // PathCchCombineEx +# include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION extern int winerror_to_errno(int); #endif @@ -1048,6 +1049,18 @@ FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); } +static void +LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out) +{ + /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ + /* Cannot simply cast and dereference in_ptr, + since it might not be aligned properly */ + __int64 in; + memcpy(&in, in_ptr, sizeof(in)); + *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); +} + void _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) { @@ -1104,6 +1117,51 @@ _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, } result->st_file_attributes = info->dwFileAttributes; } + +void +_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info, + struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->FileAttributes); + result->st_size = info->EndOfFile.QuadPart; + result->st_dev = 0; + result->st_rdev = 0; + LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_ctime, &result->st_ctime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->NumberOfLinks; + result->st_ino = info->FileId.QuadPart; + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will + open other name surrogate reparse points without traversing them. To + detect/handle these, check st_file_attributes and st_reparse_tag. */ + result->st_reparse_tag = info->ReparseTag; + if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + info->ReparseTag == IO_REPARSE_TAG_SYMLINK) { + /* set the bits that make this a symlink */ + result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK; + } + result->st_file_attributes = info->FileAttributes; + switch (info->DeviceType) { + case FILE_TYPE_DISK: + break; + case FILE_TYPE_CHAR: + /* \\.\nul */ + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR; + break; + case FILE_TYPE_PIPE: + /* \\.\pipe\spam */ + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO; + break; + default: + if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + /* \\.\pipe\ or \\.\mailslot\ */ + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR; + } + break; + } +} + #endif /* Return information about a file.