diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index 9e088a598a6c08..a6e61795f66b0a 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -390,17 +390,22 @@ ABC hierarchy:: .. abstractmethod:: get_data(path) - An abstract method to return the bytes for the data located at *path*. - Loaders that have a file-like storage back-end - that allows storing arbitrary data - can implement this abstract method to give direct access - to the data stored. :exc:`OSError` is to be raised if the *path* cannot - be found. The *path* is expected to be constructed using a module's - :attr:`~module.__file__` attribute or an item from a package's - :attr:`~module.__path__`. + An abstract method to return the bytes for the data located at *path*. + Loaders that have a file-like storage back-end that allows storing + arbitrary data can implement this abstract method to give direct + access to the data stored. The *path* is expected to be constructed + using a module's :attr:`~module.__file__` attribute or an item from + a package's :attr:`~module.__path__` attribute. - .. versionchanged:: 3.4 - Raises :exc:`OSError` instead of :exc:`NotImplementedError`. + If the *path* cannot be handled, either :exc:`OSError` or :exc:`ValueError` + is to be raised. In most cases, these will be raised by the underlying + operating system interfaces rather than directly (e.g., :exc:`OSError` + would be raised when attempting to access a valid but nonexistent + filesystem path, while attempting to access a path containing a NUL + byte would raise :exc:`ValueError`). + + .. versionchanged:: 3.4 + Raise :exc:`OSError` by default instead of :exc:`NotImplementedError`. .. class:: InspectLoader @@ -530,6 +535,8 @@ ABC hierarchy:: Reads *path* as a binary file and returns the bytes from it. + If the *path* cannot be handled, this raises an :exc:`OSError` + or a :exc:`ValueError` depending on the reason. .. class:: SourceLoader @@ -561,12 +568,15 @@ ABC hierarchy:: - ``'size'`` (optional): the size in bytes of the source code. Any other keys in the dictionary are ignored, to allow for future - extensions. If the path cannot be handled, :exc:`OSError` is raised. + extensions. + + As for :meth:`ResourecLoader.get_data`, either :exc:`OSError` or + :exc:`ValueError` is to be raised if the *path* cannot be handled. .. versionadded:: 3.3 .. versionchanged:: 3.4 - Raise :exc:`OSError` instead of :exc:`NotImplementedError`. + Raise :exc:`OSError` by default instead of :exc:`NotImplementedError`. .. method:: path_mtime(path) @@ -576,10 +586,10 @@ ABC hierarchy:: .. deprecated:: 3.3 This method is deprecated in favour of :meth:`path_stats`. You don't have to implement it, but it is still available for compatibility - purposes. Raise :exc:`OSError` if the path cannot be handled. + purposes. .. versionchanged:: 3.4 - Raise :exc:`OSError` instead of :exc:`NotImplementedError`. + Raise :exc:`OSError` by default instead of :exc:`NotImplementedError`. .. method:: set_data(path, data) @@ -589,7 +599,7 @@ ABC hierarchy:: When writing to the path fails because the path is read-only (:const:`errno.EACCES`/:exc:`PermissionError`), do not propagate the - exception. + exception. Other exceptions should still be propagated. .. versionchanged:: 3.4 No longer raises :exc:`NotImplementedError` when called. diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 1b76328429f63a..19032a91aa7f50 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -148,6 +148,7 @@ def _path_stat(path): Made a separate function to make it easier to override in experiments (e.g. cache stat results). + This function may raise OSError or ValueError. """ return _os.stat(path) @@ -156,7 +157,7 @@ def _path_is_mode_type(path, mode): """Test whether the path is the specified mode type.""" try: stat_info = _path_stat(path) - except OSError: + except (OSError, ValueError): return False return (stat_info.st_mode & 0o170000) == mode @@ -211,7 +212,7 @@ def _write_atomic(path, data, mode=0o666): with _io.FileIO(fd, 'wb') as file: file.write(data) _os.replace(path_tmp, path) - except OSError: + except: try: _os.unlink(path_tmp) except OSError: @@ -381,7 +382,7 @@ def _calc_mode(path): """Calculate the mode permissions for a bytecode file.""" try: mode = _path_stat(path).st_mode - except OSError: + except (OSError, ValueError): mode = 0o666 # We always ensure write access so we can update cached files # later even when the source files are read-only on Windows (#6074) @@ -717,7 +718,7 @@ def find_spec(cls, fullname, path=None, target=None): return None try: _path_stat(filepath) - except OSError: + except (OSError, ValueError): return None for loader, suffixes in _get_supported_file_loaders(): if filepath.endswith(tuple(suffixes)): @@ -763,7 +764,7 @@ def path_mtime(self, path): """Optional method that returns the modification time (an int) for the specified path (a str). - Raises OSError when the path cannot be handled. + Raises OSError or ValueError when the path cannot be handled. """ raise OSError @@ -777,7 +778,7 @@ def path_stats(self, path): - 'size' (optional) is the size in bytes of the source code. Implementing this method allows the loader to read bytecode files. - Raises OSError when the path cannot be handled. + Raises OSError or ValueError when the path cannot be handled. """ return {'mtime': self.path_mtime(path)} @@ -803,7 +804,7 @@ def get_source(self, fullname): path = self.get_filename(fullname) try: source_bytes = self.get_data(path) - except OSError as exc: + except (OSError, ValueError) as exc: raise ImportError('source not available through get_data()', name=fullname) from exc return decode_source(source_bytes) @@ -836,13 +837,13 @@ def get_code(self, fullname): else: try: st = self.path_stats(source_path) - except OSError: + except (OSError, ValueError): pass else: source_mtime = int(st['mtime']) try: data = self.get_data(bytecode_path) - except OSError: + except (OSError, ValueError): pass else: exc_details = { @@ -938,7 +939,10 @@ def get_filename(self, fullname): return self.path def get_data(self, path): - """Return the data from path as raw bytes.""" + """Return the data from path as raw bytes. + + This may raise an OSError or a ValueError if the path is invalid. + """ if isinstance(self, (SourceLoader, ExtensionFileLoader)): with _io.open_code(str(path)) as file: return file.read() @@ -983,18 +987,34 @@ def set_data(self, path, data, *, _mode=0o666): # Probably another Python process already created the dir. continue except OSError as exc: - # Could be a permission error, read-only filesystem: just forget - # about writing the data. - _bootstrap._verbose_message('could not create {!r}: {!r}', - parent, exc) - return + # Could be a permission error or read-only filesystem (EROFS): + # just forget about writing the data. + from errno import EACCES, EROFS + + if ( + isinstance(exc, PermissionError) + or exc.errno in {EACCES, EROFS} + ): + _bootstrap._verbose_message('could not create {!r}: {!r}', + parent, exc) + return + raise try: _write_atomic(path, data, _mode) - _bootstrap._verbose_message('created {!r}', path) except OSError as exc: # Same as above: just don't write the bytecode. - _bootstrap._verbose_message('could not create {!r}: {!r}', path, - exc) + from errno import EACCES, EROFS + + if ( + isinstance(exc, PermissionError) + or exc.errno in {EACCES, EROFS} + ): + _bootstrap._verbose_message('could not create {!r}: {!r}', + path, exc) + return + raise + else: + _bootstrap._verbose_message('created {!r}', path) class SourcelessFileLoader(FileLoader, _LoaderBasics): @@ -1358,6 +1378,9 @@ def find_spec(self, fullname, target=None): mtime = _path_stat(self.path or _os.getcwd()).st_mtime except OSError: mtime = -1 + except ValueError: + # Invalid paths will never be usable even if mtime = -1. + return None if mtime != self._path_mtime: self._fill_cache() self._path_mtime = mtime diff --git a/Lib/test/test_importlib/import_/test_path.py b/Lib/test/test_importlib/import_/test_path.py index 89b52fbd1e1aff..30d5558e3f01ba 100644 --- a/Lib/test/test_importlib/import_/test_path.py +++ b/Lib/test/test_importlib/import_/test_path.py @@ -93,13 +93,30 @@ def test_path_importer_cache_empty_string(self): self.check_found(found, importer) self.assertIn(os.getcwd(), sys.path_importer_cache) - def test_None_on_sys_path(self): - # Putting None in sys.path[0] caused an import regression from Python - # 3.2: http://bugs.python.org/issue16514 + def test_invalid_names_in_sys_path(self): + for name, desc in [ + # Putting None in sys.path[0] caused an import regression from + # Python 3.2: http://bugs.python.org/issue16514 + (None, 'None in sys.path[0]'), + # embedded NUL characters raise ValueError in os.stat() + ('\x00', 'NUL bytes path'), + (f'Top{os.sep}Mid\x00', 'path with embedded NUL bytes'), + # A path with surrogate codes. A UnicodeEncodeError is raised + # by os.stat() upon querying, which is a subclass of ValueError. + ("\uD834\uDD1E", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + # For POSIX platforms, an OSError will be raised but for Windows + # platforms, a ValueError is raised due to the path_t converter. + # See: https://github.com/python/cpython/issues/122353 + ('a' * 1_000_000, 'very long path'), + ]: + with self.subTest(desc): + self._test_invalid_name_in_sys_path(name) + + def _test_invalid_name_in_sys_path(self, name): new_path = sys.path[:] - new_path.insert(0, None) + new_path.insert(0, name) new_path_importer_cache = sys.path_importer_cache.copy() - new_path_importer_cache.pop(None, None) + new_path_importer_cache.pop(name, None) new_path_hooks = [zipimport.zipimporter, self.machinery.FileFinder.path_hook( *self.importlib._bootstrap_external._get_supported_file_loaders())] diff --git a/Misc/NEWS.d/next/Library/2024-07-29-11-21-39.gh-issue-122353.ESrsyI.rst b/Misc/NEWS.d/next/Library/2024-07-29-11-21-39.gh-issue-122353.ESrsyI.rst new file mode 100644 index 00000000000000..f9a42c1c8a0509 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-29-11-21-39.gh-issue-122353.ESrsyI.rst @@ -0,0 +1,2 @@ +Handle :exc:`ValueError`\s raised by OS-related functions during import if +``sys.path`` contains invalid path names. Patch by Bénédikt Tran.