python · ronaldoussoren · Dec 24, 2023 · Dec 24, 2023 · Dec 25, 2023 · Dec 25, 2023
@@ -256,6 +256,9 @@ ABC hierarchy::
       :func:`importlib.util.spec_from_loader` may be useful for implementing
       concrete ``MetaPathFinders``.
 
+      *Fullname* must be normalized in NFKC to match the normalization
+      done by the Python parser.
+
       .. versionadded:: 3.4
 
    .. method:: invalidate_caches()
@@ -290,6 +293,9 @@ ABC hierarchy::
       guess about what spec to return. :func:`importlib.util.spec_from_loader`
       may be useful for implementing concrete ``PathEntryFinders``.
 
+      *Fullname* must be normalized in NFKC to match the normalization
+      done by the Python parser.
+
       .. versionadded:: 3.4
 
    .. method:: invalidate_caches()

@@ -26,6 +26,13 @@ def _object_name(obj):
     except AttributeError:
         return type(obj).__qualname__
 
+def _normalize(name):
+    """Normalize 'name' to NKFC form."""
+    global _unicodedata_normalize
+    if _unicodedata_normalize is None:
+        from unicodedata import normalize as _unicodedata_normalize
+    return _unicodedata_normalize('NFKC', name)
+
 # Bootstrap-related code ######################################################
 
 # Modules injected manually by _setup()
@@ -36,6 +43,8 @@ def _object_name(obj):
 # Import done by _install_external_importers()
 _bootstrap_external = None
 
+# Import done lazily as needed by _normalize as unicodedata is not built-in.
+_unicodedata_normalize = None
 
 def _wrap(new, old):
     """Simple substitute for functools.update_wrapper."""
@@ -1392,7 +1401,15 @@ def _gcd_import(name, package=None, level=0):
     the loader did not.
 
     """
+    global _unicodedata
     _sanity_check(name, package, level)
+
+    if not name.isascii():
+        name = _normalize(name)
+
+    if package is not None and not package.isascii():
+        package = _normalize(package)
+
     if level > 0:
         name = _resolve_name(name, package, level)
     return _find_and_load(name, _gcd_import)

@@ -1345,8 +1345,9 @@ def __init__(self, path, *loader_details):
         else:
             self.path = _path_abspath(path)
         self._path_mtime = -1
-        self._path_cache = set()
-        self._relaxed_path_cache = set()
+        self._path_cache = {}
+        self._relaxed_path_cache = {}
+        self._cache_is_normalized = False
 
     def invalidate_caches(self):
         """Invalidate the directory mtime."""
@@ -1372,15 +1373,21 @@ def find_spec(self, fullname, target=None):
             self._fill_cache()
             self._path_mtime = mtime
         # tail_module keeps the original casing, for __file__ and friends
+        if not tail_module.isascii() and not self._cache_is_normalized:
+            self._normalize_cache()
         if _relax_case():
             cache = self._relaxed_path_cache
             cache_module = tail_module.lower()
         else:
             cache = self._path_cache
             cache_module = tail_module
         # Check if the module is the name of a directory (and thus a package).
-        if cache_module in cache:
-            base_path = _path_join(self.path, tail_module)
+        try:
+            cache_path = cache[cache_module]
+        except KeyError:
+            pass
+        else:
+            base_path = _path_join(self.path, cache_path)
             for suffix, loader_class in self._loaders:
                 init_filename = '__init__' + suffix
                 full_path = _path_join(base_path, init_filename)
@@ -1392,15 +1399,21 @@ def find_spec(self, fullname, target=None):
                 is_namespace = _path_isdir(base_path)
         # Check for a file w/ a proper suffix exists.
         for suffix, loader_class in self._loaders:
+            # XXX: Why is ValueError caught here?
+            #try:
+            #    full_path = _path_join(self.path, tail_module + suffix)
+            #except ValueError:
+            #    return None
+            _bootstrap._verbose_message('trying {}{} in {}', cache_module, suffix, self.path, verbosity=2)
             try:
-                full_path = _path_join(self.path, tail_module + suffix)
-            except ValueError:
-                return None
-            _bootstrap._verbose_message('trying {}', full_path, verbosity=2)
-            if cache_module + suffix in cache:
+                cache_path = cache[cache_module + suffix]
+            except KeyError:
+                pass
+
+            else:
+                full_path = _path_join(self.path, cache_path)
                 if _path_isfile(full_path):
-                    return self._get_spec(loader_class, fullname, full_path,
-                                          None, target)
+                    return self._get_spec(loader_class, fullname, full_path, None, target)
         if is_namespace:
             _bootstrap._verbose_message('possible namespace for {}', base_path)
             spec = _bootstrap.ModuleSpec(fullname, None)
@@ -1420,24 +1433,35 @@ def _fill_cache(self):
         # We store two cached versions, to handle runtime changes of the
         # PYTHONCASEOK environment variable.
         if not sys.platform.startswith('win'):
-            self._path_cache = set(contents)
+            self._path_cache = { p: p for p in contents }
         else:
             # Windows users can import modules with case-insensitive file
             # suffixes (for legacy reasons). Make the suffix lowercase here
             # so it's done once instead of for every import. This is safe as
             # the specified suffixes to check against are always specified in a
             # case-sensitive manner.
-            lower_suffix_contents = set()
+            lower_suffix_contents = {}
             for item in contents:
                 name, dot, suffix = item.partition('.')
                 if dot:
                     new_name = f'{name}.{suffix.lower()}'
                 else:
                     new_name = name
-                lower_suffix_contents.add(new_name)
+                lower_suffix_contents[new_name] = item
             self._path_cache = lower_suffix_contents
         if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS):
-            self._relaxed_path_cache = {fn.lower() for fn in contents}
+            self._relaxed_path_cache = {fn.lower(): fn for fn in contents}
+
+        self._cache_is_normalized = False
+
+    def _normalize_cache(self):
+        """Normalize all entries in the caches to NFKC."""
+        from unicodedata import normalize
+
+        self._path_cache = { normalize('NFKC', p): p for p in self._path_cache }
+        self._relaxed_path_cache = { normalize('NFKC', p): p for p in self._relaxed_path_cache }
+        self._cache_is_normalized = True
+
 
     @classmethod
     def path_hook(cls, *loader_details):

@@ -23,6 +23,7 @@
 import threading
 import time
 import types
+import unicodedata
 import unittest
 from unittest import mock
 import _imp
@@ -3372,6 +3373,47 @@ def test_magic_number_endianness(self):
         start = 2900 + sys.version_info.minor * 50
         self.assertIn(magic_number, range(start, start + 50))
 
+class TestImportAccented(unittest.TestCase):
+    # XXX: There should be tests with PYTHONCASEOK as well
+    #      (for those platforms where this is relevant)
+    dir_name = os.path.abspath(TESTFN)
+
+    def setUp(self):
+        self.sys_path = sys.path[:]
+        os.mkdir(self.dir_name)
+        sys.path.insert(0, self.dir_name)
+        importlib.invalidate_caches()
+
+    def tearDown(self):
+        sys.path[:] = self.sys_path
+        importlib.invalidate_caches()
+        rmtree(self.dir_name)
+
+    def assert_importing_possible(self, name):
+        normalized = unicodedata.normalize('NFKC', name)
+        filename = os.path.join(self.dir_name, f"{name}.py")
+        with open(filename, "w") as stream:
+            stream.write("SPAM = 'spam'\n")
+
+        values = {}
+        exec(f"from {name} import SPAM", values, values)
+        try:
+            self.assertEqual(values["SPAM"], "spam")
+            self.assertIn(normalized, sys.modules)
+        finally:
+            del sys.modules[normalized]
+
+    def test_import_precomposed(self):
+        name = 'M\u00E4dchen'
+        self.assert_importing_possible(name)
+
+    def test_import_normalized(self):
+        name = 'M\u0061\u0308dchen'
+        self.assert_importing_possible(name)
+
+    def test_import_macos_input(self):
+        name = 'Mädchen'
+        self.assert_importing_possible(name)
 
 if __name__ == '__main__':
     # Test needs to be a package, so we can do relative imports.

@@ -59,7 +59,7 @@ def test_insensitive(self):
             self.assertIsNotNone(sensitive)
             self.assertIn(self.name, sensitive.get_filename(self.name))
             self.assertIsNotNone(insensitive)
-            self.assertIn(self.name, insensitive.get_filename(self.name))
+            self.assertIn(self.name.lower(), insensitive.get_filename(self.name))
 
 
 class CaseSensitivityTestPEP451(CaseSensitivityTest):

diff --git a/Misc/NEWS.d/next/Library/2023-12-29-13-09-44.gh-issue-84013.rUaxkr.rst b/Misc/NEWS.d/next/Library/2023-12-29-13-09-44.gh-issue-84013.rUaxkr.rst
@@ -0,0 +1,3 @@
+It is now possible to import modules from the filesystem regardless of how
+the name is normalized in the filesystem. This in particular affects
+importing modules with a name that contains accented latin characters.