From 7ff59b27d77941465226c4157614f5b210bf25de Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sat, 1 Jun 2019 17:33:27 +0300 Subject: [PATCH 01/31] Add _bootsrap.py as frozen module --- vm/src/frozen.rs | 2 + vm/src/importlib.rs | 1175 +++++++++++++++++++++++++++++++++++++++++++ vm/src/lib.rs | 1 + 3 files changed, 1178 insertions(+) create mode 100644 vm/src/importlib.rs diff --git a/vm/src/frozen.rs b/vm/src/frozen.rs index cc6eb85fe5..36fd08a4ad 100644 --- a/vm/src/frozen.rs +++ b/vm/src/frozen.rs @@ -1,3 +1,4 @@ +use crate::importlib::IMPORTLIB_BOOTSTRAP; use std::collections::hash_map::HashMap; const HELLO: &str = "initialized = True @@ -7,5 +8,6 @@ print(\"Hello world!\") pub fn get_module_inits() -> HashMap { let mut modules = HashMap::new(); modules.insert("__hello__".to_string(), HELLO); + modules.insert("_frozen_importlib".to_string(), IMPORTLIB_BOOTSTRAP); modules } diff --git a/vm/src/importlib.rs b/vm/src/importlib.rs new file mode 100644 index 0000000000..7b8b4e81ba --- /dev/null +++ b/vm/src/importlib.rs @@ -0,0 +1,1175 @@ +pub const IMPORTLIB_BOOTSTRAP: &str = r#" +"""Core implementation of import. + +This module is NOT meant to be directly imported! It has been designed such +that it can be bootstrapped into Python as the implementation of import. As +such it requires the injection of specific modules and attributes in order to +work. One should use importlib as the public-facing version of this module. + +""" +# +# IMPORTANT: Whenever making changes to this module, be sure to run a top-level +# `make regen-importlib` followed by `make` in order to get the frozen version +# of the module updated. Not doing so will result in the Makefile to fail for +# all others who don't have a ./python around to freeze the module +# in the early stages of compilation. +# + +# See importlib._setup() for what is injected into the global namespace. + +# When editing this code be aware that code executed at import time CANNOT +# reference any injected objects! This includes not only global code but also +# anything specified at the class level. + +# Bootstrap-related code ###################################################### + +_bootstrap_external = None + +def _wrap(new, old): + """Simple substitute for functools.update_wrapper.""" + for replace in ['__module__', '__name__', '__qualname__', '__doc__']: + if hasattr(old, replace): + setattr(new, replace, getattr(old, replace)) + new.__dict__.update(old.__dict__) + + +def _new_module(name): + return type(sys)(name) + + +# Module-level locking ######################################################## + +# A dict mapping module names to weakrefs of _ModuleLock instances +# Dictionary protected by the global import lock +_module_locks = {} +# A dict mapping thread ids to _ModuleLock instances +_blocking_on = {} + + +class _DeadlockError(RuntimeError): + pass + + +class _ModuleLock: + """A recursive lock implementation which is able to detect deadlocks + (e.g. thread 1 trying to take locks A then B, and thread 2 trying to + take locks B then A). + """ + + def __init__(self, name): + self.lock = _thread.allocate_lock() + self.wakeup = _thread.allocate_lock() + self.name = name + self.owner = None + self.count = 0 + self.waiters = 0 + + def has_deadlock(self): + # Deadlock avoidance for concurrent circular imports. + me = _thread.get_ident() + tid = self.owner + while True: + lock = _blocking_on.get(tid) + if lock is None: + return False + tid = lock.owner + if tid == me: + return True + + def acquire(self): + """ + Acquire the module lock. If a potential deadlock is detected, + a _DeadlockError is raised. + Otherwise, the lock is always acquired and True is returned. + """ + tid = _thread.get_ident() + _blocking_on[tid] = self + try: + while True: + with self.lock: + if self.count == 0 or self.owner == tid: + self.owner = tid + self.count += 1 + return True + if self.has_deadlock(): + raise _DeadlockError('deadlock detected by %r' % self) + if self.wakeup.acquire(False): + self.waiters += 1 + # Wait for a release() call + self.wakeup.acquire() + self.wakeup.release() + finally: + del _blocking_on[tid] + + def release(self): + tid = _thread.get_ident() + with self.lock: + if self.owner != tid: + raise RuntimeError('cannot release un-acquired lock') + assert self.count > 0 + self.count -= 1 + if self.count == 0: + self.owner = None + if self.waiters: + self.waiters -= 1 + self.wakeup.release() + + def __repr__(self): + return '_ModuleLock({!r}) at {}'.format(self.name, id(self)) + + +class _DummyModuleLock: + """A simple _ModuleLock equivalent for Python builds without + multi-threading support.""" + + def __init__(self, name): + self.name = name + self.count = 0 + + def acquire(self): + self.count += 1 + return True + + def release(self): + if self.count == 0: + raise RuntimeError('cannot release un-acquired lock') + self.count -= 1 + + def __repr__(self): + return '_DummyModuleLock({!r}) at {}'.format(self.name, id(self)) + + +class _ModuleLockManager: + + def __init__(self, name): + self._name = name + self._lock = None + + def __enter__(self): + self._lock = _get_module_lock(self._name) + self._lock.acquire() + + def __exit__(self, *args, **kwargs): + self._lock.release() + + +# The following two functions are for consumption by Python/import.c. + +def _get_module_lock(name): + """Get or create the module lock for a given module name. + + Acquire/release internally the global import lock to protect + _module_locks.""" + + _imp.acquire_lock() + try: + try: + lock = _module_locks[name]() + except KeyError: + lock = None + + if lock is None: + if _thread is None: + lock = _DummyModuleLock(name) + else: + lock = _ModuleLock(name) + + def cb(ref, name=name): + _imp.acquire_lock() + try: + # bpo-31070: Check if another thread created a new lock + # after the previous lock was destroyed + # but before the weakref callback was called. + if _module_locks.get(name) is ref: + del _module_locks[name] + finally: + _imp.release_lock() + + _module_locks[name] = _weakref.ref(lock, cb) + finally: + _imp.release_lock() + + return lock + + +def _lock_unlock_module(name): + """Acquires then releases the module lock for a given module name. + + This is used to ensure a module is completely initialized, in the + event it is being imported by another thread. + """ + lock = _get_module_lock(name) + try: + lock.acquire() + except _DeadlockError: + # Concurrent circular import, we'll accept a partially initialized + # module object. + pass + else: + lock.release() + +# Frame stripping magic ############################################### +def _call_with_frames_removed(f, *args, **kwds): + """remove_importlib_frames in import.c will always remove sequences + of importlib frames that end with a call to this function + + Use it instead of a normal call in places where including the importlib + frames introduces unwanted noise into the traceback (e.g. when executing + module code) + """ + return f(*args, **kwds) + + +def _verbose_message(message, *args, verbosity=1): + """Print the message to stderr if -v/PYTHONVERBOSE is turned on.""" + if sys.flags.verbose >= verbosity: + if not message.startswith(('#', 'import ')): + message = '# ' + message + print(message.format(*args), file=sys.stderr) + + +def _requires_builtin(fxn): + """Decorator to verify the named module is built-in.""" + def _requires_builtin_wrapper(self, fullname): + if fullname not in sys.builtin_module_names: + raise ImportError('{!r} is not a built-in module'.format(fullname), + name=fullname) + return fxn(self, fullname) + _wrap(_requires_builtin_wrapper, fxn) + return _requires_builtin_wrapper + + +def _requires_frozen(fxn): + """Decorator to verify the named module is frozen.""" + def _requires_frozen_wrapper(self, fullname): + if not _imp.is_frozen(fullname): + raise ImportError('{!r} is not a frozen module'.format(fullname), + name=fullname) + return fxn(self, fullname) + _wrap(_requires_frozen_wrapper, fxn) + return _requires_frozen_wrapper + + +# Typically used by loader classes as a method replacement. +def _load_module_shim(self, fullname): + """Load the specified module into sys.modules and return it. + + This method is deprecated. Use loader.exec_module instead. + + """ + spec = spec_from_loader(fullname, self) + if fullname in sys.modules: + module = sys.modules[fullname] + _exec(spec, module) + return sys.modules[fullname] + else: + return _load(spec) + +# Module specifications ####################################################### + +def _module_repr(module): + # The implementation of ModuleType.__repr__(). + loader = getattr(module, '__loader__', None) + if hasattr(loader, 'module_repr'): + # As soon as BuiltinImporter, FrozenImporter, and NamespaceLoader + # drop their implementations for module_repr. we can add a + # deprecation warning here. + try: + return loader.module_repr(module) + except Exception: + pass + try: + spec = module.__spec__ + except AttributeError: + pass + else: + if spec is not None: + return _module_repr_from_spec(spec) + + # We could use module.__class__.__name__ instead of 'module' in the + # various repr permutations. + try: + name = module.__name__ + except AttributeError: + name = '?' + try: + filename = module.__file__ + except AttributeError: + if loader is None: + return ''.format(name) + else: + return ''.format(name, loader) + else: + return ''.format(name, filename) + + +class ModuleSpec: + """The specification for a module, used for loading. + + A module's spec is the source for information about the module. For + data associated with the module, including source, use the spec's + loader. + + `name` is the absolute name of the module. `loader` is the loader + to use when loading the module. `parent` is the name of the + package the module is in. The parent is derived from the name. + + `is_package` determines if the module is considered a package or + not. On modules this is reflected by the `__path__` attribute. + + `origin` is the specific location used by the loader from which to + load the module, if that information is available. When filename is + set, origin will match. + + `has_location` indicates that a spec's "origin" reflects a location. + When this is True, `__file__` attribute of the module is set. + + `cached` is the location of the cached bytecode file, if any. It + corresponds to the `__cached__` attribute. + + `submodule_search_locations` is the sequence of path entries to + search when importing submodules. If set, is_package should be + True--and False otherwise. + + Packages are simply modules that (may) have submodules. If a spec + has a non-None value in `submodule_search_locations`, the import + system will consider modules loaded from the spec as packages. + + Only finders (see importlib.abc.MetaPathFinder and + importlib.abc.PathEntryFinder) should modify ModuleSpec instances. + + """ + + def __init__(self, name, loader, *, origin=None, loader_state=None, + is_package=None): + self.name = name + self.loader = loader + self.origin = origin + self.loader_state = loader_state + self.submodule_search_locations = [] if is_package else None + + # file-location attributes + self._set_fileattr = False + self._cached = None + + def __repr__(self): + args = ['name={!r}'.format(self.name), + 'loader={!r}'.format(self.loader)] + if self.origin is not None: + args.append('origin={!r}'.format(self.origin)) + if self.submodule_search_locations is not None: + args.append('submodule_search_locations={}' + .format(self.submodule_search_locations)) + return '{}({})'.format(self.__class__.__name__, ', '.join(args)) + + def __eq__(self, other): + smsl = self.submodule_search_locations + try: + return (self.name == other.name and + self.loader == other.loader and + self.origin == other.origin and + smsl == other.submodule_search_locations and + self.cached == other.cached and + self.has_location == other.has_location) + except AttributeError: + return False + + @property + def cached(self): + if self._cached is None: + if self.origin is not None and self._set_fileattr: + if _bootstrap_external is None: + raise NotImplementedError + self._cached = _bootstrap_external._get_cached(self.origin) + return self._cached + + @cached.setter + def cached(self, cached): + self._cached = cached + + @property + def parent(self): + """The name of the module's parent.""" + if self.submodule_search_locations is None: + return self.name.rpartition('.')[0] + else: + return self.name + + @property + def has_location(self): + return self._set_fileattr + + @has_location.setter + def has_location(self, value): + self._set_fileattr = bool(value) + + +def spec_from_loader(name, loader, *, origin=None, is_package=None): + """Return a module spec based on various loader methods.""" + if hasattr(loader, 'get_filename'): + if _bootstrap_external is None: + raise NotImplementedError + spec_from_file_location = _bootstrap_external.spec_from_file_location + + if is_package is None: + return spec_from_file_location(name, loader=loader) + search = [] if is_package else None + return spec_from_file_location(name, loader=loader, + submodule_search_locations=search) + + if is_package is None: + if hasattr(loader, 'is_package'): + try: + is_package = loader.is_package(name) + except ImportError: + is_package = None # aka, undefined + else: + # the default + is_package = False + + return ModuleSpec(name, loader, origin=origin, is_package=is_package) + + +def _spec_from_module(module, loader=None, origin=None): + # This function is meant for use in _setup(). + try: + spec = module.__spec__ + except AttributeError: + pass + else: + if spec is not None: + return spec + + name = module.__name__ + if loader is None: + try: + loader = module.__loader__ + except AttributeError: + # loader will stay None. + pass + try: + location = module.__file__ + except AttributeError: + location = None + if origin is None: + if location is None: + try: + origin = loader._ORIGIN + except AttributeError: + origin = None + else: + origin = location + try: + cached = module.__cached__ + except AttributeError: + cached = None + try: + submodule_search_locations = list(module.__path__) + except AttributeError: + submodule_search_locations = None + + spec = ModuleSpec(name, loader, origin=origin) + spec._set_fileattr = False if location is None else True + spec.cached = cached + spec.submodule_search_locations = submodule_search_locations + return spec + + +def _init_module_attrs(spec, module, *, override=False): + # The passed-in module may be not support attribute assignment, + # in which case we simply don't set the attributes. + # __name__ + if (override or getattr(module, '__name__', None) is None): + try: + module.__name__ = spec.name + except AttributeError: + pass + # __loader__ + if override or getattr(module, '__loader__', None) is None: + loader = spec.loader + if loader is None: + # A backward compatibility hack. + if spec.submodule_search_locations is not None: + if _bootstrap_external is None: + raise NotImplementedError + _NamespaceLoader = _bootstrap_external._NamespaceLoader + + loader = _NamespaceLoader.__new__(_NamespaceLoader) + loader._path = spec.submodule_search_locations + spec.loader = loader + # While the docs say that module.__file__ is not set for + # built-in modules, and the code below will avoid setting it if + # spec.has_location is false, this is incorrect for namespace + # packages. Namespace packages have no location, but their + # __spec__.origin is None, and thus their module.__file__ + # should also be None for consistency. While a bit of a hack, + # this is the best place to ensure this consistency. + # + # See # https://docs.python.org/3/library/importlib.html#importlib.abc.Loader.load_module + # and bpo-32305 + module.__file__ = None + try: + module.__loader__ = loader + except AttributeError: + pass + # __package__ + if override or getattr(module, '__package__', None) is None: + try: + module.__package__ = spec.parent + except AttributeError: + pass + # __spec__ + try: + module.__spec__ = spec + except AttributeError: + pass + # __path__ + if override or getattr(module, '__path__', None) is None: + if spec.submodule_search_locations is not None: + try: + module.__path__ = spec.submodule_search_locations + except AttributeError: + pass + # __file__/__cached__ + if spec.has_location: + if override or getattr(module, '__file__', None) is None: + try: + module.__file__ = spec.origin + except AttributeError: + pass + + if override or getattr(module, '__cached__', None) is None: + if spec.cached is not None: + try: + module.__cached__ = spec.cached + except AttributeError: + pass + return module + + +def module_from_spec(spec): + """Create a module based on the provided spec.""" + # Typically loaders will not implement create_module(). + module = None + if hasattr(spec.loader, 'create_module'): + # If create_module() returns `None` then it means default + # module creation should be used. + module = spec.loader.create_module(spec) + elif hasattr(spec.loader, 'exec_module'): + raise ImportError('loaders that define exec_module() ' + 'must also define create_module()') + if module is None: + module = _new_module(spec.name) + _init_module_attrs(spec, module) + return module + + +def _module_repr_from_spec(spec): + """Return the repr to use for the module.""" + # We mostly replicate _module_repr() using the spec attributes. + name = '?' if spec.name is None else spec.name + if spec.origin is None: + if spec.loader is None: + return ''.format(name) + else: + return ''.format(name, spec.loader) + else: + if spec.has_location: + return ''.format(name, spec.origin) + else: + return ''.format(spec.name, spec.origin) + + +# Used by importlib.reload() and _load_module_shim(). +def _exec(spec, module): + """Execute the spec's specified module in an existing module's namespace.""" + name = spec.name + with _ModuleLockManager(name): + if sys.modules.get(name) is not module: + msg = 'module {!r} not in sys.modules'.format(name) + raise ImportError(msg, name=name) + try: + if spec.loader is None: + if spec.submodule_search_locations is None: + raise ImportError('missing loader', name=spec.name) + # Namespace package. + _init_module_attrs(spec, module, override=True) + else: + _init_module_attrs(spec, module, override=True) + if not hasattr(spec.loader, 'exec_module'): + # (issue19713) Once BuiltinImporter and ExtensionFileLoader + # have exec_module() implemented, we can add a deprecation + # warning here. + spec.loader.load_module(name) + else: + spec.loader.exec_module(module) + finally: + # Update the order of insertion into sys.modules for module + # clean-up at shutdown. + module = sys.modules.pop(spec.name) + sys.modules[spec.name] = module + return module + + +def _load_backward_compatible(spec): + # (issue19713) Once BuiltinImporter and ExtensionFileLoader + # have exec_module() implemented, we can add a deprecation + # warning here. + try: + spec.loader.load_module(spec.name) + except: + if spec.name in sys.modules: + module = sys.modules.pop(spec.name) + sys.modules[spec.name] = module + raise + # The module must be in sys.modules at this point! + # Move it to the end of sys.modules. + module = sys.modules.pop(spec.name) + sys.modules[spec.name] = module + if getattr(module, '__loader__', None) is None: + try: + module.__loader__ = spec.loader + except AttributeError: + pass + if getattr(module, '__package__', None) is None: + try: + # Since module.__path__ may not line up with + # spec.submodule_search_paths, we can't necessarily rely + # on spec.parent here. + module.__package__ = module.__name__ + if not hasattr(module, '__path__'): + module.__package__ = spec.name.rpartition('.')[0] + except AttributeError: + pass + if getattr(module, '__spec__', None) is None: + try: + module.__spec__ = spec + except AttributeError: + pass + return module + +def _load_unlocked(spec): + # A helper for direct use by the import system. + if spec.loader is not None: + # Not a namespace package. + if not hasattr(spec.loader, 'exec_module'): + return _load_backward_compatible(spec) + + module = module_from_spec(spec) + + # This must be done before putting the module in sys.modules + # (otherwise an optimization shortcut in import.c becomes + # wrong). + spec._initializing = True + try: + sys.modules[spec.name] = module + try: + if spec.loader is None: + if spec.submodule_search_locations is None: + raise ImportError('missing loader', name=spec.name) + # A namespace package so do nothing. + else: + spec.loader.exec_module(module) + except: + try: + del sys.modules[spec.name] + except KeyError: + pass + raise + # Move the module to the end of sys.modules. + # We don't ensure that the import-related module attributes get + # set in the sys.modules replacement case. Such modules are on + # their own. + module = sys.modules.pop(spec.name) + sys.modules[spec.name] = module + _verbose_message('import {!r} # {!r}', spec.name, spec.loader) + finally: + spec._initializing = False + + return module + +# A method used during testing of _load_unlocked() and by +# _load_module_shim(). +def _load(spec): + """Return a new module object, loaded by the spec's loader. + + The module is not added to its parent. + + If a module is already in sys.modules, that existing module gets + clobbered. + + """ + with _ModuleLockManager(spec.name): + return _load_unlocked(spec) + + +# Loaders ##################################################################### + +class BuiltinImporter: + + """Meta path import for built-in modules. + + All methods are either class or static methods to avoid the need to + instantiate the class. + + """ + + @staticmethod + def module_repr(module): + """Return repr for the module. + + The method is deprecated. The import machinery does the job itself. + + """ + return ''.format(module.__name__) + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + if path is not None: + return None + if _imp.is_builtin(fullname): + return spec_from_loader(fullname, cls, origin='built-in') + else: + return None + + @classmethod + def find_module(cls, fullname, path=None): + """Find the built-in module. + + If 'path' is ever specified then the search is considered a failure. + + This method is deprecated. Use find_spec() instead. + + """ + spec = cls.find_spec(fullname, path) + return spec.loader if spec is not None else None + + @classmethod + def create_module(self, spec): + """Create a built-in module""" + if spec.name not in sys.builtin_module_names: + raise ImportError('{!r} is not a built-in module'.format(spec.name), + name=spec.name) + return _call_with_frames_removed(_imp.create_builtin, spec) + + @classmethod + def exec_module(self, module): + """Exec a built-in module""" + _call_with_frames_removed(_imp.exec_builtin, module) + + @classmethod + @_requires_builtin + def get_code(cls, fullname): + """Return None as built-in modules do not have code objects.""" + return None + + @classmethod + @_requires_builtin + def get_source(cls, fullname): + """Return None as built-in modules do not have source code.""" + return None + + @classmethod + @_requires_builtin + def is_package(cls, fullname): + """Return False as built-in modules are never packages.""" + return False + + load_module = classmethod(_load_module_shim) + + +class FrozenImporter: + + """Meta path import for frozen modules. + + All methods are either class or static methods to avoid the need to + instantiate the class. + + """ + + _ORIGIN = "frozen" + + @staticmethod + def module_repr(m): + """Return repr for the module. + + The method is deprecated. The import machinery does the job itself. + + """ + return ''.format(m.__name__, FrozenImporter._ORIGIN) + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + if _imp.is_frozen(fullname): + return spec_from_loader(fullname, cls, origin=cls._ORIGIN) + else: + return None + + @classmethod + def find_module(cls, fullname, path=None): + """Find a frozen module. + + This method is deprecated. Use find_spec() instead. + + """ + return cls if _imp.is_frozen(fullname) else None + + @classmethod + def create_module(cls, spec): + """Use default semantics for module creation.""" + + @staticmethod + def exec_module(module): + name = module.__spec__.name + if not _imp.is_frozen(name): + raise ImportError('{!r} is not a frozen module'.format(name), + name=name) + code = _call_with_frames_removed(_imp.get_frozen_object, name) + exec(code, module.__dict__) + + @classmethod + def load_module(cls, fullname): + """Load a frozen module. + + This method is deprecated. Use exec_module() instead. + + """ + return _load_module_shim(cls, fullname) + + @classmethod + @_requires_frozen + def get_code(cls, fullname): + """Return the code object for the frozen module.""" + return _imp.get_frozen_object(fullname) + + @classmethod + @_requires_frozen + def get_source(cls, fullname): + """Return None as frozen modules do not have source code.""" + return None + + @classmethod + @_requires_frozen + def is_package(cls, fullname): + """Return True if the frozen module is a package.""" + return _imp.is_frozen_package(fullname) + + +# Import itself ############################################################### + +class _ImportLockContext: + + """Context manager for the import lock.""" + + def __enter__(self): + """Acquire the import lock.""" + _imp.acquire_lock() + + def __exit__(self, exc_type, exc_value, exc_traceback): + """Release the import lock regardless of any raised exceptions.""" + _imp.release_lock() + + +def _resolve_name(name, package, level): + """Resolve a relative module name to an absolute one.""" + bits = package.rsplit('.', level - 1) + if len(bits) < level: + raise ValueError('attempted relative import beyond top-level package') + base = bits[0] + return '{}.{}'.format(base, name) if name else base + + +def _find_spec_legacy(finder, name, path): + # This would be a good place for a DeprecationWarning if + # we ended up going that route. + loader = finder.find_module(name, path) + if loader is None: + return None + return spec_from_loader(name, loader) + + +def _find_spec(name, path, target=None): + """Find a module's spec.""" + meta_path = sys.meta_path + if meta_path is None: + # PyImport_Cleanup() is running or has been called. + raise ImportError("sys.meta_path is None, Python is likely " + "shutting down") + + if not meta_path: + _warnings.warn('sys.meta_path is empty', ImportWarning) + + # We check sys.modules here for the reload case. While a passed-in + # target will usually indicate a reload there is no guarantee, whereas + # sys.modules provides one. + is_reload = name in sys.modules + for finder in meta_path: + with _ImportLockContext(): + try: + find_spec = finder.find_spec + except AttributeError: + spec = _find_spec_legacy(finder, name, path) + if spec is None: + continue + else: + spec = find_spec(name, path, target) + if spec is not None: + # The parent import may have already imported this module. + if not is_reload and name in sys.modules: + module = sys.modules[name] + try: + __spec__ = module.__spec__ + except AttributeError: + # We use the found spec since that is the one that + # we would have used if the parent module hadn't + # beaten us to the punch. + return spec + else: + if __spec__ is None: + return spec + else: + return __spec__ + else: + return spec + else: + return None + + +def _sanity_check(name, package, level): + """Verify arguments are "sane".""" + if not isinstance(name, str): + raise TypeError('module name must be str, not {}'.format(type(name))) + if level < 0: + raise ValueError('level must be >= 0') + if level > 0: + if not isinstance(package, str): + raise TypeError('__package__ not set to a string') + elif not package: + raise ImportError('attempted relative import with no known parent ' + 'package') + if not name and level == 0: + raise ValueError('Empty module name') + + +_ERR_MSG_PREFIX = 'No module named ' +_ERR_MSG = _ERR_MSG_PREFIX + '{!r}' + +def _find_and_load_unlocked(name, import_): + path = None + parent = name.rpartition('.')[0] + if parent: + if parent not in sys.modules: + _call_with_frames_removed(import_, parent) + # Crazy side-effects! + if name in sys.modules: + return sys.modules[name] + parent_module = sys.modules[parent] + try: + path = parent_module.__path__ + except AttributeError: + msg = (_ERR_MSG + '; {!r} is not a package').format(name, parent) + raise ModuleNotFoundError(msg, name=name) from None + spec = _find_spec(name, path) + if spec is None: + raise ModuleNotFoundError(_ERR_MSG.format(name), name=name) + else: + module = _load_unlocked(spec) + if parent: + # Set the module as an attribute on its parent. + parent_module = sys.modules[parent] + setattr(parent_module, name.rpartition('.')[2], module) + return module + + +_NEEDS_LOADING = object() + + +def _find_and_load(name, import_): + """Find and load the module.""" + with _ModuleLockManager(name): + module = sys.modules.get(name, _NEEDS_LOADING) + if module is _NEEDS_LOADING: + return _find_and_load_unlocked(name, import_) + + if module is None: + message = ('import of {} halted; ' + 'None in sys.modules'.format(name)) + raise ModuleNotFoundError(message, name=name) + + _lock_unlock_module(name) + return module + + +def _gcd_import(name, package=None, level=0): + """Import and return the module based on its name, the package the call is + being made from, and the level adjustment. + + This function represents the greatest common denominator of functionality + between import_module and __import__. This includes setting __package__ if + the loader did not. + + """ + _sanity_check(name, package, level) + if level > 0: + name = _resolve_name(name, package, level) + return _find_and_load(name, _gcd_import) + + +def _handle_fromlist(module, fromlist, import_, *, recursive=False): + """Figure out what __import__ should return. + + The import_ parameter is a callable which takes the name of module to + import. It is required to decouple the function from assuming importlib's + import implementation is desired. + + """ + # The hell that is fromlist ... + # If a package was imported, try to import stuff from fromlist. + for x in fromlist: + if not isinstance(x, str): + if recursive: + where = module.__name__ + '.__all__' + else: + where = "``from list''" + raise TypeError(f"Item in {where} must be str, " + f"not {type(x).__name__}") + elif x == '*': + if not recursive and hasattr(module, '__all__'): + _handle_fromlist(module, module.__all__, import_, + recursive=True) + elif not hasattr(module, x): + from_name = '{}.{}'.format(module.__name__, x) + try: + _call_with_frames_removed(import_, from_name) + except ModuleNotFoundError as exc: + # Backwards-compatibility dictates we ignore failed + # imports triggered by fromlist for modules that don't + # exist. + if (exc.name == from_name and + sys.modules.get(from_name, _NEEDS_LOADING) is not None): + continue + raise + return module + + +def _calc___package__(globals): + """Calculate what __package__ should be. + + __package__ is not guaranteed to be defined or could be set to None + to represent that its proper value is unknown. + + """ + package = globals.get('__package__') + spec = globals.get('__spec__') + if package is not None: + if spec is not None and package != spec.parent: + _warnings.warn("__package__ != __spec__.parent " + f"({package!r} != {spec.parent!r})", + ImportWarning, stacklevel=3) + return package + elif spec is not None: + return spec.parent + else: + _warnings.warn("can't resolve package from __spec__ or __package__, " + "falling back on __name__ and __path__", + ImportWarning, stacklevel=3) + package = globals['__name__'] + if '__path__' not in globals: + package = package.rpartition('.')[0] + return package + + +def __import__(name, globals=None, locals=None, fromlist=(), level=0): + """Import a module. + + The 'globals' argument is used to infer where the import is occurring from + to handle relative imports. The 'locals' argument is ignored. The + 'fromlist' argument specifies what should exist as attributes on the module + being imported (e.g. ``from module import ``). The 'level' + argument represents the package location to import from in a relative + import (e.g. ``from ..pkg import mod`` would have a 'level' of 2). + + """ + if level == 0: + module = _gcd_import(name) + else: + globals_ = globals if globals is not None else {} + package = _calc___package__(globals_) + module = _gcd_import(name, package, level) + if not fromlist: + # Return up to the first dot in 'name'. This is complicated by the fact + # that 'name' may be relative. + if level == 0: + return _gcd_import(name.partition('.')[0]) + elif not name: + return module + else: + # Figure out where to slice the module's name up to the first dot + # in 'name'. + cut_off = len(name) - len(name.partition('.')[0]) + # Slice end needs to be positive to alleviate need to special-case + # when ``'.' not in name``. + return sys.modules[module.__name__[:len(module.__name__)-cut_off]] + elif hasattr(module, '__path__'): + return _handle_fromlist(module, fromlist, _gcd_import) + else: + return module + + +def _builtin_from_name(name): + spec = BuiltinImporter.find_spec(name) + if spec is None: + raise ImportError('no built-in module named ' + name) + return _load_unlocked(spec) + + +def _setup(sys_module, _imp_module): + """Setup importlib by importing needed built-in modules and injecting them + into the global namespace. + + As sys is needed for sys.modules access and _imp is needed to load built-in + modules, those two modules must be explicitly passed in. + + """ + global _imp, sys + _imp = _imp_module + sys = sys_module + + # Set up the spec for existing builtin/frozen modules. + module_type = type(sys) + for name, module in sys.modules.items(): + if isinstance(module, module_type): + if name in sys.builtin_module_names: + loader = BuiltinImporter + elif _imp.is_frozen(name): + loader = FrozenImporter + else: + continue + spec = _spec_from_module(module, loader) + _init_module_attrs(spec, module) + + # Directly load built-in modules needed during bootstrap. + self_module = sys.modules[__name__] + for builtin_name in ('_thread', '_warnings', '_weakref'): + if builtin_name not in sys.modules: + builtin_module = _builtin_from_name(builtin_name) + else: + builtin_module = sys.modules[builtin_name] + setattr(self_module, builtin_name, builtin_module) + + +def _install(sys_module, _imp_module): + """Install importers for builtin and frozen modules""" + _setup(sys_module, _imp_module) + + sys.meta_path.append(BuiltinImporter) + sys.meta_path.append(FrozenImporter) + + +def _install_external_importers(): + """Install importers that require external filesystem access""" + global _bootstrap_external + import _frozen_importlib_external + _bootstrap_external = _frozen_importlib_external + _frozen_importlib_external._install(sys.modules[__name__]) +"#; diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 24f28709ea..d52823c3de 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -48,6 +48,7 @@ pub mod frame; mod frozen; pub mod function; pub mod import; +mod importlib; pub mod obj; mod pyhash; pub mod pyobject; From e88d6ac1ef5f7e683f9a993b5a613bd16e868afe Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Wed, 5 Jun 2019 22:35:30 +0300 Subject: [PATCH 02/31] Add init_importlib --- src/main.rs | 3 +++ vm/src/import.rs | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index cd20462ecc..9b0711ce45 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,9 @@ fn main() { // Construct vm: let vm = VirtualMachine::new(); + let res = import::init_importlib(&vm); + handle_exception(&vm, res); + // Figure out if a -c option was given: let result = if let Some(command) = matches.value_of("c") { run_command(&vm, command.to_string()) diff --git a/vm/src/import.rs b/vm/src/import.rs index 0e8b7ba535..9bfeb59361 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -11,6 +11,32 @@ use crate::pyobject::{ItemProtocol, PyResult}; use crate::util; use crate::vm::VirtualMachine; +pub fn init_importlib(vm: &VirtualMachine) -> PyResult { + let importlib = import_frozen(vm, "_frozen_importlib")?; + let impmod = import_builtin(vm, "_imp")?; + let install = vm.get_attribute(importlib, "_install")?; + vm.invoke(install, vec![vm.sys_module.clone(), impmod]) +} + +fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { + if let Some(frozen) = vm.frozen.borrow().get(module_name) { + import_file(vm, module_name, "frozen".to_string(), frozen.to_string()) + } else { + Err(vm.new_import_error(format!("Cannot import frozen module {}", module_name))) + } +} + +fn import_builtin(vm: &VirtualMachine, module_name: &str) -> PyResult { + let sys_modules = vm.get_attribute(vm.sys_module.clone(), "modules").unwrap(); + if let Some(make_module_func) = vm.stdlib_inits.borrow().get(module_name) { + let module = make_module_func(vm); + sys_modules.set_item(module_name, module.clone(), vm)?; + Ok(module) + } else { + Err(vm.new_import_error(format!("Cannot import bultin module {}", module_name))) + } +} + pub fn import_module(vm: &VirtualMachine, current_path: PathBuf, module_name: &str) -> PyResult { // Cached modules: let sys_modules = vm.get_attribute(vm.sys_module.clone(), "modules").unwrap(); @@ -18,12 +44,10 @@ pub fn import_module(vm: &VirtualMachine, current_path: PathBuf, module_name: &s // First, see if we already loaded the module: if let Ok(module) = sys_modules.get_item(module_name.to_string(), vm) { Ok(module) - } else if let Some(frozen) = vm.frozen.borrow().get(module_name) { - import_file(vm, module_name, "frozen".to_string(), frozen.to_string()) - } else if let Some(make_module_func) = vm.stdlib_inits.borrow().get(module_name) { - let module = make_module_func(vm); - sys_modules.set_item(module_name, module.clone(), vm)?; - Ok(module) + } else if vm.frozen.borrow().contains_key(module_name) { + import_frozen(vm, module_name) + } else if vm.stdlib_inits.borrow().contains_key(module_name) { + import_builtin(vm, module_name) } else { let notfound_error = vm.context().exceptions.module_not_found_error.clone(); let import_error = vm.context().exceptions.import_error.clone(); From 698044b12822e43bf15cc65ed20cb4e325a821cf Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Wed, 5 Jun 2019 22:41:48 +0300 Subject: [PATCH 03/31] Add Module.__name__ --- tests/snippets/import.py | 2 ++ vm/src/obj/objmodule.rs | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tests/snippets/import.py b/tests/snippets/import.py index 4f1164043a..99322a2ea9 100644 --- a/tests/snippets/import.py +++ b/tests/snippets/import.py @@ -7,6 +7,8 @@ assert import_target.X == import_target.func() assert import_target.X == func() +assert import_mutual1.__name__ == "import_mutual1" + assert import_target.Y == other_func() assert import_target.X == aliased.X diff --git a/vm/src/obj/objmodule.rs b/vm/src/obj/objmodule.rs index 904487e4d9..ae1563d3a6 100644 --- a/vm/src/obj/objmodule.rs +++ b/vm/src/obj/objmodule.rs @@ -23,10 +23,15 @@ impl PyModuleRef { panic!("Modules should definitely have a dict."); } } + + fn name(self: PyModuleRef, _vm: &VirtualMachine) -> String { + self.name.clone() + } } pub fn init(context: &PyContext) { extend_class!(&context, &context.module_type, { "__dir__" => context.new_rustfunc(PyModuleRef::dir), + "__name__" => context.new_rustfunc(PyModuleRef::name) }); } From f1af6b1f4056ff3523ee8d4e064febbedd2d4764 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 10:28:20 +0300 Subject: [PATCH 04/31] Include _bootstrap.py as str --- vm/src/importlib.rs => Lib/importlib/_bootstrap.py | 2 -- vm/src/frozen.rs | 13 ++++++++++++- vm/src/lib.rs | 1 - 3 files changed, 12 insertions(+), 4 deletions(-) rename vm/src/importlib.rs => Lib/importlib/_bootstrap.py (99%) diff --git a/vm/src/importlib.rs b/Lib/importlib/_bootstrap.py similarity index 99% rename from vm/src/importlib.rs rename to Lib/importlib/_bootstrap.py index 7b8b4e81ba..32deef10af 100644 --- a/vm/src/importlib.rs +++ b/Lib/importlib/_bootstrap.py @@ -1,4 +1,3 @@ -pub const IMPORTLIB_BOOTSTRAP: &str = r#" """Core implementation of import. This module is NOT meant to be directly imported! It has been designed such @@ -1172,4 +1171,3 @@ def _install_external_importers(): import _frozen_importlib_external _bootstrap_external = _frozen_importlib_external _frozen_importlib_external._install(sys.modules[__name__]) -"#; diff --git a/vm/src/frozen.rs b/vm/src/frozen.rs index 36fd08a4ad..41848001d7 100644 --- a/vm/src/frozen.rs +++ b/vm/src/frozen.rs @@ -1,10 +1,21 @@ -use crate::importlib::IMPORTLIB_BOOTSTRAP; use std::collections::hash_map::HashMap; const HELLO: &str = "initialized = True print(\"Hello world!\") "; +const IMPORTLIB_BOOTSTRAP: &'static str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/", + "..", + "/", + "Lib", + "/", + "importlib", + "/", + "_bootstrap.py" +)); + pub fn get_module_inits() -> HashMap { let mut modules = HashMap::new(); modules.insert("__hello__".to_string(), HELLO); diff --git a/vm/src/lib.rs b/vm/src/lib.rs index d52823c3de..24f28709ea 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -48,7 +48,6 @@ pub mod frame; mod frozen; pub mod function; pub mod import; -mod importlib; pub mod obj; mod pyhash; pub mod pyobject; From f2145880fa151325eab22f1c48698dfef9191a0a Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 11:09:31 +0300 Subject: [PATCH 05/31] Don't set __file__ for frozen modules --- vm/src/import.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vm/src/import.rs b/vm/src/import.rs index 9bfeb59361..33335583ef 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -80,7 +80,10 @@ pub fn import_file( let attrs = vm.ctx.new_dict(); attrs.set_item("__name__", vm.new_str(module_name.to_string()), vm)?; - attrs.set_item("__file__", vm.new_str(file_path), vm)?; + if file_path != "frozen".to_string() { + // TODO: Should be removed after precompiling frozen modules. + attrs.set_item("__file__", vm.new_str(file_path), vm)?; + } let module = vm.ctx.new_module(module_name, attrs.clone()); // Store module in cache to prevent infinite loop with mutual importing libs: From 0d9a066712998e6dd30366efaff4227645d75201 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 19:03:27 +0300 Subject: [PATCH 06/31] Add sys.meta_path --- vm/src/sysmodule.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/sysmodule.rs b/vm/src/sysmodule.rs index f1cc0afcb4..594a4f8a65 100644 --- a/vm/src/sysmodule.rs +++ b/vm/src/sysmodule.rs @@ -242,6 +242,7 @@ settrace() -- set the global debug tracing function "modules" => modules.clone(), "warnoptions" => ctx.new_list(vec![]), "platform" => ctx.new_str(platform), + "meta_path" => ctx.new_list(vec![]), }); modules.set_item("sys", module.clone(), vm).unwrap(); From 37b40c55dabc4997ed4e5bd558fbd7dc780611d1 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 19:15:16 +0300 Subject: [PATCH 07/31] Add frozen _bootstrap_external.py --- Lib/importlib/_bootstrap_external.py | 1616 ++++++++++++++++++++++++++ vm/src/frozen.rs | 16 + 2 files changed, 1632 insertions(+) create mode 100644 Lib/importlib/_bootstrap_external.py diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py new file mode 100644 index 0000000000..f8ff5f4f2c --- /dev/null +++ b/Lib/importlib/_bootstrap_external.py @@ -0,0 +1,1616 @@ +"""Core implementation of path-based import. + +This module is NOT meant to be directly imported! It has been designed such +that it can be bootstrapped into Python as the implementation of import. As +such it requires the injection of specific modules and attributes in order to +work. One should use importlib as the public-facing version of this module. + +""" +# IMPORTANT: Whenever making changes to this module, be sure to run a top-level +# `make regen-importlib` followed by `make` in order to get the frozen version +# of the module updated. Not doing so will result in the Makefile to fail for +# all others who don't have a ./python around to freeze the module in the early +# stages of compilation. +# + +# See importlib._setup() for what is injected into the global namespace. + +# When editing this code be aware that code executed at import time CANNOT +# reference any injected objects! This includes not only global code but also +# anything specified at the class level. + +# Bootstrap-related code ###################################################### +_CASE_INSENSITIVE_PLATFORMS_STR_KEY = 'win', +_CASE_INSENSITIVE_PLATFORMS_BYTES_KEY = 'cygwin', 'darwin' +_CASE_INSENSITIVE_PLATFORMS = (_CASE_INSENSITIVE_PLATFORMS_BYTES_KEY + + _CASE_INSENSITIVE_PLATFORMS_STR_KEY) + + +def _make_relax_case(): + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS_STR_KEY): + key = 'PYTHONCASEOK' + else: + key = b'PYTHONCASEOK' + + def _relax_case(): + """True if filenames must be checked case-insensitively.""" + return key in _os.environ + else: + def _relax_case(): + """True if filenames must be checked case-insensitively.""" + return False + return _relax_case + + +def _pack_uint32(x): + """Convert a 32-bit integer to little-endian.""" + return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little') + + +def _unpack_uint32(data): + """Convert 4 bytes in little-endian to an integer.""" + assert len(data) == 4 + return int.from_bytes(data, 'little') + +def _unpack_uint16(data): + """Convert 2 bytes in little-endian to an integer.""" + assert len(data) == 2 + return int.from_bytes(data, 'little') + + +def _path_join(*path_parts): + """Replacement for os.path.join().""" + return path_sep.join([part.rstrip(path_separators) + for part in path_parts if part]) + + +def _path_split(path): + """Replacement for os.path.split().""" + if len(path_separators) == 1: + front, _, tail = path.rpartition(path_sep) + return front, tail + for x in reversed(path): + if x in path_separators: + front, tail = path.rsplit(x, maxsplit=1) + return front, tail + return '', path + + +def _path_stat(path): + """Stat the path. + + Made a separate function to make it easier to override in experiments + (e.g. cache stat results). + + """ + return _os.stat(path) + + +def _path_is_mode_type(path, mode): + """Test whether the path is the specified mode type.""" + try: + stat_info = _path_stat(path) + except OSError: + return False + return (stat_info.st_mode & 0o170000) == mode + + +def _path_isfile(path): + """Replacement for os.path.isfile.""" + return _path_is_mode_type(path, 0o100000) + + +def _path_isdir(path): + """Replacement for os.path.isdir.""" + if not path: + path = _os.getcwd() + return _path_is_mode_type(path, 0o040000) + + +def _path_isabs(path): + """Replacement for os.path.isabs. + + Considers a Windows drive-relative path (no drive, but starts with slash) to + still be "absolute". + """ + return path.startswith(path_separators) or path[1:3] in _pathseps_with_colon + + +def _write_atomic(path, data, mode=0o666): + """Best-effort function to write data to a path atomically. + Be prepared to handle a FileExistsError if concurrent writing of the + temporary file is attempted.""" + # id() is used to generate a pseudo-random filename. + path_tmp = '{}.{}'.format(path, id(path)) + fd = _os.open(path_tmp, + _os.O_EXCL | _os.O_CREAT | _os.O_WRONLY, mode & 0o666) + try: + # We first write data to a temporary file, and then use os.replace() to + # perform an atomic rename. + with _io.FileIO(fd, 'wb') as file: + file.write(data) + _os.replace(path_tmp, path) + except OSError: + try: + _os.unlink(path_tmp) + except OSError: + pass + raise + + +_code_type = type(_write_atomic.__code__) + + +# Finder/loader utility code ############################################### + +# Magic word to reject .pyc files generated by other Python versions. +# It should change for each incompatible change to the bytecode. +# +# The value of CR and LF is incorporated so if you ever read or write +# a .pyc file in text mode the magic number will be wrong; also, the +# Apple MPW compiler swaps their values, botching string constants. +# +# There were a variety of old schemes for setting the magic number. +# The current working scheme is to increment the previous value by +# 10. +# +# Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic +# number also includes a new "magic tag", i.e. a human readable string used +# to represent the magic number in __pycache__ directories. When you change +# the magic number, you must also set a new unique magic tag. Generally this +# can be named after the Python major version of the magic number bump, but +# it can really be anything, as long as it's different than anything else +# that's come before. The tags are included in the following table, starting +# with Python 3.2a0. +# +# Known values: +# Python 1.5: 20121 +# Python 1.5.1: 20121 +# Python 1.5.2: 20121 +# Python 1.6: 50428 +# Python 2.0: 50823 +# Python 2.0.1: 50823 +# Python 2.1: 60202 +# Python 2.1.1: 60202 +# Python 2.1.2: 60202 +# Python 2.2: 60717 +# Python 2.3a0: 62011 +# Python 2.3a0: 62021 +# Python 2.3a0: 62011 (!) +# Python 2.4a0: 62041 +# Python 2.4a3: 62051 +# Python 2.4b1: 62061 +# Python 2.5a0: 62071 +# Python 2.5a0: 62081 (ast-branch) +# Python 2.5a0: 62091 (with) +# Python 2.5a0: 62092 (changed WITH_CLEANUP opcode) +# Python 2.5b3: 62101 (fix wrong code: for x, in ...) +# Python 2.5b3: 62111 (fix wrong code: x += yield) +# Python 2.5c1: 62121 (fix wrong lnotab with for loops and +# storing constants that should have been removed) +# Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) +# Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) +# Python 2.6a1: 62161 (WITH_CLEANUP optimization) +# Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND) +# Python 2.7a0: 62181 (optimize conditional branches: +# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) +# Python 2.7a0 62191 (introduce SETUP_WITH) +# Python 2.7a0 62201 (introduce BUILD_SET) +# Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD) +# Python 3000: 3000 +# 3010 (removed UNARY_CONVERT) +# 3020 (added BUILD_SET) +# 3030 (added keyword-only parameters) +# 3040 (added signature annotations) +# 3050 (print becomes a function) +# 3060 (PEP 3115 metaclass syntax) +# 3061 (string literals become unicode) +# 3071 (PEP 3109 raise changes) +# 3081 (PEP 3137 make __file__ and __name__ unicode) +# 3091 (kill str8 interning) +# 3101 (merge from 2.6a0, see 62151) +# 3103 (__file__ points to source file) +# Python 3.0a4: 3111 (WITH_CLEANUP optimization). +# Python 3.0b1: 3131 (lexical exception stacking, including POP_EXCEPT + #3021) +# Python 3.1a1: 3141 (optimize list, set and dict comprehensions: +# change LIST_APPEND and SET_ADD, add MAP_ADD #2183) +# Python 3.1a1: 3151 (optimize conditional branches: +# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE + #4715) +# Python 3.2a1: 3160 (add SETUP_WITH #6101) +# tag: cpython-32 +# Python 3.2a2: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR #9225) +# tag: cpython-32 +# Python 3.2a3 3180 (add DELETE_DEREF #4617) +# Python 3.3a1 3190 (__class__ super closure changed) +# Python 3.3a1 3200 (PEP 3155 __qualname__ added #13448) +# Python 3.3a1 3210 (added size modulo 2**32 to the pyc header #13645) +# Python 3.3a2 3220 (changed PEP 380 implementation #14230) +# Python 3.3a4 3230 (revert changes to implicit __class__ closure #14857) +# Python 3.4a1 3250 (evaluate positional default arguments before +# keyword-only defaults #16967) +# Python 3.4a1 3260 (add LOAD_CLASSDEREF; allow locals of class to override +# free vars #17853) +# Python 3.4a1 3270 (various tweaks to the __class__ closure #12370) +# Python 3.4a1 3280 (remove implicit class argument) +# Python 3.4a4 3290 (changes to __qualname__ computation #19301) +# Python 3.4a4 3300 (more changes to __qualname__ computation #19301) +# Python 3.4rc2 3310 (alter __qualname__ computation #20625) +# Python 3.5a1 3320 (PEP 465: Matrix multiplication operator #21176) +# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations #2292) +# Python 3.5b2 3340 (fix dictionary display evaluation order #11205) +# Python 3.5b3 3350 (add GET_YIELD_FROM_ITER opcode #24400) +# Python 3.5.2 3351 (fix BUILD_MAP_UNPACK_WITH_CALL opcode #27286) +# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483) +# Python 3.6a1 3361 (lineno delta of code.co_lnotab becomes signed #26107) +# Python 3.6a2 3370 (16 bit wordcode #26647) +# Python 3.6a2 3371 (add BUILD_CONST_KEY_MAP opcode #27140) +# Python 3.6a2 3372 (MAKE_FUNCTION simplification, remove MAKE_CLOSURE +# #27095) +# Python 3.6b1 3373 (add BUILD_STRING opcode #27078) +# Python 3.6b1 3375 (add SETUP_ANNOTATIONS and STORE_ANNOTATION opcodes +# #27985) +# Python 3.6b1 3376 (simplify CALL_FUNCTIONs & BUILD_MAP_UNPACK_WITH_CALL + #27213) +# Python 3.6b1 3377 (set __class__ cell from type.__new__ #23722) +# Python 3.6b2 3378 (add BUILD_TUPLE_UNPACK_WITH_CALL #28257) +# Python 3.6rc1 3379 (more thorough __class__ validation #23722) +# Python 3.7a1 3390 (add LOAD_METHOD and CALL_METHOD opcodes #26110) +# Python 3.7a2 3391 (update GET_AITER #31709) +# Python 3.7a4 3392 (PEP 552: Deterministic pycs #31650) +# Python 3.7b1 3393 (remove STORE_ANNOTATION opcode #32550) +# Python 3.7b5 3394 (restored docstring as the firts stmt in the body; +# this might affected the first line number #32911) +# Python 3.8a1 3400 (move frame block handling to compiler #17611) +# Python 3.8a1 3401 (add END_ASYNC_FOR #33041) +# Python 3.8a1 3410 (PEP570 Python Positional-Only Parameters #36540) +# +# MAGIC must change whenever the bytecode emitted by the compiler may no +# longer be understood by older implementations of the eval loop (usually +# due to the addition of new opcodes). +# +# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array +# in PC/launcher.c must also be updated. + +MAGIC_NUMBER = (3410).to_bytes(2, 'little') + b'\r\n' +_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c + +_PYCACHE = '__pycache__' +_OPT = 'opt-' + +SOURCE_SUFFIXES = ['.py'] # _setup() adds .pyw as needed. + +BYTECODE_SUFFIXES = ['.pyc'] +# Deprecated. +DEBUG_BYTECODE_SUFFIXES = OPTIMIZED_BYTECODE_SUFFIXES = BYTECODE_SUFFIXES + +def cache_from_source(path, debug_override=None, *, optimization=None): + """Given the path to a .py file, return the path to its .pyc file. + + The .py file does not need to exist; this simply returns the path to the + .pyc file calculated as if the .py file were imported. + + The 'optimization' parameter controls the presumed optimization level of + the bytecode file. If 'optimization' is not None, the string representation + of the argument is taken and verified to be alphanumeric (else ValueError + is raised). + + The debug_override parameter is deprecated. If debug_override is not None, + a True value is the same as setting 'optimization' to the empty string + while a False value is equivalent to setting 'optimization' to '1'. + + If sys.implementation.cache_tag is None then NotImplementedError is raised. + + """ + if debug_override is not None: + _warnings.warn('the debug_override parameter is deprecated; use ' + "'optimization' instead", DeprecationWarning) + if optimization is not None: + message = 'debug_override or optimization must be set to None' + raise TypeError(message) + optimization = '' if debug_override else 1 + path = _os.fspath(path) + head, tail = _path_split(path) + base, sep, rest = tail.rpartition('.') + tag = sys.implementation.cache_tag + if tag is None: + raise NotImplementedError('sys.implementation.cache_tag is None') + almost_filename = ''.join([(base if base else rest), sep, tag]) + if optimization is None: + if sys.flags.optimize == 0: + optimization = '' + else: + optimization = sys.flags.optimize + optimization = str(optimization) + if optimization != '': + if not optimization.isalnum(): + raise ValueError('{!r} is not alphanumeric'.format(optimization)) + almost_filename = '{}.{}{}'.format(almost_filename, _OPT, optimization) + filename = almost_filename + BYTECODE_SUFFIXES[0] + if sys.pycache_prefix is not None: + # We need an absolute path to the py file to avoid the possibility of + # collisions within sys.pycache_prefix, if someone has two different + # `foo/bar.py` on their system and they import both of them using the + # same sys.pycache_prefix. Let's say sys.pycache_prefix is + # `C:\Bytecode`; the idea here is that if we get `Foo\Bar`, we first + # make it absolute (`C:\Somewhere\Foo\Bar`), then make it root-relative + # (`Somewhere\Foo\Bar`), so we end up placing the bytecode file in an + # unambiguous `C:\Bytecode\Somewhere\Foo\Bar\`. + if not _path_isabs(head): + head = _path_join(_os.getcwd(), head) + + # Strip initial drive from a Windows path. We know we have an absolute + # path here, so the second part of the check rules out a POSIX path that + # happens to contain a colon at the second character. + if head[1] == ':' and head[0] not in path_separators: + head = head[2:] + + # Strip initial path separator from `head` to complete the conversion + # back to a root-relative path before joining. + return _path_join( + sys.pycache_prefix, + head.lstrip(path_separators), + filename, + ) + return _path_join(head, _PYCACHE, filename) + + +def source_from_cache(path): + """Given the path to a .pyc. file, return the path to its .py file. + + The .pyc file does not need to exist; this simply returns the path to + the .py file calculated to correspond to the .pyc file. If path does + not conform to PEP 3147/488 format, ValueError will be raised. If + sys.implementation.cache_tag is None then NotImplementedError is raised. + + """ + if sys.implementation.cache_tag is None: + raise NotImplementedError('sys.implementation.cache_tag is None') + path = _os.fspath(path) + head, pycache_filename = _path_split(path) + found_in_pycache_prefix = False + if sys.pycache_prefix is not None: + stripped_path = sys.pycache_prefix.rstrip(path_separators) + if head.startswith(stripped_path + path_sep): + head = head[len(stripped_path):] + found_in_pycache_prefix = True + if not found_in_pycache_prefix: + head, pycache = _path_split(head) + if pycache != _PYCACHE: + raise ValueError(f'{_PYCACHE} not bottom-level directory in ' + f'{path!r}') + dot_count = pycache_filename.count('.') + if dot_count not in {2, 3}: + raise ValueError(f'expected only 2 or 3 dots in {pycache_filename!r}') + elif dot_count == 3: + optimization = pycache_filename.rsplit('.', 2)[-2] + if not optimization.startswith(_OPT): + raise ValueError("optimization portion of filename does not start " + f"with {_OPT!r}") + opt_level = optimization[len(_OPT):] + if not opt_level.isalnum(): + raise ValueError(f"optimization level {optimization!r} is not an " + "alphanumeric value") + base_filename = pycache_filename.partition('.')[0] + return _path_join(head, base_filename + SOURCE_SUFFIXES[0]) + + +def _get_sourcefile(bytecode_path): + """Convert a bytecode file path to a source path (if possible). + + This function exists purely for backwards-compatibility for + PyImport_ExecCodeModuleWithFilenames() in the C API. + + """ + if len(bytecode_path) == 0: + return None + rest, _, extension = bytecode_path.rpartition('.') + if not rest or extension.lower()[-3:-1] != 'py': + return bytecode_path + try: + source_path = source_from_cache(bytecode_path) + except (NotImplementedError, ValueError): + source_path = bytecode_path[:-1] + return source_path if _path_isfile(source_path) else bytecode_path + + +def _get_cached(filename): + if filename.endswith(tuple(SOURCE_SUFFIXES)): + try: + return cache_from_source(filename) + except NotImplementedError: + pass + elif filename.endswith(tuple(BYTECODE_SUFFIXES)): + return filename + else: + return None + + +def _calc_mode(path): + """Calculate the mode permissions for a bytecode file.""" + try: + mode = _path_stat(path).st_mode + except OSError: + mode = 0o666 + # We always ensure write access so we can update cached files + # later even when the source files are read-only on Windows (#6074) + mode |= 0o200 + return mode + + +def _check_name(method): + """Decorator to verify that the module being requested matches the one the + loader can handle. + + The first argument (self) must define _name which the second argument is + compared against. If the comparison fails then ImportError is raised. + + """ + def _check_name_wrapper(self, name=None, *args, **kwargs): + if name is None: + name = self.name + elif self.name != name: + raise ImportError('loader for %s cannot handle %s' % + (self.name, name), name=name) + return method(self, name, *args, **kwargs) + try: + _wrap = _bootstrap._wrap + except NameError: + # XXX yuck + def _wrap(new, old): + for replace in ['__module__', '__name__', '__qualname__', '__doc__']: + if hasattr(old, replace): + setattr(new, replace, getattr(old, replace)) + new.__dict__.update(old.__dict__) + _wrap(_check_name_wrapper, method) + return _check_name_wrapper + + +def _find_module_shim(self, fullname): + """Try to find a loader for the specified module by delegating to + self.find_loader(). + + This method is deprecated in favor of finder.find_spec(). + + """ + # Call find_loader(). If it returns a string (indicating this + # is a namespace package portion), generate a warning and + # return None. + loader, portions = self.find_loader(fullname) + if loader is None and len(portions): + msg = 'Not importing directory {}: missing __init__' + _warnings.warn(msg.format(portions[0]), ImportWarning) + return loader + + +def _classify_pyc(data, name, exc_details): + """Perform basic validity checking of a pyc header and return the flags field, + which determines how the pyc should be further validated against the source. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required, though.) + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + ImportError is raised when the magic number is incorrect or when the flags + field is invalid. EOFError is raised when the data is found to be truncated. + + """ + magic = data[:4] + if magic != MAGIC_NUMBER: + message = f'bad magic number in {name!r}: {magic!r}' + _bootstrap._verbose_message('{}', message) + raise ImportError(message, **exc_details) + if len(data) < 16: + message = f'reached EOF while reading pyc header of {name!r}' + _bootstrap._verbose_message('{}', message) + raise EOFError(message) + flags = _unpack_uint32(data[4:8]) + # Only the first two flags are defined. + if flags & ~0b11: + message = f'invalid flags {flags!r} in {name!r}' + raise ImportError(message, **exc_details) + return flags + + +def _validate_timestamp_pyc(data, source_mtime, source_size, name, + exc_details): + """Validate a pyc against the source last-modified time. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required.) + + *source_mtime* is the last modified timestamp of the source file. + + *source_size* is None or the size of the source file in bytes. + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + An ImportError is raised if the bytecode is stale. + + """ + if _unpack_uint32(data[8:12]) != (source_mtime & 0xFFFFFFFF): + message = f'bytecode is stale for {name!r}' + _bootstrap._verbose_message('{}', message) + raise ImportError(message, **exc_details) + if (source_size is not None and + _unpack_uint32(data[12:16]) != (source_size & 0xFFFFFFFF)): + raise ImportError(f'bytecode is stale for {name!r}', **exc_details) + + +def _validate_hash_pyc(data, source_hash, name, exc_details): + """Validate a hash-based pyc by checking the real source hash against the one in + the pyc header. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required.) + + *source_hash* is the importlib.util.source_hash() of the source file. + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + An ImportError is raised if the bytecode is stale. + + """ + if data[8:16] != source_hash: + raise ImportError( + f'hash in bytecode doesn\'t match hash of source {name!r}', + **exc_details, + ) + + +def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None): + """Compile bytecode as found in a pyc.""" + code = marshal.loads(data) + if isinstance(code, _code_type): + _bootstrap._verbose_message('code object from {!r}', bytecode_path) + if source_path is not None: + _imp._fix_co_filename(code, source_path) + return code + else: + raise ImportError('Non-code object in {!r}'.format(bytecode_path), + name=name, path=bytecode_path) + + +def _code_to_timestamp_pyc(code, mtime=0, source_size=0): + "Produce the data for a timestamp-based pyc." + data = bytearray(MAGIC_NUMBER) + data.extend(_pack_uint32(0)) + data.extend(_pack_uint32(mtime)) + data.extend(_pack_uint32(source_size)) + data.extend(marshal.dumps(code)) + return data + + +def _code_to_hash_pyc(code, source_hash, checked=True): + "Produce the data for a hash-based pyc." + data = bytearray(MAGIC_NUMBER) + flags = 0b1 | checked << 1 + data.extend(_pack_uint32(flags)) + assert len(source_hash) == 8 + data.extend(source_hash) + data.extend(marshal.dumps(code)) + return data + + +def decode_source(source_bytes): + """Decode bytes representing source code and return the string. + + Universal newline support is used in the decoding. + """ + import tokenize # To avoid bootstrap issues. + source_bytes_readline = _io.BytesIO(source_bytes).readline + encoding = tokenize.detect_encoding(source_bytes_readline) + newline_decoder = _io.IncrementalNewlineDecoder(None, True) + return newline_decoder.decode(source_bytes.decode(encoding[0])) + + +# Module specifications ####################################################### + +_POPULATE = object() + + +def spec_from_file_location(name, location=None, *, loader=None, + submodule_search_locations=_POPULATE): + """Return a module spec based on a file location. + + To indicate that the module is a package, set + submodule_search_locations to a list of directory paths. An + empty list is sufficient, though its not otherwise useful to the + import system. + + The loader must take a spec as its only __init__() arg. + + """ + if location is None: + # The caller may simply want a partially populated location- + # oriented spec. So we set the location to a bogus value and + # fill in as much as we can. + location = '' + if hasattr(loader, 'get_filename'): + # ExecutionLoader + try: + location = loader.get_filename(name) + except ImportError: + pass + else: + location = _os.fspath(location) + + # If the location is on the filesystem, but doesn't actually exist, + # we could return None here, indicating that the location is not + # valid. However, we don't have a good way of testing since an + # indirect location (e.g. a zip file or URL) will look like a + # non-existent file relative to the filesystem. + + spec = _bootstrap.ModuleSpec(name, loader, origin=location) + spec._set_fileattr = True + + # Pick a loader if one wasn't provided. + if loader is None: + for loader_class, suffixes in _get_supported_file_loaders(): + if location.endswith(tuple(suffixes)): + loader = loader_class(name, location) + spec.loader = loader + break + else: + return None + + # Set submodule_search_paths appropriately. + if submodule_search_locations is _POPULATE: + # Check the loader. + if hasattr(loader, 'is_package'): + try: + is_package = loader.is_package(name) + except ImportError: + pass + else: + if is_package: + spec.submodule_search_locations = [] + else: + spec.submodule_search_locations = submodule_search_locations + if spec.submodule_search_locations == []: + if location: + dirname = _path_split(location)[0] + spec.submodule_search_locations.append(dirname) + + return spec + + +# Loaders ##################################################################### + +class WindowsRegistryFinder: + + """Meta path finder for modules declared in the Windows registry.""" + + REGISTRY_KEY = ( + 'Software\\Python\\PythonCore\\{sys_version}' + '\\Modules\\{fullname}') + REGISTRY_KEY_DEBUG = ( + 'Software\\Python\\PythonCore\\{sys_version}' + '\\Modules\\{fullname}\\Debug') + DEBUG_BUILD = False # Changed in _setup() + + @classmethod + def _open_registry(cls, key): + try: + return _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, key) + except OSError: + return _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, key) + + @classmethod + def _search_registry(cls, fullname): + if cls.DEBUG_BUILD: + registry_key = cls.REGISTRY_KEY_DEBUG + else: + registry_key = cls.REGISTRY_KEY + key = registry_key.format(fullname=fullname, + sys_version='%d.%d' % sys.version_info[:2]) + try: + with cls._open_registry(key) as hkey: + filepath = _winreg.QueryValue(hkey, '') + except OSError: + return None + return filepath + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + filepath = cls._search_registry(fullname) + if filepath is None: + return None + try: + _path_stat(filepath) + except OSError: + return None + for loader, suffixes in _get_supported_file_loaders(): + if filepath.endswith(tuple(suffixes)): + spec = _bootstrap.spec_from_loader(fullname, + loader(fullname, filepath), + origin=filepath) + return spec + + @classmethod + def find_module(cls, fullname, path=None): + """Find module named in the registry. + + This method is deprecated. Use exec_module() instead. + + """ + spec = cls.find_spec(fullname, path) + if spec is not None: + return spec.loader + else: + return None + + +class _LoaderBasics: + + """Base class of common code needed by both SourceLoader and + SourcelessFileLoader.""" + + def is_package(self, fullname): + """Concrete implementation of InspectLoader.is_package by checking if + the path returned by get_filename has a filename of '__init__.py'.""" + filename = _path_split(self.get_filename(fullname))[1] + filename_base = filename.rsplit('.', 1)[0] + tail_name = fullname.rpartition('.')[2] + return filename_base == '__init__' and tail_name != '__init__' + + def create_module(self, spec): + """Use default semantics for module creation.""" + + def exec_module(self, module): + """Execute the module.""" + code = self.get_code(module.__name__) + if code is None: + raise ImportError('cannot load module {!r} when get_code() ' + 'returns None'.format(module.__name__)) + _bootstrap._call_with_frames_removed(exec, code, module.__dict__) + + def load_module(self, fullname): + """This module is deprecated.""" + return _bootstrap._load_module_shim(self, fullname) + + +class SourceLoader(_LoaderBasics): + + def path_mtime(self, path): + """Optional method that returns the modification time (an int) for the + specified path (a str). + + Raises OSError when the path cannot be handled. + """ + raise OSError + + def path_stats(self, path): + """Optional method returning a metadata dict for the specified + path (a str). + + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + + Implementing this method allows the loader to read bytecode files. + Raises OSError when the path cannot be handled. + """ + return {'mtime': self.path_mtime(path)} + + def _cache_bytecode(self, source_path, cache_path, data): + """Optional method which writes data (bytes) to a file path (a str). + + Implementing this method allows for the writing of bytecode files. + + The source path is needed in order to correctly transfer permissions + """ + # For backwards compatibility, we delegate to set_data() + return self.set_data(cache_path, data) + + def set_data(self, path, data): + """Optional method which writes data (bytes) to a file path (a str). + + Implementing this method allows for the writing of bytecode files. + """ + + + def get_source(self, fullname): + """Concrete implementation of InspectLoader.get_source.""" + path = self.get_filename(fullname) + try: + source_bytes = self.get_data(path) + except OSError as exc: + raise ImportError('source not available through get_data()', + name=fullname) from exc + return decode_source(source_bytes) + + def source_to_code(self, data, path, *, _optimize=-1): + """Return the code object compiled from source. + + The 'data' argument can be any object type that compile() supports. + """ + return _bootstrap._call_with_frames_removed(compile, data, path, 'exec', + dont_inherit=True, optimize=_optimize) + + def get_code(self, fullname): + """Concrete implementation of InspectLoader.get_code. + + Reading of bytecode requires path_stats to be implemented. To write + bytecode, set_data must also be implemented. + + """ + source_path = self.get_filename(fullname) + source_mtime = None + source_bytes = None + source_hash = None + hash_based = False + check_source = True + try: + bytecode_path = cache_from_source(source_path) + except NotImplementedError: + bytecode_path = None + else: + try: + st = self.path_stats(source_path) + except OSError: + pass + else: + source_mtime = int(st['mtime']) + try: + data = self.get_data(bytecode_path) + except OSError: + pass + else: + exc_details = { + 'name': fullname, + 'path': bytecode_path, + } + try: + flags = _classify_pyc(data, fullname, exc_details) + bytes_data = memoryview(data)[16:] + hash_based = flags & 0b1 != 0 + if hash_based: + check_source = flags & 0b10 != 0 + if (_imp.check_hash_based_pycs != 'never' and + (check_source or + _imp.check_hash_based_pycs == 'always')): + source_bytes = self.get_data(source_path) + source_hash = _imp.source_hash( + _RAW_MAGIC_NUMBER, + source_bytes, + ) + _validate_hash_pyc(data, source_hash, fullname, + exc_details) + else: + _validate_timestamp_pyc( + data, + source_mtime, + st['size'], + fullname, + exc_details, + ) + except (ImportError, EOFError): + pass + else: + _bootstrap._verbose_message('{} matches {}', bytecode_path, + source_path) + return _compile_bytecode(bytes_data, name=fullname, + bytecode_path=bytecode_path, + source_path=source_path) + if source_bytes is None: + source_bytes = self.get_data(source_path) + code_object = self.source_to_code(source_bytes, source_path) + _bootstrap._verbose_message('code object from {}', source_path) + if (not sys.dont_write_bytecode and bytecode_path is not None and + source_mtime is not None): + if hash_based: + if source_hash is None: + source_hash = _imp.source_hash(source_bytes) + data = _code_to_hash_pyc(code_object, source_hash, check_source) + else: + data = _code_to_timestamp_pyc(code_object, source_mtime, + len(source_bytes)) + try: + self._cache_bytecode(source_path, bytecode_path, data) + except NotImplementedError: + pass + return code_object + + +class FileLoader: + + """Base file loader class which implements the loader protocol methods that + require file system usage.""" + + def __init__(self, fullname, path): + """Cache the module name and the path to the file found by the + finder.""" + self.name = fullname + self.path = path + + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self.__dict__ == other.__dict__) + + def __hash__(self): + return hash(self.name) ^ hash(self.path) + + @_check_name + def load_module(self, fullname): + """Load a module from a file. + + This method is deprecated. Use exec_module() instead. + + """ + # The only reason for this method is for the name check. + # Issue #14857: Avoid the zero-argument form of super so the implementation + # of that form can be updated without breaking the frozen module + return super(FileLoader, self).load_module(fullname) + + @_check_name + def get_filename(self, fullname): + """Return the path to the source file as found by the finder.""" + return self.path + + def get_data(self, path): + """Return the data from path as raw bytes.""" + with _io.FileIO(path, 'r') as file: + return file.read() + + # ResourceReader ABC API. + + @_check_name + def get_resource_reader(self, module): + if self.is_package(module): + return self + return None + + def open_resource(self, resource): + path = _path_join(_path_split(self.path)[0], resource) + return _io.FileIO(path, 'r') + + def resource_path(self, resource): + if not self.is_resource(resource): + raise FileNotFoundError + path = _path_join(_path_split(self.path)[0], resource) + return path + + def is_resource(self, name): + if path_sep in name: + return False + path = _path_join(_path_split(self.path)[0], name) + return _path_isfile(path) + + def contents(self): + return iter(_os.listdir(_path_split(self.path)[0])) + + +class SourceFileLoader(FileLoader, SourceLoader): + + """Concrete implementation of SourceLoader using the file system.""" + + def path_stats(self, path): + """Return the metadata for the path.""" + st = _path_stat(path) + return {'mtime': st.st_mtime, 'size': st.st_size} + + def _cache_bytecode(self, source_path, bytecode_path, data): + # Adapt between the two APIs + mode = _calc_mode(source_path) + return self.set_data(bytecode_path, data, _mode=mode) + + def set_data(self, path, data, *, _mode=0o666): + """Write bytes data to a file.""" + parent, filename = _path_split(path) + path_parts = [] + # Figure out what directories are missing. + while parent and not _path_isdir(parent): + parent, part = _path_split(parent) + path_parts.append(part) + # Create needed directories. + for part in reversed(path_parts): + parent = _path_join(parent, part) + try: + _os.mkdir(parent) + except FileExistsError: + # Probably another Python process already created the dir. + continue + except OSError as exc: + # Could be a permission error, read-only filesystem: just forget + # about writing the data. + _bootstrap._verbose_message('could not create {!r}: {!r}', + parent, exc) + return + try: + _write_atomic(path, data, _mode) + _bootstrap._verbose_message('created {!r}', path) + except OSError as exc: + # Same as above: just don't write the bytecode. + _bootstrap._verbose_message('could not create {!r}: {!r}', path, + exc) + + +class SourcelessFileLoader(FileLoader, _LoaderBasics): + + """Loader which handles sourceless file imports.""" + + def get_code(self, fullname): + path = self.get_filename(fullname) + data = self.get_data(path) + # Call _classify_pyc to do basic validation of the pyc but ignore the + # result. There's no source to check against. + exc_details = { + 'name': fullname, + 'path': path, + } + _classify_pyc(data, fullname, exc_details) + return _compile_bytecode( + memoryview(data)[16:], + name=fullname, + bytecode_path=path, + ) + + def get_source(self, fullname): + """Return None as there is no source code.""" + return None + + +# Filled in by _setup(). +EXTENSION_SUFFIXES = [] + + +class ExtensionFileLoader(FileLoader, _LoaderBasics): + + """Loader for extension modules. + + The constructor is designed to work with FileFinder. + + """ + + def __init__(self, name, path): + self.name = name + self.path = path + + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self.__dict__ == other.__dict__) + + def __hash__(self): + return hash(self.name) ^ hash(self.path) + + def create_module(self, spec): + """Create an unitialized extension module""" + module = _bootstrap._call_with_frames_removed( + _imp.create_dynamic, spec) + _bootstrap._verbose_message('extension module {!r} loaded from {!r}', + spec.name, self.path) + return module + + def exec_module(self, module): + """Initialize an extension module""" + _bootstrap._call_with_frames_removed(_imp.exec_dynamic, module) + _bootstrap._verbose_message('extension module {!r} executed from {!r}', + self.name, self.path) + + def is_package(self, fullname): + """Return True if the extension module is a package.""" + file_name = _path_split(self.path)[1] + return any(file_name == '__init__' + suffix + for suffix in EXTENSION_SUFFIXES) + + def get_code(self, fullname): + """Return None as an extension module cannot create a code object.""" + return None + + def get_source(self, fullname): + """Return None as extension modules have no source code.""" + return None + + @_check_name + def get_filename(self, fullname): + """Return the path to the source file as found by the finder.""" + return self.path + + +class _NamespacePath: + """Represents a namespace package's path. It uses the module name + to find its parent module, and from there it looks up the parent's + __path__. When this changes, the module's own path is recomputed, + using path_finder. For top-level modules, the parent module's path + is sys.path.""" + + def __init__(self, name, path, path_finder): + self._name = name + self._path = path + self._last_parent_path = tuple(self._get_parent_path()) + self._path_finder = path_finder + + def _find_parent_path_names(self): + """Returns a tuple of (parent-module-name, parent-path-attr-name)""" + parent, dot, me = self._name.rpartition('.') + if dot == '': + # This is a top-level module. sys.path contains the parent path. + return 'sys', 'path' + # Not a top-level module. parent-module.__path__ contains the + # parent path. + return parent, '__path__' + + def _get_parent_path(self): + parent_module_name, path_attr_name = self._find_parent_path_names() + return getattr(sys.modules[parent_module_name], path_attr_name) + + def _recalculate(self): + # If the parent's path has changed, recalculate _path + parent_path = tuple(self._get_parent_path()) # Make a copy + if parent_path != self._last_parent_path: + spec = self._path_finder(self._name, parent_path) + # Note that no changes are made if a loader is returned, but we + # do remember the new parent path + if spec is not None and spec.loader is None: + if spec.submodule_search_locations: + self._path = spec.submodule_search_locations + self._last_parent_path = parent_path # Save the copy + return self._path + + def __iter__(self): + return iter(self._recalculate()) + + def __getitem__(self, index): + return self._recalculate()[index] + + def __setitem__(self, index, path): + self._path[index] = path + + def __len__(self): + return len(self._recalculate()) + + def __repr__(self): + return '_NamespacePath({!r})'.format(self._path) + + def __contains__(self, item): + return item in self._recalculate() + + def append(self, item): + self._path.append(item) + + +# We use this exclusively in module_from_spec() for backward-compatibility. +class _NamespaceLoader: + def __init__(self, name, path, path_finder): + self._path = _NamespacePath(name, path, path_finder) + + @classmethod + def module_repr(cls, module): + """Return repr for the module. + + The method is deprecated. The import machinery does the job itself. + + """ + return ''.format(module.__name__) + + def is_package(self, fullname): + return True + + def get_source(self, fullname): + return '' + + def get_code(self, fullname): + return compile('', '', 'exec', dont_inherit=True) + + def create_module(self, spec): + """Use default semantics for module creation.""" + + def exec_module(self, module): + pass + + def load_module(self, fullname): + """Load a namespace module. + + This method is deprecated. Use exec_module() instead. + + """ + # The import system never calls this method. + _bootstrap._verbose_message('namespace module loaded with path {!r}', + self._path) + return _bootstrap._load_module_shim(self, fullname) + + +# Finders ##################################################################### + +class PathFinder: + + """Meta path finder for sys.path and package __path__ attributes.""" + + @classmethod + def invalidate_caches(cls): + """Call the invalidate_caches() method on all path entry finders + stored in sys.path_importer_caches (where implemented).""" + for name, finder in list(sys.path_importer_cache.items()): + if finder is None: + del sys.path_importer_cache[name] + elif hasattr(finder, 'invalidate_caches'): + finder.invalidate_caches() + + @classmethod + def _path_hooks(cls, path): + """Search sys.path_hooks for a finder for 'path'.""" + if sys.path_hooks is not None and not sys.path_hooks: + _warnings.warn('sys.path_hooks is empty', ImportWarning) + for hook in sys.path_hooks: + try: + return hook(path) + except ImportError: + continue + else: + return None + + @classmethod + def _path_importer_cache(cls, path): + """Get the finder for the path entry from sys.path_importer_cache. + + If the path entry is not in the cache, find the appropriate finder + and cache it. If no finder is available, store None. + + """ + if path == '': + try: + path = _os.getcwd() + except FileNotFoundError: + # Don't cache the failure as the cwd can easily change to + # a valid directory later on. + return None + try: + finder = sys.path_importer_cache[path] + except KeyError: + finder = cls._path_hooks(path) + sys.path_importer_cache[path] = finder + return finder + + @classmethod + def _legacy_get_spec(cls, fullname, finder): + # This would be a good place for a DeprecationWarning if + # we ended up going that route. + if hasattr(finder, 'find_loader'): + loader, portions = finder.find_loader(fullname) + else: + loader = finder.find_module(fullname) + portions = [] + if loader is not None: + return _bootstrap.spec_from_loader(fullname, loader) + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = portions + return spec + + @classmethod + def _get_spec(cls, fullname, path, target=None): + """Find the loader or namespace_path for this module/package name.""" + # If this ends up being a namespace package, namespace_path is + # the list of paths that will become its __path__ + namespace_path = [] + for entry in path: + if not isinstance(entry, (str, bytes)): + continue + finder = cls._path_importer_cache(entry) + if finder is not None: + if hasattr(finder, 'find_spec'): + spec = finder.find_spec(fullname, target) + else: + spec = cls._legacy_get_spec(fullname, finder) + if spec is None: + continue + if spec.loader is not None: + return spec + portions = spec.submodule_search_locations + if portions is None: + raise ImportError('spec missing loader') + # This is possibly part of a namespace package. + # Remember these path entries (if any) for when we + # create a namespace package, and continue iterating + # on path. + namespace_path.extend(portions) + else: + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = namespace_path + return spec + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + """Try to find a spec for 'fullname' on sys.path or 'path'. + + The search is based on sys.path_hooks and sys.path_importer_cache. + """ + if path is None: + path = sys.path + spec = cls._get_spec(fullname, path, target) + if spec is None: + return None + elif spec.loader is None: + namespace_path = spec.submodule_search_locations + if namespace_path: + # We found at least one namespace path. Return a spec which + # can create the namespace package. + spec.origin = None + spec.submodule_search_locations = _NamespacePath(fullname, namespace_path, cls._get_spec) + return spec + else: + return None + else: + return spec + + @classmethod + def find_module(cls, fullname, path=None): + """find the module on sys.path or 'path' based on sys.path_hooks and + sys.path_importer_cache. + + This method is deprecated. Use find_spec() instead. + + """ + spec = cls.find_spec(fullname, path) + if spec is None: + return None + return spec.loader + + +class FileFinder: + + """File-based finder. + + Interactions with the file system are cached for performance, being + refreshed when the directory the finder is handling has been modified. + + """ + + def __init__(self, path, *loader_details): + """Initialize with the path to search on and a variable number of + 2-tuples containing the loader and the file suffixes the loader + recognizes.""" + loaders = [] + for loader, suffixes in loader_details: + loaders.extend((suffix, loader) for suffix in suffixes) + self._loaders = loaders + # Base (directory) path + self.path = path or '.' + self._path_mtime = -1 + self._path_cache = set() + self._relaxed_path_cache = set() + + def invalidate_caches(self): + """Invalidate the directory mtime.""" + self._path_mtime = -1 + + find_module = _find_module_shim + + def find_loader(self, fullname): + """Try to find a loader for the specified module, or the namespace + package portions. Returns (loader, list-of-portions). + + This method is deprecated. Use find_spec() instead. + + """ + spec = self.find_spec(fullname) + if spec is None: + return None, [] + return spec.loader, spec.submodule_search_locations or [] + + def _get_spec(self, loader_class, fullname, path, smsl, target): + loader = loader_class(fullname, path) + return spec_from_file_location(fullname, path, loader=loader, + submodule_search_locations=smsl) + + def find_spec(self, fullname, target=None): + """Try to find a spec for the specified module. + + Returns the matching spec, or None if not found. + """ + is_namespace = False + tail_module = fullname.rpartition('.')[2] + try: + mtime = _path_stat(self.path or _os.getcwd()).st_mtime + except OSError: + mtime = -1 + if mtime != self._path_mtime: + self._fill_cache() + self._path_mtime = mtime + # tail_module keeps the original casing, for __file__ and friends + if _relax_case(): + cache = self._relaxed_path_cache + cache_module = tail_module.lower() + else: + cache = self._path_cache + cache_module = tail_module + # Check if the module is the name of a directory (and thus a package). + if cache_module in cache: + base_path = _path_join(self.path, tail_module) + for suffix, loader_class in self._loaders: + init_filename = '__init__' + suffix + full_path = _path_join(base_path, init_filename) + if _path_isfile(full_path): + return self._get_spec(loader_class, fullname, full_path, [base_path], target) + else: + # If a namespace package, return the path if we don't + # find a module in the next section. + is_namespace = _path_isdir(base_path) + # Check for a file w/ a proper suffix exists. + for suffix, loader_class in self._loaders: + full_path = _path_join(self.path, tail_module + suffix) + _bootstrap._verbose_message('trying {}', full_path, verbosity=2) + if cache_module + suffix in cache: + if _path_isfile(full_path): + return self._get_spec(loader_class, fullname, full_path, + None, target) + if is_namespace: + _bootstrap._verbose_message('possible namespace for {}', base_path) + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = [base_path] + return spec + return None + + def _fill_cache(self): + """Fill the cache of potential modules and packages for this directory.""" + path = self.path + try: + contents = _os.listdir(path or _os.getcwd()) + except (FileNotFoundError, PermissionError, NotADirectoryError): + # Directory has either been removed, turned into a file, or made + # unreadable. + contents = [] + # We store two cached versions, to handle runtime changes of the + # PYTHONCASEOK environment variable. + if not sys.platform.startswith('win'): + self._path_cache = set(contents) + else: + # Windows users can import modules with case-insensitive file + # suffixes (for legacy reasons). Make the suffix lowercase here + # so it's done once instead of for every import. This is safe as + # the specified suffixes to check against are always specified in a + # case-sensitive manner. + lower_suffix_contents = set() + for item in contents: + name, dot, suffix = item.partition('.') + if dot: + new_name = '{}.{}'.format(name, suffix.lower()) + else: + new_name = name + lower_suffix_contents.add(new_name) + self._path_cache = lower_suffix_contents + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): + self._relaxed_path_cache = {fn.lower() for fn in contents} + + @classmethod + def path_hook(cls, *loader_details): + """A class method which returns a closure to use on sys.path_hook + which will return an instance using the specified loaders and the path + called on the closure. + + If the path called on the closure is not a directory, ImportError is + raised. + + """ + def path_hook_for_FileFinder(path): + """Path hook for importlib.machinery.FileFinder.""" + if not _path_isdir(path): + raise ImportError('only directories are supported', path=path) + return cls(path, *loader_details) + + return path_hook_for_FileFinder + + def __repr__(self): + return 'FileFinder({!r})'.format(self.path) + + +# Import setup ############################################################### + +def _fix_up_module(ns, name, pathname, cpathname=None): + # This function is used by PyImport_ExecCodeModuleObject(). + loader = ns.get('__loader__') + spec = ns.get('__spec__') + if not loader: + if spec: + loader = spec.loader + elif pathname == cpathname: + loader = SourcelessFileLoader(name, pathname) + else: + loader = SourceFileLoader(name, pathname) + if not spec: + spec = spec_from_file_location(name, pathname, loader=loader) + try: + ns['__spec__'] = spec + ns['__loader__'] = loader + ns['__file__'] = pathname + ns['__cached__'] = cpathname + except Exception: + # Not important enough to report. + pass + + +def _get_supported_file_loaders(): + """Returns a list of file-based module loaders. + + Each item is a tuple (loader, suffixes). + """ + extensions = ExtensionFileLoader, _imp.extension_suffixes() + source = SourceFileLoader, SOURCE_SUFFIXES + bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES + return [extensions, source, bytecode] + + +def _setup(_bootstrap_module): + """Setup the path-based importers for importlib by importing needed + built-in modules and injecting them into the global namespace. + + Other components are extracted from the core bootstrap module. + + """ + global sys, _imp, _bootstrap + _bootstrap = _bootstrap_module + sys = _bootstrap.sys + _imp = _bootstrap._imp + + # Directly load built-in modules needed during bootstrap. + self_module = sys.modules[__name__] + for builtin_name in ('_io', '_warnings', 'builtins', 'marshal'): + if builtin_name not in sys.modules: + builtin_module = _bootstrap._builtin_from_name(builtin_name) + else: + builtin_module = sys.modules[builtin_name] + setattr(self_module, builtin_name, builtin_module) + + # Directly load the os module (needed during bootstrap). + os_details = ('posix', ['/']), ('nt', ['\\', '/']) + for builtin_os, path_separators in os_details: + # Assumption made in _path_join() + assert all(len(sep) == 1 for sep in path_separators) + path_sep = path_separators[0] + if builtin_os in sys.modules: + os_module = sys.modules[builtin_os] + break + else: + try: + os_module = _bootstrap._builtin_from_name(builtin_os) + break + except ImportError: + continue + else: + raise ImportError('importlib requires posix or nt') + setattr(self_module, '_os', os_module) + setattr(self_module, 'path_sep', path_sep) + setattr(self_module, 'path_separators', ''.join(path_separators)) + setattr(self_module, '_pathseps_with_colon', {f':{s}' for s in path_separators}) + + # Directly load the _thread module (needed during bootstrap). + thread_module = _bootstrap._builtin_from_name('_thread') + setattr(self_module, '_thread', thread_module) + + # Directly load the _weakref module (needed during bootstrap). + weakref_module = _bootstrap._builtin_from_name('_weakref') + setattr(self_module, '_weakref', weakref_module) + + # Directly load the winreg module (needed during bootstrap). + if builtin_os == 'nt': + winreg_module = _bootstrap._builtin_from_name('winreg') + setattr(self_module, '_winreg', winreg_module) + + # Constants + setattr(self_module, '_relax_case', _make_relax_case()) + EXTENSION_SUFFIXES.extend(_imp.extension_suffixes()) + if builtin_os == 'nt': + SOURCE_SUFFIXES.append('.pyw') + if '_d.pyd' in EXTENSION_SUFFIXES: + WindowsRegistryFinder.DEBUG_BUILD = True + + +def _install(_bootstrap_module): + """Install the path-based import components.""" + _setup(_bootstrap_module) + supported_loaders = _get_supported_file_loaders() + sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)]) + sys.meta_path.append(PathFinder) diff --git a/vm/src/frozen.rs b/vm/src/frozen.rs index 41848001d7..92f272e1a7 100644 --- a/vm/src/frozen.rs +++ b/vm/src/frozen.rs @@ -16,9 +16,25 @@ const IMPORTLIB_BOOTSTRAP: &'static str = include_str!(concat!( "_bootstrap.py" )); +const IMPORTLIB_BOOTSTRAP_EXTERNAL: &'static str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/", + "..", + "/", + "Lib", + "/", + "importlib", + "/", + "_bootstrap_external.py" +)); + pub fn get_module_inits() -> HashMap { let mut modules = HashMap::new(); modules.insert("__hello__".to_string(), HELLO); modules.insert("_frozen_importlib".to_string(), IMPORTLIB_BOOTSTRAP); + modules.insert( + "_frozen_importlib_external".to_string(), + IMPORTLIB_BOOTSTRAP_EXTERNAL, + ); modules } From fbaff7fd50aaacdfa90a8b2bd7f4eba15ccf665a Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 19:16:12 +0300 Subject: [PATCH 08/31] Install external importers on init_importlib --- vm/src/import.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vm/src/import.rs b/vm/src/import.rs index 33335583ef..c91371de7d 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -14,8 +14,10 @@ use crate::vm::VirtualMachine; pub fn init_importlib(vm: &VirtualMachine) -> PyResult { let importlib = import_frozen(vm, "_frozen_importlib")?; let impmod = import_builtin(vm, "_imp")?; - let install = vm.get_attribute(importlib, "_install")?; - vm.invoke(install, vec![vm.sys_module.clone(), impmod]) + let install = vm.get_attribute(importlib.clone(), "_install")?; + vm.invoke(install, vec![vm.sys_module.clone(), impmod])?; + let install_external = vm.get_attribute(importlib, "_install_external_importers")?; + vm.invoke(install_external, vec![]) } fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { From a57f38b07f4d8e2728ed0cd42e73b2d232730545 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 19:26:59 +0300 Subject: [PATCH 09/31] Rename builtin io to _io --- Lib/io.py | 1 + vm/src/stdlib/io.rs | 2 +- vm/src/stdlib/mod.rs | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Lib/io.py diff --git a/Lib/io.py b/Lib/io.py new file mode 100644 index 0000000000..5536a308c3 --- /dev/null +++ b/Lib/io.py @@ -0,0 +1 @@ +from _io import * diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index da8bedf537..2dafc71377 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -471,7 +471,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { "getvalue" => ctx.new_rustfunc(bytes_io_getvalue) }); - py_module!(vm, "io", { + py_module!(vm, "_io", { "open" => ctx.new_rustfunc(io_open), "IOBase" => io_base, "RawIOBase" => raw_io_base, diff --git a/vm/src/stdlib/mod.rs b/vm/src/stdlib/mod.rs index 7c72a1b61d..75e554cd63 100644 --- a/vm/src/stdlib/mod.rs +++ b/vm/src/stdlib/mod.rs @@ -61,7 +61,7 @@ pub fn get_module_inits() -> HashMap { // disable some modules on WASM #[cfg(not(target_arch = "wasm32"))] { - modules.insert("io".to_string(), Box::new(io::make_module)); + modules.insert("_io".to_string(), Box::new(io::make_module)); modules.insert("_os".to_string(), Box::new(os::make_module)); modules.insert("socket".to_string(), Box::new(socket::make_module)); } From b0cccf35adc61b222a8ad327512e301435f0f1e9 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 19:32:02 +0300 Subject: [PATCH 10/31] Change os_details --- Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index f8ff5f4f2c..11b8723a45 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -1565,7 +1565,7 @@ def _setup(_bootstrap_module): setattr(self_module, builtin_name, builtin_module) # Directly load the os module (needed during bootstrap). - os_details = ('posix', ['/']), ('nt', ['\\', '/']) + os_details = ('_os', ['/']), ('_os', ['\\', '/']) # Changed by palaviv to fit RustPython!!! for builtin_os, path_separators in os_details: # Assumption made in _path_join() assert all(len(sep) == 1 for sep in path_separators) From 58d9d9deebb2ae673eb1a18dbd887f2995614a72 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 19:32:58 +0300 Subject: [PATCH 11/31] Add sys.path_hooks --- vm/src/sysmodule.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/sysmodule.rs b/vm/src/sysmodule.rs index 594a4f8a65..a16b80745b 100644 --- a/vm/src/sysmodule.rs +++ b/vm/src/sysmodule.rs @@ -243,6 +243,7 @@ settrace() -- set the global debug tracing function "warnoptions" => ctx.new_list(vec![]), "platform" => ctx.new_str(platform), "meta_path" => ctx.new_list(vec![]), + "path_hooks" => ctx.new_list(vec![]), }); modules.set_item("sys", module.clone(), vm).unwrap(); From 7f61125866487a8426cf3dca88d65d7e2e075ba8 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 20:00:34 +0300 Subject: [PATCH 12/31] Use _bootstrap.py __import__ --- vm/src/builtins.rs | 22 +--------------------- vm/src/import.rs | 7 +++++-- vm/src/vm.rs | 3 +++ 3 files changed, 9 insertions(+), 23 deletions(-) diff --git a/vm/src/builtins.rs b/vm/src/builtins.rs index 9afcd0e2ca..28ab42bd20 100644 --- a/vm/src/builtins.rs +++ b/vm/src/builtins.rs @@ -732,27 +732,7 @@ fn builtin_sum(iterable: PyIterable, start: OptionalArg, vm: &VirtualMachine) -> // Should be renamed to builtin___import__? fn builtin_import(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { - arg_check!( - vm, - args, - required = [(name, Some(vm.ctx.str_type()))], - optional = [ - (_globals, Some(vm.ctx.dict_type())), - (_locals, Some(vm.ctx.dict_type())) - ] - ); - let current_path = { - match vm.current_frame() { - Some(frame) => { - let mut source_pathbuf = PathBuf::from(&frame.code.source_path); - source_pathbuf.pop(); - source_pathbuf - } - None => PathBuf::new(), - } - }; - - import_module(vm, current_path, &objstr::get_value(name)) + vm.invoke(vm.import_func.borrow().clone(), args) } // builtin_vars diff --git a/vm/src/import.rs b/vm/src/import.rs index c91371de7d..5c21123c01 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -16,8 +16,11 @@ pub fn init_importlib(vm: &VirtualMachine) -> PyResult { let impmod = import_builtin(vm, "_imp")?; let install = vm.get_attribute(importlib.clone(), "_install")?; vm.invoke(install, vec![vm.sys_module.clone(), impmod])?; - let install_external = vm.get_attribute(importlib, "_install_external_importers")?; - vm.invoke(install_external, vec![]) + vm.import_func + .replace(vm.get_attribute(importlib.clone(), "__import__")?); + let install_external = vm.get_attribute(importlib.clone(), "_install_external_importers")?; + vm.invoke(install_external, vec![])?; + Ok(vm.get_none()) } fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { diff --git a/vm/src/vm.rs b/vm/src/vm.rs index eeba4a10d3..7efbd1c195 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -55,6 +55,7 @@ pub struct VirtualMachine { pub wasm_id: Option, pub exceptions: RefCell>, pub frozen: RefCell>, + pub import_func: RefCell, } impl VirtualMachine { @@ -68,6 +69,7 @@ impl VirtualMachine { let stdlib_inits = RefCell::new(stdlib::get_module_inits()); let frozen = RefCell::new(frozen::get_module_inits()); + let import_func = RefCell::new(ctx.none()); let vm = VirtualMachine { builtins: builtins.clone(), sys_module: sysmod.clone(), @@ -77,6 +79,7 @@ impl VirtualMachine { wasm_id: None, exceptions: RefCell::new(vec![]), frozen, + import_func, }; builtins::make_module(&vm, builtins.clone()); From 2817214c88ffc701e58d68f7522837d26ec950b3 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 20:20:12 +0300 Subject: [PATCH 13/31] Remove unused imports --- vm/src/builtins.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/vm/src/builtins.rs b/vm/src/builtins.rs index 28ab42bd20..04d83de4eb 100644 --- a/vm/src/builtins.rs +++ b/vm/src/builtins.rs @@ -4,13 +4,11 @@ use std::char; use std::io::{self, Write}; -use std::path::PathBuf; use num_bigint::Sign; use num_traits::{Signed, Zero}; use crate::compile; -use crate::import::import_module; use crate::obj::objbool; use crate::obj::objcode::PyCodeRef; use crate::obj::objdict::PyDictRef; From 0e76dbb749d4256fa1837bbe622ed15903f3297b Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 21:28:19 +0300 Subject: [PATCH 14/31] Add needed methods to _thread --- vm/src/stdlib/thread.rs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/vm/src/stdlib/thread.rs b/vm/src/stdlib/thread.rs index 8de881893a..6155365490 100644 --- a/vm/src/stdlib/thread.rs +++ b/vm/src/stdlib/thread.rs @@ -2,8 +2,10 @@ /// support threading use super::super::pyobject::PyObjectRef; use crate::function::PyFuncArgs; +use crate::import; use crate::pyobject::PyResult; use crate::vm::VirtualMachine; +use std::path::PathBuf; fn rlock_acquire(vm: &VirtualMachine, _args: PyFuncArgs) -> PyResult { Ok(vm.get_none()) @@ -11,20 +13,49 @@ fn rlock_acquire(vm: &VirtualMachine, _args: PyFuncArgs) -> PyResult { fn rlock_release(_zelf: PyObjectRef, _vm: &VirtualMachine) {} +fn rlock_enter(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!(vm, args, required = [(instance, None)]); + Ok(instance.clone()) +} + +fn rlock_exit(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!( + vm, + args, + // The context manager protocol requires these, but we don't use them + required = [ + (_instance, None), + (_exception_type, None), + (_exception_value, None), + (_traceback, None) + ] + ); + Ok(vm.get_none()) +} + fn get_ident(_vm: &VirtualMachine) -> u32 { 1 } +fn allocate_lock(vm: &VirtualMachine) -> PyResult { + let module = import::import_module(vm, PathBuf::default(), "_thread")?; + let lock_class = vm.get_attribute(module.clone(), "RLock")?; + vm.invoke(lock_class, vec![]) +} + pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { let ctx = &vm.ctx; let rlock_type = py_class!(ctx, "_thread.RLock", ctx.object(), { "acquire" => ctx.new_rustfunc(rlock_acquire), "release" => ctx.new_rustfunc(rlock_release), + "__enter__" => ctx.new_rustfunc(rlock_enter), + "__exit__" => ctx.new_rustfunc(rlock_exit), }); py_module!(vm, "_thread", { "RLock" => rlock_type, - "get_ident" => ctx.new_rustfunc(get_ident) + "get_ident" => ctx.new_rustfunc(get_ident), + "allocate_lock" => ctx.new_rustfunc(allocate_lock), }) } From 5c53e585470c3f2ad0edfa0e49b426978b353e42 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Fri, 7 Jun 2019 21:58:43 +0300 Subject: [PATCH 15/31] Print frozen import file name in stacktrace --- vm/src/import.rs | 13 +++++++++---- vm/src/stdlib/imp.rs | 20 +++++++++++--------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/vm/src/import.rs b/vm/src/import.rs index 5c21123c01..554d5e15ee 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -23,15 +23,20 @@ pub fn init_importlib(vm: &VirtualMachine) -> PyResult { Ok(vm.get_none()) } -fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { +pub fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { if let Some(frozen) = vm.frozen.borrow().get(module_name) { - import_file(vm, module_name, "frozen".to_string(), frozen.to_string()) + import_file( + vm, + module_name, + format!("frozen {}", module_name), + frozen.to_string(), + ) } else { Err(vm.new_import_error(format!("Cannot import frozen module {}", module_name))) } } -fn import_builtin(vm: &VirtualMachine, module_name: &str) -> PyResult { +pub fn import_builtin(vm: &VirtualMachine, module_name: &str) -> PyResult { let sys_modules = vm.get_attribute(vm.sys_module.clone(), "modules").unwrap(); if let Some(make_module_func) = vm.stdlib_inits.borrow().get(module_name) { let module = make_module_func(vm); @@ -85,7 +90,7 @@ pub fn import_file( let attrs = vm.ctx.new_dict(); attrs.set_item("__name__", vm.new_str(module_name.to_string()), vm)?; - if file_path != "frozen".to_string() { + if !file_path.starts_with("frozen") { // TODO: Should be removed after precompiling frozen modules. attrs.set_item("__file__", vm.new_str(file_path), vm)?; } diff --git a/vm/src/stdlib/imp.rs b/vm/src/stdlib/imp.rs index ae40c16c53..fbf0247a05 100644 --- a/vm/src/stdlib/imp.rs +++ b/vm/src/stdlib/imp.rs @@ -1,5 +1,5 @@ use crate::compile; -use crate::import::import_file; +use crate::import; use crate::obj::objcode::PyCodeRef; use crate::obj::objmodule::PyModuleRef; use crate::obj::objstr; @@ -56,20 +56,22 @@ fn imp_exec_builtin(_mod: PyModuleRef, _vm: &VirtualMachine) -> i32 { } fn imp_get_frozen_object(name: PyStringRef, vm: &VirtualMachine) -> PyResult { - if let Some(frozen) = vm.frozen.borrow().get(name.as_str()) { - compile::compile(vm, frozen, &compile::Mode::Exec, "frozen".to_string()) - .map_err(|err| vm.new_syntax_error(&err)) + let name_str = name.as_str(); + if let Some(frozen) = vm.frozen.borrow().get(name_str) { + compile::compile( + vm, + frozen, + &compile::Mode::Exec, + format!("frozen {}", name_str), + ) + .map_err(|err| vm.new_syntax_error(&err)) } else { Err(vm.new_import_error(format!("No such frozen object named {}", name.as_str()))) } } fn imp_init_frozen(name: PyStringRef, vm: &VirtualMachine) -> PyResult { - if let Some(frozen) = vm.frozen.borrow().get(name.as_str()) { - import_file(vm, name.as_str(), "frozen".to_string(), frozen.to_string()) - } else { - Err(vm.new_import_error(format!("No such frozen object named {}", name.as_str()))) - } + import::import_frozen(vm, name.as_str()) } fn imp_is_frozen_package(_name: PyStringRef, _vm: &VirtualMachine) -> bool { From 6615e811c088c0c8369cf8e2fb98f0e7f27ec716 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sat, 8 Jun 2019 09:58:50 +0300 Subject: [PATCH 16/31] Collapse concat --- vm/src/frozen.rs | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/vm/src/frozen.rs b/vm/src/frozen.rs index 92f272e1a7..42b42c65b2 100644 --- a/vm/src/frozen.rs +++ b/vm/src/frozen.rs @@ -6,26 +6,11 @@ print(\"Hello world!\") const IMPORTLIB_BOOTSTRAP: &'static str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), - "/", - "..", - "/", - "Lib", - "/", - "importlib", - "/", - "_bootstrap.py" + "/../Lib/importlib/_bootstrap.py" )); - const IMPORTLIB_BOOTSTRAP_EXTERNAL: &'static str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), - "/", - "..", - "/", - "Lib", - "/", - "importlib", - "/", - "_bootstrap_external.py" + "/../Lib/importlib/_bootstrap_external.py" )); pub fn get_module_inits() -> HashMap { From fe0284aa05ae1de55bb49b808f4f4290bb0f90fb Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sat, 8 Jun 2019 10:49:02 +0300 Subject: [PATCH 17/31] Add new to objmodule and change __name__ to property --- vm/src/obj/objmodule.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/vm/src/obj/objmodule.rs b/vm/src/obj/objmodule.rs index ae1563d3a6..c6b1f3bd02 100644 --- a/vm/src/obj/objmodule.rs +++ b/vm/src/obj/objmodule.rs @@ -1,3 +1,5 @@ +use crate::obj::objproperty::PropertyBuilder; +use crate::obj::objstr::PyStringRef; use crate::obj::objtype::PyClassRef; use crate::pyobject::{PyContext, PyRef, PyResult, PyValue}; use crate::vm::VirtualMachine; @@ -15,6 +17,12 @@ impl PyValue for PyModule { } impl PyModuleRef { + fn new(cls: PyClassRef, name: PyStringRef, vm: &VirtualMachine) -> PyResult { + PyModule { + name: name.as_str().to_string(), + } + .into_ref_with_type(vm, cls) + } fn dir(self: PyModuleRef, vm: &VirtualMachine) -> PyResult { if let Some(dict) = &self.into_object().dict { let keys = dict.into_iter().map(|(k, _v)| k.clone()).collect(); @@ -24,7 +32,7 @@ impl PyModuleRef { } } - fn name(self: PyModuleRef, _vm: &VirtualMachine) -> String { + fn name(self, _vm: &VirtualMachine) -> String { self.name.clone() } } @@ -32,6 +40,9 @@ impl PyModuleRef { pub fn init(context: &PyContext) { extend_class!(&context, &context.module_type, { "__dir__" => context.new_rustfunc(PyModuleRef::dir), - "__name__" => context.new_rustfunc(PyModuleRef::name) + "__name__" => PropertyBuilder::new(context) + .add_getter(PyModuleRef::name) + .create(), + "__new__" => context.new_rustfunc(PyModuleRef::new), }); } From 375790e142af29a3bc501ea2d63e436234fb594c Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sat, 8 Jun 2019 11:05:45 +0300 Subject: [PATCH 18/31] objmodule should have a dict --- vm/src/obj/objmodule.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vm/src/obj/objmodule.rs b/vm/src/obj/objmodule.rs index c6b1f3bd02..69f5c8b1f4 100644 --- a/vm/src/obj/objmodule.rs +++ b/vm/src/obj/objmodule.rs @@ -11,6 +11,8 @@ pub struct PyModule { pub type PyModuleRef = PyRef; impl PyValue for PyModule { + const HAVE_DICT: bool = true; + fn class(vm: &VirtualMachine) -> PyClassRef { vm.ctx.module_type() } From c8248c321101067ea3c272bc3fee90dd0ff020be Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 10:49:23 +0300 Subject: [PATCH 19/31] Expose __name__ in __dict__ --- vm/src/obj/objmodule.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/vm/src/obj/objmodule.rs b/vm/src/obj/objmodule.rs index 69f5c8b1f4..183a096efc 100644 --- a/vm/src/obj/objmodule.rs +++ b/vm/src/obj/objmodule.rs @@ -1,7 +1,8 @@ +use crate::obj::objdict::PyDictRef; use crate::obj::objproperty::PropertyBuilder; use crate::obj::objstr::PyStringRef; use crate::obj::objtype::PyClassRef; -use crate::pyobject::{PyContext, PyRef, PyResult, PyValue}; +use crate::pyobject::{ItemProtocol, PyContext, PyRef, PyResult, PyValue}; use crate::vm::VirtualMachine; #[derive(Debug)] @@ -25,6 +26,7 @@ impl PyModuleRef { } .into_ref_with_type(vm, cls) } + fn dir(self: PyModuleRef, vm: &VirtualMachine) -> PyResult { if let Some(dict) = &self.into_object().dict { let keys = dict.into_iter().map(|(k, _v)| k.clone()).collect(); @@ -34,6 +36,17 @@ impl PyModuleRef { } } + fn dict(self, vm: &VirtualMachine) -> PyResult { + let name_obj = vm.new_str(self.name.clone()); + if let Some(ref dict) = &self.into_object().dict { + let mod_dict = dict.clone(); + mod_dict.set_item("__name__", name_obj, vm)?; + Ok(mod_dict) + } else { + panic!("Modules should definitely have a dict."); + } + } + fn name(self, _vm: &VirtualMachine) -> String { self.name.clone() } @@ -45,6 +58,10 @@ pub fn init(context: &PyContext) { "__name__" => PropertyBuilder::new(context) .add_getter(PyModuleRef::name) .create(), + "__dict__" => + PropertyBuilder::new(context) + .add_getter(PyModuleRef::dict) + .create(), "__new__" => context.new_rustfunc(PyModuleRef::new), }); } From 5584733cdaf9b91fb6bfcfe95816f9d6eec5272e Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 10:56:52 +0300 Subject: [PATCH 20/31] Add sys.path_importer_cache --- vm/src/sysmodule.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/sysmodule.rs b/vm/src/sysmodule.rs index a16b80745b..f1860c52be 100644 --- a/vm/src/sysmodule.rs +++ b/vm/src/sysmodule.rs @@ -244,6 +244,7 @@ settrace() -- set the global debug tracing function "platform" => ctx.new_str(platform), "meta_path" => ctx.new_list(vec![]), "path_hooks" => ctx.new_list(vec![]), + "path_importer_cache" => ctx.new_dict(), }); modules.set_item("sys", module.clone(), vm).unwrap(); From d9d0ea18341f282a26f76a5a515dd0ec3d1cd51d Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 22:15:00 +0300 Subject: [PATCH 21/31] Fix os to _os in class --- vm/src/stdlib/os.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vm/src/stdlib/os.rs b/vm/src/stdlib/os.rs index 0893c1e6da..14d720df6b 100644 --- a/vm/src/stdlib/os.rs +++ b/vm/src/stdlib/os.rs @@ -234,7 +234,7 @@ type DirEntryRef = PyRef; impl PyValue for DirEntry { fn class(vm: &VirtualMachine) -> PyClassRef { - vm.class("os", "DirEntry") + vm.class("_os", "DirEntry") } } @@ -316,7 +316,7 @@ struct ScandirIterator { impl PyValue for ScandirIterator { fn class(vm: &VirtualMachine) -> PyClassRef { - vm.class("os", "ScandirIter") + vm.class("_os", "ScandirIter") } } @@ -366,7 +366,7 @@ struct StatResult { impl PyValue for StatResult { fn class(vm: &VirtualMachine) -> PyClassRef { - vm.class("os", "stat_result") + vm.class("_os", "stat_result") } } From 5df05d4f95f26cf4edfa91b75d0cd03a6db79c1e Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 22:59:24 +0300 Subject: [PATCH 22/31] Add script dir to sys.path --- src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main.rs b/src/main.rs index 9b0711ce45..84d2af51f3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -129,6 +129,10 @@ fn run_script(vm: &VirtualMachine, script_file: &str) -> PyResult { std::process::exit(1); }; + let dir = file_path.parent().unwrap().to_str().unwrap().to_string(); + let sys_path = vm.get_attribute(vm.sys_module.clone(), "path").unwrap(); + vm.call_method(&sys_path, "insert", vec![vm.new_int(0), vm.new_str(dir)])?; + match util::read_file(&file_path) { Ok(source) => _run_string(vm, &source, file_path.to_str().unwrap().to_string()), Err(err) => { From b567464378e0d4dafe608020d815e23676134e4c Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 22:59:53 +0300 Subject: [PATCH 23/31] Add ModuleNotFoundError to builtins --- vm/src/builtins.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/builtins.rs b/vm/src/builtins.rs index 04d83de4eb..cfdd6732b3 100644 --- a/vm/src/builtins.rs +++ b/vm/src/builtins.rs @@ -838,6 +838,7 @@ pub fn make_module(vm: &VirtualMachine, module: PyObjectRef) { "ZeroDivisionError" => ctx.exceptions.zero_division_error.clone(), "KeyError" => ctx.exceptions.key_error.clone(), "OSError" => ctx.exceptions.os_error.clone(), + "ModuleNotFoundError" => ctx.exceptions.module_not_found_error.clone(), // Warnings "Warning" => ctx.exceptions.warning.clone(), From 10828e01fb5f5e86be6a80a789cc8bee2ecd6e17 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 23:06:09 +0300 Subject: [PATCH 24/31] Set sys.pycache_prefix to None --- vm/src/sysmodule.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/sysmodule.rs b/vm/src/sysmodule.rs index f1860c52be..977f2dc93c 100644 --- a/vm/src/sysmodule.rs +++ b/vm/src/sysmodule.rs @@ -245,6 +245,7 @@ settrace() -- set the global debug tracing function "meta_path" => ctx.new_list(vec![]), "path_hooks" => ctx.new_list(vec![]), "path_importer_cache" => ctx.new_dict(), + "pycache_prefix" => vm.get_none(), }); modules.set_item("sys", module.clone(), vm).unwrap(); From 8dec522f96c9fabdea077d4bd7e825cddb9c9c69 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Sun, 9 Jun 2019 23:17:57 +0300 Subject: [PATCH 25/31] compile source may be bytes --- vm/src/builtins.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vm/src/builtins.rs b/vm/src/builtins.rs index cfdd6732b3..d41eb58746 100644 --- a/vm/src/builtins.rs +++ b/vm/src/builtins.rs @@ -4,12 +4,14 @@ use std::char; use std::io::{self, Write}; +use std::str; use num_bigint::Sign; use num_traits::{Signed, Zero}; use crate::compile; use crate::obj::objbool; +use crate::obj::objbytes::PyBytesRef; use crate::obj::objcode::PyCodeRef; use crate::obj::objdict::PyDictRef; use crate::obj::objint::{self, PyIntRef}; @@ -20,7 +22,7 @@ use crate::obj::objtype::{self, PyClassRef}; use crate::frame::Scope; use crate::function::{single_or_tuple_any, Args, KwArgs, OptionalArg, PyFuncArgs}; use crate::pyobject::{ - IdProtocol, IntoPyObject, ItemProtocol, PyIterable, PyObjectRef, PyResult, PyValue, + Either, IdProtocol, IntoPyObject, ItemProtocol, PyIterable, PyObjectRef, PyResult, PyValue, TryFromObject, TypeProtocol, }; use crate::vm::VirtualMachine; @@ -78,13 +80,19 @@ fn builtin_chr(i: u32, vm: &VirtualMachine) -> PyResult { } fn builtin_compile( - source: PyStringRef, + source: Either, filename: PyStringRef, mode: PyStringRef, vm: &VirtualMachine, ) -> PyResult { + // TODO: compile::compile should probably get bytes + let source = match source { + Either::A(string) => string.value.to_string(), + Either::B(bytes) => str::from_utf8(&bytes).unwrap().to_string(), + }; + // TODO: fix this newline bug: - let source = format!("{}\n", &source.value); + let source = format!("{}\n", source); let mode = { let mode = &mode.value; From 03735a6d264ce82baa60b9f7c63600ca58ec5aa1 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Mon, 10 Jun 2019 19:07:27 +0300 Subject: [PATCH 26/31] Add optional parameters to compile --- vm/src/builtins.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/vm/src/builtins.rs b/vm/src/builtins.rs index d41eb58746..87d39ff544 100644 --- a/vm/src/builtins.rs +++ b/vm/src/builtins.rs @@ -79,14 +79,26 @@ fn builtin_chr(i: u32, vm: &VirtualMachine) -> PyResult { } } -fn builtin_compile( +#[derive(FromArgs)] +#[allow(dead_code)] +struct CompileArgs { + #[pyarg(positional_only, optional = false)] source: Either, + #[pyarg(positional_only, optional = false)] filename: PyStringRef, + #[pyarg(positional_only, optional = false)] mode: PyStringRef, - vm: &VirtualMachine, -) -> PyResult { + #[pyarg(positional_or_keyword, optional = true)] + flags: OptionalArg, + #[pyarg(positional_or_keyword, optional = true)] + dont_inherit: OptionalArg, + #[pyarg(positional_or_keyword, optional = true)] + optimize: OptionalArg, +} + +fn builtin_compile(args: CompileArgs, vm: &VirtualMachine) -> PyResult { // TODO: compile::compile should probably get bytes - let source = match source { + let source = match args.source { Either::A(string) => string.value.to_string(), Either::B(bytes) => str::from_utf8(&bytes).unwrap().to_string(), }; @@ -95,7 +107,7 @@ fn builtin_compile( let source = format!("{}\n", source); let mode = { - let mode = &mode.value; + let mode = &args.mode.value; if mode == "exec" { compile::Mode::Exec } else if mode == "eval" { @@ -109,7 +121,7 @@ fn builtin_compile( } }; - compile::compile(vm, &source, &mode, filename.value.to_string()) + compile::compile(vm, &source, &mode, args.filename.value.to_string()) .map_err(|err| vm.new_syntax_error(&err)) } From e1472f2277c74f0db9ed4b4ec80dcc7a5516eed4 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Mon, 10 Jun 2019 19:09:09 +0300 Subject: [PATCH 27/31] Add sys.dont_write_bytecode --- vm/src/sysmodule.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/sysmodule.rs b/vm/src/sysmodule.rs index 977f2dc93c..3701da240c 100644 --- a/vm/src/sysmodule.rs +++ b/vm/src/sysmodule.rs @@ -246,6 +246,7 @@ settrace() -- set the global debug tracing function "path_hooks" => ctx.new_list(vec![]), "path_importer_cache" => ctx.new_dict(), "pycache_prefix" => vm.get_none(), + "dont_write_bytecode" => vm.new_bool(true), }); modules.set_item("sys", module.clone(), vm).unwrap(); From 603ef1ad0516b1d318e17f99e1da80adffea8291 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Mon, 10 Jun 2019 21:45:05 +0300 Subject: [PATCH 28/31] Support from_list --- vm/src/frame.rs | 8 ++++++-- vm/src/vm.rs | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 88a1b31ada..2506aa4553 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -908,7 +908,11 @@ impl Frame { } fn import(&self, vm: &VirtualMachine, module: &str, symbol: &Option) -> FrameResult { - let module = vm.import(module)?; + let from_list = match symbol { + Some(symbol) => vm.ctx.new_tuple(vec![vm.ctx.new_str(symbol.to_string())]), + None => vm.ctx.new_tuple(vec![]), + }; + let module = vm.import(module, &from_list)?; // If we're importing a symbol, look it up and use it, otherwise construct a module and return // that @@ -926,7 +930,7 @@ impl Frame { } fn import_star(&self, vm: &VirtualMachine, module: &str) -> FrameResult { - let module = vm.import(module)?; + let module = vm.import(module, &vm.ctx.new_tuple(vec![]))?; // Grab all the names from the module and put them in the context if let Some(dict) = &module.dict { diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 7efbd1c195..3356cc450d 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -137,7 +137,7 @@ impl VirtualMachine { pub fn try_class(&self, module: &str, class: &str) -> PyResult { let class = self - .get_attribute(self.import(module)?, class)? + .get_attribute(self.import(module, &self.ctx.new_tuple(vec![]))?, class)? .downcast() .expect("not a class"); Ok(class) @@ -145,7 +145,7 @@ impl VirtualMachine { pub fn class(&self, module: &str, class: &str) -> PyClassRef { let module = self - .import(module) + .import(module, &self.ctx.new_tuple(vec![])) .unwrap_or_else(|_| panic!("unable to import {}", module)); let class = self .get_attribute(module.clone(), class) @@ -303,9 +303,17 @@ impl VirtualMachine { TryFromObject::try_from_object(self, repr) } - pub fn import(&self, module: &str) -> PyResult { + pub fn import(&self, module: &str, from_list: &PyObjectRef) -> PyResult { match self.get_attribute(self.builtins.clone(), "__import__") { - Ok(func) => self.invoke(func, vec![self.ctx.new_str(module.to_string())]), + Ok(func) => self.invoke( + func, + vec![ + self.ctx.new_str(module.to_string()), + self.get_none(), + self.get_none(), + from_list.clone(), + ], + ), Err(_) => Err(self.new_exception( self.ctx.exceptions.import_error.clone(), "__import__ not found".to_string(), From 0e320f9af6ff363e07acb8afa56caabb40db8d42 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Mon, 10 Jun 2019 21:50:37 +0300 Subject: [PATCH 29/31] Change comment to XXX --- Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 11b8723a45..124948ef48 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -1565,7 +1565,7 @@ def _setup(_bootstrap_module): setattr(self_module, builtin_name, builtin_module) # Directly load the os module (needed during bootstrap). - os_details = ('_os', ['/']), ('_os', ['\\', '/']) # Changed by palaviv to fit RustPython!!! + os_details = ('_os', ['/']), ('_os', ['\\', '/']) # XXX Changed to fit RustPython!!! for builtin_os, path_separators in os_details: # Assumption made in _path_join() assert all(len(sep) == 1 for sep in path_separators) From ea8e28026b474269c7f86f5e41fb1890fed1ee20 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Tue, 11 Jun 2019 13:10:29 +0300 Subject: [PATCH 30/31] Simplify objmodule --- vm/src/macros.rs | 1 + vm/src/obj/objmodule.rs | 46 +++++------------------------------------ vm/src/sysmodule.rs | 1 + 3 files changed, 7 insertions(+), 41 deletions(-) diff --git a/vm/src/macros.rs b/vm/src/macros.rs index c3267ff157..3e5d3262f0 100644 --- a/vm/src/macros.rs +++ b/vm/src/macros.rs @@ -118,6 +118,7 @@ macro_rules! no_kwargs { macro_rules! py_module { ( $vm:expr, $module_name:expr, { $($name:expr => $value:expr),* $(,)* }) => {{ let module = $vm.ctx.new_module($module_name, $vm.ctx.new_dict()); + $vm.set_attr(&module, "__name__", $vm.ctx.new_str($module_name.to_string())).unwrap(); $( $vm.set_attr(&module, $name, $value).unwrap(); )* diff --git a/vm/src/obj/objmodule.rs b/vm/src/obj/objmodule.rs index 183a096efc..99dff3b267 100644 --- a/vm/src/obj/objmodule.rs +++ b/vm/src/obj/objmodule.rs @@ -1,8 +1,6 @@ -use crate::obj::objdict::PyDictRef; -use crate::obj::objproperty::PropertyBuilder; use crate::obj::objstr::PyStringRef; use crate::obj::objtype::PyClassRef; -use crate::pyobject::{ItemProtocol, PyContext, PyRef, PyResult, PyValue}; +use crate::pyobject::{PyContext, PyRef, PyResult, PyValue}; use crate::vm::VirtualMachine; #[derive(Debug)] @@ -20,48 +18,14 @@ impl PyValue for PyModule { } impl PyModuleRef { - fn new(cls: PyClassRef, name: PyStringRef, vm: &VirtualMachine) -> PyResult { - PyModule { - name: name.as_str().to_string(), - } - .into_ref_with_type(vm, cls) - } - - fn dir(self: PyModuleRef, vm: &VirtualMachine) -> PyResult { - if let Some(dict) = &self.into_object().dict { - let keys = dict.into_iter().map(|(k, _v)| k.clone()).collect(); - Ok(vm.ctx.new_list(keys)) - } else { - panic!("Modules should definitely have a dict."); - } - } - - fn dict(self, vm: &VirtualMachine) -> PyResult { - let name_obj = vm.new_str(self.name.clone()); - if let Some(ref dict) = &self.into_object().dict { - let mod_dict = dict.clone(); - mod_dict.set_item("__name__", name_obj, vm)?; - Ok(mod_dict) - } else { - panic!("Modules should definitely have a dict."); - } - } - - fn name(self, _vm: &VirtualMachine) -> String { - self.name.clone() + fn init(self, name: PyStringRef, vm: &VirtualMachine) -> PyResult { + vm.set_attr(&self.into_object(), "__name__", name)?; + Ok(vm.get_none()) } } pub fn init(context: &PyContext) { extend_class!(&context, &context.module_type, { - "__dir__" => context.new_rustfunc(PyModuleRef::dir), - "__name__" => PropertyBuilder::new(context) - .add_getter(PyModuleRef::name) - .create(), - "__dict__" => - PropertyBuilder::new(context) - .add_getter(PyModuleRef::dict) - .create(), - "__new__" => context.new_rustfunc(PyModuleRef::new), + "__init__" => context.new_rustfunc(PyModuleRef::init), }); } diff --git a/vm/src/sysmodule.rs b/vm/src/sysmodule.rs index 3701da240c..8ce352d32d 100644 --- a/vm/src/sysmodule.rs +++ b/vm/src/sysmodule.rs @@ -224,6 +224,7 @@ settrace() -- set the global debug tracing function module_names.sort(); let modules = ctx.new_dict(); extend_module!(vm, module, { + "__name__" => ctx.new_str(String::from("sys")), "argv" => argv(ctx), "builtin_module_names" => ctx.new_tuple(module_names.iter().map(|v| v.into_pyobject(vm).unwrap()).collect()), "flags" => flags, From 1de9f73bd076b559189841de72446a96e1c7ffd2 Mon Sep 17 00:00:00 2001 From: Aviv Palivoda Date: Tue, 11 Jun 2019 22:54:54 +0300 Subject: [PATCH 31/31] Optimize already loaded modules --- vm/src/vm.rs | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 3356cc450d..96fb081f49 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -304,20 +304,27 @@ impl VirtualMachine { } pub fn import(&self, module: &str, from_list: &PyObjectRef) -> PyResult { - match self.get_attribute(self.builtins.clone(), "__import__") { - Ok(func) => self.invoke( - func, - vec![ - self.ctx.new_str(module.to_string()), - self.get_none(), - self.get_none(), - from_list.clone(), - ], - ), - Err(_) => Err(self.new_exception( - self.ctx.exceptions.import_error.clone(), - "__import__ not found".to_string(), - )), + let sys_modules = self + .get_attribute(self.sys_module.clone(), "modules") + .unwrap(); + if let Ok(module) = sys_modules.get_item(module.to_string(), self) { + Ok(module) + } else { + match self.get_attribute(self.builtins.clone(), "__import__") { + Ok(func) => self.invoke( + func, + vec![ + self.ctx.new_str(module.to_string()), + self.get_none(), + self.get_none(), + from_list.clone(), + ], + ), + Err(_) => Err(self.new_exception( + self.ctx.exceptions.import_error.clone(), + "__import__ not found".to_string(), + )), + } } }