From 21c8085f68823092c1545d9d57af63421024292c Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 1 Mar 2018 18:14:15 -0800 Subject: [PATCH 1/2] Don't modify the on disk cache in fine-grained mode This is a little subtle, because interface_hash still needs to be computed, as it is a major driver of the coarse-grained build process. Since metas are no longer computed for files that get rechecked during build, to avoid spuriously reprocessing them we need to find initial file state in cache mdoe as well. --- mypy/build.py | 28 +++++++++++++++++++--------- mypy/dmypy_server.py | 14 +++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 633978617257..4d177dcb72fc 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1247,15 +1247,6 @@ def write_cache(id: str, path: str, tree: MypyFile, corresponding to the metadata that was written (the latter may be None if the cache could not be written). """ - # Obtain file paths - path = os.path.abspath(path) - meta_json, data_json = get_cache_names(id, path, manager) - manager.log('Writing {} {} {} {}'.format(id, path, meta_json, data_json)) - - # Make sure directory for cache files exists - parent = os.path.dirname(data_json) - assert os.path.dirname(meta_json) == parent - # Serialize data and analyze interface data = {'tree': tree.serialize(), 'fine_grained_deps': serialized_fine_grained_deps, @@ -1266,6 +1257,21 @@ def write_cache(id: str, path: str, tree: MypyFile, data_str = json.dumps(data, sort_keys=True) interface_hash = compute_hash(data_str) + # Don't make file system modifications in fine-grained mode + # We still need to return an interface_hash, because it determines + # what gets recomputed in the initial build. + if manager.options.fine_grained_incremental: + return interface_hash, None + + # Obtain file paths + path = os.path.abspath(path) + meta_json, data_json = get_cache_names(id, path, manager) + manager.log('Writing {} {} {} {}'.format(id, path, meta_json, data_json)) + + # Make sure directory for cache files exists + parent = os.path.dirname(data_json) + assert os.path.dirname(meta_json) == parent + # Obtain and set up metadata try: os.makedirs(parent, exist_ok=True) @@ -1347,6 +1353,10 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: This avoids inconsistent states with cache files from different mypy runs, see #4043 for an example. """ + # Don't make file system modifications in fine-grained mode + if manager.options.fine_grained_incremental: + return + path = os.path.abspath(path) meta_json, data_json = get_cache_names(id, path, manager) manager.log('Deleting {} {} {} {}'.format(id, path, meta_json, data_json)) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index a26a256fb011..27c233a97748 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -254,9 +254,8 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict self.fscache = FileSystemCache(self.options.python_version) self.fswatcher = FileSystemWatcher(self.fscache) self.update_sources(sources) - if not self.options.use_fine_grained_cache: - # Stores the initial state of sources as a side effect. - self.fswatcher.find_changed() + # Stores the initial state of sources as a side effect. + self.fswatcher.find_changed() try: # TODO: alt_lib_path result = mypy.build.build(sources=sources, @@ -274,16 +273,17 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph) self.fine_grained_initialized = True self.previous_sources = sources - self.fscache.flush() - # If we are using the fine-grained cache, build hasn't actually done - # the typechecking on the updated files yet. + # If we are using the fine-grained cache, build might not have + # actually done the typechecking on the updated files yet. # Run a fine-grained update starting from the cached data if self.options.use_fine_grained_cache: # Pull times and hashes out of the saved_cache and stick them into # the fswatcher, so we pick up the changes. for state in self.fine_grained_manager.graph.values(): meta = state.meta + # If there isn't a meta, that means the current + # version got checked in the initial build. if meta is None: continue assert state.path is not None self.fswatcher.set_file_data( @@ -294,8 +294,8 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict changed = self.find_changed(sources) if changed: messages = self.fine_grained_manager.update(changed) - self.fscache.flush() + self.fscache.flush() status = 1 if messages else 0 self.previous_messages = messages[:] return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status} From 8b3702d4fcb1c9bc6c05903d42d78189ec2d9c50 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Mon, 5 Mar 2018 09:51:36 -0800 Subject: [PATCH 2/2] Tweak some of the logic for pulling info from the metas --- mypy/dmypy_server.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 27c233a97748..629f410beb79 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -281,10 +281,14 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict # Pull times and hashes out of the saved_cache and stick them into # the fswatcher, so we pick up the changes. for state in self.fine_grained_manager.graph.values(): + # Only grab hashes from modules that came from the + # cache. For modules that actually got parsed & + # typechecked we rely on the data from the actual disk + # cache, since we don't generate metas for those. + assert state.tree is not None + if not state.tree.is_cache_skeleton: continue meta = state.meta - # If there isn't a meta, that means the current - # version got checked in the initial build. - if meta is None: continue + assert meta is not None assert state.path is not None self.fswatcher.set_file_data( state.path,