Skip to content

Split fine_grained_deps out into its own .deps.json cache file #4906

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 76 additions & 29 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,11 @@ def default_lib_path(data_dir: str,
('hash', str),
('dependencies', List[str]), # names of imported modules
('data_mtime', int), # mtime of data_json
('deps_mtime', Optional[int]), # mtime of deps_json
('data_json', str), # path of <id>.data.json
# path of <id>.deps.json, which we use to store fine-grained
# dependency information for fine-grained mode
('deps_json', Optional[str]),
('suppressed', List[str]), # dependencies that weren't imported
('child_modules', List[str]), # all submodules of the given module
('options', Optional[Dict[str, object]]), # build options
Expand All @@ -413,7 +417,16 @@ def default_lib_path(data_dir: str,
# silent mode or simply not found.


def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
def cache_meta_from_dict(meta: Dict[str, Any],
data_json: str, deps_json: Optional[str]) -> CacheMeta:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a docstring and describe arguments, including deps_json?

"""Build a CacheMeta object from a json metadata dictionary

Args:
meta: JSON metadata read from the metadata cache file
data_json: Path to the .data.json file containing the AST trees
deps_json: Optionally, path to the .deps.json file containign
fine-grained dependency information.
"""
sentinel = None # type: Any # Values to be validated by the caller
return CacheMeta(
meta.get('id', sentinel),
Expand All @@ -423,7 +436,9 @@ def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
meta.get('hash', sentinel),
meta.get('dependencies', []),
int(meta['data_mtime']) if 'data_mtime' in meta else sentinel,
int(meta['deps_mtime']) if meta.get('deps_mtime') is not None else None,
data_json,
deps_json,
meta.get('suppressed', []),
meta.get('child_modules', []),
meta.get('options'),
Expand Down Expand Up @@ -962,7 +977,7 @@ def verify_module(fscache: FileSystemMetaCache, id: str, path: str) -> bool:
return True


def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str]:
def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str, Optional[str]]:
"""Return the file names for the cache files.

Args:
Expand All @@ -972,16 +987,20 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str
pyversion: Python version (major, minor)

Returns:
A tuple with the file names to be used for the meta JSON and the
data JSON, respectively.
A tuple with the file names to be used for the meta JSON, the
data JSON, and the fine-grained deps JSON, respectively.
"""
cache_dir = manager.options.cache_dir
pyversion = manager.options.python_version
prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.'))
is_package = os.path.basename(path).startswith('__init__.py')
if is_package:
prefix = os.path.join(prefix, '__init__')
return (prefix + '.meta.json', prefix + '.data.json')

deps_json = None
if manager.options.cache_fine_grained:
deps_json = prefix + '.deps.json'
return (prefix + '.meta.json', prefix + '.data.json', deps_json)


def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]:
Expand All @@ -997,7 +1016,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
valid; otherwise None.
"""
# TODO: May need to take more build options into account
meta_json, data_json = get_cache_names(id, path, manager)
meta_json, data_json, deps_json = get_cache_names(id, path, manager)
manager.trace('Looking for {} at {}'.format(id, meta_json))
try:
with open(meta_json, 'r') as f:
Expand All @@ -1011,11 +1030,12 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
manager.log('Could not load cache for {}: meta cache is not a dict: {}'
.format(id, repr(meta)))
return None
m = cache_meta_from_dict(meta, data_json)
m = cache_meta_from_dict(meta, data_json, deps_json)
# Don't check for path match, that is dealt with in validate_meta().
if (m.id != id or
m.mtime is None or m.size is None or
m.dependencies is None or m.data_mtime is None):
m.dependencies is None or m.data_mtime is None or
(manager.options.cache_fine_grained and m.deps_mtime is None)):
manager.log('Metadata abandoned for {}: attributes are missing'.format(id))
return None

Expand Down Expand Up @@ -1098,6 +1118,13 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
if data_mtime != meta.data_mtime:
manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
return None
deps_mtime = None
if manager.options.cache_fine_grained:
assert meta.deps_json
deps_mtime = getmtime(meta.deps_json)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the file system cache?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use the cache for meta files at all, currently

if deps_mtime != meta.deps_mtime:
manager.log('Metadata abandoned for {}: deps cache is modified'.format(id))
return None

path = os.path.abspath(path)
try:
Expand Down Expand Up @@ -1143,6 +1170,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
'size': size,
'hash': source_hash,
'data_mtime': data_mtime,
'deps_mtime': deps_mtime,
'dependencies': meta.dependencies,
'suppressed': meta.suppressed,
'child_modules': meta.child_modules,
Expand All @@ -1158,7 +1186,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
else:
meta_str = json.dumps(meta_dict)
meta_json, _ = get_cache_names(id, path, manager)
meta_json, _, _2 = get_cache_names(id, path, manager)
manager.log('Updating mtime for {}: file {}, meta {}, mtime {}'
.format(id, path, meta_json, meta.mtime))
atomic_write(meta_json, meta_str, '\n') # Ignore errors, it's just an optimization.
Expand All @@ -1176,6 +1204,13 @@ def compute_hash(text: str) -> str:
return hashlib.md5(text.encode('utf-8')).hexdigest()


def json_dumps(obj: Any, debug_cache: bool) -> str:
if debug_cache:
return json.dumps(obj, indent=2, sort_keys=True)
else:
return json.dumps(obj, sort_keys=True)


def write_cache(id: str, path: str, tree: MypyFile,
serialized_fine_grained_deps: Dict[str, List[str]],
dependencies: List[str], suppressed: List[str],
Expand Down Expand Up @@ -1209,21 +1244,17 @@ def write_cache(id: str, path: str, tree: MypyFile,
"""
# Obtain file paths
path = os.path.abspath(path)
meta_json, data_json = get_cache_names(id, path, manager)
manager.log('Writing {} {} {} {}'.format(id, path, meta_json, data_json))
meta_json, data_json, deps_json = get_cache_names(id, path, manager)
manager.log('Writing {} {} {} {} {}'.format(
id, path, meta_json, data_json, deps_json))

# Make sure directory for cache files exists
parent = os.path.dirname(data_json)
assert os.path.dirname(meta_json) == parent

# Serialize data and analyze interface
data = {'tree': tree.serialize(),
'fine_grained_deps': serialized_fine_grained_deps,
}
if manager.options.debug_cache:
data_str = json.dumps(data, indent=2, sort_keys=True)
else:
data_str = json.dumps(data, sort_keys=True)
data = tree.serialize()
data_str = json_dumps(data, manager.options.debug_cache)
interface_hash = compute_hash(data_str)

# Obtain and set up metadata
Expand Down Expand Up @@ -1265,6 +1296,14 @@ def write_cache(id: str, path: str, tree: MypyFile,
return interface_hash, None
data_mtime = getmtime(data_json)

deps_mtime = None
if deps_json:
deps_str = json_dumps(serialized_fine_grained_deps, manager.options.debug_cache)
if not atomic_write(deps_json, deps_str, '\n'):
manager.log("Error writing deps JSON file {}".format(deps_json))
return interface_hash, None
deps_mtime = getmtime(deps_json)

mtime = int(st.st_mtime)
size = st.st_size
options = manager.options.clone_for_module(id)
Expand All @@ -1275,6 +1314,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
'size': size,
'hash': source_hash,
'data_mtime': data_mtime,
'deps_mtime': deps_mtime,
'dependencies': dependencies,
'suppressed': suppressed,
'child_modules': child_modules,
Expand All @@ -1287,17 +1327,14 @@ def write_cache(id: str, path: str, tree: MypyFile,
}

# Write meta cache file
if manager.options.debug_cache:
meta_str = json.dumps(meta, indent=2, sort_keys=True)
else:
meta_str = json.dumps(meta)
meta_str = json_dumps(meta, manager.options.debug_cache)
if not atomic_write(meta_json, meta_str, '\n'):
# Most likely the error is the replace() call
# (see https://github.com/python/mypy/issues/3215).
# The next run will simply find the cache entry out of date.
manager.log("Error writing meta JSON file {}".format(meta_json))

return interface_hash, cache_meta_from_dict(meta, data_json)
return interface_hash, cache_meta_from_dict(meta, data_json, deps_json)


def delete_cache(id: str, path: str, manager: BuildManager) -> None:
Expand All @@ -1308,12 +1345,13 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None:
see #4043 for an example.
"""
path = os.path.abspath(path)
meta_json, data_json = get_cache_names(id, path, manager)
manager.log('Deleting {} {} {} {}'.format(id, path, meta_json, data_json))
cache_paths = get_cache_names(id, path, manager)
manager.log('Deleting {} {} {}'.format(id, path, " ".join(x for x in cache_paths if x)))

for filename in [data_json, meta_json]:
for filename in cache_paths:
try:
os.remove(filename)
if filename:
os.remove(filename)
except OSError as e:
if e.errno != errno.ENOENT:
manager.log("Error deleting cache file {}: {}".format(filename, e.strerror))
Expand Down Expand Up @@ -1657,15 +1695,22 @@ def wrap_context(self) -> Iterator[None]:
self.check_blockers()

# Methods for processing cached modules.
def load_fine_grained_deps(self) -> None:
assert self.meta is not None, "Internal error: this method must be called only" \
" for cached modules"
assert self.meta.deps_json
with open(self.meta.deps_json) as f:
deps = json.load(f)
# TODO: Assert deps file wasn't changed.
self.fine_grained_deps = {k: set(v) for k, v in deps.items()}

def load_tree(self) -> None:
assert self.meta is not None, "Internal error: this method must be called only" \
" for cached modules"
with open(self.meta.data_json) as f:
data = json.load(f)
# TODO: Assert data file wasn't changed.
self.tree = MypyFile.deserialize(data['tree'])
self.fine_grained_deps = {k: set(v) for k, v in data['fine_grained_deps'].items()}
self.tree = MypyFile.deserialize(data)

self.manager.modules[self.id] = self.tree
self.manager.add_stats(fresh_trees=1)
Expand Down Expand Up @@ -2517,6 +2562,8 @@ def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> Non
# Note that ascc is a set, and scc is a list.
scc = order_ascc(graph, ascc)
process_fresh_scc(graph, scc, manager)
for id in scc:
graph[id].load_fine_grained_deps()


def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]:
Expand Down
4 changes: 3 additions & 1 deletion mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ def __init__(self, options: Options,
options.fine_grained_incremental = True
options.show_traceback = True
if options.use_fine_grained_cache:
options.cache_fine_grained = True # set this so that cache options match
# Using fine_grained_cache implies generating and caring
# about the fine grained cache
options.cache_fine_grained = True
else:
options.cache_dir = os.devnull
# Fine-grained incremental doesn't support general partial types
Expand Down
2 changes: 1 addition & 1 deletion mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Options:
}

OPTIONS_AFFECTING_CACHE = ((PER_MODULE_OPTIONS |
{"quick_and_dirty", "platform", "cache_fine_grained"})
{"quick_and_dirty", "platform"})
- {"debug_cache"})

def __init__(self) -> None:
Expand Down
30 changes: 30 additions & 0 deletions test-data/unit/check-incremental.test
Original file line number Diff line number Diff line change
Expand Up @@ -4235,3 +4235,33 @@ pass
[out2]
[out3]
tmp/a.py:1: note: unused 'type: ignore' comment

-- Test that a non cache_fine_grained run can use a fine-grained cache
[case testRegularUsesFgCache]
# flags: --config-file tmp/mypy.ini
import a
[file a.py]
x = 0
[file mypy.ini]
[[mypy]
cache_fine_grained = True
[file mypy.ini.2]
[[mypy]
cache_fine_grained = False
-- Nothing should get rechecked
[rechecked]
[stale]

[case testFgCacheNeedsFgCache]
# flags: --config-file tmp/mypy.ini
import a
[file a.py]
x = 0
[file mypy.ini]
[[mypy]
cache_fine_grained = False
[file mypy.ini.2]
[[mypy]
cache_fine_grained = True
[rechecked a, builtins]
[stale a, builtins]