From ef14a68048d9c230b6359c96ad5fcd8897f0a0f5 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Mon, 18 Aug 2025 13:26:24 +0100 Subject: [PATCH 1/2] Force all deserialized objects to the oldest generation --- mypy/build.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mypy/build.py b/mypy/build.py index 71575de9d877..9da345d41d80 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -3326,8 +3326,22 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # # TODO: see if it's possible to determine if we need to process only a # _subset_ of the past SCCs instead of having to process them all. + if platform.python_implementation() == "CPython": + # When deserializing cache we create huge amount of new objects, so even + # with our generous GC thresholds, GC is still doing a lot of pointless + # work searching for garbage. So, we temporarily disable it when + # processing fresh SCCs, and then move all the new objects to the oldest + # generation with the freeze()/unfreeze() trick below. This is arguably + # a hack, but it gives huge performance wins for large third-party + # libraries, like torch. + gc.collect() + gc.disable() for prev_scc in fresh_scc_queue: process_fresh_modules(graph, prev_scc, manager) + if platform.python_implementation() == "CPython": + gc.freeze() + gc.unfreeze() + gc.enable() fresh_scc_queue = [] size = len(scc) if size == 1: From b7051c33add8f9840f1c6abedad9d529631d6bcb Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Tue, 19 Aug 2025 00:42:20 +0100 Subject: [PATCH 2/2] Limit the hack to just one use --- mypy/build.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 9da345d41d80..883ae1f22f19 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -116,6 +116,8 @@ "abc", } +# We are careful now, we can increase this in future if safe/useful. +MAX_GC_FREEZE_CYCLES = 1 Graph: _TypeAlias = dict[str, "State"] @@ -707,6 +709,8 @@ def __init__( # new file can be processed O(n**2) times. This cache # avoids most of this redundant work. self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {} + # Number of times we used GC optimization hack for fresh SCCs. + self.gc_freeze_cycles = 0 def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -3326,7 +3330,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # # TODO: see if it's possible to determine if we need to process only a # _subset_ of the past SCCs instead of having to process them all. - if platform.python_implementation() == "CPython": + if ( + platform.python_implementation() == "CPython" + and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES + ): # When deserializing cache we create huge amount of new objects, so even # with our generous GC thresholds, GC is still doing a lot of pointless # work searching for garbage. So, we temporarily disable it when @@ -3338,7 +3345,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: gc.disable() for prev_scc in fresh_scc_queue: process_fresh_modules(graph, prev_scc, manager) - if platform.python_implementation() == "CPython": + if ( + platform.python_implementation() == "CPython" + and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES + ): + manager.gc_freeze_cycles += 1 gc.freeze() gc.unfreeze() gc.enable()