From 60ca325da68f5958dfe095f7f68468a3e58a3c8c Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Wed, 8 Mar 2017 12:12:37 -0600 Subject: [PATCH 1/2] Add option to skip the GC cycle on Repo close In large environment with thousands of repos and frequent repo opens and closes, the cost of doing 2 full gc cycles on every Repo destruction can become crippling. --- git/repo/base.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/git/repo/base.py b/git/repo/base.py index 7820fd668..a4156edaf 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -90,7 +90,10 @@ class Repo(object): # Subclasses may easily bring in their own custom types by placing a constructor or type here GitCommandWrapperType = Git - def __init__(self, path=None, odbt=DefaultDBType, search_parent_directories=False): + def __init__( + self, path=None, odbt=DefaultDBType, + search_parent_directories=False, + gc_on_close=True): """Create a new Repo instance :param path: @@ -113,9 +116,15 @@ def __init__(self, path=None, odbt=DefaultDBType, search_parent_directories=Fals Please note that this was the default behaviour in older versions of GitPython, which is considered a bug though. + :param gc_on_close: + Whether to force gc collect and mmap collect on Repo destruction. + In some high-volume cases, the cost of doing frequent full gc cycles + is extremely expensive. While doing the gc is the safe bet, skipping + it can be useful in specific cases. Defaults to True. :raise InvalidGitRepositoryError: :raise NoSuchPathError: :return: git.Repo """ + self._gc_on_close = gc_on_close epath = path or os.getenv('GIT_DIR') if not epath: epath = os.getcwd() @@ -191,9 +200,10 @@ def __del__(self): def close(self): if self.git: self.git.clear_cache() - gc.collect() - gitdb.util.mman.collect() - gc.collect() + if self._gc_on_close: + gc.collect() + gitdb.util.mman.collect() + gc.collect() def __eq__(self, rhs): if isinstance(rhs, Repo): From 0533dd6e1dddfaa05549a4c16303ea4b4540c030 Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Wed, 8 Mar 2017 12:45:53 -0600 Subject: [PATCH 2/2] Perform the first gc.collect before clearing cache In issue #553 it was originally mentioned that the gc.collect was required before clearing the cache, but in the code the collect happens after the cache clear. Move the first gc.collect to before the cache to be consistent with the original description. --- git/repo/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/git/repo/base.py b/git/repo/base.py index a4156edaf..8dbdd6651 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -199,9 +199,10 @@ def __del__(self): def close(self): if self.git: - self.git.clear_cache() if self._gc_on_close: gc.collect() + self.git.clear_cache() + if self._gc_on_close: gitdb.util.mman.collect() gc.collect()