Skip to content

Commit 694f993

Browse files
committed
Add reachable heads for each revision to revision cache
Also switch to `git rev-list --topo-order` traversal in order to avoid a 2nd pass in Python.
1 parent 23afc8e commit 694f993

File tree

1 file changed

+71
-54
lines changed

1 file changed

+71
-54
lines changed

tracext/git/PyGIT.py

Lines changed: 71 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,12 @@ def getInstance(self):
138138
% (("","weak ")[is_weak], id(self.__inst), self.__repo))
139139
return self.__inst
140140

141+
141142
class Storage:
143+
"""
144+
High-level wrapper around GitCore with in-memory caching
145+
"""
146+
142147
__SREV_MIN = 4 # minimum short-rev length
143148

144149
@staticmethod
@@ -236,88 +241,96 @@ def __rev_cache_sync(self, youngest_rev=None):
236241
return need_update
237242

238243
def get_rev_cache(self):
244+
"""
245+
Retrieve revision cache
246+
247+
may rebuild cache on the fly if required
248+
249+
returns tuple(youngest_rev, oldest_rev, rev_dict, tag_dict, short_rev_dict, branch_dict)
250+
"""
251+
239252
with self.__rev_cache_lock:
240253
if self.__rev_cache is None: # can be cleared by Storage.__rev_cache_sync()
241254
self.logger.debug("triggered rebuild of commit tree db for %d" % id(self))
242-
new_db = {}
243-
new_sdb = {}
244-
new_tags = set([])
255+
245256
youngest = None
246257
oldest = None
247-
for revs in self.repo.rev_parse("--tags").splitlines():
248-
new_tags.add(revs.strip())
258+
new_db = {} # db
259+
new_sdb = {} # short_rev db
249260

250261
# helper for reusing strings
251262
__rev_seen = {}
252263
def __rev_reuse(rev):
253264
rev = str(rev)
254265
return __rev_seen.setdefault(rev, rev)
255266

256-
rev = ord_rev = 0
257-
for revs in self.repo.rev_list("--parents", "--all").splitlines():
258-
revs = revs.strip().split()
267+
new_tags = set(__rev_reuse(rev.strip()) for rev in self.repo.rev_parse("--tags").splitlines())
259268

260-
revs = map(__rev_reuse, revs)
269+
new_branches = [(k, __rev_reuse(v)) for k, v in self._get_branches()]
270+
head_revs = set(v for _, v in new_branches)
271+
272+
rev = ord_rev = 0
273+
for ord_rev, revs in enumerate(self.repo.rev_list("--parents",
274+
"--topo-order",
275+
"--all").splitlines()):
276+
revs = map(__rev_reuse, revs.strip().split())
261277

262278
rev = revs[0]
263279

280+
# first rev seen is assumed to be the youngest one
281+
if not ord_rev:
282+
youngest = rev
283+
264284
# shortrev "hash" map
265285
srev_key = self.__rev_key(rev)
266286
new_sdb.setdefault(srev_key, []).append(rev)
267287

288+
# parents
268289
parents = tuple(revs[1:])
269290

270-
ord_rev += 1
271-
272-
# first rev seen is assumed to be the youngest one (and has ord_rev=1)
273-
if not youngest:
274-
youngest = rev
275-
276-
# new_db[rev] = (children(rev), parents(rev), ordinal_id(rev))
277-
if new_db.has_key(rev):
278-
_children,_parents,_ord_rev = new_db[rev]
291+
# new_db[rev] = (children(rev), parents(rev), ordinal_id(rev), rheads(rev))
292+
if rev in new_db:
293+
# (incomplete) entry was already created by children
294+
_children, _parents, _ord_rev, _rheads = new_db[rev]
279295
assert _children
280296
assert not _parents
281297
assert _ord_rev == 0
282-
else:
298+
299+
if rev in head_revs and rev not in _rheads:
300+
_rheads.append(rev)
301+
302+
else: # new entry
283303
_children = []
304+
_rheads = [rev] if rev in head_revs else []
284305

285-
# create/update entry
286-
new_db[rev] = _children, parents, ord_rev
306+
# create/update entry -- transform lists into tuples since entry will be final
307+
new_db[rev] = tuple(_children), tuple(parents), ord_rev + 1, tuple(_rheads)
287308

288-
# update all parents(rev)'s children
309+
# update parents(rev)s
289310
for parent in parents:
290311
# by default, a dummy ordinal_id is used for the mean-time
291-
_children, _parents, _ord_rev = new_db.setdefault(parent, ([], [], 0))
312+
_children, _parents, _ord_rev, _rheads2 = new_db.setdefault(parent, ([], [], 0, []))
313+
314+
# update parent(rev)'s children
292315
if rev not in _children:
293316
_children.append(rev)
294317

318+
# update parent(rev)'s rheads
319+
for rev in _rheads:
320+
if rev not in _rheads2:
321+
_rheads2.append(rev)
322+
295323
# last rev seen is assumed to be the oldest one (with highest ord_rev)
296324
oldest = rev
297325

298326
__rev_seen = None
299327

300-
assert len(new_db) == ord_rev
301-
302-
# convert children lists to tuples
303-
tmp = {}
304-
try:
305-
while True:
306-
k,v = new_db.popitem()
307-
assert v[2] > 0
308-
tmp[k] = tuple(v[0]),v[1],v[2]
309-
except KeyError:
310-
pass
311-
312-
assert len(new_db) == 0
313-
new_db = tmp
314-
315328
# convert sdb either to dict or array depending on size
316329
tmp = [()]*(max(new_sdb.keys())+1) if len(new_sdb) > 5000 else {}
317330

318331
try:
319332
while True:
320-
k,v = new_sdb.popitem()
333+
k, v = new_sdb.popitem()
321334
tmp[k] = tuple(v)
322335
except KeyError:
323336
pass
@@ -326,17 +339,32 @@ def __rev_reuse(rev):
326339
new_sdb = tmp
327340

328341
# atomically update self.__rev_cache
329-
self.__rev_cache = youngest, oldest, new_db, new_tags, new_sdb
330-
self.logger.debug("rebuilt commit tree db for %d with %d entries" % (id(self),len(new_db)))
342+
self.__rev_cache = youngest, oldest, new_db, new_tags, new_sdb, new_branches
343+
self.logger.debug("rebuilt commit tree db for %d with %d entries" % (id(self), len(new_db)))
331344

332345
assert all(e is not None for e in self.__rev_cache) or not any(self.__rev_cache)
333346

334347
return self.__rev_cache
335348
# with self.__rev_cache_lock
336349

337-
# tuple: youngest_rev, oldest_rev, rev_dict, tag_dict, short_rev_dict
350+
# tuple: youngest_rev, oldest_rev, rev_dict, tag_dict, short_rev_dict, branch_dict
338351
rev_cache = property(get_rev_cache)
339352

353+
def _get_branches(self):
354+
"returns list of (local) branches, with active (= HEAD) one being the first item"
355+
result=[]
356+
for e in self.repo.branch("-v", "--no-abbrev").splitlines():
357+
(bname,bsha)=e[1:].strip().split()[:2]
358+
if e.startswith('*'):
359+
result.insert(0,(bname,bsha))
360+
else:
361+
result.append((bname,bsha))
362+
return result
363+
364+
def get_branches(self):
365+
"returns list of (local) branches, with active (= HEAD) one being the first item"
366+
return self.rev_cache[5]
367+
340368
def get_commits(self):
341369
return self.rev_cache[2]
342370

@@ -467,17 +495,6 @@ def fullrev(self, srev):
467495

468496
return None
469497

470-
def get_branches(self):
471-
"returns list of (local) branches, with active (= HEAD) one being the first item"
472-
result=[]
473-
for e in self.repo.branch("-v", "--no-abbrev").splitlines():
474-
(bname,bsha)=e[1:].strip().split()[:2]
475-
if e.startswith('*'):
476-
result.insert(0,(bname,bsha))
477-
else:
478-
result.append((bname,bsha))
479-
return result
480-
481498
def get_tags(self):
482499
return [e.strip() for e in self.repo.tag("-l").splitlines()]
483500

@@ -594,7 +611,7 @@ def all_revs(self):
594611
return self.get_commits().iterkeys()
595612

596613
def sync(self):
597-
rev = self.repo.rev_list("--max-count=1", "--all").strip()
614+
rev = self.repo.rev_list("--max-count=1", "--topo-order", "--all").strip()
598615
return self.__rev_cache_sync(rev)
599616

600617
def last_change(self, sha, path):

0 commit comments

Comments
 (0)