Skip to content

Commit 20a338f

Browse files
committed
Merge pull request #409 from nvie/add-incremental-blame-support
Add incremental blame support
2 parents 978eb5b + f533a68 commit 20a338f

File tree

4 files changed

+134
-3
lines changed

4 files changed

+134
-3
lines changed

git/compat.py

+15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# flake8: noqa
99

1010
import sys
11+
import six
1112

1213
from gitdb.utils.compat import (
1314
PY3,
@@ -46,6 +47,20 @@ def mviter(d):
4647
def mviter(d):
4748
return d.itervalues()
4849

50+
PRE_PY27 = sys.version_info < (2, 7)
51+
52+
53+
def safe_decode(s):
54+
"""Safely decodes a binary string to unicode"""
55+
if isinstance(s, six.text_type):
56+
return s
57+
elif isinstance(s, six.binary_type):
58+
if PRE_PY27:
59+
return s.decode(defenc) # we're screwed
60+
else:
61+
return s.decode(defenc, errors='replace')
62+
raise TypeError('Expected bytes or text, but got %r' % (s,))
63+
4964

5065
def with_metaclass(meta, *bases):
5166
"""copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""

git/repo/base.py

+65-3
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,14 @@
5252
from git.compat import (
5353
text_type,
5454
defenc,
55-
PY3
55+
PY3,
56+
safe_decode,
5657
)
5758

5859
import os
5960
import sys
6061
import re
62+
from six.moves import range
6163

6264
DefaultDBType = GitCmdObjectDB
6365
if sys.version_info[:2] < (2, 5): # python 2.4 compatiblity
@@ -655,7 +657,64 @@ def active_branch(self):
655657
:return: Head to the active branch"""
656658
return self.head.reference
657659

658-
def blame(self, rev, file):
660+
def blame_incremental(self, rev, file, **kwargs):
661+
"""Iterator for blame information for the given file at the given revision.
662+
663+
Unlike .blame(), this does not return the actual file's contents, only
664+
a stream of (commit, range) tuples.
665+
666+
:parm rev: revision specifier, see git-rev-parse for viable options.
667+
:return: lazy iterator of (git.Commit, range) tuples, where the commit
668+
indicates the commit to blame for the line, and range
669+
indicates a span of line numbers in the resulting file.
670+
671+
If you combine all line number ranges outputted by this command, you
672+
should get a continuous range spanning all line numbers in the file.
673+
"""
674+
data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
675+
commits = dict()
676+
677+
stream = iter(data.splitlines())
678+
while True:
679+
line = next(stream) # when exhausted, casues a StopIteration, terminating this function
680+
681+
hexsha, _, lineno, num_lines = line.split()
682+
lineno = int(lineno)
683+
num_lines = int(num_lines)
684+
if hexsha not in commits:
685+
# Now read the next few lines and build up a dict of properties
686+
# for this commit
687+
props = dict()
688+
while True:
689+
line = next(stream)
690+
if line == b'boundary':
691+
# "boundary" indicates a root commit and occurs
692+
# instead of the "previous" tag
693+
continue
694+
695+
tag, value = line.split(b' ', 1)
696+
props[tag] = value
697+
if tag == b'filename':
698+
# "filename" formally terminates the entry for --incremental
699+
break
700+
701+
c = Commit(self, hex_to_bin(hexsha),
702+
author=Actor(safe_decode(props[b'author']),
703+
safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
704+
authored_date=int(props[b'author-time']),
705+
committer=Actor(safe_decode(props[b'committer']),
706+
safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
707+
committed_date=int(props[b'committer-time']),
708+
message=safe_decode(props[b'summary']))
709+
commits[hexsha] = c
710+
else:
711+
# Discard the next line (it's a filename end tag)
712+
line = next(stream)
713+
assert line.startswith(b'filename'), 'Unexpected git blame output'
714+
715+
yield commits[hexsha], range(lineno, lineno + num_lines)
716+
717+
def blame(self, rev, file, incremental=False, **kwargs):
659718
"""The blame information for the given file at the given revision.
660719
661720
:parm rev: revision specifier, see git-rev-parse for viable options.
@@ -664,7 +723,10 @@ def blame(self, rev, file):
664723
A list of tuples associating a Commit object with a list of lines that
665724
changed within the given commit. The Commit objects will be given in order
666725
of appearance."""
667-
data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
726+
if incremental:
727+
return self.blame_incremental(rev, file, **kwargs)
728+
729+
data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False, **kwargs)
668730
commits = dict()
669731
blames = list()
670732
info = None

git/test/fixtures/blame_incremental

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
82b8902e033430000481eb355733cd7065342037 2 2 1
2+
author Sebastian Thiel
3+
author-mail <byronimo@gmail.com>
4+
author-time 1270634931
5+
author-tz +0200
6+
committer Sebastian Thiel
7+
committer-mail <byronimo@gmail.com>
8+
committer-time 1270634931
9+
committer-tz +0200
10+
summary Used this release for a first beta of the 0.2 branch of development
11+
previous 501bf602abea7d21c3dbb409b435976e92033145 AUTHORS
12+
filename AUTHORS
13+
82b8902e033430000481eb355733cd7065342037 14 14 1
14+
filename AUTHORS
15+
c76852d0bff115720af3f27acdb084c59361e5f6 1 1 1
16+
author Michael Trier
17+
author-mail <mtrier@gmail.com>
18+
author-time 1232829627
19+
author-tz -0500
20+
committer Michael Trier
21+
committer-mail <mtrier@gmail.com>
22+
committer-time 1232829627
23+
committer-tz -0500
24+
summary Lots of spring cleaning and added in Sphinx documentation.
25+
previous bcd57e349c08bd7f076f8d6d2f39b702015358c1 AUTHORS
26+
filename AUTHORS
27+
c76852d0bff115720af3f27acdb084c59361e5f6 2 3 11
28+
filename AUTHORS
29+
c76852d0bff115720af3f27acdb084c59361e5f6 13 15 2
30+
filename AUTHORS

git/test/test_repo.py

+24
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@
5050
from nose import SkipTest
5151

5252

53+
def iter_flatten(lol):
54+
for items in lol:
55+
for item in items:
56+
yield item
57+
58+
59+
def flatten(lol):
60+
return list(iter_flatten(lol))
61+
62+
5363
class TestRepo(TestBase):
5464

5565
@raises(InvalidGitRepositoryError)
@@ -323,6 +333,20 @@ def test_blame_real(self):
323333
assert c, "Should have executed at least one blame command"
324334
assert nml, "There should at least be one blame commit that contains multiple lines"
325335

336+
@patch.object(Git, '_call_process')
337+
def test_blame_incremental(self, git):
338+
git.return_value = fixture('blame_incremental')
339+
blame_output = self.rorepo.blame_incremental('9debf6b0aafb6f7781ea9d1383c86939a1aacde3', 'AUTHORS')
340+
blame_output = list(blame_output)
341+
assert len(blame_output) == 5
342+
343+
# Check all outputted line numbers
344+
ranges = flatten([line_numbers for _, line_numbers in blame_output])
345+
assert ranges == flatten([range(2, 3), range(14, 15), range(1, 2), range(3, 14), range(15, 17)]), str(ranges)
346+
347+
commits = [c.hexsha[:7] for c, _ in blame_output]
348+
assert commits == ['82b8902', '82b8902', 'c76852d', 'c76852d', 'c76852d'], str(commits)
349+
326350
@patch.object(Git, '_call_process')
327351
def test_blame_complex_revision(self, git):
328352
git.return_value = fixture('blame_complex_revision')

0 commit comments

Comments
 (0)