Skip to content

Commit f5cd6a6

Browse files
authored
Implementation and tests required to ensure that command output encoding does not raise an exception (ruby-git#405)
Signed-off-by: James Couball <jcouball@yahoo.com>
1 parent 2402674 commit f5cd6a6

32 files changed

+763
-17
lines changed

git.gemspec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ Gem::Specification.new do |s|
1515
s.required_rubygems_version = Gem::Requirement.new('>= 0') if s.respond_to?(:required_rubygems_version=)
1616
s.requirements = ['git 1.6.0.0, or greater']
1717

18+
s.add_runtime_dependency 'rchardet', '~> 1.8'
19+
1820
s.add_development_dependency 'rake'
1921
s.add_development_dependency 'rdoc'
2022
s.add_development_dependency 'test-unit', '>=2', '< 4'

lib/git/diff.rb

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,7 @@ def process_full_diff
127127
}
128128
final = {}
129129
current_file = nil
130-
if @full_diff.encoding.name != "UTF-8"
131-
full_diff_utf8_encoded = @full_diff.encode("UTF-8", "binary", { :invalid => :replace, :undef => :replace })
132-
else
133-
full_diff_utf8_encoded = @full_diff
134-
end
135-
full_diff_utf8_encoded.split("\n").each do |line|
130+
@full_diff.split("\n").each do |line|
136131
if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line)
137132
current_file = m[1]
138133
final[current_file] = defaults.merge({:patch => line, :path => current_file})

lib/git/lib.rb

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
require 'rchardet'
12
require 'tempfile'
23

34
module Git
@@ -900,16 +901,7 @@ def meets_required_version?
900901
ENV_VARIABLE_NAMES = ['GIT_DIR', 'GIT_WORK_TREE', 'GIT_INDEX_FILE', 'GIT_SSH']
901902

902903
def command_lines(cmd, opts = [], chdir = true, redirect = '')
903-
cmd_op = command(cmd, opts, chdir)
904-
if cmd_op.encoding.name != "UTF-8"
905-
op = cmd_op.encode("UTF-8", "binary", {
906-
:invalid => :replace,
907-
:undef => :replace
908-
})
909-
else
910-
op = cmd_op
911-
end
912-
op.split("\n")
904+
command(cmd, opts, chdir).lines.map(&:chomp)
913905
end
914906

915907
# Takes the current git's system ENV variables and store them.
@@ -1039,10 +1031,35 @@ def log_path_options(opts)
10391031
arr_opts
10401032
end
10411033

1034+
def default_encoding
1035+
__ENCODING__.name
1036+
end
1037+
1038+
def best_guess_encoding
1039+
# Encoding::ASCII_8BIT.name
1040+
Encoding::UTF_8.name
1041+
end
1042+
1043+
def detected_encoding(str)
1044+
CharDet.detect(str)['encoding'] || best_guess_encoding
1045+
end
1046+
1047+
def encoding_options
1048+
{ invalid: :replace, undef: :replace }
1049+
end
1050+
1051+
def normalize_encoding(str)
1052+
return str if str.valid_encoding? && str.encoding == default_encoding
1053+
1054+
return str.encode(default_encoding, str.encoding, encoding_options) if str.valid_encoding?
1055+
1056+
str.encode(default_encoding, detected_encoding(str), encoding_options)
1057+
end
1058+
10421059
def run_command(git_cmd, &block)
10431060
return IO.popen(git_cmd, &block) if block_given?
10441061

1045-
`#{git_cmd}`.chomp
1062+
`#{git_cmd}`.chomp.lines.map { |l| normalize_encoding(l) }.join
10461063
end
10471064

10481065
def escape(s)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
A file with Japanese text

tests/files/encoding/dot_git/HEAD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ref: refs/heads/master

tests/files/encoding/dot_git/config

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[core]
2+
repositoryformatversion = 0
3+
filemode = true
4+
bare = false
5+
logallrefupdates = true
6+
ignorecase = true
7+
precomposeunicode = true
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Unnamed repository; edit this file 'description' to name the repository.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/sh
2+
#
3+
# An example hook script to check the commit log message taken by
4+
# applypatch from an e-mail message.
5+
#
6+
# The hook should exit with non-zero status after issuing an
7+
# appropriate message if it wants to stop the commit. The hook is
8+
# allowed to edit the commit message file.
9+
#
10+
# To enable this hook, rename this file to "applypatch-msg".
11+
12+
. git-sh-setup
13+
commitmsg="$(git rev-parse --git-path hooks/commit-msg)"
14+
test -x "$commitmsg" && exec "$commitmsg" ${1+"$@"}
15+
:
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/sh
2+
#
3+
# An example hook script to check the commit log message.
4+
# Called by "git commit" with one argument, the name of the file
5+
# that has the commit message. The hook should exit with non-zero
6+
# status after issuing an appropriate message if it wants to stop the
7+
# commit. The hook is allowed to edit the commit message file.
8+
#
9+
# To enable this hook, rename this file to "commit-msg".
10+
11+
# Uncomment the below to add a Signed-off-by line to the message.
12+
# Doing this in a hook is a bad idea in general, but the prepare-commit-msg
13+
# hook is more suited to it.
14+
#
15+
# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
16+
# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
17+
18+
# This example catches duplicate Signed-off-by lines.
19+
20+
test "" = "$(grep '^Signed-off-by: ' "$1" |
21+
sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || {
22+
echo >&2 Duplicate Signed-off-by lines.
23+
exit 1
24+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/usr/bin/perl
2+
3+
use strict;
4+
use warnings;
5+
use IPC::Open2;
6+
7+
# An example hook script to integrate Watchman
8+
# (https://facebook.github.io/watchman/) with git to speed up detecting
9+
# new and modified files.
10+
#
11+
# The hook is passed a version (currently 1) and a time in nanoseconds
12+
# formatted as a string and outputs to stdout all files that have been
13+
# modified since the given time. Paths must be relative to the root of
14+
# the working tree and separated by a single NUL.
15+
#
16+
# To enable this hook, rename this file to "query-watchman" and set
17+
# 'git config core.fsmonitor .git/hooks/query-watchman'
18+
#
19+
my ($version, $time) = @ARGV;
20+
21+
# Check the hook interface version
22+
23+
if ($version == 1) {
24+
# convert nanoseconds to seconds
25+
$time = int $time / 1000000000;
26+
} else {
27+
die "Unsupported query-fsmonitor hook version '$version'.\n" .
28+
"Falling back to scanning...\n";
29+
}
30+
31+
my $git_work_tree;
32+
if ($^O =~ 'msys' || $^O =~ 'cygwin') {
33+
$git_work_tree = Win32::GetCwd();
34+
$git_work_tree =~ tr/\\/\//;
35+
} else {
36+
require Cwd;
37+
$git_work_tree = Cwd::cwd();
38+
}
39+
40+
my $retry = 1;
41+
42+
launch_watchman();
43+
44+
sub launch_watchman {
45+
46+
my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty')
47+
or die "open2() failed: $!\n" .
48+
"Falling back to scanning...\n";
49+
50+
# In the query expression below we're asking for names of files that
51+
# changed since $time but were not transient (ie created after
52+
# $time but no longer exist).
53+
#
54+
# To accomplish this, we're using the "since" generator to use the
55+
# recency index to select candidate nodes and "fields" to limit the
56+
# output to file names only. Then we're using the "expression" term to
57+
# further constrain the results.
58+
#
59+
# The category of transient files that we want to ignore will have a
60+
# creation clock (cclock) newer than $time_t value and will also not
61+
# currently exist.
62+
63+
my $query = <<" END";
64+
["query", "$git_work_tree", {
65+
"since": $time,
66+
"fields": ["name"],
67+
"expression": ["not", ["allof", ["since", $time, "cclock"], ["not", "exists"]]]
68+
}]
69+
END
70+
71+
print CHLD_IN $query;
72+
close CHLD_IN;
73+
my $response = do {local $/; <CHLD_OUT>};
74+
75+
die "Watchman: command returned no output.\n" .
76+
"Falling back to scanning...\n" if $response eq "";
77+
die "Watchman: command returned invalid output: $response\n" .
78+
"Falling back to scanning...\n" unless $response =~ /^\{/;
79+
80+
my $json_pkg;
81+
eval {
82+
require JSON::XS;
83+
$json_pkg = "JSON::XS";
84+
1;
85+
} or do {
86+
require JSON::PP;
87+
$json_pkg = "JSON::PP";
88+
};
89+
90+
my $o = $json_pkg->new->utf8->decode($response);
91+
92+
if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) {
93+
print STDERR "Adding '$git_work_tree' to watchman's watch list.\n";
94+
$retry--;
95+
qx/watchman watch "$git_work_tree"/;
96+
die "Failed to make watchman watch '$git_work_tree'.\n" .
97+
"Falling back to scanning...\n" if $? != 0;
98+
99+
# Watchman will always return all files on the first query so
100+
# return the fast "everything is dirty" flag to git and do the
101+
# Watchman query just to get it over with now so we won't pay
102+
# the cost in git to look up each individual file.
103+
print "/\0";
104+
eval { launch_watchman() };
105+
exit 0;
106+
}
107+
108+
die "Watchman: $o->{error}.\n" .
109+
"Falling back to scanning...\n" if $o->{error};
110+
111+
binmode STDOUT, ":utf8";
112+
local $, = "\0";
113+
print @{$o->{files}};
114+
}

0 commit comments

Comments
 (0)