Skip to content

Commit 89c007d

Browse files
committed
Properly unescape diff paths
Signed-off-by: James Couball <jcouball@yahoo.com>
1 parent ea47044 commit 89c007d

10 files changed

+205
-44
lines changed

lib/git.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
require 'git/branches'
1010
require 'git/config'
1111
require 'git/diff'
12+
require 'git/encoding_utils'
13+
require 'git/escaped_path'
1214
require 'git/index'
1315
require 'git/lib'
1416
require 'git/log'

lib/git/base.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def self.init(directory = '.', options = {})
3636

3737
init_options = {
3838
:bare => options[:bare],
39-
:initial_branch => options[:initial_branch],
39+
:initial_branch => options[:initial_branch]
4040
}
4141

4242
directory = options[:bare] ? options[:repository] : options[:working_directory]

lib/git/diff.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ def process_full_diff
129129
final = {}
130130
current_file = nil
131131
@full_diff.split("\n").each do |line|
132-
if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line)
133-
current_file = m[1]
132+
if m = %r{\Adiff --git ("?)a/(.+?)\1 ("?)b/(.+?)\3\z}.match(line)
133+
current_file = Git::EscapedPath.new(m[2]).unescape
134134
final[current_file] = defaults.merge({:patch => line, :path => current_file})
135135
else
136136
if m = /^index ([0-9a-f]{4,40})\.\.([0-9a-f]{4,40})( ......)*/.match(line)

lib/git/encoding_utils.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# frozen_string_literal: true
2+
3+
require 'rchardet'
4+
5+
module Git
6+
# Method that can be used to detect and normalize string encoding
7+
module EncodingUtils
8+
def self.default_encoding
9+
__ENCODING__.name
10+
end
11+
12+
def self.best_guess_encoding
13+
# Encoding::ASCII_8BIT.name
14+
Encoding::UTF_8.name
15+
end
16+
17+
def self.detected_encoding(str)
18+
CharDet.detect(str)['encoding'] || best_guess_encoding
19+
end
20+
21+
def self.encoding_options
22+
{ invalid: :replace, undef: :replace }
23+
end
24+
25+
def self.normalize_encoding(str)
26+
return str if str.valid_encoding? && str.encoding.name == default_encoding
27+
28+
return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?
29+
30+
str.encode(default_encoding, detected_encoding(str), **encoding_options)
31+
end
32+
end
33+
end

lib/git/escaped_path.rb

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# frozen_string_literal: true
2+
3+
module Git
4+
# Represents an escaped Git path string
5+
#
6+
# Git commands that output paths (e.g. ls-files, diff), will escape usual
7+
# characters in the path with backslashes in the same way C escapes control
8+
# characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values
9+
# larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8).
10+
#
11+
# @example
12+
# Git::GitPath.new('\302\265').unescape # => "µ"
13+
#
14+
class EscapedPath
15+
UNESCAPES = {
16+
'a' => 0x07,
17+
'b' => 0x08,
18+
't' => 0x09,
19+
'n' => 0x0a,
20+
'v' => 0x0b,
21+
'f' => 0x0c,
22+
'r' => 0x0d,
23+
'e' => 0x1b,
24+
'\\' => 0x5c,
25+
'"' => 0x22,
26+
"'" => 0x27
27+
}.freeze
28+
29+
attr_reader :path
30+
31+
def initialize(path)
32+
@path = path
33+
end
34+
35+
# Convert an escaped path to an unescaped path
36+
def unescape
37+
bytes = escaped_path_to_bytes(path)
38+
str = bytes.pack('C*')
39+
str.force_encoding(Encoding::UTF_8)
40+
end
41+
42+
private
43+
44+
def extract_octal(path, index)
45+
[path[index + 1..index + 4].to_i(8), 4]
46+
end
47+
48+
def extract_escape(path, index)
49+
[UNESCAPES[path[index + 1]], 2]
50+
end
51+
52+
def extract_single_char(path, index)
53+
[path[index].ord, 1]
54+
end
55+
56+
def next_byte(path, index)
57+
if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7'
58+
extract_octal(path, index)
59+
elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1])
60+
extract_escape(path, index)
61+
else
62+
extract_single_char(path, index)
63+
end
64+
end
65+
66+
def escaped_path_to_bytes(path)
67+
index = 0
68+
[].tap do |bytes|
69+
while index < path.length
70+
byte, chars_used = next_byte(path, index)
71+
bytes << byte
72+
index += chars_used
73+
end
74+
end
75+
end
76+
end
77+
end

lib/git/lib.rb

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
require 'rchardet'
21
require 'tempfile'
32
require 'zlib'
43

@@ -1085,7 +1084,8 @@ def command(cmd, *opts, &block)
10851084
global_opts = []
10861085
global_opts << "--git-dir=#{@git_dir}" if !@git_dir.nil?
10871086
global_opts << "--work-tree=#{@git_work_dir}" if !@git_work_dir.nil?
1088-
global_opts << ["-c", "color.ui=false"]
1087+
global_opts << %w[-c core.quotePath=true]
1088+
global_opts << %w[-c color.ui=false]
10891089

10901090
opts = [opts].flatten.map {|s| escape(s) }.join(' ')
10911091

@@ -1176,35 +1176,10 @@ def log_path_options(opts)
11761176
arr_opts
11771177
end
11781178

1179-
def default_encoding
1180-
__ENCODING__.name
1181-
end
1182-
1183-
def best_guess_encoding
1184-
# Encoding::ASCII_8BIT.name
1185-
Encoding::UTF_8.name
1186-
end
1187-
1188-
def detected_encoding(str)
1189-
CharDet.detect(str)['encoding'] || best_guess_encoding
1190-
end
1191-
1192-
def encoding_options
1193-
{ invalid: :replace, undef: :replace }
1194-
end
1195-
1196-
def normalize_encoding(str)
1197-
return str if str.valid_encoding? && str.encoding.name == default_encoding
1198-
1199-
return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?
1200-
1201-
str.encode(default_encoding, detected_encoding(str), **encoding_options)
1202-
end
1203-
12041179
def run_command(git_cmd, &block)
12051180
return IO.popen(git_cmd, &block) if block_given?
12061181

1207-
`#{git_cmd}`.lines.map { |l| normalize_encoding(l) }.join
1182+
`#{git_cmd}`.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join
12081183
end
12091184

12101185
def escape(s)
@@ -1225,6 +1200,5 @@ def windows_platform?
12251200
win_platform_regex = /mingw|mswin/
12261201
RUBY_PLATFORM =~ win_platform_regex || RUBY_DESCRIPTION =~ win_platform_regex
12271202
end
1228-
12291203
end
12301204
end

tests/units/test_archive.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_archive
4545

4646
f = @git.object('v2.6').archive(tempfile, :format => 'tar', :prefix => 'test/', :path => 'ex_dir/')
4747
assert(File.exist?(f))
48-
48+
4949
lines = Minitar::Input.open(f).each.to_a.map(&:full_name)
5050
assert_match(%r{test/}, lines[1])
5151
assert_match(%r{test/ex_dir/ex\.txt}, lines[3])
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env ruby
2+
# encoding: utf-8
3+
4+
require File.dirname(__FILE__) + '/../test_helper'
5+
6+
# Test diff when the file path has to be quoted according to core.quotePath
7+
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
8+
#
9+
class TestDiffWithEscapedPath < Test::Unit::TestCase
10+
def test_diff_with_non_ascii_filename
11+
in_temp_dir do |path|
12+
create_file('my_other_file_☠', "First Line\n")
13+
`git init`
14+
`git add .`
15+
`git config --local core.safecrlf false` if Gem.win_platform?
16+
`git commit -m "First Commit"`
17+
update_file('my_other_file_☠', "Second Line\n")
18+
diff_paths = Git.open('.').diff.map(&:path)
19+
assert_equal(["my_other_file_☠"], diff_paths)
20+
end
21+
end
22+
end

tests/units/test_escaped_path.rb

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
require "#{File.dirname(__FILE__)}/../test_helper"
5+
6+
# Test diff when the file path has escapes according to core.quotePath
7+
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
8+
# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/
9+
# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby
10+
#
11+
class TestEscapedPath < Test::Unit::TestCase
12+
def test_simple_path
13+
path = 'my_other_file'
14+
expected_unescaped_path = 'my_other_file'
15+
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
16+
end
17+
18+
def test_unicode_path
19+
path = 'my_other_file_\\342\\230\\240'
20+
expected_unescaped_path = 'my_other_file_☠'
21+
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
22+
end
23+
24+
def test_single_char_escapes
25+
Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char|
26+
path = "\\#{escape_char}"
27+
assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape)
28+
end
29+
end
30+
31+
def test_compound_escape
32+
path = 'my_other_file_"\\342\\230\\240\\n"'
33+
expected_unescaped_path = "my_other_file_\"\n\""
34+
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
35+
end
36+
end

tests/units/test_logger.rb

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,32 +7,49 @@ class TestLogger < Test::Unit::TestCase
77
def setup
88
set_file_paths
99
end
10-
10+
11+
def missing_log_entry
12+
'Did not find expected log entry.'
13+
end
14+
15+
def unexpected_log_entry
16+
'Unexpected log entry found'
17+
end
18+
1119
def test_logger
1220
log = Tempfile.new('logfile')
1321
log.close
14-
22+
1523
logger = Logger.new(log.path)
1624
logger.level = Logger::DEBUG
17-
25+
1826
@git = Git.open(@wdir, :log => logger)
1927
@git.branches.size
20-
28+
2129
logc = File.read(log.path)
22-
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
23-
assert(/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))
2430

31+
expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
32+
assert_match(expected_log_entry, logc, missing_log_entry)
33+
34+
expected_log_entry = /DEBUG -- : cherry/
35+
assert_match(expected_log_entry, logc, missing_log_entry)
36+
end
37+
38+
def test_logging_at_info_level_should_not_show_debug_messages
2539
log = Tempfile.new('logfile')
2640
log.close
2741
logger = Logger.new(log.path)
2842
logger.level = Logger::INFO
29-
43+
3044
@git = Git.open(@wdir, :log => logger)
3145
@git.branches.size
32-
46+
3347
logc = File.read(log.path)
34-
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
35-
assert(!/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))
48+
49+
expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
50+
assert_match(expected_log_entry, logc, missing_log_entry)
51+
52+
expected_log_entry = /DEBUG -- : cherry/
53+
assert_not_match(expected_log_entry, logc, unexpected_log_entry)
3654
end
37-
3855
end

0 commit comments

Comments
 (0)