From c06eb4f8a85c141a591b2d584378784d5ce7ca1b Mon Sep 17 00:00:00 2001 From: Kaoru Shirai <475350+kaorukobo@users.noreply.github.com> Date: Sat, 16 Aug 2025 16:55:05 +0900 Subject: [PATCH 1/2] fix: properly parse UTF-8(multibyte) file paths in git output --- lib/git/lib.rb | 22 +++++++++++------ tests/units/test_status.rb | 50 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index ac671df8..793557d1 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -643,7 +643,7 @@ def ls_tree(sha, opts = {}) args << opts[:path] if opts[:path] command_lines('ls-tree', *args).each do |line| - (info, filenm) = line.split("\t") + (info, filenm) = split_status_line(line) (mode, type, sha) = info.split data[type][filenm] = { mode: mode, sha: sha } end @@ -905,9 +905,9 @@ def ls_files(location = nil) location ||= '.' {}.tap do |files| command_lines('ls-files', '--stage', location).each do |line| - (info, file) = line.split("\t") + (info, file) = split_status_line(line) (mode, sha, stage) = info.split - files[unescape_quoted_path(file)] = { + files[file] = { path: file, mode_index: mode, sha_index: sha, stage: stage } end @@ -956,7 +956,9 @@ def ignored_files end def untracked_files - command_lines('ls-files', '--others', '--exclude-standard', chdir: @git_work_dir) + command_lines('ls-files', '--others', '--exclude-standard', chdir: @git_work_dir).map do |f| + unescape_quoted_path(f) + end end def config_remote(name) @@ -1602,7 +1604,7 @@ def self.warn_if_old_command(lib) # rubocop:disable Naming/PredicateMethod def parse_diff_path_status(args) command_lines('diff', *args).each_with_object({}) do |line, memo| - status, path = line.split("\t") + status, path = split_status_line(line) memo[path] = status end end @@ -1727,7 +1729,7 @@ def parse_diff_stats_output(lines) def parse_stat_lines(lines) lines.map do |line| - insertions_s, deletions_s, filename = line.split("\t") + insertions_s, deletions_s, filename = split_status_line(line) { filename: filename, insertions: insertions_s.to_i, @@ -1736,6 +1738,12 @@ def parse_stat_lines(lines) end end + def split_status_line(line) + parts = line.split("\t") + parts[-1] = unescape_quoted_path(parts[-1]) if parts.any? + parts + end + def build_final_stats_hash(file_stats) { total: build_total_stats(file_stats), @@ -1965,7 +1973,7 @@ def diff_as_hash(diff_command, opts = []) # update index before diffing to avoid spurious diffs command('status') command_lines(diff_command, *opts).each_with_object({}) do |line, memo| - info, file = line.split("\t") + info, file = split_status_line(line) mode_src, mode_dest, sha_src, sha_dest, type = info.split memo[file] = { diff --git a/tests/units/test_status.rb b/tests/units/test_status.rb index bdadf5d8..c655fd39 100644 --- a/tests/units/test_status.rb +++ b/tests/units/test_status.rb @@ -235,4 +235,54 @@ def test_changed_cache assert(!git.status.changed?('test_file_1')) end end + + def test_multibyte_path + # a name consisting of UTF-8 characters + multibyte_name = "\u30DE\u30EB\u30C1\u30D0\u30A4\u30C8\u6587\u5B57\u30D5\u30A1\u30A4\u30EB\u263A" + + in_temp_dir do |_path| + `git init` + + File.write('file1', 'contents1') + `git add file1` + `git commit -m "my message"` + + git = Git.open('.') + + # Test added + File.write("#{multibyte_name}_added.txt", 'contents_mb_added') + `git add #{multibyte_name}_added.txt` + + status = git.status + assert_equal(1, status.added.size) + assert_equal(["#{multibyte_name}_added.txt"], status.added.keys) + + # Test untracked + File.write("#{multibyte_name}_untracked.txt", 'contents_mb_untracked') + + status = git.status + assert_equal(1, status.untracked.size) + assert_equal(["#{multibyte_name}_untracked.txt"], status.untracked.keys) + + # Test changed + File.write("#{multibyte_name}_changed.txt", 'original_content') + `git add #{multibyte_name}_changed.txt` + `git commit -m "add multibyte file"` + File.write("#{multibyte_name}_changed.txt", 'modified_content') + + status = git.status + assert_equal(1, status.changed.size) + assert_equal(["#{multibyte_name}_changed.txt"], status.changed.keys) + + # Test deleted + File.write("#{multibyte_name}_deleted.txt", 'to_be_deleted') + `git add #{multibyte_name}_deleted.txt` + `git commit -m "add file to be deleted"` + File.delete("#{multibyte_name}_deleted.txt") + + status = git.status + assert_equal(1, status.deleted.size) + assert_equal(["#{multibyte_name}_deleted.txt"], status.deleted.keys) + end + end end From 4c4c8f94166c4d3711315bf459e34aec1bf9d846 Mon Sep 17 00:00:00 2001 From: Kaoru Shirai <475350+kaorukobo@users.noreply.github.com> Date: Sun, 17 Aug 2025 13:52:41 +0900 Subject: [PATCH 2/2] chore: update ClassLength max in .rubocop_todo.yml for CI passing --- .rubocop_todo.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 1333d28e..e501510f 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2025-07-06 21:08:14 UTC using RuboCop version 1.77.0. +# on 2025-08-17 04:52:22 UTC using RuboCop version 1.79.2. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -9,4 +9,4 @@ # Offense count: 2 # Configuration parameters: CountComments, CountAsOne. Metrics/ClassLength: - Max: 1032 + Max: 1039