diff --git a/README.md b/README.md index 354b82da..90aeadc9 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,14 @@ Zip.default_compression = Zlib::DEFAULT_COMPRESSION ``` It defaults to `Zlib::DEFAULT_COMPRESSION`. Possible values are `Zlib::BEST_COMPRESSION`, `Zlib::DEFAULT_COMPRESSION` and `Zlib::NO_COMPRESSION` +Sometimes file names inside zip contain non-ASCII characters. If you can assume which encoding was used for such names and want to be able to find such entries using `find_entry` then you can force assumed encoding like so: + +```ruby +Zip.force_entry_names_encoding = 'UTF-8' +``` + +Allowed encoding names are the same as accepted by `String#force_encoding` + You can set multiple settings at the same time by using a block: ```ruby diff --git a/lib/zip.rb b/lib/zip.rb index bb44361a..9145207b 100644 --- a/lib/zip.rb +++ b/lib/zip.rb @@ -34,7 +34,15 @@ module Zip extend self - attr_accessor :unicode_names, :on_exists_proc, :continue_on_exists_proc, :sort_entries, :default_compression, :write_zip64_support, :warn_invalid_date, :case_insensitive_match + attr_accessor :unicode_names, + :on_exists_proc, + :continue_on_exists_proc, + :sort_entries, + :default_compression, + :write_zip64_support, + :warn_invalid_date, + :case_insensitive_match, + :force_entry_names_encoding def reset! @_ran_once = false diff --git a/lib/zip/entry.rb b/lib/zip/entry.rb index 6e1829ae..791ab32a 100644 --- a/lib/zip/entry.rb +++ b/lib/zip/entry.rb @@ -240,6 +240,9 @@ def read_local_entry(io) #:nodoc:all extra = io.read(@extra_length) @name.tr!('\\', '/') + if ::Zip.force_entry_names_encoding + @name.force_encoding(::Zip.force_entry_names_encoding) + end if extra && extra.bytesize != @extra_length raise ::Zip::Error, 'Truncated local zip entry header' @@ -364,6 +367,9 @@ def read_c_dir_entry(io) #:nodoc:all check_c_dir_entry_signature set_time(@last_mod_date, @last_mod_time) @name = io.read(@name_length) + if ::Zip.force_entry_names_encoding + @name.force_encoding(::Zip.force_entry_names_encoding) + end read_c_dir_extra_field(io) @comment = io.read(@comment_length) check_c_dir_entry_comment_size diff --git a/test/unicode_file_names_and_comments_test.rb b/test/unicode_file_names_and_comments_test.rb index b9b1967a..aac3e256 100644 --- a/test/unicode_file_names_and_comments_test.rb +++ b/test/unicode_file_names_and_comments_test.rb @@ -33,6 +33,18 @@ def test_unicode_file_name assert(filepath == entry_name) end end + + ::Zip.force_entry_names_encoding = 'UTF-8' + ::Zip::File.open(FILENAME) do |zip| + file_entrys.each do |filename| + refute_nil(zip.find_entry(filename)) + end + directory_entrys.each do |filepath| + refute_nil(zip.find_entry(filepath)) + end + end + ::Zip.force_entry_names_encoding = nil + ::File.unlink(FILENAME) end