diff --git a/CHANGES.md b/CHANGES.md index 202bf2d5..69762423 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,11 @@ # Changes +### 2025-03-12 (2.10.2) + +* Fix a potential crash in the C extension parser. +* Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it. +* Ensure document snippets that are included in parser errors don't include truncated multibyte characters. + ### 2025-02-10 (2.10.1) * Fix a compatibility issue with `MultiJson.dump(obj, pretty: true)`: `no implicit conversion of false into Proc (TypeError)`. diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index c21a5fda..d990612a 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -341,6 +341,44 @@ static void rvalue_stack_eagerly_release(VALUE handle) } } + +#ifndef HAVE_STRNLEN +static size_t strnlen(const char *s, size_t maxlen) +{ + char *p; + return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); +} +#endif + +#define PARSE_ERROR_FRAGMENT_LEN 32 +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error(const char *format, const char *start) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + + size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; + const char *ptr = start; + + if (len == PARSE_ERROR_FRAGMENT_LEN) { + MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); + + while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte + len--; + } + + if (buffer[len - 1] >= 0xC0) { // multibyte character start + len--; + } + + buffer[len] = '\0'; + ptr = (const char *)buffer; + } + + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); +} + /* unicode */ static const signed char digit_values[256] = { @@ -362,21 +400,19 @@ static const signed char digit_values[256] = { static uint32_t unescape_unicode(const unsigned char *p) { - const uint32_t replacement_char = 0xFFFD; - signed char b; uint32_t result = 0; b = digit_values[p[0]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[1]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[2]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[3]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; return result; } @@ -440,34 +476,6 @@ typedef struct JSON_ParserStateStruct { static const rb_data_type_t JSON_ParserConfig_type; -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - static const bool whitespace[256] = { [' '] = 1, ['\t'] = 1, @@ -600,7 +608,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c buffer = RSTRING_PTR(result); bufferStart = buffer; - while ((pe = memchr(pe, '\\', stringEnd - pe))) { + while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) { unescape = (char *) "?"; unescape_len = 1; if (pe > p) { diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index c46a1e47..85250920 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -5,6 +5,7 @@ */ package json.ext; +import org.jcodings.Encoding; import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; @@ -15,6 +16,7 @@ import org.jruby.RubyFixnum; import org.jruby.RubyFloat; import org.jruby.RubyHash; +import org.jruby.RubyIO; import org.jruby.RubyString; import org.jruby.RubySymbol; import org.jruby.runtime.Helpers; @@ -81,11 +83,41 @@ static RubyString generateJson(ThreadContext context, T return handler.generateNew(context, session, object); } - BufferedOutputStream buffer = new BufferedOutputStream(new IOOutputStream(io), IO_BUFFER_SIZE); + BufferedOutputStream buffer = + new BufferedOutputStream( + new PatchedIOOutputStream(io, UTF8Encoding.INSTANCE), + IO_BUFFER_SIZE); handler.generateToBuffer(context, session, object, buffer); return io; } + /** + * A version of IOOutputStream hacked to avoid fast-path RubyIO calls when the target IO has an external encoding. + * + * All calls to the underlying IO will be done dynamically and all incoming bytes wrapped in RubyString instances. + * This avoids bugs in the fast-path logic in JRuby 9.4.12.0 and earlier that fails to properly handle writing bytes + * when the source and target destination are the same. + * + * See https://github.com/jruby/jruby/issues/8682 + */ + private static class PatchedIOOutputStream extends IOOutputStream { + public PatchedIOOutputStream(IRubyObject io, Encoding encoding) { + super(io, encoding); + } + + @Override + public RubyIO getRealIO(IRubyObject io) { + RubyIO realIO = super.getRealIO(io); + + // if the real IO has an external encoding, don't use fast path + if (realIO == null || realIO.getEnc() != null) { + return null; + } + + return realIO; + } + } + /** * Returns the best serialization handler for the given object. */ diff --git a/lib/json/common.rb b/lib/json/common.rb index 005bac5c..9094df00 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -152,10 +152,13 @@ def initialize(message, invalid_object = nil) end def detailed_message(...) + # Exception#detailed_message doesn't exist until Ruby 3.2 + super_message = defined?(super) ? super : message + if @invalid_object.nil? - super + super_message else - "#{super}\nInvalid object: #{@invalid_object.inspect}" + "#{super_message}\nInvalid object: #{@invalid_object.inspect}" end end end @@ -840,7 +843,7 @@ def dump(obj, anIO = nil, limit = nil, kwargs = nil) opts = JSON.dump_default_options opts = opts.merge(:max_nesting => limit) if limit - opts = merge_dump_options(opts, **kwargs) if kwargs + opts = opts.merge(kwargs) if kwargs begin State.generate(obj, opts, anIO) @@ -854,15 +857,6 @@ def self.iconv(to, from, string) string.encode(to, from) end - def merge_dump_options(opts, strict: NOT_SET) - opts = opts.merge(strict: strict) if NOT_SET != strict - opts - end - - class << self - private :merge_dump_options - end - # JSON::Coder holds a parser and generator configuration. # # module MyApp diff --git a/lib/json/version.rb b/lib/json/version.rb index 65bcb976..9c21f18b 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.10.1' + VERSION = '2.10.2' end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index c67cd334..4a92801f 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -410,6 +410,14 @@ def test_json_generate end end + def test_json_generate_error_detailed_message + error = assert_raise JSON::GeneratorError do + generate(["\xea"]) + end + + assert_not_nil(error.detailed_message) + end + def test_json_generate_unsupported_types assert_raise JSON::GeneratorError do generate(Object.new, strict: true) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index d1f084bb..87b78fb0 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -311,6 +311,11 @@ def test_invalid_unicode_escape assert_raise(JSON::ParserError) { parse('"\uaa"') } assert_raise(JSON::ParserError) { parse('"\uaaa"') } assert_equal "\uaaaa", parse('"\uaaaa"') + + assert_raise(JSON::ParserError) { parse('"\u______"') } + assert_raise(JSON::ParserError) { parse('"\u1_____"') } + assert_raise(JSON::ParserError) { parse('"\u11____"') } + assert_raise(JSON::ParserError) { parse('"\u111___"') } end def test_parse_big_integers @@ -645,6 +650,22 @@ def test_parse_error_incomplete_hash end end + def test_parse_error_snippet + omit "C ext only test" unless RUBY_ENGINE == "ruby" + + error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") } + assert_equal "unexpected character: 'ああああああああああ'", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("aあああああああああああああああああああああああ") } + assert_equal "unexpected character: 'aああああああああああ'", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("abあああああああああああああああああああああああ") } + assert_equal "unexpected character: 'abあああああああああ'", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") } + assert_equal "unexpected character: 'abcあああああああああ'", error.message + end + def test_parse_leading_slash # ref: https://github.com/ruby/ruby/pull/12598 assert_raise(JSON::ParserError) do