From bea96e0a69cb53efbfc69e2a7134a184b39f957f Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Sat, 15 Feb 2025 11:58:15 +1300 Subject: [PATCH 1/7] Pass through all options if present. --- lib/json/common.rb | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/lib/json/common.rb b/lib/json/common.rb index 005bac5c..3f1e5ce9 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -840,7 +840,7 @@ def dump(obj, anIO = nil, limit = nil, kwargs = nil) opts = JSON.dump_default_options opts = opts.merge(:max_nesting => limit) if limit - opts = merge_dump_options(opts, **kwargs) if kwargs + opts = opts.merge(kwargs) if kwargs begin State.generate(obj, opts, anIO) @@ -854,15 +854,6 @@ def self.iconv(to, from, string) string.encode(to, from) end - def merge_dump_options(opts, strict: NOT_SET) - opts = opts.merge(strict: strict) if NOT_SET != strict - opts - end - - class << self - private :merge_dump_options - end - # JSON::Coder holds a parser and generator configuration. # # module MyApp From e144793b7226c2df75c414749d6f87ab7fcf4dce Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 25 Feb 2025 17:12:26 +0100 Subject: [PATCH 2/7] Ensure parser error snippets are valid UTF-8 Fix: https://github.com/ruby/json/issues/755 Error messages now include a snippet of the document that doesn't parse to help locate the issue, however the way it was done wasn't UTF-8 aware, and it could result in exception messages with truncated characters. It would be nice to go a bit farther and actually support codepoints, but it's a lot of complexity to do it in C, perhaps if we move that logic to Ruby given it's not a performance sensitive codepath. --- CHANGES.md | 2 ++ ext/json/ext/parser/parser.c | 15 ++++++++++++--- test/json/json_parser_test.rb | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 202bf2d5..d2ed4052 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ # Changes +* Ensure document snippets that are included in parser errors don't include truncated multibyte characters. + ### 2025-02-10 (2.10.1) * Fix a compatibility issue with `MultiJson.dump(obj, pretty: true)`: `no implicit conversion of false into Proc (TypeError)`. diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index c21a5fda..776eb916 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -454,15 +454,24 @@ RBIMPL_ATTR_NORETURN() #endif static void raise_parse_error(const char *format, const char *start) { - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; const char *ptr = start; if (len == PARSE_ERROR_FRAGMENT_LEN) { MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; + + while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte + len--; + } + + if (buffer[len - 1] >= 0xC0) { // multibyte character start + len--; + } + + buffer[len] = '\0'; + ptr = (const char *)buffer; } rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index d1f084bb..ae0f285d 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -645,6 +645,22 @@ def test_parse_error_incomplete_hash end end + def test_parse_error_snippet + omit "C ext only test" unless RUBY_ENGINE == "ruby" + + error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") } + assert_equal "unexpected character: 'ああああああああああ'", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("aあああああああああああああああああああああああ") } + assert_equal "unexpected character: 'aああああああああああ'", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("abあああああああああああああああああああああああ") } + assert_equal "unexpected character: 'abあああああああああ'", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") } + assert_equal "unexpected character: 'abcあああああああああ'", error.message + end + def test_parse_leading_slash # ref: https://github.com/ruby/ruby/pull/12598 assert_raise(JSON::ParserError) do From 2e015ff839ed2044ead0fd27b63a912766270a1b Mon Sep 17 00:00:00 2001 From: Rahim Packir Saibo Date: Wed, 5 Mar 2025 18:03:03 +0000 Subject: [PATCH 3/7] Fix JSON::GeneratorError#detailed_message with Ruby < 3.2 --- lib/json/common.rb | 7 +++++-- test/json/json_generator_test.rb | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/json/common.rb b/lib/json/common.rb index 3f1e5ce9..9094df00 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -152,10 +152,13 @@ def initialize(message, invalid_object = nil) end def detailed_message(...) + # Exception#detailed_message doesn't exist until Ruby 3.2 + super_message = defined?(super) ? super : message + if @invalid_object.nil? - super + super_message else - "#{super}\nInvalid object: #{@invalid_object.inspect}" + "#{super_message}\nInvalid object: #{@invalid_object.inspect}" end end end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index c67cd334..4a92801f 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -410,6 +410,14 @@ def test_json_generate end end + def test_json_generate_error_detailed_message + error = assert_raise JSON::GeneratorError do + generate(["\xea"]) + end + + assert_not_nil(error.detailed_message) + end + def test_json_generate_unsupported_types assert_raise JSON::GeneratorError do generate(Object.new, strict: true) From c079793b7655b749a4d85f5c8e6bd2649fd31c0c Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 10 Mar 2025 04:34:57 -0500 Subject: [PATCH 4/7] Avoid fast-path IO writes when IO has ext enc This works around a bug in JRuby's IOOutputStream logic whereby an IO with an external encoding will always fail to write incoming bytes. We use base logic to detect if the target object is a "real IO" and if so and it has an external encoding, we drop the realIO and. This causes the rest of IOOutputStream to avoid the fast path and always use dyncall logic with RubyString wrappers. This works around the issue in jruby/jruby#8682. This should be temporary, since it will definitely degrade direct writes to such IO objects, but a longer-term fix for the encoding issues spelled out in jruby/jruby#8682 will need to come first, or else json will have to be modified to not use IOOutputStream at all. --- java/src/json/ext/Generator.java | 34 +++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index c46a1e47..85250920 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -5,6 +5,7 @@ */ package json.ext; +import org.jcodings.Encoding; import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; @@ -15,6 +16,7 @@ import org.jruby.RubyFixnum; import org.jruby.RubyFloat; import org.jruby.RubyHash; +import org.jruby.RubyIO; import org.jruby.RubyString; import org.jruby.RubySymbol; import org.jruby.runtime.Helpers; @@ -81,11 +83,41 @@ static RubyString generateJson(ThreadContext context, T return handler.generateNew(context, session, object); } - BufferedOutputStream buffer = new BufferedOutputStream(new IOOutputStream(io), IO_BUFFER_SIZE); + BufferedOutputStream buffer = + new BufferedOutputStream( + new PatchedIOOutputStream(io, UTF8Encoding.INSTANCE), + IO_BUFFER_SIZE); handler.generateToBuffer(context, session, object, buffer); return io; } + /** + * A version of IOOutputStream hacked to avoid fast-path RubyIO calls when the target IO has an external encoding. + * + * All calls to the underlying IO will be done dynamically and all incoming bytes wrapped in RubyString instances. + * This avoids bugs in the fast-path logic in JRuby 9.4.12.0 and earlier that fails to properly handle writing bytes + * when the source and target destination are the same. + * + * See https://github.com/jruby/jruby/issues/8682 + */ + private static class PatchedIOOutputStream extends IOOutputStream { + public PatchedIOOutputStream(IRubyObject io, Encoding encoding) { + super(io, encoding); + } + + @Override + public RubyIO getRealIO(IRubyObject io) { + RubyIO realIO = super.getRealIO(io); + + // if the real IO has an external encoding, don't use fast path + if (realIO == null || realIO.getEnc() != null) { + return null; + } + + return realIO; + } + } + /** * Returns the best serialization handler for the given object. */ From 7d0637b9e6e0269c88418b142cb9a1ef2799587d Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 11 Mar 2025 20:50:26 +0100 Subject: [PATCH 5/7] Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it. `"\u1"` would raise, but `"\u1zzz"` wouldn't. --- CHANGES.md | 1 + ext/json/ext/parser/parser.c | 85 +++++++++++++++++------------------ test/json/json_parser_test.rb | 5 +++ 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d2ed4052..f57dd36a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,6 @@ # Changes +* Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it. * Ensure document snippets that are included in parser errors don't include truncated multibyte characters. ### 2025-02-10 (2.10.1) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 776eb916..0a1d9375 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -341,6 +341,44 @@ static void rvalue_stack_eagerly_release(VALUE handle) } } + +#ifndef HAVE_STRNLEN +static size_t strnlen(const char *s, size_t maxlen) +{ + char *p; + return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); +} +#endif + +#define PARSE_ERROR_FRAGMENT_LEN 32 +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error(const char *format, const char *start) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + + size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; + const char *ptr = start; + + if (len == PARSE_ERROR_FRAGMENT_LEN) { + MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); + + while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte + len--; + } + + if (buffer[len - 1] >= 0xC0) { // multibyte character start + len--; + } + + buffer[len] = '\0'; + ptr = (const char *)buffer; + } + + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); +} + /* unicode */ static const signed char digit_values[256] = { @@ -362,21 +400,19 @@ static const signed char digit_values[256] = { static uint32_t unescape_unicode(const unsigned char *p) { - const uint32_t replacement_char = 0xFFFD; - signed char b; uint32_t result = 0; b = digit_values[p[0]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[1]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[2]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[3]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; return result; } @@ -440,43 +476,6 @@ typedef struct JSON_ParserStateStruct { static const rb_data_type_t JSON_ParserConfig_type; -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - - while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte - len--; - } - - if (buffer[len - 1] >= 0xC0) { // multibyte character start - len--; - } - - buffer[len] = '\0'; - ptr = (const char *)buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - static const bool whitespace[256] = { [' '] = 1, ['\t'] = 1, diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index ae0f285d..87b78fb0 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -311,6 +311,11 @@ def test_invalid_unicode_escape assert_raise(JSON::ParserError) { parse('"\uaa"') } assert_raise(JSON::ParserError) { parse('"\uaaa"') } assert_equal "\uaaaa", parse('"\uaaaa"') + + assert_raise(JSON::ParserError) { parse('"\u______"') } + assert_raise(JSON::ParserError) { parse('"\u1_____"') } + assert_raise(JSON::ParserError) { parse('"\u11____"') } + assert_raise(JSON::ParserError) { parse('"\u111___"') } end def test_parse_big_integers From cf242d89a0523bacd5238a59c77b33411b8c3208 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 11 Mar 2025 18:49:14 +0100 Subject: [PATCH 6/7] Fix potential out of bound read in `json_string_unescape`. --- CHANGES.md | 1 + ext/json/ext/parser/parser.c | 2 +- test/json/json_parser_test.rb | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index f57dd36a..bae88d71 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,6 @@ # Changes +* Fix a potential crash in the C extension parser. * Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it. * Ensure document snippets that are included in parser errors don't include truncated multibyte characters. diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 0a1d9375..d990612a 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -608,7 +608,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c buffer = RSTRING_PTR(result); bufferStart = buffer; - while ((pe = memchr(pe, '\\', stringEnd - pe))) { + while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) { unescape = (char *) "?"; unescape_len = 1; if (pe > p) { diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 87b78fb0..5519d4c5 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -673,6 +673,13 @@ def test_parse_leading_slash end end + def test_parse_malformated_unicode_escapes + assert_equal "�|", JSON.parse('"\\u1gef|"') + assert_equal "�|", JSON.parse('"\\u12ge|"') + assert_equal "�|", JSON.parse('"\\u123g|"') + assert_equal '�\\\\"', JSON.parse('"\\u1\\\\\\\\\\\\\\\\"') + end + private def string_deduplication_available? From 350c1fd154eaf7840f696c623362478a9148166c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 12 Mar 2025 14:06:14 +0100 Subject: [PATCH 7/7] Release 2.10.2 --- CHANGES.md | 2 ++ lib/json/version.rb | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index bae88d71..69762423 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ # Changes +### 2025-03-12 (2.10.2) + * Fix a potential crash in the C extension parser. * Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it. * Ensure document snippets that are included in parser errors don't include truncated multibyte characters. diff --git a/lib/json/version.rb b/lib/json/version.rb index 65bcb976..9c21f18b 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.10.1' + VERSION = '2.10.2' end