From bea96e0a69cb53efbfc69e2a7134a184b39f957f Mon Sep 17 00:00:00 2001
From: Samuel Williams <samuel.williams@oriontransfer.co.nz>
Date: Sat, 15 Feb 2025 11:58:15 +1300
Subject: [PATCH 1/7] Pass through all options if present.

---
 lib/json/common.rb | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/lib/json/common.rb b/lib/json/common.rb
index 005bac5c..3f1e5ce9 100644
--- a/lib/json/common.rb
+++ b/lib/json/common.rb
@@ -840,7 +840,7 @@ def dump(obj, anIO = nil, limit = nil, kwargs = nil)
 
     opts = JSON.dump_default_options
     opts = opts.merge(:max_nesting => limit) if limit
-    opts = merge_dump_options(opts, **kwargs) if kwargs
+    opts = opts.merge(kwargs) if kwargs
 
     begin
       State.generate(obj, opts, anIO)
@@ -854,15 +854,6 @@ def self.iconv(to, from, string)
     string.encode(to, from)
   end
 
-  def merge_dump_options(opts, strict: NOT_SET)
-    opts = opts.merge(strict: strict) if NOT_SET != strict
-    opts
-  end
-
-  class << self
-    private :merge_dump_options
-  end
-
   # JSON::Coder holds a parser and generator configuration.
   #
   #   module MyApp

From e144793b7226c2df75c414749d6f87ab7fcf4dce Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Tue, 25 Feb 2025 17:12:26 +0100
Subject: [PATCH 2/7] Ensure parser error snippets are valid UTF-8

Fix: https://github.com/ruby/json/issues/755

Error messages now include a snippet of the document
that doesn't parse to help locate the issue, however
the way it was done wasn't UTF-8 aware, and it could
result in exception messages with truncated characters.

It would be nice to go a bit farther and actually support
codepoints, but it's a lot of complexity to do it in C,
perhaps if we move that logic to Ruby given it's not a
performance sensitive codepath.
---
 CHANGES.md                    |  2 ++
 ext/json/ext/parser/parser.c  | 15 ++++++++++++---
 test/json/json_parser_test.rb | 16 ++++++++++++++++
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 202bf2d5..d2ed4052 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,7 @@
 # Changes
 
+* Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
+
 ### 2025-02-10 (2.10.1)
 
 * Fix a compatibility issue with `MultiJson.dump(obj, pretty: true)`: `no implicit conversion of false into Proc (TypeError)`.
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
index c21a5fda..776eb916 100644
--- a/ext/json/ext/parser/parser.c
+++ b/ext/json/ext/parser/parser.c
@@ -454,15 +454,24 @@ RBIMPL_ATTR_NORETURN()
 #endif
 static void raise_parse_error(const char *format, const char *start)
 {
-    char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
+    unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
 
     size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
     const char *ptr = start;
 
     if (len == PARSE_ERROR_FRAGMENT_LEN) {
         MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
-        buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
-        ptr = buffer;
+
+        while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
+            len--;
+        }
+
+        if (buffer[len - 1] >= 0xC0) { // multibyte character start
+            len--;
+        }
+
+        buffer[len] = '\0';
+        ptr = (const char *)buffer;
     }
 
     rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
index d1f084bb..ae0f285d 100644
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@@ -645,6 +645,22 @@ def test_parse_error_incomplete_hash
     end
   end
 
+  def test_parse_error_snippet
+    omit "C ext only test" unless RUBY_ENGINE == "ruby"
+
+    error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") }
+    assert_equal "unexpected character: 'ああああああああああ'", error.message
+
+    error = assert_raise(JSON::ParserError) { JSON.parse("aあああああああああああああああああああああああ") }
+    assert_equal "unexpected character: 'aああああああああああ'", error.message
+
+    error = assert_raise(JSON::ParserError) { JSON.parse("abあああああああああああああああああああああああ") }
+    assert_equal "unexpected character: 'abあああああああああ'", error.message
+
+    error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") }
+    assert_equal "unexpected character: 'abcあああああああああ'", error.message
+  end
+
   def test_parse_leading_slash
     # ref: https://github.com/ruby/ruby/pull/12598
     assert_raise(JSON::ParserError) do

From 2e015ff839ed2044ead0fd27b63a912766270a1b Mon Sep 17 00:00:00 2001
From: Rahim Packir Saibo <i.am@rah.im>
Date: Wed, 5 Mar 2025 18:03:03 +0000
Subject: [PATCH 3/7] Fix JSON::GeneratorError#detailed_message with Ruby < 3.2

---
 lib/json/common.rb               | 7 +++++--
 test/json/json_generator_test.rb | 8 ++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/lib/json/common.rb b/lib/json/common.rb
index 3f1e5ce9..9094df00 100644
--- a/lib/json/common.rb
+++ b/lib/json/common.rb
@@ -152,10 +152,13 @@ def initialize(message, invalid_object = nil)
     end
 
     def detailed_message(...)
+      # Exception#detailed_message doesn't exist until Ruby 3.2
+      super_message = defined?(super) ? super : message
+
       if @invalid_object.nil?
-        super
+        super_message
       else
-        "#{super}\nInvalid object: #{@invalid_object.inspect}"
+        "#{super_message}\nInvalid object: #{@invalid_object.inspect}"
       end
     end
   end
diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb
index c67cd334..4a92801f 100755
--- a/test/json/json_generator_test.rb
+++ b/test/json/json_generator_test.rb
@@ -410,6 +410,14 @@ def test_json_generate
     end
   end
 
+  def test_json_generate_error_detailed_message
+    error = assert_raise JSON::GeneratorError do
+      generate(["\xea"])
+    end
+
+    assert_not_nil(error.detailed_message)
+  end
+
   def test_json_generate_unsupported_types
     assert_raise JSON::GeneratorError do
       generate(Object.new, strict: true)

From c079793b7655b749a4d85f5c8e6bd2649fd31c0c Mon Sep 17 00:00:00 2001
From: Charles Oliver Nutter <headius@headius.com>
Date: Mon, 10 Mar 2025 04:34:57 -0500
Subject: [PATCH 4/7] Avoid fast-path IO writes when IO has ext enc

This works around a bug in JRuby's IOOutputStream logic whereby an
IO with an external encoding will always fail to write incoming
bytes. We use base logic to detect if the target object is a "real
IO" and if so and it has an external encoding, we drop the realIO
and. This causes the rest of IOOutputStream to avoid the fast path
and always use dyncall logic with RubyString wrappers.

This works around the issue in jruby/jruby#8682.

This should be temporary, since it will definitely degrade direct
writes to such IO objects, but a longer-term fix for the encoding
issues spelled out in jruby/jruby#8682 will need to come first,
or else json will have to be modified to not use IOOutputStream at
all.
---
 java/src/json/ext/Generator.java | 34 +++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java
index c46a1e47..85250920 100644
--- a/java/src/json/ext/Generator.java
+++ b/java/src/json/ext/Generator.java
@@ -5,6 +5,7 @@
  */
 package json.ext;
 
+import org.jcodings.Encoding;
 import org.jcodings.specific.UTF8Encoding;
 import org.jruby.Ruby;
 import org.jruby.RubyArray;
@@ -15,6 +16,7 @@
 import org.jruby.RubyFixnum;
 import org.jruby.RubyFloat;
 import org.jruby.RubyHash;
+import org.jruby.RubyIO;
 import org.jruby.RubyString;
 import org.jruby.RubySymbol;
 import org.jruby.runtime.Helpers;
@@ -81,11 +83,41 @@ static <T extends IRubyObject> RubyString generateJson(ThreadContext context, T
             return handler.generateNew(context, session, object);
         }
 
-        BufferedOutputStream buffer = new BufferedOutputStream(new IOOutputStream(io), IO_BUFFER_SIZE);
+        BufferedOutputStream buffer =
+                new BufferedOutputStream(
+                        new PatchedIOOutputStream(io, UTF8Encoding.INSTANCE),
+                        IO_BUFFER_SIZE);
         handler.generateToBuffer(context, session, object, buffer);
         return io;
     }
 
+    /**
+     * A version of IOOutputStream hacked to avoid fast-path RubyIO calls when the target IO has an external encoding.
+     *
+     * All calls to the underlying IO will be done dynamically and all incoming bytes wrapped in RubyString instances.
+     * This avoids bugs in the fast-path logic in JRuby 9.4.12.0 and earlier that fails to properly handle writing bytes
+     * when the source and target destination are the same.
+     *
+     * See https://github.com/jruby/jruby/issues/8682
+     */
+    private static class PatchedIOOutputStream extends IOOutputStream {
+        public PatchedIOOutputStream(IRubyObject io, Encoding encoding) {
+            super(io, encoding);
+        }
+
+        @Override
+        public RubyIO getRealIO(IRubyObject io) {
+            RubyIO realIO = super.getRealIO(io);
+
+            // if the real IO has an external encoding, don't use fast path
+            if (realIO == null || realIO.getEnc() != null) {
+                return null;
+            }
+
+            return realIO;
+        }
+    }
+
     /**
      * Returns the best serialization handler for the given object.
      */

From 7d0637b9e6e0269c88418b142cb9a1ef2799587d Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Tue, 11 Mar 2025 20:50:26 +0100
Subject: [PATCH 5/7] Raise a ParserError on all incomplete unicode escape
 sequence.

This was the behavior until `2.10.0` unadvertently changed it.

`"\u1"` would raise, but `"\u1zzz"` wouldn't.
---
 CHANGES.md                    |  1 +
 ext/json/ext/parser/parser.c  | 85 +++++++++++++++++------------------
 test/json/json_parser_test.rb |  5 +++
 3 files changed, 48 insertions(+), 43 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index d2ed4052..f57dd36a 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,6 @@
 # Changes
 
+* Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it.
 * Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
 
 ### 2025-02-10 (2.10.1)
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
index 776eb916..0a1d9375 100644
--- a/ext/json/ext/parser/parser.c
+++ b/ext/json/ext/parser/parser.c
@@ -341,6 +341,44 @@ static void rvalue_stack_eagerly_release(VALUE handle)
     }
 }
 
+
+#ifndef HAVE_STRNLEN
+static size_t strnlen(const char *s, size_t maxlen)
+{
+    char *p;
+    return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
+}
+#endif
+
+#define PARSE_ERROR_FRAGMENT_LEN 32
+#ifdef RBIMPL_ATTR_NORETURN
+RBIMPL_ATTR_NORETURN()
+#endif
+static void raise_parse_error(const char *format, const char *start)
+{
+    unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
+
+    size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
+    const char *ptr = start;
+
+    if (len == PARSE_ERROR_FRAGMENT_LEN) {
+        MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
+
+        while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
+            len--;
+        }
+
+        if (buffer[len - 1] >= 0xC0) { // multibyte character start
+            len--;
+        }
+
+        buffer[len] = '\0';
+        ptr = (const char *)buffer;
+    }
+
+    rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
+}
+
 /* unicode */
 
 static const signed char digit_values[256] = {
@@ -362,21 +400,19 @@ static const signed char digit_values[256] = {
 
 static uint32_t unescape_unicode(const unsigned char *p)
 {
-    const uint32_t replacement_char = 0xFFFD;
-
     signed char b;
     uint32_t result = 0;
     b = digit_values[p[0]];
-    if (b < 0) return replacement_char;
+    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     b = digit_values[p[1]];
-    if (b < 0) return replacement_char;
+    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     b = digit_values[p[2]];
-    if (b < 0) return replacement_char;
+    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     b = digit_values[p[3]];
-    if (b < 0) return replacement_char;
+    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     return result;
 }
@@ -440,43 +476,6 @@ typedef struct JSON_ParserStateStruct {
 
 static const rb_data_type_t JSON_ParserConfig_type;
 
-#ifndef HAVE_STRNLEN
-static size_t strnlen(const char *s, size_t maxlen)
-{
-    char *p;
-    return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
-}
-#endif
-
-#define PARSE_ERROR_FRAGMENT_LEN 32
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_parse_error(const char *format, const char *start)
-{
-    unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
-
-    size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
-    const char *ptr = start;
-
-    if (len == PARSE_ERROR_FRAGMENT_LEN) {
-        MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
-
-        while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
-            len--;
-        }
-
-        if (buffer[len - 1] >= 0xC0) { // multibyte character start
-            len--;
-        }
-
-        buffer[len] = '\0';
-        ptr = (const char *)buffer;
-    }
-
-    rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
-}
-
 static const bool whitespace[256] = {
     [' '] = 1,
     ['\t'] = 1,
diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
index ae0f285d..87b78fb0 100644
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@@ -311,6 +311,11 @@ def test_invalid_unicode_escape
     assert_raise(JSON::ParserError) { parse('"\uaa"') }
     assert_raise(JSON::ParserError) { parse('"\uaaa"') }
     assert_equal "\uaaaa", parse('"\uaaaa"')
+
+    assert_raise(JSON::ParserError) { parse('"\u______"') }
+    assert_raise(JSON::ParserError) { parse('"\u1_____"') }
+    assert_raise(JSON::ParserError) { parse('"\u11____"') }
+    assert_raise(JSON::ParserError) { parse('"\u111___"') }
   end
 
   def test_parse_big_integers

From cf242d89a0523bacd5238a59c77b33411b8c3208 Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Tue, 11 Mar 2025 18:49:14 +0100
Subject: [PATCH 6/7] Fix potential out of bound read in
 `json_string_unescape`.

---
 CHANGES.md                    | 1 +
 ext/json/ext/parser/parser.c  | 2 +-
 test/json/json_parser_test.rb | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index f57dd36a..bae88d71 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,6 @@
 # Changes
 
+* Fix a potential crash in the C extension parser.
 * Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it.
 * Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
 
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
index 0a1d9375..d990612a 100644
--- a/ext/json/ext/parser/parser.c
+++ b/ext/json/ext/parser/parser.c
@@ -608,7 +608,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
     buffer = RSTRING_PTR(result);
     bufferStart = buffer;
 
-    while ((pe = memchr(pe, '\\', stringEnd - pe))) {
+    while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
         unescape = (char *) "?";
         unescape_len = 1;
         if (pe > p) {
diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
index 87b78fb0..5519d4c5 100644
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@@ -673,6 +673,13 @@ def test_parse_leading_slash
     end
   end
 
+  def test_parse_malformated_unicode_escapes
+    assert_equal "�|", JSON.parse('"\\u1gef|"')
+    assert_equal "�|", JSON.parse('"\\u12ge|"')
+    assert_equal "�|", JSON.parse('"\\u123g|"')
+    assert_equal '�\\\\"', JSON.parse('"\\u1\\\\\\\\\\\\\\\\"')
+  end
+
   private
 
   def string_deduplication_available?

From 350c1fd154eaf7840f696c623362478a9148166c Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Wed, 12 Mar 2025 14:06:14 +0100
Subject: [PATCH 7/7] Release 2.10.2

---
 CHANGES.md          | 2 ++
 lib/json/version.rb | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index bae88d71..69762423 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,7 @@
 # Changes
 
+### 2025-03-12 (2.10.2)
+
 * Fix a potential crash in the C extension parser.
 * Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it.
 * Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
diff --git a/lib/json/version.rb b/lib/json/version.rb
index 65bcb976..9c21f18b 100644
--- a/lib/json/version.rb
+++ b/lib/json/version.rb
@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 
 module JSON
-  VERSION = '2.10.1'
+  VERSION = '2.10.2'
 end