From 937c8d2e65f4b0a0654626dcdecb695fd13fcd23 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 10:46:10 +0200 Subject: [PATCH 01/75] Start 2.8.0 development --- lib/json/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/json/version.rb b/lib/json/version.rb index d3d621b75..ee1312c7d 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.7.3' + VERSION = '2.8.0.alpha1' end From 8abed74ce20fc84a734b343e87d1ba55262e1921 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 24 Oct 2024 16:27:11 +0900 Subject: [PATCH 02/75] CI: Drop old platforms --- .github/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index afc4ae294..418c10e4b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,9 +20,7 @@ jobs: fail-fast: false matrix: os: - - ubuntu-20.04 - ubuntu-22.04 - - macos-12 - macos-13 - macos-14 - windows-latest From 1d1fbd3ebbf066ba7c78b9f384af1de20871165b Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 24 Oct 2024 16:35:54 +0900 Subject: [PATCH 03/75] CI: Drop in-between versions --- .github/workflows/ci.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 418c10e4b..b0ff83016 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: uses: ruby/actions/.github/workflows/ruby_versions.yml@master with: engine: cruby-jruby - min_version: 2.3 + min_version: 2.7 host: needs: ruby-versions @@ -27,14 +27,10 @@ jobs: ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} include: - { os: windows-latest , ruby: mswin } # ruby/ruby windows CI - - { os: ubuntu-latest , ruby: jruby-9.3 } # Ruby 2.7 - { os: ubuntu-latest , ruby: jruby-9.4 } # Ruby 3.1 - { os: macos-latest , ruby: truffleruby-head } - { os: ubuntu-latest , ruby: truffleruby-head } exclude: - - { os: macos-14, ruby: 2.3 } - - { os: macos-14, ruby: 2.4 } - - { os: macos-14, ruby: 2.5 } - { os: windows-latest, ruby: jruby } - { os: windows-latest, ruby: jruby-head } @@ -47,8 +43,6 @@ jobs: ruby-version: ${{ matrix.ruby }} apt-get: ragel brew: ragel - # only needed for Ruby 2.3 - mingw: ragel - run: | bundle config --without benchmark From 2c43b7ea32a95412691aaa34da353d94a74522e3 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 10:48:44 +0200 Subject: [PATCH 04/75] Set Ruby 2.7 as the required version --- CHANGES.md | 2 + json.gemspec | 4 +- json_pure.gemspec | 78 +++++++++++++++++++------------------- lib/json/pure/generator.rb | 27 +++++-------- lib/json/pure/parser.rb | 9 +---- 5 files changed, 57 insertions(+), 63 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 665635bb3..5b3fbf071 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ # Changes +* Bump required_ruby_version to 2.7. + ### 2024-10-24 (2.7.3) * Numerous performance optimizations in `JSON.generate` and `JSON.dump` (up to 2 times faster). diff --git a/json.gemspec b/json.gemspec index 96721f909..c6aa82d36 100644 --- a/json.gemspec +++ b/json.gemspec @@ -1,3 +1,5 @@ +# frozen_string_literal: true + version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line| /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1 end rescue nil @@ -19,7 +21,7 @@ spec = Gem::Specification.new do |s| 'wiki_uri' => 'https://github.com/ruby/json/wiki' } - s.required_ruby_version = Gem::Requirement.new(">= 2.3") + s.required_ruby_version = Gem::Requirement.new(">= 2.7") if java_ext s.description = "A JSON implementation as a JRuby extension." diff --git a/json_pure.gemspec b/json_pure.gemspec index 231f672c6..37b437c4a 100644 --- a/json_pure.gemspec +++ b/json_pure.gemspec @@ -1,50 +1,52 @@ +# frozen_string_literal: true + version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line| /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1 end rescue nil Gem::Specification.new do |s| - s.name = "json_pure".freeze + s.name = "json_pure" s.version = version - s.summary = "JSON Implementation for Ruby".freeze - s.description = "This is a JSON implementation in pure Ruby.".freeze - s.licenses = ["Ruby".freeze] - s.authors = ["Florian Frank".freeze] - s.email = "flori@ping.de".freeze + s.summary = "JSON Implementation for Ruby" + s.description = "This is a JSON implementation in pure Ruby." + s.licenses = ["Ruby"] + s.authors = ["Florian Frank"] + s.email = "flori@ping.de" - s.extra_rdoc_files = ["README.md".freeze] - s.rdoc_options = ["--title".freeze, "JSON implementation for ruby".freeze, "--main".freeze, "README.md".freeze] + s.extra_rdoc_files = ["README.md"] + s.rdoc_options = ["--title", "JSON implementation for ruby", "--main", "README.md"] s.files = [ - "CHANGES.md".freeze, - "COPYING".freeze, - "BSDL".freeze, - "LEGAL".freeze, - "README.md".freeze, - "json_pure.gemspec".freeze, - "lib/json.rb".freeze, - "lib/json/add/bigdecimal.rb".freeze, - "lib/json/add/complex.rb".freeze, - "lib/json/add/core.rb".freeze, - "lib/json/add/date.rb".freeze, - "lib/json/add/date_time.rb".freeze, - "lib/json/add/exception.rb".freeze, - "lib/json/add/ostruct.rb".freeze, - "lib/json/add/range.rb".freeze, - "lib/json/add/rational.rb".freeze, - "lib/json/add/regexp.rb".freeze, - "lib/json/add/set.rb".freeze, - "lib/json/add/struct.rb".freeze, - "lib/json/add/symbol.rb".freeze, - "lib/json/add/time.rb".freeze, - "lib/json/common.rb".freeze, - "lib/json/ext.rb".freeze, - "lib/json/generic_object.rb".freeze, - "lib/json/pure.rb".freeze, - "lib/json/pure/generator.rb".freeze, - "lib/json/pure/parser.rb".freeze, - "lib/json/version.rb".freeze, + "CHANGES.md", + "COPYING", + "BSDL", + "LEGAL", + "README.md", + "json_pure.gemspec", + "lib/json.rb", + "lib/json/add/bigdecimal.rb", + "lib/json/add/complex.rb", + "lib/json/add/core.rb", + "lib/json/add/date.rb", + "lib/json/add/date_time.rb", + "lib/json/add/exception.rb", + "lib/json/add/ostruct.rb", + "lib/json/add/range.rb", + "lib/json/add/rational.rb", + "lib/json/add/regexp.rb", + "lib/json/add/set.rb", + "lib/json/add/struct.rb", + "lib/json/add/symbol.rb", + "lib/json/add/time.rb", + "lib/json/common.rb", + "lib/json/ext.rb", + "lib/json/generic_object.rb", + "lib/json/pure.rb", + "lib/json/pure/generator.rb", + "lib/json/pure/parser.rb", + "lib/json/version.rb", ] - s.homepage = "https://ruby.github.io/json".freeze + s.homepage = "https://ruby.github.io/json" s.metadata = { 'bug_tracker_uri' => 'https://github.com/ruby/json/issues', 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md', @@ -54,5 +56,5 @@ Gem::Specification.new do |s| 'wiki_uri' => 'https://github.com/ruby/json/wiki' } - s.required_ruby_version = Gem::Requirement.new(">= 2.3".freeze) + s.required_ruby_version = Gem::Requirement.new(">= 2.7") end diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index c2268cc36..fe481685e 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -333,24 +333,17 @@ def generate(obj) end # Assumes !@ascii_only, !@script_safe - if Regexp.method_defined?(:match?) - private def fast_serialize_string(string, buf) # :nodoc: - buf << '"' - string = string.encode(::Encoding::UTF_8) unless string.encoding == ::Encoding::UTF_8 - raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding? - - if /["\\\x0-\x1f]/n.match?(string) - buf << string.gsub(/["\\\x0-\x1f]/n, MAP) - else - buf << string - end - buf << '"' - end - else - # Ruby 2.3 compatibility - private def fast_serialize_string(string, buf) # :nodoc: - buf << string.to_json(self) + private def fast_serialize_string(string, buf) # :nodoc: + buf << '"' + string = string.encode(::Encoding::UTF_8) unless string.encoding == ::Encoding::UTF_8 + raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding? + + if /["\\\x0-\x1f]/n.match?(string) + buf << string.gsub(/["\\\x0-\x1f]/n, MAP) + else + buf << string end + buf << '"' end # Return the value returned by method +name+. diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index 3dafe8309..1afd8e466 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -161,11 +161,10 @@ def convert_encoding(source) ?u => nil, }) - STR_UMINUS = ''.respond_to?(:-@) def parse_string if scan(STRING) return '' if self[1].empty? - string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c| + string = self[1].gsub(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c| if u = UNESCAPE_MAP[$&[1]] u else # \uXXXX @@ -181,11 +180,7 @@ def parse_string string.force_encoding(::Encoding::UTF_8) if @freeze - if STR_UMINUS - string = -string - else - string.freeze - end + string = -string end if @create_additions and @match_string From b2c4480f2ea5d24af7fa14ab570fb903b6e6a717 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 18 Oct 2024 15:36:57 +0200 Subject: [PATCH 05/75] pretty_generate: don't apply object_nl / array_nl for empty containers Fix: https://github.com/ruby/json/issues/437 Before: ```json { "foo": { }, "bar": [ ] } ``` After: ```json { "foo": {}, "bar": [] } ``` --- ext/json/ext/generator/generator.c | 14 ++++++++++++++ java/src/json/ext/Generator.java | 12 ++++++++++++ lib/json/pure/generator.rb | 17 +++++++++++++++-- test/json/json_generator_test.rb | 7 +++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index c35e86d9b..fbfa2c724 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -681,6 +681,13 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S if (max_nesting != 0 && depth > max_nesting) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); } + + if (RHASH_SIZE(obj) == 0) { + fbuffer_append(buffer, "{}", 2); + --state->depth; + return; + } + fbuffer_append_char(buffer, '{'); arg.buffer = buffer; @@ -709,6 +716,13 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St if (max_nesting != 0 && depth > max_nesting) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); } + + if (RARRAY_LEN(obj) == 0) { + fbuffer_append(buffer, "[]", 2); + --state->depth; + return; + } + fbuffer_append_char(buffer, '['); if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len); for(i = 0; i < RARRAY_LEN(obj); i++) { diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 5a296c8f9..d0ac44586 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -269,6 +269,12 @@ void generate(Session session, RubyArray object, ByteList buffer) { GeneratorState state = session.getState(); int depth = state.increaseDepth(); + if (object.isEmpty()) { + buffer.append("[]".getBytes()); + state.decreaseDepth(); + return; + } + ByteList indentUnit = state.getIndent(); byte[] shift = Utils.repeat(indentUnit, depth); @@ -327,6 +333,12 @@ void generate(final Session session, RubyHash object, final GeneratorState state = session.getState(); final int depth = state.increaseDepth(); + if (object.isEmpty()) { + buffer.append("{}".getBytes()); + state.decreaseDepth(); + return; + } + final ByteList objectNl = state.getObjectNl(); final byte[] indent = Utils.repeat(state.getIndent(), depth); final ByteList spaceBefore = state.getSpaceBefore(); diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index fe481685e..8410013ff 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -399,9 +399,15 @@ def json_shift(state) end def json_transform(state) + depth = state.depth += 1 + + if empty? + state.depth -= 1 + return '{}' + end + delim = ",#{state.object_nl}" result = +"{#{state.object_nl}" - depth = state.depth += 1 first = true indent = !state.object_nl.empty? each { |key, value| @@ -441,6 +447,13 @@ def to_json(state = nil, *) private def json_transform(state) + depth = state.depth += 1 + + if empty? + state.depth -= 1 + return '[]' + end + result = '['.dup if state.array_nl.empty? delim = "," @@ -448,7 +461,7 @@ def json_transform(state) result << state.array_nl delim = ",#{state.array_nl}" end - depth = state.depth += 1 + first = true indent = !state.array_nl.empty? each { |value| diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 7dc45e3a5..32712c834 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -90,10 +90,17 @@ def test_dump_strict def test_generate_pretty json = pretty_generate({}) + assert_equal('{}', json) + + json = pretty_generate({1=>{}, 2=>[], 3=>4}) assert_equal(<<'EOT'.chomp, json) { + "1": {}, + "2": [], + "3": 4 } EOT + json = pretty_generate(@hash) # hashes aren't (insertion) ordered on every ruby implementation # assert_equal(@json3, json) From fb25e94aeab510ec67be44ab61ae664aa67e62dd Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 11:42:02 +0200 Subject: [PATCH 06/75] Modernize heredocs --- lib/json.rb | 28 ++++++------ lib/json/common.rb | 44 +++++++++--------- test/json/json_generator_test.rb | 76 ++++++++++++++++---------------- test/json/json_parser_test.rb | 76 ++++++++++++++++---------------- 4 files changed, 112 insertions(+), 112 deletions(-) diff --git a/lib/json.rb b/lib/json.rb index 8b1673d53..c28e853e1 100644 --- a/lib/json.rb +++ b/lib/json.rb @@ -378,13 +378,13 @@ # json1 = JSON.generate(ruby) # ruby1 = JSON.parse(json1, create_additions: true) # # Make a nice display. -# display = < {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} # @@ -445,17 +445,17 @@ class << self # parse(source, opts); see #parse. # # Source for following examples: - # source = <<-EOT - # { - # "name": "Dave", - # "age" :40, - # "hats": [ - # "Cattleman's", - # "Panama", - # "Tophat" - # ] - # } - # EOT + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON # # Load a \String: # ruby = JSON.load(source) diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 32712c834..01e412c96 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -19,24 +19,24 @@ def setup } @json2 = '{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' + '"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}' - @json3 = <<'EOT'.chomp -{ - "a": 2, - "b": 3.141, - "c": "c", - "d": [ - 1, - "b", - 3.14 - ], - "e": { - "foo": "bar" - }, - "g": "\"\u0000\u001f", - "h": 1000.0, - "i": 0.001 -} -EOT + @json3 = <<~'JSON'.chomp + { + "a": 2, + "b": 3.141, + "c": "c", + "d": [ + 1, + "b", + 3.14 + ], + "e": { + "foo": "bar" + }, + "g": "\"\u0000\u001f", + "h": 1000.0, + "i": 0.001 + } + JSON end def silence @@ -93,13 +93,13 @@ def test_generate_pretty assert_equal('{}', json) json = pretty_generate({1=>{}, 2=>[], 3=>4}) - assert_equal(<<'EOT'.chomp, json) -{ - "1": {}, - "2": [], - "3": 4 -} -EOT + assert_equal(<<~'JSON'.chomp, json) + { + "1": {}, + "2": [], + "3": 4 + } + JSON json = pretty_generate(@hash) # hashes aren't (insertion) ordered on every ruby implementation @@ -108,11 +108,11 @@ def test_generate_pretty parsed_json = parse(json) assert_equal(@hash, parsed_json) json = pretty_generate({1=>2}) - assert_equal(<<'EOT'.chomp, json) -{ - "1": 2 -} -EOT + assert_equal(<<~'JSON'.chomp, json) + { + "1": 2 + } + JSON parsed_json = parse(json) assert_equal({"1"=>2}, parsed_json) assert_equal '666', pretty_generate(666) @@ -121,14 +121,14 @@ def test_generate_pretty def test_generate_custom state = State.new(:space_before => " ", :space => " ", :indent => "", :object_nl => "\n", :array_nl => "") json = generate({1=>{2=>3,4=>[5,6]}}, state) - assert_equal(<<'EOT'.chomp, json) -{ -"1" : { -"2" : 3, -"4" : [5,6] -} -} -EOT + assert_equal(<<~'JSON'.chomp, json) + { + "1" : { + "2" : 3, + "4" : [5,6] + } + } + JSON end def test_fast_generate diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 8d3c0c17c..ffc67ef6f 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -247,50 +247,50 @@ def test_freeze end def test_parse_comments - json = < "value1", "key2" => "value2", "key3" => "value3" }, parse(json)) - json = < "value1" }, parse(json)) - json = < "value1" }, parse(json)) end From 6a444816e9a3b6cfd25f03924ae0b6d611aa41a4 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 16:04:33 +0200 Subject: [PATCH 07/75] Add ostruct to Gemfile --- Gemfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Gemfile b/Gemfile index 50249b0c7..b5e84b43d 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,7 @@ else end group :development do + gem "ostruct" gem "rake" gem "rake-compiler" gem "test-unit" From cecf04fdfc0f39cb593095e32a412cc98cc0b3e8 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 16:10:44 +0200 Subject: [PATCH 08/75] Cleaner .encode / .force_encoding --- test/json/json_parser_test.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index ffc67ef6f..59cfcfa6e 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -19,14 +19,14 @@ def test_construction assert_equal 'test', parser.source end - def test_argument_encoding - source = "{}".encode("UTF-16") + def test_argument_encoding_unmodified + source = "{}".encode(Encoding::UTF_16) JSON::Parser.new(source) assert_equal Encoding::UTF_16, source.encoding end - def test_argument_encoding_for_binary - source = "{}".encode("ASCII-8BIT") + def test_argument_encoding_for_binary_unmodified + source = "{}".b JSON::Parser.new(source) assert_equal Encoding::ASCII_8BIT, source.encoding end From 7f079b25beb3acb76a46519ee3feb65c1820caff Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 16:51:14 +0200 Subject: [PATCH 09/75] Use smaller types for JSON_Parser boolean fields --- ext/json/ext/parser/parser.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index 9c7f0e7d2..59c502b22 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -15,17 +15,17 @@ typedef struct JSON_ParserStruct { long len; char *memo; VALUE create_id; - int max_nesting; - int allow_nan; - int parsing_name; - int symbolize_names; - int freeze; VALUE object_class; VALUE array_class; VALUE decimal_class; - int create_additions; VALUE match_string; FBuffer *fbuffer; + int max_nesting; + char allow_nan; + char parsing_name; + char symbolize_names; + char freeze; + char create_additions; } JSON_Parser; #define GET_PARSER \ From a3df007601515bc0e261112f3cd704dc5fb6fb53 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 12:35:29 +0200 Subject: [PATCH 10/75] json_pure: fix ractor compatibility This actually never worked, because the test was always testing the ext version from the stdlib, never the pure version nor the current ext version. --- lib/json/pure/parser.rb | 26 +++++++++++++------------- test/json/ractor_test.rb | 28 +++++++++++++++++++--------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index 1afd8e466..0c53eb79a 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -148,24 +148,24 @@ def convert_encoding(source) end # Unescape characters in strings. - UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } - UNESCAPE_MAP.update({ - ?" => '"', - ?\\ => '\\', - ?/ => '/', - ?b => "\b", - ?f => "\f", - ?n => "\n", - ?r => "\r", - ?t => "\t", - ?u => nil, - }) + UNESCAPE_MAP = { + '"' => '"', + '\\' => '\\', + '/' => '/', + 'b' => "\b", + 'f' => "\f", + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'u' => nil, + }.freeze def parse_string if scan(STRING) return '' if self[1].empty? string = self[1].gsub(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c| - if u = UNESCAPE_MAP[$&[1]] + k = $&[1] + if u = UNESCAPE_MAP.fetch(k) { k.chr } u else # \uXXXX bytes = ''.b diff --git a/test/json/ractor_test.rb b/test/json/ractor_test.rb index e0116400f..f857c9a8b 100644 --- a/test/json/ractor_test.rb +++ b/test/json/ractor_test.rb @@ -9,10 +9,7 @@ class JSONInRactorTest < Test::Unit::TestCase def test_generate - assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}", ignore_stderr: true) - begin; - $VERBOSE = nil - require "json" + pid = fork do r = Ractor.new do json = JSON.generate({ 'a' => 2, @@ -26,9 +23,22 @@ def test_generate }) JSON.parse(json) end - expected_json = '{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' + - '"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}' - assert_equal(JSON.parse(expected_json), r.take) - end; + expected_json = JSON.parse('{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' + + '"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}') + actual_json = r.take + + if expected_json == actual_json + exit 0 + else + puts "Expected:" + puts expected_json + puts "Acutual:" + puts actual_json + puts + exit 1 + end + end + _, status = Process.waitpid2(pid) + assert_predicate status, :success? end -end if defined?(Ractor) +end if defined?(Ractor) && Process.respond_to?(:fork) From 49de571dd80788b422e2132788d703e3d231473f Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 12:36:53 +0200 Subject: [PATCH 11/75] Cleanup test_helper.rb --- test/json/test_helper.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index 4955a02c9..e8bba16f8 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -1,12 +1,12 @@ case ENV['JSON'] when 'pure' - $:.unshift File.join(__dir__, '../../lib') + $LOAD_PATH.unshift(File.expand_path('../../../lib', __FILE__)) require 'json/pure' when 'ext' - $:.unshift File.join(__dir__, '../../ext'), File.join(__dir__, '../../lib') + $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) require 'json/ext' else - $:.unshift File.join(__dir__, '../../ext'), File.join(__dir__, '../../lib') + $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) require 'json' end From 1f5e849fe00945c377a64f7e86b59137f76116c3 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 09:55:25 +0200 Subject: [PATCH 12/75] Workaround rubygems $LOAD_PATH bug Ref: https://github.com/ruby/json/issues/647 Ref: https://github.com/rubygems/rubygems/pull/6490 Older rubygems are executing `extconf.rb` with a broken `$LOAD_PATH` causing the `json` gem native extension to be loaded with the stdlib version of the `.rb` files. This fails with ``` json/common.rb:82:in `initialize': wrong number of arguments (given 1, expected 0) (ArgumentError) ``` Since this is just for `extconf.rb` we can probably just accept that extra argument and ignore it. The bug was fixed in rubygems 3.4.9 / 2023-03-20 --- ext/json/ext/generator/generator.c | 9 +++++++++ test/json/json_generator_test.rb | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index fbfa2c724..8a94e2337 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -977,6 +977,12 @@ static VALUE cState_generate(VALUE self, VALUE obj) return result; } +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) +{ + rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`"); + return self; +} + /* * call-seq: initialize_copy(orig) * @@ -1422,6 +1428,9 @@ void Init_generator(void) cState = rb_define_class_under(mGenerator, "State", rb_cObject); rb_define_alloc_func(cState, cState_s_allocate); rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); + rb_define_method(cState, "initialize", cState_initialize, -1); + rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings + rb_define_method(cState, "initialize_copy", cState_init_copy, 1); rb_define_method(cState, "indent", cState_indent, 0); rb_define_method(cState, "indent=", cState_indent_set, 1); diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 01e412c96..1670445a7 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -268,19 +268,19 @@ def test_buffer_initial_length end def test_gc - if respond_to?(:assert_in_out_err) && !(RUBY_PLATFORM =~ /java/) - assert_in_out_err(%w[-rjson -Ilib -Iext], <<-EOS, [], []) - bignum_too_long_to_embed_as_string = 1234567890123456789012345 - expect = bignum_too_long_to_embed_as_string.to_s - GC.stress = true - - 10.times do |i| - tmp = bignum_too_long_to_embed_as_string.to_json - raise "'\#{expect}' is expected, but '\#{tmp}'" unless tmp == expect - end - EOS + pid = fork do + bignum_too_long_to_embed_as_string = 1234567890123456789012345 + expect = bignum_too_long_to_embed_as_string.to_s + GC.stress = true + + 10.times do |i| + tmp = bignum_too_long_to_embed_as_string.to_json + raise "#{expect}' is expected, but '#{tmp}'" unless tmp == expect + end end - end if GC.respond_to?(:stress=) + _, status = Process.waitpid2(pid) + assert_predicate status, :success? + end if GC.respond_to?(:stress=) && Process.respond_to?(:fork) def test_configure_using_configure_and_merge numbered_state = { From dfdd4acf36d75609a82371f69c9e205a208f1938 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 12:00:21 +0200 Subject: [PATCH 13/75] Workaround being loaded alongside a different `json_pure` version Fix: https://github.com/ruby/json/issues/646 Since both `json` and `json_pure` expose the same files, if the versions don't match, the native extension may be loaded with Ruby code that don't match and is incompatible. By doing the `require json/ext/generator/state` from C we ensure we're at least loading that. But this is a dirty workaround for the 2.7.x branch, we should find a better way to fully isolate the two gems. --- ext/json/ext/generator/generator.c | 2 ++ lib/json/ext.rb | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 8a94e2337..3f2d91ffa 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -1521,4 +1521,6 @@ void Init_generator(void) usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); binary_encindex = rb_ascii8bit_encindex(); + + rb_require("json/ext/generator/state"); } diff --git a/lib/json/ext.rb b/lib/json/ext.rb index 775e28a96..92ef61eae 100644 --- a/lib/json/ext.rb +++ b/lib/json/ext.rb @@ -15,9 +15,6 @@ module Ext else require 'json/ext/parser' require 'json/ext/generator' - unless RUBY_ENGINE == 'jruby' - require 'json/ext/generator/state' - end $DEBUG and warn "Using Ext extension for JSON." JSON.parser = Parser JSON.generator = Generator From 472db78e6b726af885605598f451c64168dfd47a Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 12:46:50 +0200 Subject: [PATCH 14/75] Update CHANGES --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 5b3fbf071..fffd3d249 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,12 @@ * Bump required_ruby_version to 2.7. +### 2024-10-25 (2.7.4) + +* Workaround a bug in 3.4.8 and older https://github.com/rubygems/rubygems/pull/6490. +* Workaround different versions of `json` and `json_pure` being loaded (not officially supported). +* Make `json_pure` Ractor compatible. + ### 2024-10-24 (2.7.3) * Numerous performance optimizations in `JSON.generate` and `JSON.dump` (up to 2 times faster). From 01d2aac0bb829f3dc3bdaed03324bde8aa4b8e58 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 12:55:49 +0200 Subject: [PATCH 15/75] Release 2.7.4 --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index fffd3d249..4d6f7fecf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ ### 2024-10-25 (2.7.4) * Workaround a bug in 3.4.8 and older https://github.com/rubygems/rubygems/pull/6490. + This bug would cause some gems with native extension to fail during compilation. * Workaround different versions of `json` and `json_pure` being loaded (not officially supported). * Make `json_pure` Ractor compatible. From 4d9dc988179aa6720ed86fa3ce325eb564ccc535 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 25 Oct 2024 19:20:00 +0200 Subject: [PATCH 16/75] Instantiate Parser with a kwsplat Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs. So if json_pure 2.7.4 is loaded with `json <= 2.7.2` (or stdlib) it blows up. Ref: https://github.com/ruby/json/issues/650 Fix: https://github.com/ruby/json/issues/651 --- CHANGES.md | 4 ++++ lib/json/common.rb | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 4d6f7fecf..bea1f5168 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,10 @@ * Bump required_ruby_version to 2.7. +### UNRELEASED (2.7.5) + +* Workaround another issue caused by conflicting versions of both `json_pure` and `json` being loaded. + ### 2024-10-25 (2.7.4) * Workaround a bug in 3.4.8 and older https://github.com/rubygems/rubygems/pull/6490. diff --git a/lib/json/common.rb b/lib/json/common.rb index 182bb1c0e..5f86360e0 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -219,7 +219,12 @@ def parse(source, opts = nil) if opts.nil? Parser.new(source).parse else - Parser.new(source, opts).parse + # NB: The ** shouldn't be required, but we have to deal with + # different versions of the `json` and `json_pure` gems being + # loaded concurrently. + # Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs. + # Ref: https://github.com/ruby/json/issues/650 + Parser.new(source, **opts).parse end end From 844a6633ab7718eb228ca0b2ce3445709cefc292 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 28 Oct 2024 08:34:43 +0100 Subject: [PATCH 17/75] Handle all formatting configs potentially being `nil`. Fix: https://github.com/ruby/json/issues/653 I don't think this was really fully supported in the past, but it kinda worked with some of the implementations. --- CHANGES.md | 1 + lib/json/ext/generator/state.rb | 10 +++++----- lib/json/pure/generator.rb | 14 +++++++------- test/json/json_generator_test.rb | 21 +++++++++++++++++++++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index bea1f5168..713d127da 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ ### UNRELEASED (2.7.5) +* Gracefully handle formatting configs being set to `nil` instead of `""`. * Workaround another issue caused by conflicting versions of both `json_pure` and `json` being loaded. ### 2024-10-25 (2.7.4) diff --git a/lib/json/ext/generator/state.rb b/lib/json/ext/generator/state.rb index 4f9675d7b..29688142f 100644 --- a/lib/json/ext/generator/state.rb +++ b/lib/json/ext/generator/state.rb @@ -46,15 +46,15 @@ def configure(opts) opts.each do |key, value| case key when :indent - self.indent = value + self.indent = value || '' when :space - self.space = value + self.space = value || '' when :space_before - self.space_before = value + self.space_before = value || '' when :array_nl - self.array_nl = value + self.array_nl = value || '' when :object_nl - self.object_nl = value + self.object_nl = value || '' when :max_nesting self.max_nesting = value || 0 when :depth diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 8410013ff..59424d300 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -239,13 +239,13 @@ def configure(opts) end # NOTE: If adding new instance variables here, check whether #generate should check them for #generate_json - @indent = opts[:indent] if opts.key?(:indent) - @space = opts[:space] if opts.key?(:space) - @space_before = opts[:space_before] if opts.key?(:space_before) - @object_nl = opts[:object_nl] if opts.key?(:object_nl) - @array_nl = opts[:array_nl] if opts.key?(:array_nl) - @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) - @ascii_only = opts[:ascii_only] if opts.key?(:ascii_only) + @indent = opts[:indent] || '' if opts.key?(:indent) + @space = opts[:space] || '' if opts.key?(:space) + @space_before = opts[:space_before] || '' if opts.key?(:space_before) + @object_nl = opts[:object_nl] || '' if opts.key?(:object_nl) + @array_nl = opts[:array_nl] || '' if opts.key?(:array_nl) + @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) + @ascii_only = opts[:ascii_only] if opts.key?(:ascii_only) @depth = opts[:depth] || 0 @buffer_initial_length ||= opts[:buffer_initial_length] diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 1670445a7..57c4e6cee 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -174,6 +174,27 @@ def test_states assert s[:check_circular?] end + def test_falsy_state + object = { foo: [1, 2], bar: { egg: :spam }} + expected_json = JSON.generate( + object, + array_nl: "", + indent: "", + object_nl: "", + space: "", + space_before: "", + ) + + assert_equal expected_json, JSON.generate( + object, + array_nl: nil, + indent: nil, + object_nl: nil, + space: nil, + space_before: nil, + ) + end + def test_pretty_state state = JSON.create_pretty_state assert_equal({ From 0655b58d144c076f6c215b70a1231dace16a66d5 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 28 Oct 2024 21:08:26 +0100 Subject: [PATCH 18/75] Optimize `fbuffer_append_long` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: https://github.com/ruby/json/issues/655 Rather than to write the number backward, and then reverse the buffer, we can start from the back of the buffer and write the number in the proper direction. Before: ``` == Encoding integers (8009 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 8.606k i/100ms oj 9.598k i/100ms Calculating ------------------------------------- json 86.059k (± 0.8%) i/s (11.62 μs/i) - 430.300k in 5.000416s oj 97.409k (± 0.6%) i/s (10.27 μs/i) - 489.498k in 5.025360s Comparison: json: 86058.8 i/s oj: 97408.8 i/s - 1.13x faster ``` After: ``` == Encoding integers (8009 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 9.500k i/100ms json 9.359k i/100ms oj 9.722k i/100ms Calculating ------------------------------------- json (reuse) 96.270k (± 0.4%) i/s (10.39 μs/i) - 484.500k in 5.032777s json 94.800k (± 2.2%) i/s (10.55 μs/i) - 477.309k in 5.037495s oj 97.131k (± 0.7%) i/s (10.30 μs/i) - 486.100k in 5.004822s Comparison: json (reuse): 96270.1 i/s oj: 97130.5 i/s - same-ish: difference falls within error json: 94799.9 i/s - same-ish: difference falls within error ``` --- benchmark/encoder.rb | 1 + ext/json/ext/fbuffer/fbuffer.h | 26 +++++++++----------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 662b1c3e7..f1e318268 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -64,6 +64,7 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } # On these benchmarks we perform well. Either on par or very closely faster/slower +benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state) benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state) benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state) benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index 76bd6ce12..4dec50734 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -107,33 +107,25 @@ static void fbuffer_append_char(FBuffer *fb, char newchr) } #ifdef JSON_GENERATOR -static void freverse(char *start, char *end) -{ - char c; - - while (end > start) { - c = *end, *end-- = *start, *start++ = c; - } -} - static long fltoa(long number, char *buf) { - static char digits[] = "0123456789"; + static const char digits[] = "0123456789"; long sign = number; char* tmp = buf; if (sign < 0) number = -number; - do *tmp++ = digits[number % 10]; while (number /= 10); - if (sign < 0) *tmp++ = '-'; - freverse(buf, tmp - 1); - return tmp - buf; + do *tmp-- = digits[number % 10]; while (number /= 10); + if (sign < 0) *tmp-- = '-'; + return buf - tmp; } +#define LONG_BUFFER_SIZE 20 static void fbuffer_append_long(FBuffer *fb, long number) { - char buf[20]; - unsigned long len = fltoa(number, buf); - fbuffer_append(fb, buf, len); + char buf[LONG_BUFFER_SIZE]; + char *buffer_end = buf + LONG_BUFFER_SIZE; + long len = fltoa(number, buffer_end - 1); + fbuffer_append(fb, buffer_end - len, len); } static VALUE fbuffer_to_s(FBuffer *fb) From 72110f7992c28c4a73e144b7b63b9ecded0f812e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 10:23:25 +0100 Subject: [PATCH 19/75] Allocate the FBuffer struct on the stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: https://github.com/ruby/json/issues/655 The actual buffer is still on the heap, but this saves a pair of malloc/free. This helps a lot on micro-benchmarks Before: ``` ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- Oj 531.598k i/100ms JSON reuse 417.666k i/100ms Calculating ------------------------------------- Oj 5.735M (± 1.3%) i/s (174.35 ns/i) - 28.706M in 5.005900s JSON reuse 4.604M (± 1.4%) i/s (217.18 ns/i) - 23.389M in 5.080779s Comparison: Oj: 5735475.6 i/s JSON reuse: 4604380.3 i/s - 1.25x slower ``` After: ``` ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- Oj 518.700k i/100ms JSON reuse 483.370k i/100ms Calculating ------------------------------------- Oj 5.722M (± 1.8%) i/s (174.76 ns/i) - 29.047M in 5.077823s JSON reuse 5.278M (± 1.5%) i/s (189.46 ns/i) - 26.585M in 5.038172s Comparison: Oj: 5722283.8 i/s JSON reuse: 5278061.7 i/s - 1.08x slower ``` Bench: ```ruby require 'benchmark/ips' require 'oj' require 'json' json_encoder = JSON::State.new(JSON.dump_default_options) test_data = [1, "string", { a: 1, b: 2 }, [3, 4, 5]] Oj.default_options = Oj.default_options.merge(mode: :compat) Benchmark.ips do |x| x.config(time: 5, warmup: 2) x.report("Oj") do Oj.dump(test_data) end x.report("JSON reuse") do json_encoder.generate(test_data) end x.compare!(order: :baseline) end ``` --- ext/json/ext/fbuffer/fbuffer.h | 17 +++++------------ ext/json/ext/generator/generator.c | 17 +++++------------ ext/json/ext/generator/generator.h | 9 ++++----- ext/json/ext/parser/parser.c | 24 ++++++++++++------------ ext/json/ext/parser/parser.h | 2 +- ext/json/ext/parser/parser.rl | 24 ++++++++++++------------ 6 files changed, 39 insertions(+), 54 deletions(-) diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index 4dec50734..dc09dde9c 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -13,12 +13,11 @@ typedef struct FBufferStruct { #define FBUFFER_INITIAL_LENGTH_DEFAULT 1024 -#define FBUFFER_PTR(fb) (fb->ptr) -#define FBUFFER_LEN(fb) (fb->len) -#define FBUFFER_CAPA(fb) (fb->capa) +#define FBUFFER_PTR(fb) ((fb)->ptr) +#define FBUFFER_LEN(fb) ((fb)->len) +#define FBUFFER_CAPA(fb) ((fb)->capa) #define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) -static FBuffer *fbuffer_alloc(unsigned long initial_length); static void fbuffer_free(FBuffer *fb); #ifndef JSON_GENERATOR static void fbuffer_clear(FBuffer *fb); @@ -36,20 +35,14 @@ static VALUE fbuffer_to_s(FBuffer *fb); #define RB_UNLIKELY(expr) expr #endif -static FBuffer *fbuffer_alloc(unsigned long initial_length) +static void fbuffer_init(FBuffer *fb, unsigned long initial_length) { - FBuffer *fb; - if (initial_length <= 0) initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - fb = ALLOC(FBuffer); - memset((void *) fb, 0, sizeof(FBuffer)); - fb->initial_length = initial_length; - return fb; + fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; } static void fbuffer_free(FBuffer *fb) { if (fb->ptr) ruby_xfree(fb->ptr); - ruby_xfree(fb); } #ifndef JSON_GENERATOR diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 3f2d91ffa..0d70b1217 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -911,15 +911,6 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s } } -static FBuffer *cState_prepare_buffer(VALUE self) -{ - FBuffer *buffer; - GET_STATE(self); - buffer = fbuffer_alloc(state->buffer_initial_length); - - return buffer; -} - struct generate_json_data { FBuffer *buffer; VALUE vstate; @@ -948,18 +939,20 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) static VALUE cState_partial_generate(VALUE self, VALUE obj) { - FBuffer *buffer = cState_prepare_buffer(self); GET_STATE(self); + FBuffer buffer = {0}; + fbuffer_init(&buffer, state->buffer_initial_length); + struct generate_json_data data = { - .buffer = buffer, + .buffer = &buffer, .vstate = self, .state = state, .obj = obj }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - return fbuffer_to_s(buffer); + return fbuffer_to_s(&buffer); } /* diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h index 3710ce7c2..230c08a64 100644 --- a/ext/json/ext/generator/generator.h +++ b/ext/json/ext/generator/generator.h @@ -55,16 +55,16 @@ typedef struct JSON_Generator_StateStruct { GET_STATE_TO(self, state) #define GENERATE_JSON(type) \ - FBuffer *buffer; \ VALUE Vstate; \ JSON_Generator_State *state; \ \ rb_scan_args(argc, argv, "01", &Vstate); \ Vstate = cState_from_state_s(cState, Vstate); \ TypedData_Get_Struct(Vstate, JSON_Generator_State, &JSON_Generator_State_type, state); \ - buffer = cState_prepare_buffer(Vstate); \ - generate_json_##type(buffer, Vstate, state, self); \ - return fbuffer_to_s(buffer) + FBuffer buffer = {0}; \ + fbuffer_init(&buffer, state->buffer_initial_length); \ + generate_json_##type(&buffer, Vstate, state, self); \ + return fbuffer_to_s(&buffer) static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); @@ -122,7 +122,6 @@ static VALUE cState_script_safe(VALUE self); static VALUE cState_script_safe_set(VALUE self, VALUE depth); static VALUE cState_strict(VALUE self); static VALUE cState_strict_set(VALUE self, VALUE strict); -static FBuffer *cState_prepare_buffer(VALUE self); static const rb_data_type_t JSON_Generator_State_type; diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index cf0b3cefa..ce40ddd44 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -972,10 +972,10 @@ case 5: if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); return p + 1; } else { return NULL; @@ -1167,15 +1167,15 @@ case 7: } long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); + VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); *result = rb_funcallv(mod, method_id, 1, &text); } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); + *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); } return p + 1; @@ -2138,14 +2138,14 @@ static void JSON_mark(void *ptr) static void JSON_free(void *ptr) { JSON_Parser *json = ptr; - fbuffer_free(json->fbuffer); + fbuffer_free(&json->fbuffer); ruby_xfree(json); } static size_t JSON_memsize(const void *ptr) { const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); + return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); } static const rb_data_type_t JSON_Parser_type = { @@ -2159,7 +2159,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - json->fbuffer = fbuffer_alloc(0); + fbuffer_init(&json->fbuffer, 0); return obj; } diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index 59c502b22..2a5b13856 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -19,7 +19,7 @@ typedef struct JSON_ParserStruct { VALUE array_class; VALUE decimal_class; VALUE match_string; - FBuffer *fbuffer; + FBuffer fbuffer; int max_nesting; char allow_nan; char parsing_name; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 73f81341a..430f3c45b 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -324,10 +324,10 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); return p + 1; } else { return NULL; @@ -388,15 +388,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul } long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); + VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); *result = rb_funcallv(mod, method_id, 1, &text); } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); + *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); } return p + 1; @@ -898,14 +898,14 @@ static void JSON_mark(void *ptr) static void JSON_free(void *ptr) { JSON_Parser *json = ptr; - fbuffer_free(json->fbuffer); + fbuffer_free(&json->fbuffer); ruby_xfree(json); } static size_t JSON_memsize(const void *ptr) { const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); + return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); } static const rb_data_type_t JSON_Parser_type = { @@ -919,7 +919,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - json->fbuffer = fbuffer_alloc(0); + fbuffer_init(&json->fbuffer, 0); return obj; } From d54063a790d1dfcbdd828f81c52d00c31d90a2ea Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 12:47:57 +0100 Subject: [PATCH 20/75] Remove dead cases from convert_UTF8_to_* functions --- ext/json/ext/generator/generator.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 0d70b1217..2d0620d71 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -44,9 +44,6 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca if (RB_UNLIKELY(ch_len)) { switch (ch_len) { - case 0: - pos++; - break; case 1: { FLUSH_POS(1); switch (ch) { @@ -217,9 +214,6 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons if (RB_UNLIKELY(ch_len)) { switch (ch_len) { - case 0: - pos++; - break; case 1: { FLUSH_POS(1); switch (ch) { From d227d225ca51870263d372d30d23bde06c9dffa6 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 18:55:22 +0100 Subject: [PATCH 21/75] Fix a memory leak in #to_json methods Fix: https://github.com/ruby/json/issues/460 The various `to_json` methods must rescue exceptions to free the buffer. ``` require 'json' data = 10_000.times.to_a << BasicObject.new 20.times do 100.times do begin data.to_json rescue NoMethodError end end puts `ps -o rss= -p #{$$}` end ``` ``` 20128 24992 29920 34672 39600 44336 49136 53936 58816 63616 68416 73232 78032 82896 87696 92528 97408 102208 107008 111808 ``` --- ext/json/ext/generator/generator.c | 50 +++++++++++++++++++++--------- ext/json/ext/generator/generator.h | 14 +-------- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 2d0620d71..4e1c0e314 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -397,7 +397,9 @@ static char *fstrndup(const char *ptr, unsigned long len) { */ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(object); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_object); } /* @@ -409,7 +411,9 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) * produced JSON string output further. */ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(array); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_array); } #ifdef RUBY_INTEGER_UNIFICATION @@ -420,7 +424,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { */ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(integer); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_integer); } #else @@ -431,7 +437,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(fixnum); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_fixnum); } /* @@ -441,7 +449,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(bignum); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_bignum); } #endif @@ -452,7 +462,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(float); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_float); } /* @@ -475,7 +487,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) { */ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(string); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_string); } /* @@ -530,7 +544,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o) */ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(true); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("true", 4); } /* @@ -540,7 +555,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(false); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("false", 5); } /* @@ -550,7 +566,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(null); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("null", 4); } /* @@ -567,7 +584,7 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) rb_scan_args(argc, argv, "01", &state); Check_Type(string, T_STRING); state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string); + return cState_partial_generate(state, string, generate_json_string); } static void State_free(void *ptr) @@ -834,6 +851,7 @@ static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_ generate_json_bignum(buffer, Vstate, state, obj); } #endif + static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) { double value = RFLOAT_VALUE(obj); @@ -910,13 +928,14 @@ struct generate_json_data { VALUE vstate; JSON_Generator_State *state; VALUE obj; + void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); }; static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - generate_json(data->buffer, data->vstate, data->state, data->obj); + data->func(data->buffer, data->vstate, data->state, data->obj); return Qnil; } @@ -931,7 +950,7 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj) +static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)) { GET_STATE(self); @@ -942,7 +961,8 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj) .buffer = &buffer, .vstate = self, .state = state, - .obj = obj + .obj = obj, + .func = func }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); @@ -958,7 +978,7 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj) */ static VALUE cState_generate(VALUE self, VALUE obj) { - VALUE result = cState_partial_generate(self, obj); + VALUE result = cState_partial_generate(self, obj, generate_json); GET_STATE(self); (void)state; return result; diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h index 230c08a64..05e8f0d80 100644 --- a/ext/json/ext/generator/generator.h +++ b/ext/json/ext/generator/generator.h @@ -54,18 +54,6 @@ typedef struct JSON_Generator_StateStruct { JSON_Generator_State *state; \ GET_STATE_TO(self, state) -#define GENERATE_JSON(type) \ - VALUE Vstate; \ - JSON_Generator_State *state; \ - \ - rb_scan_args(argc, argv, "01", &Vstate); \ - Vstate = cState_from_state_s(cState, Vstate); \ - TypedData_Get_Struct(Vstate, JSON_Generator_State, &JSON_Generator_State_type, state); \ - FBuffer buffer = {0}; \ - fbuffer_init(&buffer, state->buffer_initial_length); \ - generate_json_##type(&buffer, Vstate, state, self); \ - return fbuffer_to_s(&buffer) - static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); #ifdef RUBY_INTEGER_UNIFICATION @@ -99,7 +87,7 @@ static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_ static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static VALUE cState_partial_generate(VALUE self, VALUE obj); +static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)); static VALUE cState_generate(VALUE self, VALUE obj); static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_indent(VALUE self); From f6d6ca3c17d7974328cf5f69ba219b19e95fb4ce Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 29 Oct 2024 15:21:38 -0400 Subject: [PATCH 22/75] Remove double semicolon at end of line in parser --- ext/json/ext/parser/parser.c | 2 +- ext/json/ext/parser/parser.rl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index ce40ddd44..368d539bc 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1940,7 +1940,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) source = convert_encoding(StringValue(source)); StringValue(source); json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; + json->source = RSTRING_PTR(source); json->Vsource = source; return self; } diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 430f3c45b..c62701459 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -835,7 +835,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) source = convert_encoding(StringValue(source)); StringValue(source); json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; + json->source = RSTRING_PTR(source); json->Vsource = source; return self; } From fe607f4806ac1d448c1ea5ae7324fdbab183d2ca Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 11:00:16 +0100 Subject: [PATCH 23/75] Allocate the initial generator buffer on the stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: https://github.com/ruby/json/issues/655 Followup: https://github.com/ruby/json/issues/657 Assuming the generator might be used for fairly small documents we can start with a reasonable buffer size of the stack, and if we outgrow it, we can spill on the heap. In a way this is optimizing for micro-benchmarks, but there are valid use case for fiarly small JSON document in actual real world scenarios, so trashing the GC less in such case make sense. Before: ``` ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- Oj 518.700k i/100ms JSON reuse 483.370k i/100ms Calculating ------------------------------------- Oj 5.722M (± 1.8%) i/s (174.76 ns/i) - 29.047M in 5.077823s JSON reuse 5.278M (± 1.5%) i/s (189.46 ns/i) - 26.585M in 5.038172s Comparison: Oj: 5722283.8 i/s JSON reuse: 5278061.7 i/s - 1.08x slower ``` After: ``` ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- Oj 517.837k i/100ms JSON reuse 548.871k i/100ms Calculating ------------------------------------- Oj 5.693M (± 1.6%) i/s (175.65 ns/i) - 28.481M in 5.004056s JSON reuse 5.855M (± 1.2%) i/s (170.80 ns/i) - 29.639M in 5.063004s Comparison: Oj: 5692985.6 i/s JSON reuse: 5854857.9 i/s - 1.03x faster ``` --- ext/json/ext/fbuffer/fbuffer.h | 29 +++++++++++++++++++++++++---- ext/json/ext/generator/generator.c | 3 ++- ext/json/ext/generator/generator.h | 1 + ext/json/ext/parser/parser.c | 2 +- ext/json/ext/parser/parser.rl | 2 +- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index dc09dde9c..55fc0bba9 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -4,13 +4,20 @@ #include "ruby.h" #include "ruby/encoding.h" +enum fbuffer_type { + HEAP = 0, + STACK = 1, +}; + typedef struct FBufferStruct { + enum fbuffer_type type; unsigned long initial_length; - char *ptr; unsigned long len; unsigned long capa; + char *ptr; } FBuffer; +#define FBUFFER_STACK_SIZE 512 #define FBUFFER_INITIAL_LENGTH_DEFAULT 1024 #define FBUFFER_PTR(fb) ((fb)->ptr) @@ -35,14 +42,21 @@ static VALUE fbuffer_to_s(FBuffer *fb); #define RB_UNLIKELY(expr) expr #endif -static void fbuffer_init(FBuffer *fb, unsigned long initial_length) +static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; + if (stack_buffer) { + fb->type = STACK; + fb->ptr = stack_buffer; + fb->capa = stack_buffer_size; + } } static void fbuffer_free(FBuffer *fb) { - if (fb->ptr) ruby_xfree(fb->ptr); + if (fb->ptr && fb->type == HEAP) { + ruby_xfree(fb->ptr); + } } #ifndef JSON_GENERATOR @@ -65,7 +79,14 @@ static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) for (required = fb->capa; requested > required - fb->len; required <<= 1); if (required > fb->capa) { - REALLOC_N(fb->ptr, char, required); + if (fb->type == STACK) { + const char *old_buffer = fb->ptr; + fb->ptr = ALLOC_N(char, required); + fb->type = HEAP; + MEMCPY(fb->ptr, old_buffer, char, fb->len); + } else { + REALLOC_N(fb->ptr, char, required); + } fb->capa = required; } } diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 4e1c0e314..fc002ff73 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -954,8 +954,9 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer { GET_STATE(self); + char stack_buffer[FBUFFER_STACK_SIZE]; FBuffer buffer = {0}; - fbuffer_init(&buffer, state->buffer_initial_length); + fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); struct generate_json_data data = { .buffer = &buffer, diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h index 05e8f0d80..b6898c955 100644 --- a/ext/json/ext/generator/generator.h +++ b/ext/json/ext/generator/generator.h @@ -54,6 +54,7 @@ typedef struct JSON_Generator_StateStruct { JSON_Generator_State *state; \ GET_STATE_TO(self, state) + static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); #ifdef RUBY_INTEGER_UNIFICATION diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 368d539bc..a6d8ff2cc 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -2159,7 +2159,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_init(&json->fbuffer, 0); + fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); return obj; } diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index c62701459..499cc693a 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -919,7 +919,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_init(&json->fbuffer, 0); + fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); return obj; } From 89f816e868bb9352d76addb2f44c8e284bf5927e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 12:13:44 +0100 Subject: [PATCH 24/75] Make fbuffer_inc_capa easier to inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the extra logic added for stack allocation, and especially the memcpy, it became harder for compilers to inline. This doesn't fully reclaim the speed lost with the stack allocation, but it's getting closer. Before: ``` == Encoding twitter.json (466906 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 160.000 i/100ms oj 225.000 i/100ms Calculating ------------------------------------- json 1.577k (± 2.0%) i/s (634.20 μs/i) - 8.000k in 5.075561s oj 2.264k (± 2.3%) i/s (441.79 μs/i) - 11.475k in 5.072205s Comparison: json: 1576.8 i/s oj: 2263.5 i/s - 1.44x faster == Encoding citm_catalog.json (500298 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 101.000 i/100ms oj 123.000 i/100ms Calculating ------------------------------------- json 1.033k (± 2.6%) i/s (968.06 μs/i) - 5.252k in 5.087617s oj 1.257k (± 2.2%) i/s (795.54 μs/i) - 6.396k in 5.090830s Comparison: json: 1033.0 i/s oj: 1257.0 i/s - 1.22x faster ``` After: ``` == Encoding twitter.json (466906 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin23] Warming up -------------------------------------- json 213.000 i/100ms oj 230.000 i/100ms Calculating ------------------------------------- json 2.064k (± 3.6%) i/s (484.44 μs/i) - 10.437k in 5.063685s oj 2.246k (± 0.7%) i/s (445.19 μs/i) - 11.270k in 5.017541s Comparison: json: 2064.2 i/s oj: 2246.2 i/s - 1.09x faster == Encoding citm_catalog.json (500298 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin23] Warming up -------------------------------------- json 133.000 i/100ms oj 132.000 i/100ms Calculating ------------------------------------- json 1.327k (± 1.7%) i/s (753.69 μs/i) - 6.650k in 5.013565s oj 1.305k (± 2.2%) i/s (766.40 μs/i) - 6.600k in 5.061089s Comparison: json: 1326.8 i/s oj: 1304.8 i/s - same-ish: difference falls within error ``` --- ext/json/ext/fbuffer/fbuffer.h | 47 +++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index 55fc0bba9..9bbfeed3c 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -33,7 +33,7 @@ static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); #ifdef JSON_GENERATOR static void fbuffer_append_long(FBuffer *fb, long number); #endif -static void fbuffer_append_char(FBuffer *fb, char newchr); +static inline void fbuffer_append_char(FBuffer *fb, char newchr); #ifdef JSON_GENERATOR static VALUE fbuffer_to_s(FBuffer *fb); #endif @@ -66,29 +66,34 @@ static void fbuffer_clear(FBuffer *fb) } #endif -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) { - if (RB_UNLIKELY(requested > fb->capa - fb->len)) { - unsigned long required; + unsigned long required; - if (RB_UNLIKELY(!fb->ptr)) { - fb->ptr = ALLOC_N(char, fb->initial_length); - fb->capa = fb->initial_length; - } + if (RB_UNLIKELY(!fb->ptr)) { + fb->ptr = ALLOC_N(char, fb->initial_length); + fb->capa = fb->initial_length; + } + + for (required = fb->capa; requested > required - fb->len; required <<= 1); - for (required = fb->capa; requested > required - fb->len; required <<= 1); - - if (required > fb->capa) { - if (fb->type == STACK) { - const char *old_buffer = fb->ptr; - fb->ptr = ALLOC_N(char, required); - fb->type = HEAP; - MEMCPY(fb->ptr, old_buffer, char, fb->len); - } else { - REALLOC_N(fb->ptr, char, required); - } - fb->capa = required; + if (required > fb->capa) { + if (fb->type == STACK) { + const char *old_buffer = fb->ptr; + fb->ptr = ALLOC_N(char, required); + fb->type = HEAP; + MEMCPY(fb->ptr, old_buffer, char, fb->len); + } else { + REALLOC_N(fb->ptr, char, required); } + fb->capa = required; + } +} + +static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +{ + if (RB_UNLIKELY(requested > fb->capa - fb->len)) { + fbuffer_do_inc_capa(fb, requested); } } @@ -113,7 +118,7 @@ static void fbuffer_append_str(FBuffer *fb, VALUE str) } #endif -static void fbuffer_append_char(FBuffer *fb, char newchr) +static inline void fbuffer_append_char(FBuffer *fb, char newchr) { fbuffer_inc_capa(fb, 1); *(fb->ptr + fb->len) = newchr; From 97b61edce196a0b5eace0d3f51b4ca9ab9e605c5 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 30 Oct 2024 10:21:33 +0100 Subject: [PATCH 25/75] Use JSON.generate instead of JSON.dump for benchmarking --- benchmark/encoder.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index f1e318268..7b59309a7 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -18,7 +18,7 @@ def implementations(ruby_obj) state = JSON::State.new(JSON.dump_default_options) { json_state: ["json (reuse)", proc { state.generate(ruby_obj) }], - json: ["json", proc { JSON.dump(ruby_obj) }], + json: ["json", proc { JSON.generate(ruby_obj) }], oj: ["oj", proc { Oj.dump(ruby_obj) }], } end From 6382c231b0b84abe28cc3a979729a29dd7dba27d Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 15:03:42 +0100 Subject: [PATCH 26/75] generator.c: store pretty strings in VALUE Given we expect these to almost always be null, we might as well keep them in RString. And even when provided, assuming we're passed frozen strings we'll save on copying them. This also reduce the size of the struct from 112B to 72B. --- ext/json/ext/generator/generator.c | 159 ++++++++++++----------------- ext/json/ext/generator/generator.h | 27 ++--- 2 files changed, 74 insertions(+), 112 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index fc002ff73..59ecb5356 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -297,14 +297,6 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons RB_GC_GUARD(str); } -static char *fstrndup(const char *ptr, unsigned long len) { - char *result; - if (len <= 0) return NULL; - result = ALLOC_N(char, len); - memcpy(result, ptr, len); - return result; -} - /* * Document-module: JSON::Ext::Generator * @@ -587,27 +579,35 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) return cState_partial_generate(state, string, generate_json_string); } +static void State_mark(void *ptr) +{ + JSON_Generator_State *state = ptr; + rb_gc_mark_movable(state->indent); + rb_gc_mark_movable(state->space); + rb_gc_mark_movable(state->space_before); + rb_gc_mark_movable(state->object_nl); + rb_gc_mark_movable(state->array_nl); +} + +static void State_compact(void *ptr) +{ + JSON_Generator_State *state = ptr; + state->indent = rb_gc_location(state->indent); + state->space = rb_gc_location(state->space); + state->space_before = rb_gc_location(state->space_before); + state->object_nl = rb_gc_location(state->object_nl); + state->array_nl = rb_gc_location(state->array_nl); +} + static void State_free(void *ptr) { JSON_Generator_State *state = ptr; - if (state->indent) ruby_xfree(state->indent); - if (state->space) ruby_xfree(state->space); - if (state->space_before) ruby_xfree(state->space_before); - if (state->object_nl) ruby_xfree(state->object_nl); - if (state->array_nl) ruby_xfree(state->array_nl); ruby_xfree(state); } static size_t State_memsize(const void *ptr) { - const JSON_Generator_State *state = ptr; - size_t size = sizeof(*state); - if (state->indent) size += state->indent_len + 1; - if (state->space) size += state->space_len + 1; - if (state->space_before) size += state->space_before_len + 1; - if (state->object_nl) size += state->object_nl_len + 1; - if (state->array_nl) size += state->array_nl_len + 1; - return size; + return sizeof(JSON_Generator_State); } #ifndef HAVE_RB_EXT_RACTOR_SAFE @@ -617,7 +617,12 @@ static size_t State_memsize(const void *ptr) static const rb_data_type_t JSON_Generator_State_type = { "JSON/Generator/State", - {NULL, State_free, State_memsize,}, + { + .dmark = State_mark, + .dfree = State_free, + .dsize = State_memsize, + .dcompact = State_compact, + }, 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, }; @@ -651,11 +656,11 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) if (arg->iter > 0) fbuffer_append_char(buffer, ','); if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append(buffer, state->object_nl, state->object_nl_len); + fbuffer_append_str(buffer, state->object_nl); } if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } @@ -673,9 +678,9 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) } generate_json_string(buffer, Vstate, state, key_to_s); - if (RB_UNLIKELY(state->space_before)) fbuffer_append(buffer, state->space_before, state->space_before_len); + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); fbuffer_append_char(buffer, ':'); - if (RB_UNLIKELY(state->space)) fbuffer_append(buffer, state->space, state->space_len); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); generate_json(buffer, Vstate, state, val); arg->iter++; @@ -709,10 +714,10 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S depth = --state->depth; if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append(buffer, state->object_nl, state->object_nl_len); + fbuffer_append_str(buffer, state->object_nl); if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } } @@ -735,25 +740,25 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St } fbuffer_append_char(buffer, '['); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len); + if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); for(i = 0; i < RARRAY_LEN(obj); i++) { if (i > 0) { fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len); + if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); } if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i)); } state->depth = --depth; if (RB_UNLIKELY(state->array_nl)) { - fbuffer_append(buffer, state->array_nl, state->array_nl_len); + fbuffer_append_str(buffer, state->array_nl); if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } } @@ -1007,11 +1012,11 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = fstrndup(origState->indent, origState->indent_len); - objState->space = fstrndup(origState->space, origState->space_len); - objState->space_before = fstrndup(origState->space_before, origState->space_before_len); - objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len); - objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len); + objState->indent = origState->indent; + objState->space = origState->space; + objState->space_before = origState->space_before; + objState->object_nl = origState->object_nl; + objState->array_nl = origState->array_nl; return obj; } @@ -1041,7 +1046,7 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts) static VALUE cState_indent(VALUE self) { GET_STATE(self); - return state->indent ? rb_str_new(state->indent, state->indent_len) : rb_str_new2(""); + return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1051,20 +1056,12 @@ static VALUE cState_indent(VALUE self) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { - unsigned long len; GET_STATE(self); Check_Type(indent, T_STRING); - len = RSTRING_LEN(indent); - if (len == 0) { - if (state->indent) { - ruby_xfree(state->indent); - state->indent = NULL; - state->indent_len = 0; - } + if (RSTRING_LEN(indent)) { + state->indent = RB_OBJ_FROZEN(indent) ? indent : rb_str_freeze(rb_str_dup(indent)); } else { - if (state->indent) ruby_xfree(state->indent); - state->indent = fstrndup(RSTRING_PTR(indent), len); - state->indent_len = len; + state->indent = Qfalse; } return Qnil; } @@ -1078,7 +1075,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent) static VALUE cState_space(VALUE self) { GET_STATE(self); - return state->space ? rb_str_new(state->space, state->space_len) : rb_str_new2(""); + return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1089,20 +1086,12 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { - unsigned long len; GET_STATE(self); Check_Type(space, T_STRING); - len = RSTRING_LEN(space); - if (len == 0) { - if (state->space) { - ruby_xfree(state->space); - state->space = NULL; - state->space_len = 0; - } + if (RSTRING_LEN(space)) { + state->space = RB_OBJ_FROZEN(space) ? space : rb_str_freeze(rb_str_dup(space)); } else { - if (state->space) ruby_xfree(state->space); - state->space = fstrndup(RSTRING_PTR(space), len); - state->space_len = len; + state->space = Qfalse; } return Qnil; } @@ -1115,7 +1104,7 @@ static VALUE cState_space_set(VALUE self, VALUE space) static VALUE cState_space_before(VALUE self) { GET_STATE(self); - return state->space_before ? rb_str_new(state->space_before, state->space_before_len) : rb_str_new2(""); + return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1125,20 +1114,12 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { - unsigned long len; GET_STATE(self); Check_Type(space_before, T_STRING); - len = RSTRING_LEN(space_before); - if (len == 0) { - if (state->space_before) { - ruby_xfree(state->space_before); - state->space_before = NULL; - state->space_before_len = 0; - } + if (RSTRING_LEN(space_before)) { + state->space_before = RB_OBJ_FROZEN(space_before) ? space_before : rb_str_freeze(rb_str_dup(space_before)); } else { - if (state->space_before) ruby_xfree(state->space_before); - state->space_before = fstrndup(RSTRING_PTR(space_before), len); - state->space_before_len = len; + state->space_before = Qfalse; } return Qnil; } @@ -1152,7 +1133,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before) static VALUE cState_object_nl(VALUE self) { GET_STATE(self); - return state->object_nl ? rb_str_new(state->object_nl, state->object_nl_len) : rb_str_new2(""); + return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1163,19 +1144,12 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { - unsigned long len; GET_STATE(self); Check_Type(object_nl, T_STRING); - len = RSTRING_LEN(object_nl); - if (len == 0) { - if (state->object_nl) { - ruby_xfree(state->object_nl); - state->object_nl = NULL; - } + if (RSTRING_LEN(object_nl)) { + state->object_nl = RB_OBJ_FROZEN(object_nl) ? object_nl : rb_str_freeze(rb_str_dup(object_nl)); } else { - if (state->object_nl) ruby_xfree(state->object_nl); - state->object_nl = fstrndup(RSTRING_PTR(object_nl), len); - state->object_nl_len = len; + state->object_nl = Qfalse; } return Qnil; } @@ -1188,7 +1162,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) static VALUE cState_array_nl(VALUE self) { GET_STATE(self); - return state->array_nl ? rb_str_new(state->array_nl, state->array_nl_len) : rb_str_new2(""); + return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1198,19 +1172,12 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { - unsigned long len; GET_STATE(self); Check_Type(array_nl, T_STRING); - len = RSTRING_LEN(array_nl); - if (len == 0) { - if (state->array_nl) { - ruby_xfree(state->array_nl); - state->array_nl = NULL; - } + if (RSTRING_LEN(array_nl)) { + state->array_nl = RB_OBJ_FROZEN(array_nl) ? array_nl : rb_str_freeze(rb_str_dup(array_nl)); } else { - if (state->array_nl) ruby_xfree(state->array_nl); - state->array_nl = fstrndup(RSTRING_PTR(array_nl), len); - state->array_nl_len = len; + state->array_nl = Qfalse; } return Qnil; } diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h index b6898c955..e74af24a2 100644 --- a/ext/json/ext/generator/generator.h +++ b/ext/json/ext/generator/generator.h @@ -23,28 +23,23 @@ typedef unsigned char _Bool; #endif #endif -static char *fstrndup(const char *ptr, unsigned long len); - /* ruby api and some helpers */ typedef struct JSON_Generator_StateStruct { - char *indent; - long indent_len; - char *space; - long space_len; - char *space_before; - long space_before_len; - char *object_nl; - long object_nl_len; - char *array_nl; - long array_nl_len; + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + long max_nesting; - char allow_nan; - char ascii_only; - char script_safe; - char strict; long depth; long buffer_initial_length; + + bool allow_nan; + bool ascii_only; + bool script_safe; + bool strict; } JSON_Generator_State; #define GET_STATE_TO(self, state) \ From 5c0d428d4c518e651edd3d57dca83ab601944505 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 17:36:04 +0100 Subject: [PATCH 27/75] Move State#configure back into C While less nice, this open the door to eluding the State object allocation when possible. --- ext/json/ext/generator/generator.c | 130 ++++++++++++++++++++--------- lib/json/ext/generator/state.rb | 32 +------ 2 files changed, 91 insertions(+), 71 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 59ecb5356..aa9f1c9de 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -8,6 +8,8 @@ static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8; static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; +static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict; /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. @@ -1049,6 +1051,17 @@ static VALUE cState_indent(VALUE self) return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0)); } +static VALUE string_config(VALUE config) +{ + if (RTEST(config)) { + Check_Type(config, T_STRING); + if (RSTRING_LEN(config)) { + return RB_OBJ_FROZEN(config) ? config : rb_str_freeze(rb_str_dup(config)); + } + } + return Qfalse; +} + /* * call-seq: indent=(indent) * @@ -1057,12 +1070,7 @@ static VALUE cState_indent(VALUE self) static VALUE cState_indent_set(VALUE self, VALUE indent) { GET_STATE(self); - Check_Type(indent, T_STRING); - if (RSTRING_LEN(indent)) { - state->indent = RB_OBJ_FROZEN(indent) ? indent : rb_str_freeze(rb_str_dup(indent)); - } else { - state->indent = Qfalse; - } + state->indent = string_config(indent); return Qnil; } @@ -1087,12 +1095,7 @@ static VALUE cState_space(VALUE self) static VALUE cState_space_set(VALUE self, VALUE space) { GET_STATE(self); - Check_Type(space, T_STRING); - if (RSTRING_LEN(space)) { - state->space = RB_OBJ_FROZEN(space) ? space : rb_str_freeze(rb_str_dup(space)); - } else { - state->space = Qfalse; - } + state->space = string_config(space); return Qnil; } @@ -1115,12 +1118,7 @@ static VALUE cState_space_before(VALUE self) static VALUE cState_space_before_set(VALUE self, VALUE space_before) { GET_STATE(self); - Check_Type(space_before, T_STRING); - if (RSTRING_LEN(space_before)) { - state->space_before = RB_OBJ_FROZEN(space_before) ? space_before : rb_str_freeze(rb_str_dup(space_before)); - } else { - state->space_before = Qfalse; - } + state->space_before = string_config(space_before); return Qnil; } @@ -1145,12 +1143,7 @@ static VALUE cState_object_nl(VALUE self) static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { GET_STATE(self); - Check_Type(object_nl, T_STRING); - if (RSTRING_LEN(object_nl)) { - state->object_nl = RB_OBJ_FROZEN(object_nl) ? object_nl : rb_str_freeze(rb_str_dup(object_nl)); - } else { - state->object_nl = Qfalse; - } + state->object_nl = string_config(object_nl); return Qnil; } @@ -1173,12 +1166,7 @@ static VALUE cState_array_nl(VALUE self) static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { GET_STATE(self); - Check_Type(array_nl, T_STRING); - if (RSTRING_LEN(array_nl)) { - state->array_nl = RB_OBJ_FROZEN(array_nl) ? array_nl : rb_str_freeze(rb_str_dup(array_nl)); - } else { - state->array_nl = Qfalse; - } + state->array_nl = string_config(array_nl); return Qnil; } @@ -1207,6 +1195,11 @@ static VALUE cState_max_nesting(VALUE self) return LONG2FIX(state->max_nesting); } +static long long_config(VALUE num) +{ + return RTEST(num) ? FIX2LONG(num) : 0; +} + /* * call-seq: max_nesting=(depth) * @@ -1216,8 +1209,7 @@ static VALUE cState_max_nesting(VALUE self) static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->max_nesting = FIX2LONG(depth); + state->max_nesting = long_config(depth); return Qnil; } @@ -1345,8 +1337,7 @@ static VALUE cState_depth(VALUE self) static VALUE cState_depth_set(VALUE self, VALUE depth) { GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->depth = FIX2LONG(depth); + state->depth = long_config(depth); return Qnil; } @@ -1361,6 +1352,15 @@ static VALUE cState_buffer_initial_length(VALUE self) return LONG2FIX(state->buffer_initial_length); } +static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length) +{ + Check_Type(buffer_initial_length, T_FIXNUM); + long initial_length = FIX2LONG(buffer_initial_length); + if (initial_length > 0) { + state->buffer_initial_length = initial_length; + } +} + /* * call-seq: buffer_initial_length=(length) * @@ -1369,16 +1369,51 @@ static VALUE cState_buffer_initial_length(VALUE self) */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { - long initial_length; GET_STATE(self); - Check_Type(buffer_initial_length, T_FIXNUM); - initial_length = FIX2LONG(buffer_initial_length); - if (initial_length > 0) { - state->buffer_initial_length = initial_length; - } + buffer_initial_length_set(state, buffer_initial_length); return Qnil; } +static int configure_state_i(VALUE key, VALUE val, VALUE _arg) +{ + JSON_Generator_State *state = (JSON_Generator_State *)_arg; + + if (key == sym_indent) { state->indent = string_config(val); } + else if (key == sym_space) { state->space = string_config(val); } + else if (key == sym_space_before) { state->space_before = string_config(val); } + else if (key == sym_object_nl) { state->object_nl = string_config(val); } + else if (key == sym_array_nl) { state->array_nl = string_config(val); } + else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } + else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } + else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } + else if (key == sym_depth) { state->depth = long_config(val); } + else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } + else if (key == sym_script_safe) { state->script_safe = RTEST(val); } + else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } + else if (key == sym_strict) { state->strict = RTEST(val); } + return ST_CONTINUE; +} + +static void configure_state(JSON_Generator_State *state, VALUE config) +{ + if (!RTEST(config)) return; + + Check_Type(config, T_HASH); + + if (!RHASH_SIZE(config)) return; + + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(config, configure_state_i, (VALUE)state); +} + +static VALUE cState_configure(VALUE self, VALUE opts) +{ + GET_STATE(self); + configure_state(state, opts); + return self; +} + /* * */ @@ -1405,6 +1440,7 @@ void Init_generator(void) rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); rb_define_method(cState, "initialize", cState_initialize, -1); rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings + rb_define_private_method(cState, "_configure", cState_configure, 1); rb_define_method(cState, "initialize_copy", cState_init_copy, 1); rb_define_method(cState, "indent", cState_indent, 0); @@ -1493,6 +1529,20 @@ void Init_generator(void) i_extend = rb_intern("extend"); i_encode = rb_intern("encode"); + sym_indent = ID2SYM(rb_intern("indent")); + sym_space = ID2SYM(rb_intern("space")); + sym_space_before = ID2SYM(rb_intern("space_before")); + sym_object_nl = ID2SYM(rb_intern("object_nl")); + sym_array_nl = ID2SYM(rb_intern("array_nl")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_ascii_only = ID2SYM(rb_intern("ascii_only")); + sym_depth = ID2SYM(rb_intern("depth")); + sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length")); + sym_script_safe = ID2SYM(rb_intern("script_safe")); + sym_escape_slash = ID2SYM(rb_intern("escape_slash")); + sym_strict = ID2SYM(rb_intern("strict")); + usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); binary_encindex = rb_ascii8bit_encindex(); diff --git a/lib/json/ext/generator/state.rb b/lib/json/ext/generator/state.rb index 29688142f..6cd9496e6 100644 --- a/lib/json/ext/generator/state.rb +++ b/lib/json/ext/generator/state.rb @@ -42,37 +42,7 @@ def configure(opts) raise TypeError, "can't convert #{opts.class} into Hash" end end - - opts.each do |key, value| - case key - when :indent - self.indent = value || '' - when :space - self.space = value || '' - when :space_before - self.space_before = value || '' - when :array_nl - self.array_nl = value || '' - when :object_nl - self.object_nl = value || '' - when :max_nesting - self.max_nesting = value || 0 - when :depth - self.depth = value - when :buffer_initial_length - self.buffer_initial_length = value - when :allow_nan - self.allow_nan = value - when :ascii_only - self.ascii_only = value - when :script_safe, :escape_slash - self.script_safe = value - when :strict - self.strict = value - end - end - - self + _configure(opts) end alias_method :merge, :configure From 5009e7889b5b49e392648bfe354156858ed9999a Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 29 Oct 2024 18:14:12 +0100 Subject: [PATCH 28/75] Elide Generator::State allocation until a `to_json` method has to be called MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: https://github.com/ruby/json/issues/655 For very small documents, the biggest performance gap with alternatives is that the API impose that we allocate the `State` object. In a real world app this doesn't make much of a difference, but when running in a micro-benchmark this doubles the allocations, causing twice the amount of GC runs, making us look bad. However, unless we have to call a `to_json` method, the `State` object isn't visible, so with some refactoring, we can elude that allocation entirely. Instead we allocate the State internal struct on the stack, and if we need to call a `to_json` method, we allocate the `State` and spill the struct on the heap. As a result, `JSON.generate` is now as fast as re-using a `State` instance, as long as only primitives are generated. Before: ``` == Encoding small mixed (34 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 598.654k i/100ms json 400.542k i/100ms oj 533.353k i/100ms Calculating ------------------------------------- json (reuse) 6.371M (± 8.6%) i/s (156.96 ns/i) - 31.729M in 5.059195s json 4.120M (± 6.6%) i/s (242.72 ns/i) - 20.828M in 5.090549s oj 5.622M (± 6.4%) i/s (177.86 ns/i) - 28.268M in 5.061473s Comparison: json (reuse): 6371126.6 i/s oj: 5622452.0 i/s - same-ish: difference falls within error json: 4119991.1 i/s - 1.55x slower == Encoding small nested array (121 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 248.125k i/100ms json 215.255k i/100ms oj 217.531k i/100ms Calculating ------------------------------------- json (reuse) 2.628M (± 6.1%) i/s (380.55 ns/i) - 13.151M in 5.030281s json 2.185M (± 6.7%) i/s (457.74 ns/i) - 10.978M in 5.057655s oj 2.217M (± 6.7%) i/s (451.10 ns/i) - 11.094M in 5.044844s Comparison: json (reuse): 2627799.4 i/s oj: 2216824.8 i/s - 1.19x slower json: 2184669.5 i/s - 1.20x slower == Encoding small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 641.334k i/100ms json 322.745k i/100ms oj 642.450k i/100ms Calculating ------------------------------------- json (reuse) 7.133M (± 6.5%) i/s (140.19 ns/i) - 35.915M in 5.068201s json 4.615M (± 7.0%) i/s (216.70 ns/i) - 22.915M in 5.003718s oj 6.912M (± 6.4%) i/s (144.68 ns/i) - 34.692M in 5.047690s Comparison: json (reuse): 7133123.3 i/s oj: 6911977.1 i/s - same-ish: difference falls within error json: 4614696.6 i/s - 1.55x slower ``` After: ``` == Encoding small mixed (34 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 572.751k i/100ms json 457.741k i/100ms oj 512.247k i/100ms Calculating ------------------------------------- json (reuse) 6.324M (± 6.9%) i/s (158.12 ns/i) - 31.501M in 5.023093s json 6.263M (± 6.9%) i/s (159.66 ns/i) - 31.126M in 5.017086s oj 5.569M (± 6.6%) i/s (179.56 ns/i) - 27.661M in 5.003739s Comparison: json (reuse): 6324183.5 i/s json: 6263204.9 i/s - same-ish: difference falls within error oj: 5569049.2 i/s - same-ish: difference falls within error == Encoding small nested array (121 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 258.505k i/100ms json 242.335k i/100ms oj 220.678k i/100ms Calculating ------------------------------------- json (reuse) 2.589M (± 9.6%) i/s (386.17 ns/i) - 12.925M in 5.071853s json 2.594M (± 6.6%) i/s (385.46 ns/i) - 13.086M in 5.083035s oj 2.250M (± 2.3%) i/s (444.43 ns/i) - 11.255M in 5.004707s Comparison: json (reuse): 2589499.6 i/s json: 2594321.0 i/s - same-ish: difference falls within error oj: 2250064.0 i/s - 1.15x slower == Encoding small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 656.373k i/100ms json 644.135k i/100ms oj 650.283k i/100ms Calculating ------------------------------------- json (reuse) 7.202M (± 7.1%) i/s (138.84 ns/i) - 36.101M in 5.051438s json 7.278M (± 1.7%) i/s (137.40 ns/i) - 36.716M in 5.046300s oj 7.036M (± 1.7%) i/s (142.12 ns/i) - 35.766M in 5.084729s Comparison: json (reuse): 7202447.9 i/s json: 7277883.0 i/s - same-ish: difference falls within error oj: 7036115.2 i/s - same-ish: difference falls within error ``` --- benchmark/encoder.rb | 1 + ext/json/ext/generator/generator.c | 173 ++++++++++++++++++-------- ext/json/ext/generator/generator.h | 23 +--- java/src/json/ext/GeneratorState.java | 14 ++- lib/json/common.rb | 5 +- lib/json/pure/generator.rb | 8 +- test/json/json_generator_test.rb | 15 +++ 7 files changed, 160 insertions(+), 79 deletions(-) diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 7b59309a7..9569f02e4 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -60,6 +60,7 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # The performance difference is mostly more time spent in GC because of this extra pressure. # If we re-use the same `JSON::State` instance, we're faster than Oj on the array benchmark, and much closer # on the Hash one. +benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]] benchmark_encoding "small nested array", [[1,2,3,4,5]]*10 benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index aa9f1c9de..8118e1392 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -11,6 +11,41 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_e static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict; + +#define GET_STATE_TO(self, state) \ + TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + GET_STATE_TO(self, state) + +struct generate_json_data; + +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); + +struct generate_json_data { + FBuffer *buffer; + VALUE vstate; + JSON_Generator_State *state; + VALUE obj; + generator_func func; +}; + +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +#ifdef RUBY_INTEGER_UNIFICATION +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +#endif +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); + /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. * @@ -629,19 +664,39 @@ static const rb_data_type_t JSON_Generator_State_type = { RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, }; +static void state_init(JSON_Generator_State *state) +{ + state->max_nesting = 100; + state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; +} + static VALUE cState_s_allocate(VALUE klass) { JSON_Generator_State *state; VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state); - state->max_nesting = 100; - state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + state_init(state); return obj; } +static void vstate_spill(struct generate_json_data *data) +{ + VALUE vstate = cState_s_allocate(cState); + GET_STATE(vstate); + MEMCPY(state, data->state, JSON_Generator_State, 1); + data->state = state; + data->vstate = vstate; +} + +static inline VALUE vstate_get(struct generate_json_data *data) +{ + if (RB_UNLIKELY(!data->vstate)) { + vstate_spill(data); + } + return data->vstate; +} + struct hash_foreach_arg { - FBuffer *buffer; - JSON_Generator_State *state; - VALUE Vstate; + struct generate_json_data *data; int iter; }; @@ -649,9 +704,10 @@ static int json_object_i(VALUE key, VALUE val, VALUE _arg) { struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg; - FBuffer *buffer = arg->buffer; - JSON_Generator_State *state = arg->state; - VALUE Vstate = arg->Vstate; + struct generate_json_data *data = arg->data; + + FBuffer *buffer = data->buffer; + JSON_Generator_State *state = data->state; long depth = state->depth; int j; @@ -679,22 +735,21 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) break; } - generate_json_string(buffer, Vstate, state, key_to_s); + generate_json_string(buffer, data, state, key_to_s); if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); fbuffer_append_char(buffer, ':'); if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); - generate_json(buffer, Vstate, state, val); + generate_json(buffer, data, state, val); arg->iter++; return ST_CONTINUE; } -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { long max_nesting = state->max_nesting; long depth = ++state->depth; int j; - struct hash_foreach_arg arg; if (max_nesting != 0 && depth > max_nesting) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); @@ -708,10 +763,10 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '{'); - arg.buffer = buffer; - arg.state = state; - arg.Vstate = Vstate; - arg.iter = 0; + struct hash_foreach_arg arg = { + .data = data, + .iter = 0, + }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); depth = --state->depth; @@ -726,7 +781,7 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { long max_nesting = state->max_nesting; long depth = ++state->depth; @@ -753,7 +808,7 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St fbuffer_append_str(buffer, state->indent); } } - generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i)); + generate_json(buffer, data, state, RARRAY_AREF(obj, i)); } state->depth = --depth; if (RB_UNLIKELY(state->array_nl)) { @@ -799,7 +854,7 @@ static inline VALUE ensure_valid_encoding(VALUE str) return str; } -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { obj = ensure_valid_encoding(obj); @@ -823,43 +878,43 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); fbuffer_append_str(buffer, tmp); } #ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, state, obj); else - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, state, obj); } #endif -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { double value = RFLOAT_VALUE(obj); char allow_nan = state->allow_nan; @@ -874,20 +929,20 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St fbuffer_append_str(buffer, tmp); } -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { VALUE tmp; if (obj == Qnil) { - generate_json_null(buffer, Vstate, state, obj); + generate_json_null(buffer, data, state, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, Vstate, state, obj); + generate_json_false(buffer, data, state, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, Vstate, state, obj); + generate_json_true(buffer, data, state, obj); } else if (RB_SPECIAL_CONST_P(obj)) { if (RB_FIXNUM_P(obj)) { - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, state, obj); } else if (RB_FLONUM_P(obj)) { - generate_json_float(buffer, Vstate, state, obj); + generate_json_float(buffer, data, state, obj); } else { goto general; } @@ -895,54 +950,46 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s VALUE klass = RBASIC_CLASS(obj); switch (RB_BUILTIN_TYPE(obj)) { case T_BIGNUM: - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, state, obj); break; case T_HASH: if (klass != rb_cHash) goto general; - generate_json_object(buffer, Vstate, state, obj); + generate_json_object(buffer, data, state, obj); break; case T_ARRAY: if (klass != rb_cArray) goto general; - generate_json_array(buffer, Vstate, state, obj); + generate_json_array(buffer, data, state, obj); break; case T_STRING: if (klass != rb_cString) goto general; - generate_json_string(buffer, Vstate, state, obj); + generate_json_string(buffer, data, state, obj); break; case T_FLOAT: if (klass != rb_cFloat) goto general; - generate_json_float(buffer, Vstate, state, obj); + generate_json_float(buffer, data, state, obj); break; default: general: if (state->strict) { rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, Vstate); + tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); Check_Type(tmp, T_STRING); fbuffer_append_str(buffer, tmp); } else { tmp = rb_funcall(obj, i_to_s, 0); Check_Type(tmp, T_STRING); - generate_json_string(buffer, Vstate, state, tmp); + generate_json_string(buffer, data, state, tmp); } } } } -struct generate_json_data { - FBuffer *buffer; - VALUE vstate; - JSON_Generator_State *state; - VALUE obj; - void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -}; - static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - data->func(data->buffer, data->vstate, data->state, data->obj); + data->func(data->buffer, data, data->state, data->obj); return Qnil; } @@ -957,7 +1004,7 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)) +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) { GET_STATE(self); @@ -1414,6 +1461,28 @@ static VALUE cState_configure(VALUE self, VALUE opts) return self; } +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) +{ + JSON_Generator_State state = {0}; + state_init(&state); + configure_state(&state, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = {0}; + fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, + .state = &state, + .obj = obj, + .func = generate_json, + }; + rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + + return fbuffer_to_s(&buffer); +} + /* * */ @@ -1475,6 +1544,8 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, 1); + rb_define_singleton_method(cState, "generate", cState_m_generate, 2); + VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h index e74af24a2..749a627a1 100644 --- a/ext/json/ext/generator/generator.h +++ b/ext/json/ext/generator/generator.h @@ -42,14 +42,6 @@ typedef struct JSON_Generator_StateStruct { bool strict; } JSON_Generator_State; -#define GET_STATE_TO(self, state) \ - TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) - -#define GET_STATE(self) \ - JSON_Generator_State *state; \ - GET_STATE_TO(self, state) - - static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); #ifdef RUBY_INTEGER_UNIFICATION @@ -70,20 +62,7 @@ static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); static void State_free(void *state); static VALUE cState_s_allocate(VALUE klass); -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#endif -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)); + static VALUE cState_generate(VALUE self, VALUE obj); static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_indent(VALUE self); diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index 909f1a56e..1600b04aa 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -5,6 +5,8 @@ */ package json.ext; +import org.jcodings.specific.UTF8Encoding; + import org.jruby.Ruby; import org.jruby.RubyBoolean; import org.jruby.RubyClass; @@ -136,6 +138,11 @@ public static IRubyObject from_state(ThreadContext context, return fromState(context, opts); } + @JRubyMethod(meta=true) + public static IRubyObject generate(ThreadContext context, IRubyObject klass, IRubyObject obj, IRubyObject opts) { + return fromState(context, opts).generate(context, obj); + } + static GeneratorState fromState(ThreadContext context, IRubyObject opts) { return fromState(context, RuntimeInfo.forRuntime(context.getRuntime()), opts); } @@ -225,7 +232,12 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) { public IRubyObject generate(ThreadContext context, IRubyObject obj) { RubyString result = Generator.generateJson(context, obj, this); RuntimeInfo info = RuntimeInfo.forRuntime(context.getRuntime()); - result.force_encoding(context, info.utf8.get()); + if (result.getEncoding() != UTF8Encoding.INSTANCE) { + if (result.isFrozen()) { + result = result.strDup(context.getRuntime()); + } + result.force_encoding(context, info.utf8.get()); + } return result; } diff --git a/lib/json/common.rb b/lib/json/common.rb index 5f86360e0..015a3c578 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -307,11 +307,10 @@ def load_file!(filespec, opts = {}) # def generate(obj, opts = nil) if State === opts - state = opts + opts.generate(obj) else - state = State.new(opts) + State.generate(obj, opts) end - state.generate(obj) end # :stopdoc: diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 59424d300..8df1692d0 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -96,6 +96,10 @@ module Generator # This class is used to create State instances, that are use to hold data # while generating a JSON text from a Ruby data structure. class State + def self.generate(obj, opts = nil) + new(opts).generate(obj) + end + # Creates a State object from _opts_, which ought to be Hash to create # a new State instance configured by _opts_, something else to create # an unconfigured instance. If _opts_ is a State object, it is just @@ -130,7 +134,7 @@ def self.from_state(opts) # * *allow_nan*: true if NaN, Infinity, and -Infinity should be # generated, otherwise an exception is thrown, if these values are # encountered. This options defaults to false. - def initialize(opts = {}) + def initialize(opts = nil) @indent = '' @space = '' @space_before = '' @@ -141,7 +145,7 @@ def initialize(opts = {}) @script_safe = false @strict = false @buffer_initial_length = 1024 - configure opts + configure(opts || {}) end # This string is used to indent levels in the JSON text. diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 57c4e6cee..2b1d48b61 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -486,6 +486,21 @@ def test_invalid_encoding_string end end + def test_to_json_called_with_state_object + object = Object.new + called = false + argument = nil + object.singleton_class.define_method(:to_json) do |state| + called = true + argument = state + "" + end + + assert_equal "", JSON.dump(object) + assert called, "#to_json wasn't called" + assert_instance_of JSON::State, argument + end + if defined?(JSON::Ext::Generator) and RUBY_PLATFORM != "java" def test_valid_utf8_in_different_encoding utf8_string = "€™" From 08635312e5fbec000d46746c98fabcacbc8e18e5 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 30 Oct 2024 11:21:59 +0100 Subject: [PATCH 29/75] Setup ruby_memcheck Hoping it might find the leak reported in https://github.com/ruby/json/issues/460 --- .github/workflows/ci.yml | 23 +++++++++++++++++++++++ Gemfile | 1 + Rakefile | 10 ++++++++++ test/json/test_helper.rb | 3 +++ 4 files changed, 37 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0ff83016..c9422e0d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,3 +56,26 @@ jobs: - run: gem install pkg/*.gem if: ${{ matrix.ruby != '3.2' }} + + valgrind: + name: Ruby memcheck + runs-on: ubuntu-latest + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v3 + + - name: Set up Ruby + uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "3.3" + apt-get: ragel valgrind + + - run: | + bundle config --without benchmark + bundle install + + - run: rake compile + + - run: rake valgrind diff --git a/Gemfile b/Gemfile index b5e84b43d..ef2cf7fa0 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,7 @@ else end group :development do + gem "ruby_memcheck" if RUBY_PLATFORM =~ /linux/i gem "ostruct" gem "rake" gem "rake-compiler" diff --git a/Rakefile b/Rakefile index e22a3ddd1..7a013eb0d 100644 --- a/Rakefile +++ b/Rakefile @@ -250,6 +250,16 @@ else t.options = '-v' end + begin + require "ruby_memcheck" + RubyMemcheck::TestTask.new(valgrind: [ :set_env_ext, :check_env, :compile, :do_test_ext ]) do |t| + t.test_files = FileList['test/json/*_test.rb'] + t.verbose = true + t.options = '-v' + end + rescue LoadError + end + desc "Update the tags file" task :tags do system 'ctags', *Dir['**/*.{rb,c,h,java}'] diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index e8bba16f8..7bff9b339 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -1,12 +1,15 @@ case ENV['JSON'] when 'pure' $LOAD_PATH.unshift(File.expand_path('../../../lib', __FILE__)) + $stderr.puts("Testing JSON::Pure") require 'json/pure' when 'ext' + $stderr.puts("Testing JSON::Ext") $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) require 'json/ext' else $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) + $stderr.puts("Testing JSON") require 'json' end From 72b92cf57df513c2346f7d26312f72a1bbd30528 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 16:51:25 +0200 Subject: [PATCH 30/75] Deprecate unsafe default options of `JSON.load` [Feature #19528] Ref: https://bugs.ruby-lang.org/issues/19528 `load` is understood as the default method for serializer kind of libraries, and the default options of `JSON.load` has caused many security vulnerabilities over the years. The plan is to do like YAML/Psych, deprecate these default options and direct users toward using `JSON.unsafe_load` so at least it's obvious it should be used against untrusted data. --- ext/json/ext/parser/parser.c | 135 ++++++++++++----------- ext/json/ext/parser/parser.h | 1 + ext/json/ext/parser/parser.rl | 15 ++- java/src/json/ext/Parser.java | 164 ++++++++++++++++------------ java/src/json/ext/Parser.rl | 22 +++- lib/json/common.rb | 187 +++++++++++++++++++++++++++++--- lib/json/pure/parser.rb | 25 +++-- test/json/json_addition_test.rb | 6 + 8 files changed, 394 insertions(+), 161 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index a6d8ff2cc..38c546b46 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -474,6 +474,9 @@ case 26: if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { + if (json->deprecated_create_additions) { + rb_warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } *result = rb_funcall(klass, i_json_create, 1, *result); } } @@ -486,7 +489,7 @@ case 26: -#line 490 "parser.c" +#line 493 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -494,7 +497,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 287 "parser.rl" +#line 290 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -502,14 +505,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 506 "parser.c" +#line 509 "parser.c" { cs = JSON_value_start; } -#line 294 "parser.rl" +#line 297 "parser.rl" -#line 513 "parser.c" +#line 516 "parser.c" { if ( p == pe ) goto _test_eof; @@ -543,14 +546,14 @@ case 1: cs = 0; goto _out; tr2: -#line 239 "parser.rl" +#line 242 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr3: -#line 244 "parser.rl" +#line 247 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -570,7 +573,7 @@ cs = 0; } goto st29; tr7: -#line 262 "parser.rl" +#line 265 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -578,7 +581,7 @@ cs = 0; } goto st29; tr11: -#line 268 "parser.rl" +#line 271 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -586,7 +589,7 @@ cs = 0; } goto st29; tr25: -#line 232 "parser.rl" +#line 235 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -596,7 +599,7 @@ cs = 0; } goto st29; tr27: -#line 225 "parser.rl" +#line 228 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -606,19 +609,19 @@ cs = 0; } goto st29; tr31: -#line 219 "parser.rl" +#line 222 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 216 "parser.rl" +#line 219 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 222 "parser.rl" +#line 225 "parser.rl" { *result = Qtrue; } @@ -627,9 +630,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 274 "parser.rl" +#line 277 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 633 "parser.c" +#line 636 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -870,7 +873,7 @@ case 28: _out: {} } -#line 295 "parser.rl" +#line 298 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -884,7 +887,7 @@ case 28: } -#line 888 "parser.c" +#line 891 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -892,7 +895,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 315 "parser.rl" +#line 318 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -900,15 +903,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 904 "parser.c" +#line 907 "parser.c" { cs = JSON_integer_start; } -#line 322 "parser.rl" +#line 325 "parser.rl" json->memo = p; -#line 912 "parser.c" +#line 915 "parser.c" { if ( p == pe ) goto _test_eof; @@ -942,14 +945,14 @@ case 3: goto st0; goto tr4; tr4: -#line 312 "parser.rl" +#line 315 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 953 "parser.c" +#line 956 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -968,7 +971,7 @@ case 5: _out: {} } -#line 324 "parser.rl" +#line 327 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -983,7 +986,7 @@ case 5: } -#line 987 "parser.c" +#line 990 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -991,7 +994,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 349 "parser.rl" +#line 352 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -999,15 +1002,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1003 "parser.c" +#line 1006 "parser.c" { cs = JSON_float_start; } -#line 356 "parser.rl" +#line 359 "parser.rl" json->memo = p; -#line 1011 "parser.c" +#line 1014 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1065,14 +1068,14 @@ case 8: goto st0; goto tr9; tr9: -#line 343 "parser.rl" +#line 346 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1076 "parser.c" +#line 1079 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1133,7 +1136,7 @@ case 7: _out: {} } -#line 358 "parser.rl" +#line 361 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1186,7 +1189,7 @@ case 7: -#line 1190 "parser.c" +#line 1193 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 17}; enum {JSON_array_error = 0}; @@ -1194,7 +1197,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 438 "parser.rl" +#line 441 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1208,14 +1211,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); -#line 1212 "parser.c" +#line 1215 "parser.c" { cs = JSON_array_start; } -#line 451 "parser.rl" +#line 454 "parser.rl" -#line 1219 "parser.c" +#line 1222 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1254,7 +1257,7 @@ case 2: goto st2; goto st0; tr2: -#line 415 "parser.rl" +#line 418 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1274,7 +1277,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1278 "parser.c" +#line 1281 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1374,14 +1377,14 @@ case 12: goto st3; goto st12; tr4: -#line 430 "parser.rl" +#line 433 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1385 "parser.c" +#line 1388 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1437,7 +1440,7 @@ case 16: _out: {} } -#line 452 "parser.rl" +#line 455 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; @@ -1598,7 +1601,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int } -#line 1602 "parser.c" +#line 1605 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1606,7 +1609,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 630 "parser.rl" +#line 633 "parser.rl" static int @@ -1627,15 +1630,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1631 "parser.c" +#line 1634 "parser.c" { cs = JSON_string_start; } -#line 650 "parser.rl" +#line 653 "parser.rl" json->memo = p; -#line 1639 "parser.c" +#line 1642 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1660,7 +1663,7 @@ case 2: goto st0; goto st2; tr2: -#line 617 "parser.rl" +#line 620 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1670,14 +1673,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 627 "parser.rl" +#line 630 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1681 "parser.c" +#line 1684 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1753,7 +1756,7 @@ case 7: _out: {} } -#line 652 "parser.rl" +#line 655 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1888,10 +1891,16 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } tmp = ID2SYM(i_create_additions); if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; + tmp = rb_hash_aref(opts, tmp); + if (NIL_P(tmp)) { + json->create_additions = 1; + json->deprecated_create_additions = 1; + } else { + json->create_additions = RTEST(tmp); + json->deprecated_create_additions = 0; + } } + if (json->symbolize_names && json->create_additions) { rb_raise(rb_eArgError, "options :symbolize_names and :create_additions cannot be " @@ -1946,7 +1955,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1950 "parser.c" +#line 1959 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1954,7 +1963,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 858 "parser.rl" +#line 867 "parser.rl" /* @@ -1972,16 +1981,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1976 "parser.c" +#line 1985 "parser.c" { cs = JSON_start; } -#line 875 "parser.rl" +#line 884 "parser.rl" p = json->source; pe = p + json->len; -#line 1985 "parser.c" +#line 1994 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2015,7 +2024,7 @@ case 1: cs = 0; goto _out; tr2: -#line 850 "parser.rl" +#line 859 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2025,7 +2034,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2029 "parser.c" +#line 2038 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2114,7 +2123,7 @@ case 9: _out: {} } -#line 878 "parser.rl" +#line 887 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index 2a5b13856..6bf21aab5 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -26,6 +26,7 @@ typedef struct JSON_ParserStruct { char symbolize_names; char freeze; char create_additions; + char deprecated_create_additions; } JSON_Parser; #define GET_PARSER \ diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 499cc693a..441e58e7b 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -196,6 +196,9 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { + if (json->deprecated_create_additions) { + rb_warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } *result = rb_funcall(klass, i_json_create, 1, *result); } } @@ -783,10 +786,16 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } tmp = ID2SYM(i_create_additions); if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; + tmp = rb_hash_aref(opts, tmp); + if (NIL_P(tmp)) { + json->create_additions = 1; + json->deprecated_create_additions = 1; + } else { + json->create_additions = RTEST(tmp); + json->deprecated_create_additions = 0; + } } + if (json->symbolize_names && json->create_additions) { rb_raise(rb_eArgError, "options :symbolize_names and :create_additions cannot be " diff --git a/java/src/json/ext/Parser.java b/java/src/json/ext/Parser.java index 4234fd21f..92001b3e0 100644 --- a/java/src/json/ext/Parser.java +++ b/java/src/json/ext/Parser.java @@ -51,6 +51,7 @@ public class Parser extends RubyObject { private RubyString vSource; private RubyString createId; private boolean createAdditions; + private boolean deprecatedCreateAdditions; private int maxNesting; private boolean allowNaN; private boolean symbolizeNames; @@ -171,7 +172,20 @@ public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { this.symbolizeNames = opts.getBool("symbolize_names", false); this.freeze = opts.getBool("freeze", false); this.createId = opts.getString("create_id", getCreateId(context)); - this.createAdditions = opts.getBool("create_additions", false); + + IRubyObject additions = opts.get("create_additions"); + this.createAdditions = false; + this.deprecatedCreateAdditions = false; + + if (additions != null) { + if (additions.isNil()) { + this.createAdditions = true; + this.deprecatedCreateAdditions = true; + } else { + this.createAdditions = opts.getBool("create_additions", false); + } + } + this.objectClass = opts.getClass("object_class", runtime.getHash()); this.arrayClass = opts.getClass("array_class", runtime.getArray()); this.decimalClass = opts.getClass("decimal_class", null); @@ -342,11 +356,11 @@ private Ruby getRuntime() { } -// line 368 "Parser.rl" +// line 382 "Parser.rl" -// line 350 "Parser.java" +// line 364 "Parser.java" private static byte[] init__JSON_value_actions_0() { return new byte [] { @@ -460,7 +474,7 @@ private static byte[] init__JSON_value_from_state_actions_0() static final int JSON_value_en_main = 1; -// line 474 "Parser.rl" +// line 488 "Parser.rl" void parseValue(ParserResult res, int p, int pe) { @@ -468,14 +482,14 @@ void parseValue(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 472 "Parser.java" +// line 486 "Parser.java" { cs = JSON_value_start; } -// line 481 "Parser.rl" +// line 495 "Parser.rl" -// line 479 "Parser.java" +// line 493 "Parser.java" { int _klen; int _trans = 0; @@ -501,13 +515,13 @@ void parseValue(ParserResult res, int p, int pe) { while ( _nacts-- > 0 ) { switch ( _JSON_value_actions[_acts++] ) { case 9: -// line 459 "Parser.rl" +// line 473 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 511 "Parser.java" +// line 525 "Parser.java" } } @@ -570,25 +584,25 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) switch ( _JSON_value_actions[_acts++] ) { case 0: -// line 376 "Parser.rl" +// line 390 "Parser.rl" { result = getRuntime().getNil(); } break; case 1: -// line 379 "Parser.rl" +// line 393 "Parser.rl" { result = getRuntime().getFalse(); } break; case 2: -// line 382 "Parser.rl" +// line 396 "Parser.rl" { result = getRuntime().getTrue(); } break; case 3: -// line 385 "Parser.rl" +// line 399 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_NAN); @@ -598,7 +612,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 4: -// line 392 "Parser.rl" +// line 406 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_INFINITY); @@ -608,7 +622,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 5: -// line 399 "Parser.rl" +// line 413 "Parser.rl" { if (pe > p + 8 && absSubSequence(p, p + 9).equals(JSON_MINUS_INFINITY)) { @@ -637,7 +651,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 6: -// line 425 "Parser.rl" +// line 439 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -650,7 +664,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 7: -// line 435 "Parser.rl" +// line 449 "Parser.rl" { currentNesting++; parseArray(res, p, pe); @@ -665,7 +679,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 8: -// line 447 "Parser.rl" +// line 461 "Parser.rl" { currentNesting++; parseObject(res, p, pe); @@ -679,7 +693,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } } break; -// line 683 "Parser.java" +// line 697 "Parser.java" } } } @@ -699,7 +713,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) break; } } -// line 482 "Parser.rl" +// line 496 "Parser.rl" if (cs >= JSON_value_first_final && result != null) { if (parser.freeze) { @@ -712,7 +726,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } -// line 716 "Parser.java" +// line 730 "Parser.java" private static byte[] init__JSON_integer_actions_0() { return new byte [] { @@ -811,7 +825,7 @@ private static byte[] init__JSON_integer_trans_actions_0() static final int JSON_integer_en_main = 1; -// line 504 "Parser.rl" +// line 518 "Parser.rl" void parseInteger(ParserResult res, int p, int pe) { @@ -829,15 +843,15 @@ int parseIntegerInternal(int p, int pe) { int cs = EVIL; -// line 833 "Parser.java" +// line 847 "Parser.java" { cs = JSON_integer_start; } -// line 521 "Parser.rl" +// line 535 "Parser.rl" int memo = p; -// line 841 "Parser.java" +// line 855 "Parser.java" { int _klen; int _trans = 0; @@ -918,13 +932,13 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) switch ( _JSON_integer_actions[_acts++] ) { case 0: -// line 498 "Parser.rl" +// line 512 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 928 "Parser.java" +// line 942 "Parser.java" } } } @@ -944,7 +958,7 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) break; } } -// line 523 "Parser.rl" +// line 537 "Parser.rl" if (cs < JSON_integer_first_final) { return -1; @@ -964,7 +978,7 @@ RubyInteger bytesToInum(Ruby runtime, ByteList num) { } -// line 968 "Parser.java" +// line 982 "Parser.java" private static byte[] init__JSON_float_actions_0() { return new byte [] { @@ -1066,7 +1080,7 @@ private static byte[] init__JSON_float_trans_actions_0() static final int JSON_float_en_main = 1; -// line 556 "Parser.rl" +// line 570 "Parser.rl" void parseFloat(ParserResult res, int p, int pe) { @@ -1085,15 +1099,15 @@ int parseFloatInternal(int p, int pe) { int cs = EVIL; -// line 1089 "Parser.java" +// line 1103 "Parser.java" { cs = JSON_float_start; } -// line 574 "Parser.rl" +// line 588 "Parser.rl" int memo = p; -// line 1097 "Parser.java" +// line 1111 "Parser.java" { int _klen; int _trans = 0; @@ -1174,13 +1188,13 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) switch ( _JSON_float_actions[_acts++] ) { case 0: -// line 547 "Parser.rl" +// line 561 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1184 "Parser.java" +// line 1198 "Parser.java" } } } @@ -1200,7 +1214,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) break; } } -// line 576 "Parser.rl" +// line 590 "Parser.rl" if (cs < JSON_float_first_final) { return -1; @@ -1210,7 +1224,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) } -// line 1214 "Parser.java" +// line 1228 "Parser.java" private static byte[] init__JSON_string_actions_0() { return new byte [] { @@ -1312,7 +1326,7 @@ private static byte[] init__JSON_string_trans_actions_0() static final int JSON_string_en_main = 1; -// line 615 "Parser.rl" +// line 629 "Parser.rl" void parseString(ParserResult res, int p, int pe) { @@ -1320,15 +1334,15 @@ void parseString(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 1324 "Parser.java" +// line 1338 "Parser.java" { cs = JSON_string_start; } -// line 622 "Parser.rl" +// line 636 "Parser.rl" int memo = p; -// line 1332 "Parser.java" +// line 1346 "Parser.java" { int _klen; int _trans = 0; @@ -1409,7 +1423,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) switch ( _JSON_string_actions[_acts++] ) { case 0: -// line 590 "Parser.rl" +// line 604 "Parser.rl" { int offset = byteList.begin(); ByteList decoded = decoder.decode(byteList, memo + 1 - offset, @@ -1424,13 +1438,13 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) } break; case 1: -// line 603 "Parser.rl" +// line 617 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1434 "Parser.java" +// line 1448 "Parser.java" } } } @@ -1450,7 +1464,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) break; } } -// line 624 "Parser.rl" +// line 638 "Parser.rl" if (parser.createAdditions) { RubyHash matchString = parser.match_string; @@ -1471,6 +1485,9 @@ public void visit(IRubyObject pattern, IRubyObject klass) { RubyClass klass = (RubyClass) memoArray[1]; if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } result = klass.callMethod(context, "json_create", result); } } @@ -1495,7 +1512,7 @@ public void visit(IRubyObject pattern, IRubyObject klass) { } -// line 1499 "Parser.java" +// line 1516 "Parser.java" private static byte[] init__JSON_array_actions_0() { return new byte [] { @@ -1608,7 +1625,7 @@ private static byte[] init__JSON_array_trans_actions_0() static final int JSON_array_en_main = 1; -// line 704 "Parser.rl" +// line 721 "Parser.rl" void parseArray(ParserResult res, int p, int pe) { @@ -1628,14 +1645,14 @@ void parseArray(ParserResult res, int p, int pe) { } -// line 1632 "Parser.java" +// line 1649 "Parser.java" { cs = JSON_array_start; } -// line 723 "Parser.rl" +// line 740 "Parser.rl" -// line 1639 "Parser.java" +// line 1656 "Parser.java" { int _klen; int _trans = 0; @@ -1716,7 +1733,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) switch ( _JSON_array_actions[_acts++] ) { case 0: -// line 673 "Parser.rl" +// line 690 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -1733,13 +1750,13 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } break; case 1: -// line 688 "Parser.rl" +// line 705 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1743 "Parser.java" +// line 1760 "Parser.java" } } } @@ -1759,7 +1776,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) break; } } -// line 724 "Parser.rl" +// line 741 "Parser.rl" if (cs >= JSON_array_first_final) { res.update(result, p + 1); @@ -1769,7 +1786,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } -// line 1773 "Parser.java" +// line 1790 "Parser.java" private static byte[] init__JSON_object_actions_0() { return new byte [] { @@ -1892,7 +1909,7 @@ private static byte[] init__JSON_object_trans_actions_0() static final int JSON_object_en_main = 1; -// line 781 "Parser.rl" +// line 798 "Parser.rl" void parseObject(ParserResult res, int p, int pe) { @@ -1917,14 +1934,14 @@ void parseObject(ParserResult res, int p, int pe) { } -// line 1921 "Parser.java" +// line 1938 "Parser.java" { cs = JSON_object_start; } -// line 805 "Parser.rl" +// line 822 "Parser.rl" -// line 1928 "Parser.java" +// line 1945 "Parser.java" { int _klen; int _trans = 0; @@ -2005,7 +2022,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) switch ( _JSON_object_actions[_acts++] ) { case 0: -// line 738 "Parser.rl" +// line 755 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2022,7 +2039,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 1: -// line 753 "Parser.rl" +// line 770 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -2040,13 +2057,13 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 2: -// line 769 "Parser.rl" +// line 786 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 2050 "Parser.java" +// line 2067 "Parser.java" } } } @@ -2066,7 +2083,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) break; } } -// line 806 "Parser.rl" +// line 823 "Parser.rl" if (cs < JSON_object_first_final) { res.update(null, p + 1); @@ -2090,6 +2107,9 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) callMethod(context, "deep_const_get", vKlassName); if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } returnedResult = klass.callMethod(context, "json_create", result); } @@ -2099,7 +2119,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } -// line 2103 "Parser.java" +// line 2123 "Parser.java" private static byte[] init__JSON_actions_0() { return new byte [] { @@ -2202,7 +2222,7 @@ private static byte[] init__JSON_trans_actions_0() static final int JSON_en_main = 1; -// line 857 "Parser.rl" +// line 877 "Parser.rl" public IRubyObject parseImplemetation() { @@ -2212,16 +2232,16 @@ public IRubyObject parseImplemetation() { ParserResult res = new ParserResult(); -// line 2216 "Parser.java" +// line 2236 "Parser.java" { cs = JSON_start; } -// line 866 "Parser.rl" +// line 886 "Parser.rl" p = byteList.begin(); pe = p + byteList.length(); -// line 2225 "Parser.java" +// line 2245 "Parser.java" { int _klen; int _trans = 0; @@ -2302,7 +2322,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) switch ( _JSON_actions[_acts++] ) { case 0: -// line 843 "Parser.rl" +// line 863 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2314,7 +2334,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) } } break; -// line 2318 "Parser.java" +// line 2338 "Parser.java" } } } @@ -2334,7 +2354,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) break; } } -// line 869 "Parser.rl" +// line 889 "Parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/java/src/json/ext/Parser.rl b/java/src/json/ext/Parser.rl index 0fd6d7968..2173105f9 100644 --- a/java/src/json/ext/Parser.rl +++ b/java/src/json/ext/Parser.rl @@ -49,6 +49,7 @@ public class Parser extends RubyObject { private RubyString vSource; private RubyString createId; private boolean createAdditions; + private boolean deprecatedCreateAdditions; private int maxNesting; private boolean allowNaN; private boolean symbolizeNames; @@ -169,7 +170,20 @@ public class Parser extends RubyObject { this.symbolizeNames = opts.getBool("symbolize_names", false); this.freeze = opts.getBool("freeze", false); this.createId = opts.getString("create_id", getCreateId(context)); - this.createAdditions = opts.getBool("create_additions", false); + + IRubyObject additions = opts.get("create_additions"); + this.createAdditions = false; + this.deprecatedCreateAdditions = false; + + if (additions != null) { + if (additions.isNil()) { + this.createAdditions = true; + this.deprecatedCreateAdditions = true; + } else { + this.createAdditions = opts.getBool("create_additions", false); + } + } + this.objectClass = opts.getClass("object_class", runtime.getHash()); this.arrayClass = opts.getClass("array_class", runtime.getArray()); this.decimalClass = opts.getClass("decimal_class", null); @@ -641,6 +655,9 @@ public class Parser extends RubyObject { RubyClass klass = (RubyClass) memoArray[1]; if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } result = klass.callMethod(context, "json_create", result); } } @@ -826,6 +843,9 @@ public class Parser extends RubyObject { callMethod(context, "deep_const_get", vKlassName); if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } returnedResult = klass.callMethod(context, "json_create", result); } diff --git a/lib/json/common.rb b/lib/json/common.rb index 015a3c578..546b6ec80 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -49,18 +49,9 @@ def parser=(parser) # :nodoc: # level (absolute namespace path?). If there doesn't exist a constant at # the given path, an ArgumentError is raised. def deep_const_get(path) # :nodoc: - path.to_s.split(/::/).inject(Object) do |p, c| - case - when c.empty? then p - when p.const_defined?(c, true) then p.const_get(c) - else - begin - p.const_missing(c) - rescue NameError => e - raise ArgumentError, "can't get const #{path}: #{e}" - end - end - end + Object.const_get(path) + rescue NameError => e + raise ArgumentError, "can't get const #{path}: #{e}" end # Set the module _generator_ to be used by JSON. @@ -69,7 +60,7 @@ def generator=(generator) # :nodoc: @generator = generator generator_methods = generator::GeneratorMethods for const in generator_methods.constants - klass = deep_const_get(const) + klass = const_get(const) modul = generator_methods.const_get(const) klass.class_eval do instance_methods(false).each do |m| @@ -403,6 +394,20 @@ def pretty_generate(obj, opts = nil) module_function :pretty_unparse # :startdoc: + class << self + # Sets or returns default options for the JSON.unsafe_load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + attr_accessor :unsafe_load_default_options + end + self.unsafe_load_default_options = { + :max_nesting => false, + :allow_nan => true, + :allow_blank => true, + :create_additions => true, + } + class << self # Sets or returns default options for the JSON.load method. # Initially: @@ -411,11 +416,162 @@ class << self attr_accessor :load_default_options end self.load_default_options = { - :max_nesting => false, :allow_nan => true, :allow_blank => true, - :create_additions => true, + :create_additions => nil, } + # :call-seq: + # JSON.unsafe_load(source, proc = nil, options = {}) -> object + # + # Returns the Ruby objects created by parsing the given +source+. + # + # - Argument +source+ must be, or be convertible to, a \String: + # - If +source+ responds to instance method +to_str+, + # source.to_str becomes the source. + # - If +source+ responds to instance method +to_io+, + # source.to_io.read becomes the source. + # - If +source+ responds to instance method +read+, + # source.read becomes the source. + # - If both of the following are true, source becomes the \String 'null': + # - Option +allow_blank+ specifies a truthy value. + # - The source, as defined above, is +nil+ or the empty \String ''. + # - Otherwise, +source+ remains the source. + # - Argument +proc+, if given, must be a \Proc that accepts one argument. + # It will be called recursively with each result (depth-first order). + # See details below. + # BEWARE: This method is meant to serialise data from trusted user input, + # like from your own database server or clients under your control, it could + # be dangerous to allow untrusted users to pass JSON sources into it. + # - Argument +opts+, if given, contains a \Hash of options for the parsing. + # See {Parsing Options}[#module-JSON-label-Parsing+Options]. + # The default options can be changed via method JSON.unsafe_load_default_options=. + # + # --- + # + # When no +proc+ is given, modifies +source+ as above and returns the result of + # parse(source, opts); see #parse. + # + # Source for following examples: + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON + # + # Load a \String: + # ruby = JSON.unsafe_load(source) + # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load an \IO object: + # require 'stringio' + # object = JSON.unsafe_load(StringIO.new(source)) + # object # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load a \File object: + # path = 't.json' + # File.write(path, source) + # File.open(path) do |file| + # JSON.unsafe_load(file) + # end # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # --- + # + # When +proc+ is given: + # - Modifies +source+ as above. + # - Gets the +result+ from calling parse(source, opts). + # - Recursively calls proc(result). + # - Returns the final result. + # + # Example: + # require 'json' + # + # # Some classes for the example. + # class Base + # def initialize(attributes) + # @attributes = attributes + # end + # end + # class User < Base; end + # class Account < Base; end + # class Admin < Base; end + # # The JSON source. + # json = <<-EOF + # { + # "users": [ + # {"type": "User", "username": "jane", "email": "jane@example.com"}, + # {"type": "User", "username": "john", "email": "john@example.com"} + # ], + # "accounts": [ + # {"account": {"type": "Account", "paid": true, "account_id": "1234"}}, + # {"account": {"type": "Account", "paid": false, "account_id": "1235"}} + # ], + # "admins": {"type": "Admin", "password": "0wn3d"} + # } + # EOF + # # Deserializer method. + # def deserialize_obj(obj, safe_types = %w(User Account Admin)) + # type = obj.is_a?(Hash) && obj["type"] + # safe_types.include?(type) ? Object.const_get(type).new(obj) : obj + # end + # # Call to JSON.unsafe_load + # ruby = JSON.unsafe_load(json, proc {|obj| + # case obj + # when Hash + # obj.each {|k, v| obj[k] = deserialize_obj v } + # when Array + # obj.map! {|v| deserialize_obj v } + # end + # }) + # pp ruby + # Output: + # {"users"=> + # [#"User", "username"=>"jane", "email"=>"jane@example.com"}>, + # #"User", "username"=>"john", "email"=>"john@example.com"}>], + # "accounts"=> + # [{"account"=> + # #"Account", "paid"=>true, "account_id"=>"1234"}>}, + # {"account"=> + # #"Account", "paid"=>false, "account_id"=>"1235"}>}], + # "admins"=> + # #"Admin", "password"=>"0wn3d"}>} + # + def unsafe_load(source, proc = nil, options = nil) + opts = if options.nil? + unsafe_load_default_options + else + unsafe_load_default_options.merge(options) + end + + unless source.is_a?(String) + if source.respond_to? :to_str + source = source.to_str + elsif source.respond_to? :to_io + source = source.to_io.read + elsif source.respond_to?(:read) + source = source.read + end + end + + if opts[:allow_blank] && (source.nil? || source.empty?) + source = 'null' + end + result = parse(source, opts) + recurse_proc(result, &proc) if proc + result + end # :call-seq: # JSON.load(source, proc = nil, options = {}) -> object @@ -439,6 +595,7 @@ class << self # BEWARE: This method is meant to serialise data from trusted user input, # like from your own database server or clients under your control, it could # be dangerous to allow untrusted users to pass JSON sources into it. + # If you must use it, use JSON.unsafe_load instead to make it clear. # - Argument +opts+, if given, contains a \Hash of options for the parsing. # See {Parsing Options}[#module-JSON-label-Parsing+Options]. # The default options can be changed via method JSON.load_default_options=. diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index 0c53eb79a..699f89c25 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -90,11 +90,14 @@ def initialize(source, opts = nil) @allow_nan = !!opts[:allow_nan] @symbolize_names = !!opts[:symbolize_names] @freeze = !!opts[:freeze] - if opts.key?(:create_additions) - @create_additions = !!opts[:create_additions] - else - @create_additions = false + + @deprecated_create_additions = false + @create_additions = opts.fetch(:create_additions, false) + if @create_additions.nil? + @create_additions = true + @deprecated_create_additions = true end + @symbolize_names && @create_additions and raise ArgumentError, 'options :symbolize_names and :create_additions cannot be used '\ 'in conjunction' @@ -185,8 +188,13 @@ def parse_string if @create_additions and @match_string for (regexp, klass) in @match_string - klass.json_creatable? or next - string =~ regexp and return klass.json_create(string) + if klass.json_creatable? and string.match?(regexp) + if @deprecated_create_additions + warn "JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`" + end + + return klass.json_create(string) + end end end string @@ -308,8 +316,11 @@ def parse_object raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" end if @create_additions and klassname = result[@create_id] - klass = JSON.deep_const_get klassname + klass = JSON.deep_const_get(klassname) break unless klass and klass.json_creatable? + if @deprecated_create_additions + warn "JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`" + end result = klass.json_create(result) end break diff --git a/test/json/json_addition_test.rb b/test/json/json_addition_test.rb index 8c3fbda56..d78ae55c3 100644 --- a/test/json/json_addition_test.rb +++ b/test/json/json_addition_test.rb @@ -162,6 +162,12 @@ def test_core assert_equal(/foo/i, JSON(JSON(/foo/i), :create_additions => true)) end + def test_deprecated_load_create_additions + assert_warning(/use JSON\.unsafe_load/) do + JSON.load(JSON.dump(Time.now)) + end + end + def test_utc_datetime now = Time.now d = DateTime.parse(now.to_s) # usual case From 42402fc13f12ab7043718c3e54aa511df1593358 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 24 Oct 2024 16:04:50 +0200 Subject: [PATCH 31/75] Emit warnings when dumping binary strings Because of it's Ruby 1.8 heritage, the C extension doesn't care much about strings encoding. We should get stricter over time. --- ext/json/ext/generator/generator.c | 13 +++++++------ ext/json/ext/parser/parser.c | 25 ++++++++++++++----------- ext/json/ext/parser/parser.rl | 9 ++++++--- lib/json/add/bigdecimal.rb | 2 +- test/json/json_generator_test.rb | 9 +++++++-- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 8118e1392..00d9ffda0 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -46,6 +46,8 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static int usascii_encindex, utf8_encindex, binary_encindex; + /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. * @@ -535,7 +537,7 @@ static VALUE mString_to_json_raw_object(VALUE self) VALUE result = rb_hash_new(); rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_str_new2("raw"), ary); + rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary); return result; } @@ -822,8 +824,6 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data fbuffer_append_char(buffer, ']'); } -static int usascii_encindex, utf8_encindex, binary_encindex; - static inline int enc_utf8_compatible_p(int enc_idx) { if (enc_idx == usascii_encindex) return 1; @@ -837,13 +837,14 @@ static inline VALUE ensure_valid_encoding(VALUE str) VALUE utf8_string; if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); switch (rb_enc_str_coderange(utf8_string)) { case ENC_CODERANGE_7BIT: + return utf8_string; case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); return utf8_string; break; } diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 38c546b46..9d2c1a5ea 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1798,9 +1798,12 @@ static VALUE convert_encoding(VALUE source) if (encindex == binary_encindex) { // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + return utf8_string; + } } return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); @@ -1955,7 +1958,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1959 "parser.c" +#line 1962 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1963,7 +1966,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 867 "parser.rl" +#line 870 "parser.rl" /* @@ -1981,16 +1984,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1985 "parser.c" +#line 1988 "parser.c" { cs = JSON_start; } -#line 884 "parser.rl" +#line 887 "parser.rl" p = json->source; pe = p + json->len; -#line 1994 "parser.c" +#line 1997 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2024,7 +2027,7 @@ case 1: cs = 0; goto _out; tr2: -#line 859 "parser.rl" +#line 862 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2034,7 +2037,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2038 "parser.c" +#line 2041 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2123,7 +2126,7 @@ case 9: _out: {} } -#line 887 "parser.rl" +#line 890 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 441e58e7b..b03a1d592 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -693,9 +693,12 @@ static VALUE convert_encoding(VALUE source) if (encindex == binary_encindex) { // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + return utf8_string; + } } return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); diff --git a/lib/json/add/bigdecimal.rb b/lib/json/add/bigdecimal.rb index b8d0bb468..5dbc12c07 100644 --- a/lib/json/add/bigdecimal.rb +++ b/lib/json/add/bigdecimal.rb @@ -35,7 +35,7 @@ def self.json_create(object) def as_json(*) { JSON.create_id => self.class.name, - 'b' => _dump, + 'b' => _dump.force_encoding(Encoding::UTF_8), } end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 2b1d48b61..288cbbbb3 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -507,8 +507,13 @@ def test_valid_utf8_in_different_encoding wrong_encoding_string = utf8_string.b # This behavior is historical. Not necessary desirable. We should deprecated it. # The pure and java version of the gem already don't behave this way. - assert_equal utf8_string.to_json, wrong_encoding_string.to_json - assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string) + assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do + assert_equal utf8_string.to_json, wrong_encoding_string.to_json + end + + assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do + assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string) + end end def test_string_ext_included_calls_super From f4862ce5caa43017cd50258c05fb203b157f4d33 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 30 Oct 2024 12:31:00 +0100 Subject: [PATCH 32/75] Update CHANGES.md --- CHANGES.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 713d127da..071251825 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,9 +1,16 @@ # Changes +### UNRELEASED + +* Emit a deprecation warning when `JSON.load` create custom types without the `create_additions` option being explictly enabled. + * Prefer to use `JSON.unsafe_load(string)` or `JSON.load(string, create_additions: true)`. +* Emit a deprecation warning when serializing valid UTF-8 strings encoded in `ASCII_8BIT` aka `BINARY`. * Bump required_ruby_version to 2.7. +* More performance improvments to `JSON.dump` and `JSON.generate`. -### UNRELEASED (2.7.5) +### 2024-10-25 (2.7.5) +* Fix a memory leak when `#to_json` methods raise an exception. * Gracefully handle formatting configs being set to `nil` instead of `""`. * Workaround another issue caused by conflicting versions of both `json_pure` and `json` being loaded. From 90e6df34e61227aaf86509c0cba49338dc2905f9 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 30 Oct 2024 12:37:00 +0100 Subject: [PATCH 33/75] Update README - Remove reference to old school Kernel methods that are questionable. - Make it clear `json/add` is unsafe. --- README.md | 49 +++++-------------------------------------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 94a35d7bd..65f284249 100644 --- a/README.md +++ b/README.md @@ -82,50 +82,11 @@ You can also use the `pretty_generate` method (which formats the output more verbosely and nicely) or `fast_generate` (which doesn't do any of the security checks generate performs, e. g. nesting deepness checks). -There are also the JSON and JSON[] methods which use parse on a String or -generate a JSON document from an array or hash: +## Handling arbitrary types -```ruby -document = JSON 'test' => 23 # => "{\"test\":23}" -document = JSON['test' => 23] # => "{\"test\":23}" -``` - -and - -```ruby -data = JSON '{"test":23}' # => {"test"=>23} -data = JSON['{"test":23}'] # => {"test"=>23} -``` - -You can choose to load a set of common additions to ruby core's objects if -you - -```ruby -require 'json/add/core' -``` - -After requiring this you can, e. g., serialise/deserialise Ruby ranges: - -```ruby -JSON JSON(1..10) # => 1..10 -``` - -To find out how to add JSON support to other or your own classes, read the -section "More Examples" below. - -## Serializing exceptions - -The JSON module doesn't extend `Exception` by default. If you convert an `Exception` -object to JSON, it will by default only include the exception message. - -To include the full details, you must either load the `json/add/core` mentioned -above, or specifically load the exception addition: - -```ruby -require 'json/add/exception' -``` - -## More Examples +> [!CAUTION] +> You should never use `JSON.unsafe_load` nor `JSON.parse(str, create_additions: true)` to parse untrusted user input, +> as it can lead to remove code execution vulnerabilities. To create a JSON document from a ruby data structure, you can call `JSON.generate` like that: @@ -191,7 +152,7 @@ JSON.parse json # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10] json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10] # => "[1,2,{\"a\":3.141},false,true,null,{\"json_class\":\"Range\",\"data\":[4,10,false]}]" -JSON.parse json, :create_additions => true +JSON.unsafe_load json # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10] ``` From 3eef3d38cf91721870413ae34eb6b8efde33106e Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 30 Oct 2024 15:20:01 +0100 Subject: [PATCH 34/75] Skip calling configure if there are no options * Set all defaults in the constructor. * Order the instance variables in the same order as in #configure. --- lib/json/pure/generator.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 8df1692d0..42491bfd8 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -142,10 +142,12 @@ def initialize(opts = nil) @array_nl = '' @allow_nan = false @ascii_only = false - @script_safe = false - @strict = false + @depth = 0 @buffer_initial_length = 1024 - configure(opts || {}) + @script_safe = false + @strict = false + @max_nesting = 100 + configure(opts) if opts end # This string is used to indent levels in the JSON text. From 7af612b6d2156d674e2d716b9ffe06b64d9e2f1e Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 30 Oct 2024 21:38:33 +0100 Subject: [PATCH 35/75] Use Encoding::BINARY for clarity in lib/json/pure --- lib/json/pure/generator.rb | 2 +- lib/json/pure/parser.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 42491bfd8..47a032a5d 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -74,7 +74,7 @@ def utf8_to_json_ascii(string, script_safe = false) # :nodoc: )/nx) { |c| c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0] - s.force_encoding(::Encoding::ASCII_8BIT) + s.force_encoding(::Encoding::BINARY) s.gsub!(/.{4}/n, '\\\\u\&') s.force_encoding(::Encoding::UTF_8) } diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index 699f89c25..ba38f5d25 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -143,9 +143,9 @@ def convert_encoding(source) raise TypeError, "#{source.inspect} is not like a string" end - if source.encoding != ::Encoding::ASCII_8BIT + if source.encoding != ::Encoding::BINARY source = source.encode(::Encoding::UTF_8) - source.force_encoding(::Encoding::ASCII_8BIT) + source.force_encoding(::Encoding::BINARY) end source end From 850bd077c45cf42b0a828c1ed91af512f56dc283 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 30 Oct 2024 21:39:01 +0100 Subject: [PATCH 36/75] Add test for parsing broken strings --- test/json/json_parser_test.rb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 59cfcfa6e..2e09ff7bf 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -196,6 +196,22 @@ def test_parse_some_strings ) end + def test_parse_broken_string + # https://github.com/ruby/json/issues/138 + s = parse(%{["\x80"]})[0] + assert_equal("\x80", s) + assert_equal Encoding::UTF_8, s.encoding + assert_equal false, s.valid_encoding? + + s = parse(%{["\x80"]}.b)[0] + assert_equal("\x80", s) + assert_equal Encoding::UTF_8, s.encoding + assert_equal false, s.valid_encoding? + + input = %{["\x80"]}.dup.force_encoding(Encoding::US_ASCII) + assert_raise(Encoding::InvalidByteSequenceError) { parse(input) } + end + def test_parse_big_integers json1 = JSON(orig = (1 << 31) - 1) assert_equal orig, parse(json1) From 1344ad6f668be21b268f7f86c0abd298a196b1cc Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 30 Oct 2024 21:39:24 +0100 Subject: [PATCH 37/75] Use String#encode instead of rb_str_conv_enc() * rb_str_conv_enc() returns the source string unmodified if the conversion did not work. But we should be consistent with the generator here and only accept BINARY or convertible to UTF-8. --- ext/json/ext/parser/parser.c | 35 +++++++++++++++++------------------ ext/json/ext/parser/parser.rl | 19 +++++++++---------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 9d2c1a5ea..1c42ee386 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -89,14 +89,14 @@ static void raise_parse_error(const char *format, const char *start) rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError; +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class, i_array_class, i_decimal_class, i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus; + i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode; static int binary_encindex; static int utf8_encindex; @@ -1797,16 +1797,11 @@ static VALUE convert_encoding(VALUE source) } if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - switch (rb_enc_str_coderange(utf8_string)) { - case ENC_CODERANGE_7BIT: - case ENC_CODERANGE_VALID: - return utf8_string; - } + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); } - return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } /* @@ -1958,7 +1953,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1962 "parser.c" +#line 1957 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1966,7 +1961,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 870 "parser.rl" +#line 865 "parser.rl" /* @@ -1984,16 +1979,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1988 "parser.c" +#line 1983 "parser.c" { cs = JSON_start; } -#line 887 "parser.rl" +#line 882 "parser.rl" p = json->source; pe = p + json->len; -#line 1997 "parser.c" +#line 1992 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2027,7 +2022,7 @@ case 1: cs = 0; goto _out; tr2: -#line 862 "parser.rl" +#line 857 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2037,7 +2032,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2041 "parser.c" +#line 2036 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2126,7 +2121,7 @@ case 9: _out: {} } -#line 890 "parser.rl" +#line 885 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2214,6 +2209,9 @@ void Init_parser(void) CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); rb_gc_register_mark_object(CMinusInfinity); + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); i_create_id = rb_intern("create_id"); @@ -2235,6 +2233,7 @@ void Init_parser(void) i_try_convert = rb_intern("try_convert"); i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); + i_encode = rb_intern("encode"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index b03a1d592..3c42afa25 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -87,14 +87,14 @@ static void raise_parse_error(const char *format, const char *start) rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError; +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class, i_array_class, i_decimal_class, i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus; + i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode; static int binary_encindex; static int utf8_encindex; @@ -692,16 +692,11 @@ static VALUE convert_encoding(VALUE source) } if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - switch (rb_enc_str_coderange(utf8_string)) { - case ENC_CODERANGE_7BIT: - case ENC_CODERANGE_VALID: - return utf8_string; - } + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); } - return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } /* @@ -974,6 +969,9 @@ void Init_parser(void) CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); rb_gc_register_mark_object(CMinusInfinity); + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); i_create_id = rb_intern("create_id"); @@ -995,6 +993,7 @@ void Init_parser(void) i_try_convert = rb_intern("try_convert"); i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); + i_encode = rb_intern("encode"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); From 0f0b16b3f5147228e752e1d7b613c5f977d00293 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 30 Oct 2024 22:10:43 +0100 Subject: [PATCH 38/75] Skip test failing on JRuby --- test/json/json_parser_test.rb | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 2e09ff7bf..6d8456c79 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -196,20 +196,21 @@ def test_parse_some_strings ) end - def test_parse_broken_string - # https://github.com/ruby/json/issues/138 - s = parse(%{["\x80"]})[0] - assert_equal("\x80", s) - assert_equal Encoding::UTF_8, s.encoding - assert_equal false, s.valid_encoding? - - s = parse(%{["\x80"]}.b)[0] - assert_equal("\x80", s) - assert_equal Encoding::UTF_8, s.encoding - assert_equal false, s.valid_encoding? - - input = %{["\x80"]}.dup.force_encoding(Encoding::US_ASCII) - assert_raise(Encoding::InvalidByteSequenceError) { parse(input) } + if RUBY_ENGINE != "jruby" # https://github.com/ruby/json/issues/138 + def test_parse_broken_string + s = parse(%{["\x80"]})[0] + assert_equal("\x80", s) + assert_equal Encoding::UTF_8, s.encoding + assert_equal false, s.valid_encoding? + + s = parse(%{["\x80"]}.b)[0] + assert_equal("\x80", s) + assert_equal Encoding::UTF_8, s.encoding + assert_equal false, s.valid_encoding? + + input = %{["\x80"]}.dup.force_encoding(Encoding::US_ASCII) + assert_raise(Encoding::InvalidByteSequenceError) { parse(input) } + end end def test_parse_big_integers From 9d47305c4852c829184ea51604ea570f7aaa9766 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 08:52:19 +0100 Subject: [PATCH 39/75] JSON.generate: call to_json on String subclasses Fix: https://github.com/ruby/json/issues/667 This is yet another behavior on which the various implementations differed, but the C implementation used to call `to_json` on String subclasses used as keys. This was optimized out in e125072130229e54a651f7b11d7d5a782ae7fb65 but there is an Active Support test case for it, so it's best to make all 3 implementation respect this behavior. --- ext/json/ext/fbuffer/fbuffer.h | 4 ++++ ext/json/ext/generator/generator.c | 6 ++++- java/src/json/ext/Generator.java | 9 +++++++- lib/json/pure/generator.rb | 33 ++++++++++++++++++++++------ test/json/json_generator_test.rb | 35 ++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 9 deletions(-) diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index 9bbfeed3c..367ebd89f 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -42,6 +42,10 @@ static VALUE fbuffer_to_s(FBuffer *fb); #define RB_UNLIKELY(expr) expr #endif +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 00d9ffda0..8f0ef207d 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -737,7 +737,11 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) break; } - generate_json_string(buffer, data, state, key_to_s); + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { + generate_json_string(buffer, data, state, key_to_s); + } else { + generate_json(buffer, data, state, key_to_s); + } if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); fbuffer_append_char(buffer, ':'); if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index d0ac44586..b149feb36 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -359,7 +359,14 @@ public void visit(IRubyObject key, IRubyObject value) { } if (objectNl.length() != 0) buffer.append(indent); - STRING_HANDLER.generate(session, key.asString(), buffer); + IRubyObject keyStr = key.callMethod(context, "to_s"); + if (keyStr.getMetaClass() == runtime.getString()) { + STRING_HANDLER.generate(session, (RubyString)keyStr, buffer); + } else { + Utils.ensureString(keyStr); + Handler keyHandler = (Handler) getHandlerFor(runtime, keyStr); + keyHandler.generate(session, keyStr, buffer); + } session.infectBy(key); buffer.append(spaceBefore); diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 8df1692d0..3087ae1d9 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -305,19 +305,30 @@ def generate(obj) # Handles @allow_nan, @buffer_initial_length, other ivars must be the default value (see above) private def generate_json(obj, buf) - case obj - when Hash + klass = obj.class + if klass == Hash buf << '{' first = true obj.each_pair do |k,v| buf << ',' unless first - fast_serialize_string(k.to_s, buf) + + key_str = k.to_s + if key_str.is_a?(::String) + if key_str.class == ::String + fast_serialize_string(key_str, buf) + else + generate_json(key_str, buf) + end + else + raise TypeError, "#{k.class}#to_s returns an instance of #{key_str.class}, expected a String" + end + buf << ':' generate_json(v, buf) first = false end buf << '}' - when Array + elsif klass == Array buf << '[' first = true obj.each do |e| @@ -326,9 +337,9 @@ def generate(obj) first = false end buf << ']' - when String + elsif klass == String fast_serialize_string(obj, buf) - when Integer + elsif klass == Integer buf << obj.to_s else # Note: Float is handled this way since Float#to_s is slow anyway @@ -417,7 +428,15 @@ def json_transform(state) each { |key, value| result << delim unless first result << state.indent * depth if indent - result = +"#{result}#{key.to_s.to_json(state)}#{state.space_before}:#{state.space}" + + key_str = key.to_s + key_json = if key_str.is_a?(::String) + key_str = key_str.to_json(state) + else + raise TypeError, "#{key.class}#to_s returns an instance of #{key_str.class}, expected a String" + end + + result = +"#{result}#{key_json}#{state.space_before}:#{state.space}" if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value) raise GeneratorError, "#{value.class} not allowed in JSON" elsif value.respond_to?(:to_json) diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 288cbbbb3..6716eb82f 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -486,6 +486,41 @@ def test_invalid_encoding_string end end + class MyCustomString < String + def to_json(_state = nil) + '"my_custom_key"' + end + + def to_s + self + end + end + + def test_string_subclass_as_keys + # Ref: https://github.com/ruby/json/issues/667 + # if key.to_s doesn't return a bare string, we call `to_json` on it. + key = MyCustomString.new("won't be used") + assert_equal '{"my_custom_key":1}', JSON.generate(key => 1) + end + + class FakeString + def to_json(_state = nil) + raise "Shouldn't be called" + end + + def to_s + self + end + end + + def test_custom_object_as_keys + key = FakeString.new + error = assert_raise(TypeError) do + JSON.generate(key => 1) + end + assert_match "FakeString", error.message + end + def test_to_json_called_with_state_object object = Object.new called = false From 0c797b4a11811142e510a71d005d2e2a734fab23 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 11:40:05 +0100 Subject: [PATCH 40/75] Trigger write barrier when setting Generator::State configs Followup: 6382c231b0b84abe28cc3a979729a29dd7dba27d --- ext/json/ext/generator/generator.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 8f0ef207d..362eff7f7 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -687,6 +687,11 @@ static void vstate_spill(struct generate_json_data *data) MEMCPY(state, data->state, JSON_Generator_State, 1); data->state = state; data->vstate = vstate; + RB_OBJ_WRITTEN(vstate, Qundef, state->indent); + RB_OBJ_WRITTEN(vstate, Qundef, state->space); + RB_OBJ_WRITTEN(vstate, Qundef, state->space_before); + RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); } static inline VALUE vstate_get(struct generate_json_data *data) @@ -1122,7 +1127,7 @@ static VALUE string_config(VALUE config) static VALUE cState_indent_set(VALUE self, VALUE indent) { GET_STATE(self); - state->indent = string_config(indent); + RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; } @@ -1147,7 +1152,7 @@ static VALUE cState_space(VALUE self) static VALUE cState_space_set(VALUE self, VALUE space) { GET_STATE(self); - state->space = string_config(space); + RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; } @@ -1170,7 +1175,7 @@ static VALUE cState_space_before(VALUE self) static VALUE cState_space_before_set(VALUE self, VALUE space_before) { GET_STATE(self); - state->space_before = string_config(space_before); + RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; } @@ -1195,7 +1200,7 @@ static VALUE cState_object_nl(VALUE self) static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { GET_STATE(self); - state->object_nl = string_config(object_nl); + RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; } @@ -1218,7 +1223,7 @@ static VALUE cState_array_nl(VALUE self) static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { GET_STATE(self); - state->array_nl = string_config(array_nl); + RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; } From 7e557ee291dd9bb5bd540f557893aad47e82bda6 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 14:43:19 +0100 Subject: [PATCH 41/75] parser.rl: extract `build_string` --- ext/json/ext/parser/parser.c | 107 ++++++++++++++++++---------------- ext/json/ext/parser/parser.rl | 73 ++++++++++++----------- 2 files changed, 97 insertions(+), 83 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 1c42ee386..62f290332 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1450,6 +1450,43 @@ case 16: } } +static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + } else { + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + } +# else + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + if (intern) { + # if STR_UMINUS_DEDUPE_FROZEN + // Starting from MRI 3.0 it is preferable to freeze the string + // before deduplication so that it can be interned directly + // otherwise it would be duplicated first which is wasteful. + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + # elif STR_UMINUS_DEDUPE + // MRI 2.5 and older do not deduplicate strings that are already + // frozen. + result = rb_funcall(result, i_uminus, 0); + # else + result = rb_str_freeze(result); + # endif + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + static const size_t MAX_STACK_BUFFER_SIZE = 128; static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) { @@ -1561,47 +1598,17 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); - } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } -# else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } + result = build_string(buffer, bufferStart, intern, symbolize); - if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } -# endif - - if (symbolize) { - result = rb_str_intern(result); + if (bufferSize > MAX_STACK_BUFFER_SIZE) { + ruby_xfree(bufferStart); } return result; } -#line 1605 "parser.c" +#line 1612 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1609,7 +1616,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 633 "parser.rl" +#line 640 "parser.rl" static int @@ -1630,15 +1637,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1634 "parser.c" +#line 1641 "parser.c" { cs = JSON_string_start; } -#line 653 "parser.rl" +#line 660 "parser.rl" json->memo = p; -#line 1642 "parser.c" +#line 1649 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1663,7 +1670,7 @@ case 2: goto st0; goto st2; tr2: -#line 620 "parser.rl" +#line 627 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1673,14 +1680,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 630 "parser.rl" +#line 637 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1684 "parser.c" +#line 1691 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1756,7 +1763,7 @@ case 7: _out: {} } -#line 655 "parser.rl" +#line 662 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1953,7 +1960,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1957 "parser.c" +#line 1964 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1961,7 +1968,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 865 "parser.rl" +#line 872 "parser.rl" /* @@ -1979,16 +1986,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1983 "parser.c" +#line 1990 "parser.c" { cs = JSON_start; } -#line 882 "parser.rl" +#line 889 "parser.rl" p = json->source; pe = p + json->len; -#line 1992 "parser.c" +#line 1999 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2022,7 +2029,7 @@ case 1: cs = 0; goto _out; tr2: -#line 857 "parser.rl" +#line 864 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2032,7 +2039,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2036 "parser.c" +#line 2043 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2121,7 +2128,7 @@ case 9: _out: {} } -#line 885 "parser.rl" +#line 892 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 3c42afa25..35a9766dd 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -461,6 +461,43 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul } } +static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + } else { + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + } +# else + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + if (intern) { + # if STR_UMINUS_DEDUPE_FROZEN + // Starting from MRI 3.0 it is preferable to freeze the string + // before deduplication so that it can be interned directly + // otherwise it would be duplicated first which is wasteful. + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + # elif STR_UMINUS_DEDUPE + // MRI 2.5 and older do not deduplicate strings that are already + // frozen. + result = rb_funcall(result, i_uminus, 0); + # else + result = rb_str_freeze(result); + # endif + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + static const size_t MAX_STACK_BUFFER_SIZE = 128; static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) { @@ -572,40 +609,10 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); - } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } -# else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } + result = build_string(buffer, bufferStart, intern, symbolize); - if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } -# endif - - if (symbolize) { - result = rb_str_intern(result); + if (bufferSize > MAX_STACK_BUFFER_SIZE) { + ruby_xfree(bufferStart); } return result; From 7e0f66546a53d99439db6ac30bdbcf6bebc7d801 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 14:47:17 +0100 Subject: [PATCH 42/75] json_string_unescape: assume the string doesn't need escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If that assumption holds true, then we don't need to copy the string into a buffer to unescape it. For small string is just saves copying, but for large ones it also saves a malloc/free combo. Before: ``` == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 52.000 i/100ms oj 61.000 i/100ms oj strict 70.000 i/100ms Oj::Parser 71.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 510.111 (± 2.9%) i/s (1.96 ms/i) - 2.548k in 5.000029s oj 610.232 (± 3.1%) i/s (1.64 ms/i) - 3.050k in 5.003725s oj strict 713.231 (± 3.2%) i/s (1.40 ms/i) - 3.570k in 5.010902s Oj::Parser 762.598 (± 3.0%) i/s (1.31 ms/i) - 3.834k in 5.033130s rapidjson 553.029 (± 7.4%) i/s (1.81 ms/i) - 2.750k in 5.022630s Comparison: json: 510.1 i/s Oj::Parser: 762.6 i/s - 1.49x faster oj strict: 713.2 i/s - 1.40x faster oj: 610.2 i/s - 1.20x faster rapidjson: 553.0 i/s - same-ish: difference falls within error == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 28.000 i/100ms oj 33.000 i/100ms oj strict 37.000 i/100ms Oj::Parser 43.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 303.853 (± 3.6%) i/s (3.29 ms/i) - 1.540k in 5.076079s oj 348.009 (± 2.0%) i/s (2.87 ms/i) - 1.749k in 5.027738s oj strict 396.679 (± 3.3%) i/s (2.52 ms/i) - 1.998k in 5.042271s Oj::Parser 406.699 (± 2.2%) i/s (2.46 ms/i) - 2.064k in 5.077587s rapidjson 393.463 (± 3.3%) i/s (2.54 ms/i) - 1.976k in 5.028501s Comparison: json: 303.9 i/s Oj::Parser: 406.7 i/s - 1.34x faster oj strict: 396.7 i/s - 1.31x faster rapidjson: 393.5 i/s - 1.29x faster oj: 348.0 i/s - 1.15x faster ``` After: ``` == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 56.000 i/100ms oj 62.000 i/100ms oj strict 72.000 i/100ms Oj::Parser 77.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 568.025 (± 2.1%) i/s (1.76 ms/i) - 2.856k in 5.030272s oj 630.936 (± 1.4%) i/s (1.58 ms/i) - 3.162k in 5.012630s oj strict 705.784 (±11.2%) i/s (1.42 ms/i) - 3.456k in 5.006706s Oj::Parser 783.989 (± 1.7%) i/s (1.28 ms/i) - 3.927k in 5.010343s rapidjson 557.630 (± 2.0%) i/s (1.79 ms/i) - 2.805k in 5.032388s Comparison: json: 568.0 i/s Oj::Parser: 784.0 i/s - 1.38x faster oj strict: 705.8 i/s - 1.24x faster oj: 630.9 i/s - 1.11x faster rapidjson: 557.6 i/s - same-ish: difference falls within error == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 29.000 i/100ms oj 33.000 i/100ms oj strict 38.000 i/100ms Oj::Parser 43.000 i/100ms rapidjson 37.000 i/100ms Calculating ------------------------------------- json 319.271 (± 3.1%) i/s (3.13 ms/i) - 1.595k in 5.001128s oj 347.946 (± 1.7%) i/s (2.87 ms/i) - 1.749k in 5.028395s oj strict 396.914 (± 3.0%) i/s (2.52 ms/i) - 2.014k in 5.079645s Oj::Parser 409.311 (± 2.7%) i/s (2.44 ms/i) - 2.064k in 5.046626s rapidjson 394.752 (± 1.5%) i/s (2.53 ms/i) - 1.998k in 5.062776s Comparison: json: 319.3 i/s Oj::Parser: 409.3 i/s - 1.28x faster oj strict: 396.9 i/s - 1.24x faster rapidjson: 394.8 i/s - 1.24x faster oj: 347.9 i/s - 1.09x faster ``` --- ext/json/ext/parser/parser.c | 51 +++++++++++++++++++---------------- ext/json/ext/parser/parser.h | 19 ++++++++++++- ext/json/ext/parser/parser.rl | 17 +++++++----- 3 files changed, 57 insertions(+), 30 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 62f290332..e1ad1d02f 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1450,7 +1450,7 @@ case 16: } } -static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) { if (symbolize) { intern = true; @@ -1458,12 +1458,12 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = rb_utf8_str_new(start, (long)(end - start)); } # else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = rb_utf8_str_new(start, (long)(end - start)); if (intern) { # if STR_UMINUS_DEDUPE_FROZEN // Starting from MRI 3.0 it is preferable to freeze the string @@ -1488,7 +1488,7 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo } static const size_t MAX_STACK_BUFFER_SIZE = 128; -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) +static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) { VALUE result = Qnil; size_t bufferSize = stringEnd - string; @@ -1496,6 +1496,11 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int int unescape_len; char buf[4]; + pe = memchr(p, '\\', bufferSize); + if (RB_LIKELY(pe == NULL)) { + return build_string(string, stringEnd, intern, symbolize); + } + if (bufferSize > MAX_STACK_BUFFER_SIZE) { # ifdef HAVE_RB_ENC_INTERNED_STR bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); @@ -1598,7 +1603,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } - result = build_string(buffer, bufferStart, intern, symbolize); + result = build_string(bufferStart, buffer, intern, symbolize); if (bufferSize > MAX_STACK_BUFFER_SIZE) { ruby_xfree(bufferStart); @@ -1608,7 +1613,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int } -#line 1612 "parser.c" +#line 1617 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1616,7 +1621,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 640 "parser.rl" +#line 645 "parser.rl" static int @@ -1637,15 +1642,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1641 "parser.c" +#line 1646 "parser.c" { cs = JSON_string_start; } -#line 660 "parser.rl" +#line 665 "parser.rl" json->memo = p; -#line 1649 "parser.c" +#line 1654 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1670,7 +1675,7 @@ case 2: goto st0; goto st2; tr2: -#line 627 "parser.rl" +#line 632 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1680,14 +1685,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 637 "parser.rl" +#line 642 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1691 "parser.c" +#line 1696 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1763,7 +1768,7 @@ case 7: _out: {} } -#line 662 "parser.rl" +#line 667 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1960,7 +1965,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1964 "parser.c" +#line 1969 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1968,7 +1973,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 872 "parser.rl" +#line 877 "parser.rl" /* @@ -1986,16 +1991,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1990 "parser.c" +#line 1995 "parser.c" { cs = JSON_start; } -#line 889 "parser.rl" +#line 894 "parser.rl" p = json->source; pe = p + json->len; -#line 1999 "parser.c" +#line 2004 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2029,7 +2034,7 @@ case 1: cs = 0; goto _out; tr2: -#line 864 "parser.rl" +#line 869 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2039,7 +2044,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2043 "parser.c" +#line 2048 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2128,7 +2133,7 @@ case 9: _out: {} } -#line 892 "parser.rl" +#line 897 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index 6bf21aab5..d1863a2b9 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -3,6 +3,23 @@ #include "ruby.h" +/* This is the fallback definition from Ruby 3.4 */ +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include +# endif +#elif defined(HAVE_STDBOOL_H) +# include +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + #ifndef MAYBE_UNUSED # define MAYBE_UNUSED(x) x #endif @@ -46,7 +63,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize); +static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize); static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static VALUE convert_encoding(VALUE source); static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 35a9766dd..808e55748 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -461,7 +461,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul } } -static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) { if (symbolize) { intern = true; @@ -469,12 +469,12 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = rb_utf8_str_new(start, (long)(end - start)); } # else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = rb_utf8_str_new(start, (long)(end - start)); if (intern) { # if STR_UMINUS_DEDUPE_FROZEN // Starting from MRI 3.0 it is preferable to freeze the string @@ -499,7 +499,7 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo } static const size_t MAX_STACK_BUFFER_SIZE = 128; -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) +static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) { VALUE result = Qnil; size_t bufferSize = stringEnd - string; @@ -507,6 +507,11 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int int unescape_len; char buf[4]; + pe = memchr(p, '\\', bufferSize); + if (RB_LIKELY(pe == NULL)) { + return build_string(string, stringEnd, intern, symbolize); + } + if (bufferSize > MAX_STACK_BUFFER_SIZE) { # ifdef HAVE_RB_ENC_INTERNED_STR bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); @@ -609,7 +614,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } - result = build_string(buffer, bufferStart, intern, symbolize); + result = build_string(bufferStart, buffer, intern, symbolize); if (bufferSize > MAX_STACK_BUFFER_SIZE) { ruby_xfree(bufferStart); From 35cf2b84e06ef0ba99c721686b7145b2e37b8a21 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 15:07:40 +0100 Subject: [PATCH 43/75] Remove String#-@ check in extconf.rb Now that older rubies have been droped, we no longer need to check for all that. --- ext/json/ext/parser/extconf.rb | 25 ----------------- ext/json/ext/parser/parser.c | 49 +++++++++++++--------------------- ext/json/ext/parser/parser.rl | 15 ++--------- 3 files changed, 21 insertions(+), 68 deletions(-) diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index bd06f2782..870588412 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -4,31 +4,6 @@ have_func("rb_enc_raise", "ruby.h") have_func("rb_enc_interned_str", "ruby.h") -# checking if String#-@ (str_uminus) dedupes... ' -begin - a = -(%w(t e s t).join) - b = -(%w(t e s t).join) - if a.equal?(b) - $CFLAGS << ' -DSTR_UMINUS_DEDUPE=1 ' - else - $CFLAGS << ' -DSTR_UMINUS_DEDUPE=0 ' - end -rescue NoMethodError - $CFLAGS << ' -DSTR_UMINUS_DEDUPE=0 ' -end - -# checking if String#-@ (str_uminus) directly interns frozen strings... ' -begin - s = rand.to_s.freeze - if (-s).equal?(s) && (-s.dup).equal?(s) - $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=1 ' - else - $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 ' - end -rescue NoMethodError - $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 ' -end - append_cflags("-std=c99") create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index e1ad1d02f..a4e49681f 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1465,19 +1465,8 @@ static inline VALUE build_string(const char *start, const char *end, bool intern # else result = rb_utf8_str_new(start, (long)(end - start)); if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 3.0 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } # endif if (symbolize) { @@ -1613,7 +1602,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo } -#line 1617 "parser.c" +#line 1606 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1621,7 +1610,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 645 "parser.rl" +#line 634 "parser.rl" static int @@ -1642,15 +1631,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1646 "parser.c" +#line 1635 "parser.c" { cs = JSON_string_start; } -#line 665 "parser.rl" +#line 654 "parser.rl" json->memo = p; -#line 1654 "parser.c" +#line 1643 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1675,7 +1664,7 @@ case 2: goto st0; goto st2; tr2: -#line 632 "parser.rl" +#line 621 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1685,14 +1674,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 642 "parser.rl" +#line 631 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1696 "parser.c" +#line 1685 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1768,7 +1757,7 @@ case 7: _out: {} } -#line 667 "parser.rl" +#line 656 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1965,7 +1954,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1969 "parser.c" +#line 1958 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1973,7 +1962,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 877 "parser.rl" +#line 866 "parser.rl" /* @@ -1991,16 +1980,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1995 "parser.c" +#line 1984 "parser.c" { cs = JSON_start; } -#line 894 "parser.rl" +#line 883 "parser.rl" p = json->source; pe = p + json->len; -#line 2004 "parser.c" +#line 1993 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2034,7 +2023,7 @@ case 1: cs = 0; goto _out; tr2: -#line 869 "parser.rl" +#line 858 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2044,7 +2033,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2048 "parser.c" +#line 2037 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2133,7 +2122,7 @@ case 9: _out: {} } -#line 897 "parser.rl" +#line 886 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 808e55748..ef83aaec7 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -476,19 +476,8 @@ static inline VALUE build_string(const char *start, const char *end, bool intern # else result = rb_utf8_str_new(start, (long)(end - start)); if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 3.0 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } # endif if (symbolize) { From 5e1ec4a2689ee8c7d3f09420f2d10d5a1d27c93e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 15:22:38 +0100 Subject: [PATCH 44/75] json_string_unescape: Use the returned RString as buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than to copy into a buffer to unescape and then copy that buffer into the final string, we can directly copy into the final string. The downside is that if the string contains a lot of escaping, we end up returning a string that's larger than strictly necessary, but it's probably fine. Before: ``` == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 56.000 i/100ms oj 58.000 i/100ms oj strict 74.000 i/100ms Oj::Parser 76.000 i/100ms rapidjson 52.000 i/100ms Calculating ------------------------------------- json 556.659 (± 2.9%) i/s (1.80 ms/i) - 2.800k in 5.034719s oj 604.077 (± 3.8%) i/s (1.66 ms/i) - 3.016k in 5.001546s oj strict 706.942 (± 3.5%) i/s (1.41 ms/i) - 3.552k in 5.030954s Oj::Parser 752.917 (± 3.2%) i/s (1.33 ms/i) - 3.800k in 5.052707s rapidjson 546.470 (± 3.5%) i/s (1.83 ms/i) - 2.756k in 5.049855s Comparison: json: 556.7 i/s Oj::Parser: 752.9 i/s - 1.35x faster oj strict: 706.9 i/s - 1.27x faster oj: 604.1 i/s - 1.09x faster rapidjson: 546.5 i/s - same-ish: difference falls within error == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 29.000 i/100ms oj 32.000 i/100ms oj strict 38.000 i/100ms Oj::Parser 42.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 317.858 (± 3.1%) i/s (3.15 ms/i) - 1.595k in 5.023245s oj 348.168 (± 2.6%) i/s (2.87 ms/i) - 1.760k in 5.058431s oj strict 394.599 (± 2.8%) i/s (2.53 ms/i) - 1.976k in 5.012073s Oj::Parser 403.771 (± 3.0%) i/s (2.48 ms/i) - 2.058k in 5.101578s rapidjson 383.441 (± 3.7%) i/s (2.61 ms/i) - 1.938k in 5.061355s Comparison: json: 317.9 i/s Oj::Parser: 403.8 i/s - 1.27x faster oj strict: 394.6 i/s - 1.24x faster rapidjson: 383.4 i/s - 1.21x faster oj: 348.2 i/s - 1.10x faster ``` After: ``` == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 56.000 i/100ms oj 62.000 i/100ms oj strict 73.000 i/100ms Oj::Parser 76.000 i/100ms rapidjson 54.000 i/100ms Calculating ------------------------------------- json 561.009 (± 7.5%) i/s (1.78 ms/i) - 2.800k in 5.039548s oj 601.124 (± 4.3%) i/s (1.66 ms/i) - 3.038k in 5.064686s oj strict 707.455 (± 3.4%) i/s (1.41 ms/i) - 3.577k in 5.062540s Oj::Parser 751.799 (± 3.1%) i/s (1.33 ms/i) - 3.800k in 5.059509s rapidjson 535.641 (± 3.2%) i/s (1.87 ms/i) - 2.700k in 5.045816s Comparison: json: 561.0 i/s Oj::Parser: 751.8 i/s - 1.34x faster oj strict: 707.5 i/s - 1.26x faster oj: 601.1 i/s - same-ish: difference falls within error rapidjson: 535.6 i/s - same-ish: difference falls within error == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 30.000 i/100ms oj 32.000 i/100ms oj strict 36.000 i/100ms Oj::Parser 42.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 313.248 (± 7.3%) i/s (3.19 ms/i) - 1.560k in 5.014118s oj 341.977 (± 4.1%) i/s (2.92 ms/i) - 1.728k in 5.063332s oj strict 387.062 (± 6.2%) i/s (2.58 ms/i) - 1.944k in 5.045961s Oj::Parser 400.423 (± 4.0%) i/s (2.50 ms/i) - 2.016k in 5.044513s rapidjson 379.046 (± 6.1%) i/s (2.64 ms/i) - 1.911k in 5.064461s Comparison: json: 313.2 i/s Oj::Parser: 400.4 i/s - 1.28x faster oj strict: 387.1 i/s - 1.24x faster rapidjson: 379.0 i/s - 1.21x faster oj: 342.0 i/s - same-ish: difference falls within error ``` --- ext/json/ext/parser/parser.c | 67 +++++++++++++---------------------- ext/json/ext/parser/parser.rl | 33 +++++------------ 2 files changed, 33 insertions(+), 67 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index a4e49681f..cdf8983a7 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1476,10 +1476,8 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static const size_t MAX_STACK_BUFFER_SIZE = 128; static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) { - VALUE result = Qnil; size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; @@ -1490,19 +1488,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo return build_string(string, stringEnd, intern, symbolize); } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOC_N(char, bufferSize); -# endif - } else { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOCA_N(char, bufferSize); -# endif - } + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = bufferStart = RSTRING_PTR(result); while (pe < stringEnd) { if (*pe == '\\') { @@ -1536,9 +1524,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo break; case 'u': if (pe > stringEnd - 4) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete unicode character escape sequence at '%s'", p); } else { uint32_t ch = unescape_unicode((unsigned char *) ++pe); @@ -1556,9 +1541,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo if ((ch & 0xFC00) == 0xD800) { pe++; if (pe > stringEnd - 6) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete surrogate pair at '%s'", p); } if (pe[0] == '\\' && pe[1] == 'u') { @@ -1591,18 +1573,19 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo MEMCPY(buffer, p, char, pe - p); buffer += pe - p; } + rb_str_set_len(result, buffer - bufferStart); - result = build_string(bufferStart, buffer, intern, symbolize); - - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); + if (symbolize) { + result = rb_str_intern(result); + } else if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); } return result; } -#line 1606 "parser.c" +#line 1589 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1610,7 +1593,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 634 "parser.rl" +#line 617 "parser.rl" static int @@ -1631,15 +1614,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1635 "parser.c" +#line 1618 "parser.c" { cs = JSON_string_start; } -#line 654 "parser.rl" +#line 637 "parser.rl" json->memo = p; -#line 1643 "parser.c" +#line 1626 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1664,7 +1647,7 @@ case 2: goto st0; goto st2; tr2: -#line 621 "parser.rl" +#line 604 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1674,14 +1657,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 631 "parser.rl" +#line 614 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1685 "parser.c" +#line 1668 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1757,7 +1740,7 @@ case 7: _out: {} } -#line 656 "parser.rl" +#line 639 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1954,7 +1937,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1958 "parser.c" +#line 1941 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1962,7 +1945,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 866 "parser.rl" +#line 849 "parser.rl" /* @@ -1980,16 +1963,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1984 "parser.c" +#line 1967 "parser.c" { cs = JSON_start; } -#line 883 "parser.rl" +#line 866 "parser.rl" p = json->source; pe = p + json->len; -#line 1993 "parser.c" +#line 1976 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2023,7 +2006,7 @@ case 1: cs = 0; goto _out; tr2: -#line 858 "parser.rl" +#line 841 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2033,7 +2016,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2037 "parser.c" +#line 2020 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2122,7 +2105,7 @@ case 9: _out: {} } -#line 886 "parser.rl" +#line 869 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index ef83aaec7..3301f1608 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -487,10 +487,8 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static const size_t MAX_STACK_BUFFER_SIZE = 128; static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) { - VALUE result = Qnil; size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; @@ -501,19 +499,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo return build_string(string, stringEnd, intern, symbolize); } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOC_N(char, bufferSize); -# endif - } else { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOCA_N(char, bufferSize); -# endif - } + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = bufferStart = RSTRING_PTR(result); while (pe < stringEnd) { if (*pe == '\\') { @@ -547,9 +535,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo break; case 'u': if (pe > stringEnd - 4) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete unicode character escape sequence at '%s'", p); } else { uint32_t ch = unescape_unicode((unsigned char *) ++pe); @@ -567,9 +552,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo if ((ch & 0xFC00) == 0xD800) { pe++; if (pe > stringEnd - 6) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete surrogate pair at '%s'", p); } if (pe[0] == '\\' && pe[1] == 'u') { @@ -602,11 +584,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo MEMCPY(buffer, p, char, pe - p); buffer += pe - p; } + rb_str_set_len(result, buffer - bufferStart); - result = build_string(bufferStart, buffer, intern, symbolize); - - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); + if (symbolize) { + result = rb_str_intern(result); + } else if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); } return result; From bfb779794ce4b2f3ab43f2d919cced3a1f81df86 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 1 Nov 2024 17:05:22 +0100 Subject: [PATCH 45/75] Elide JSON::Parser allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to https://github.com/ruby/json/pull/662, but here we don't even need to spill on the heap, because the parser is never exposed. Before: ``` == Parsing small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 188.233k i/100ms oj 213.985k i/100ms oj strict 242.564k i/100ms Oj::Parser 448.682k i/100ms rapidjson 291.925k i/100ms Calculating ------------------------------------- json 1.983M (± 0.5%) i/s (504.32 ns/i) - 9.976M in 5.031352s oj 2.334M (± 0.2%) i/s (428.48 ns/i) - 11.769M in 5.042839s oj strict 2.689M (± 0.2%) i/s (371.85 ns/i) - 13.584M in 5.051044s Oj::Parser 4.662M (± 1.2%) i/s (214.50 ns/i) - 23.331M in 5.005414s rapidjson 3.110M (± 0.7%) i/s (321.57 ns/i) - 15.764M in 5.069531s Comparison: json: 1982878.1 i/s Oj::Parser: 4661924.8 i/s - 2.35x faster rapidjson: 3109722.2 i/s - 1.57x faster oj strict: 2689277.0 i/s - 1.36x faster oj: 2333852.9 i/s - 1.18x faster ``` After: ``` == Parsing small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 223.083k i/100ms oj 214.400k i/100ms oj strict 243.519k i/100ms Oj::Parser 445.445k i/100ms rapidjson 293.936k i/100ms Calculating ------------------------------------- json 2.279M (± 4.5%) i/s (438.71 ns/i) - 11.377M in 5.002132s oj 2.315M (± 0.3%) i/s (431.96 ns/i) - 11.578M in 5.001141s oj strict 2.665M (± 0.9%) i/s (375.19 ns/i) - 13.394M in 5.025562s Oj::Parser 4.703M (± 0.3%) i/s (212.63 ns/i) - 23.609M in 5.019913s rapidjson 3.129M (± 0.4%) i/s (319.55 ns/i) - 15.873M in 5.072213s Comparison: json: 2279385.2 i/s Oj::Parser: 4703032.3 i/s - 2.06x faster rapidjson: 3129356.1 i/s - 1.37x faster oj strict: 2665318.3 i/s - 1.17x faster oj: 2315009.3 i/s - same-ish: difference falls within error ``` --- ext/json/ext/parser/parser.c | 408 +++++++++++++++++++++++----------- ext/json/ext/parser/parser.rl | 254 +++++++++++---------- java/src/json/ext/Parser.java | 150 +++++++------ java/src/json/ext/Parser.rl | 8 + lib/json/common.rb | 11 +- lib/json/pure/parser.rb | 15 ++ 6 files changed, 520 insertions(+), 326 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index cdf8983a7..710834ebc 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -128,7 +128,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); #line 135 "parser.c" @@ -253,11 +253,11 @@ case 8: if (np == NULL) { p--; {p++; cs = 9; goto _out;} } else { - if (NIL_P(json->object_class)) { + if (json->object_class) { + rb_funcall(*result, i_aset, 2, last_name, v); + } else { OBJ_FREEZE(last_name); rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); } {p = (( np))-1;} } @@ -466,10 +466,10 @@ case 26: if (cs >= JSON_object_first_final) { if (json->create_additions) { VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + klassname = rb_hash_aref(*result, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); @@ -1141,7 +1141,7 @@ case 7: if (cs >= JSON_float_first_final) { VALUE mod = Qnil; ID method_id = 0; - if (!NIL_P(json->decimal_class)) { + if (json->decimal_class) { if (rb_respond_to(json->decimal_class, i_try_convert)) { mod = json->decimal_class; method_id = i_try_convert; @@ -1208,7 +1208,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); #line 1215 "parser.c" @@ -1264,10 +1264,10 @@ case 2: if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { + if (json->array_class) { rb_funcall(*result, i_leftshift, 1, v); + } else { + rb_ary_push(*result, v); } {p = (( np))-1;} } @@ -1788,6 +1788,103 @@ static VALUE convert_encoding(VALUE source) return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + VALUE tmp = ID2SYM(i_max_nesting); + if (option_given_p(opts, tmp)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } else { + json->max_nesting = 100; + } + tmp = ID2SYM(i_allow_nan); + if (option_given_p(opts, tmp)) { + json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->allow_nan = 0; + } + tmp = ID2SYM(i_symbolize_names); + if (option_given_p(opts, tmp)) { + json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->symbolize_names = 0; + } + tmp = ID2SYM(i_freeze); + if (option_given_p(opts, tmp)) { + json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->freeze = 0; + } + tmp = ID2SYM(i_create_additions); + if (option_given_p(opts, tmp)) { + tmp = rb_hash_aref(opts, tmp); + if (NIL_P(tmp)) { + json->create_additions = 1; + json->deprecated_create_additions = 1; + } else { + json->create_additions = RTEST(tmp); + json->deprecated_create_additions = 0; + } + } + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + tmp = ID2SYM(i_create_id); + if (option_given_p(opts, tmp)) { + json->create_id = rb_hash_aref(opts, tmp); + } else { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + tmp = ID2SYM(i_object_class); + if (option_given_p(opts, tmp)) { + json->object_class = rb_hash_aref(opts, tmp); + if (NIL_P(json->object_class)) json->object_class = Qfalse; + } + tmp = ID2SYM(i_array_class); + if (option_given_p(opts, tmp)) { + json->array_class = rb_hash_aref(opts, tmp); + if (NIL_P(json->array_class)) json->array_class = Qfalse; + } + + tmp = ID2SYM(i_decimal_class); + if (option_given_p(opts, tmp)) { + json->decimal_class = rb_hash_aref(opts, tmp); + if (NIL_P(json->decimal_class)) json->decimal_class = Qfalse; + } + + tmp = ID2SYM(i_match_string); + if (option_given_p(opts, tmp)) { + VALUE match_string = rb_hash_aref(opts, tmp); + json->match_string = RTEST(match_string) ? match_string : Qfalse; + } + } + } + + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; +} + /* * call-seq: new(source, opts => {}) * @@ -1822,122 +1919,16 @@ static VALUE convert_encoding(VALUE source) */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { - VALUE source, opts; GET_PARSER_INIT; - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_check_arity(argc, 1, 2); - source = argv[0]; - opts = Qnil; - if (argc == 2) { - opts = argv[1]; - Check_Type(argv[1], T_HASH); - if (RHASH_SIZE(argv[1]) > 0) { - opts = argv[1]; - } - } - if (!NIL_P(opts)) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - tmp = rb_hash_aref(opts, tmp); - if (NIL_P(tmp)) { - json->create_additions = 1; - json->deprecated_create_additions = 1; - } else { - json->create_additions = RTEST(tmp); - json->deprecated_create_additions = 0; - } - } - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 0; - json->create_id = Qnil; - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } -#line 1941 "parser.c" +#line 1932 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1945,7 +1936,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 849 "parser.rl" +#line 840 "parser.rl" /* @@ -1963,16 +1954,179 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; +#line 1958 "parser.c" + { + cs = JSON_start; + } + +#line 857 "parser.rl" + p = json->source; + pe = p + json->len; + #line 1967 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st6; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +tr2: +#line 832 "parser.rl" + { + char *np = JSON_parse_value(json, p, pe, &result, 0); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 2011 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st2; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st10; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st10; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st1; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st1; + goto st9; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 860 "parser.rl" + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; + } +} + +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + + JSON_Parser parser = {0}; + JSON_Parser *json = &parser; + parser_init(json, source, opts); + + +#line 2121 "parser.c" { cs = JSON_start; } -#line 866 "parser.rl" +#line 880 "parser.rl" p = json->source; pe = p + json->len; -#line 1976 "parser.c" +#line 2130 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2006,7 +2160,7 @@ case 1: cs = 0; goto _out; tr2: -#line 841 "parser.rl" +#line 832 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2016,7 +2170,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2020 "parser.c" +#line 2174 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2105,7 +2259,7 @@ case 9: _out: {} } -#line 869 "parser.rl" +#line 883 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2184,6 +2338,8 @@ void Init_parser(void) rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 3301f1608..0f34c46bd 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -140,11 +140,11 @@ static int utf8_encindex; if (np == NULL) { fhold; fbreak; } else { - if (NIL_P(json->object_class)) { + if (json->object_class) { + rb_funcall(*result, i_aset, 2, last_name, v); + } else { OBJ_FREEZE(last_name); rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); } fexec np; } @@ -180,7 +180,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); %% write init; %% write exec; @@ -188,10 +188,10 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu if (cs >= JSON_object_first_final) { if (json->create_additions) { VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + klassname = rb_hash_aref(*result, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); @@ -362,7 +362,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (cs >= JSON_float_first_final) { VALUE mod = Qnil; ID method_id = 0; - if (!NIL_P(json->decimal_class)) { + if (json->decimal_class) { if (rb_respond_to(json->decimal_class, i_try_convert)) { mod = json->decimal_class; method_id = i_try_convert; @@ -421,10 +421,10 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (np == NULL) { fhold; fbreak; } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { + if (json->array_class) { rb_funcall(*result, i_leftshift, 1, v); + } else { + rb_ary_push(*result, v); } fexec np; } @@ -448,7 +448,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); %% write init; %% write exec; @@ -683,6 +683,103 @@ static VALUE convert_encoding(VALUE source) return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + VALUE tmp = ID2SYM(i_max_nesting); + if (option_given_p(opts, tmp)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } else { + json->max_nesting = 100; + } + tmp = ID2SYM(i_allow_nan); + if (option_given_p(opts, tmp)) { + json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->allow_nan = 0; + } + tmp = ID2SYM(i_symbolize_names); + if (option_given_p(opts, tmp)) { + json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->symbolize_names = 0; + } + tmp = ID2SYM(i_freeze); + if (option_given_p(opts, tmp)) { + json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->freeze = 0; + } + tmp = ID2SYM(i_create_additions); + if (option_given_p(opts, tmp)) { + tmp = rb_hash_aref(opts, tmp); + if (NIL_P(tmp)) { + json->create_additions = 1; + json->deprecated_create_additions = 1; + } else { + json->create_additions = RTEST(tmp); + json->deprecated_create_additions = 0; + } + } + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + tmp = ID2SYM(i_create_id); + if (option_given_p(opts, tmp)) { + json->create_id = rb_hash_aref(opts, tmp); + } else { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + tmp = ID2SYM(i_object_class); + if (option_given_p(opts, tmp)) { + json->object_class = rb_hash_aref(opts, tmp); + if (NIL_P(json->object_class)) json->object_class = Qfalse; + } + tmp = ID2SYM(i_array_class); + if (option_given_p(opts, tmp)) { + json->array_class = rb_hash_aref(opts, tmp); + if (NIL_P(json->array_class)) json->array_class = Qfalse; + } + + tmp = ID2SYM(i_decimal_class); + if (option_given_p(opts, tmp)) { + json->decimal_class = rb_hash_aref(opts, tmp); + if (NIL_P(json->decimal_class)) json->decimal_class = Qfalse; + } + + tmp = ID2SYM(i_match_string); + if (option_given_p(opts, tmp)) { + VALUE match_string = rb_hash_aref(opts, tmp); + json->match_string = RTEST(match_string) ? match_string : Qfalse; + } + } + } + + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; +} + /* * call-seq: new(source, opts => {}) * @@ -717,117 +814,11 @@ static VALUE convert_encoding(VALUE source) */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { - VALUE source, opts; GET_PARSER_INIT; - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_check_arity(argc, 1, 2); - source = argv[0]; - opts = Qnil; - if (argc == 2) { - opts = argv[1]; - Check_Type(argv[1], T_HASH); - if (RHASH_SIZE(argv[1]) > 0) { - opts = argv[1]; - } - } - if (!NIL_P(opts)) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - tmp = rb_hash_aref(opts, tmp); - if (NIL_P(tmp)) { - json->create_additions = 1; - json->deprecated_create_additions = 1; - } else { - json->create_additions = RTEST(tmp); - json->deprecated_create_additions = 0; - } - } - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 0; - json->create_id = Qnil; - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } @@ -875,6 +866,29 @@ static VALUE cParser_parse(VALUE self) } } +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + + JSON_Parser parser = {0}; + JSON_Parser *json = &parser; + parser_init(json, source, opts); + + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; + } +} + static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; @@ -944,6 +958,8 @@ void Init_parser(void) rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); diff --git a/java/src/json/ext/Parser.java b/java/src/json/ext/Parser.java index 92001b3e0..1e8832908 100644 --- a/java/src/json/ext/Parser.java +++ b/java/src/json/ext/Parser.java @@ -159,6 +159,14 @@ public static IRubyObject newInstance(IRubyObject clazz, IRubyObject[] args, Blo return parser; } + @JRubyMethod(meta=true) + public static IRubyObject parse(ThreadContext context, IRubyObject clazz, IRubyObject source, IRubyObject opts) { + IRubyObject[] args = new IRubyObject[] {source, opts}; + Parser parser = (Parser)((RubyClass)clazz).allocate(); + parser.callInit(args, null); + return parser.parse(context); + } + @JRubyMethod(required = 1, optional = 1, visibility = Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { Ruby runtime = context.getRuntime(); @@ -356,11 +364,11 @@ private Ruby getRuntime() { } -// line 382 "Parser.rl" +// line 390 "Parser.rl" -// line 364 "Parser.java" +// line 372 "Parser.java" private static byte[] init__JSON_value_actions_0() { return new byte [] { @@ -474,7 +482,7 @@ private static byte[] init__JSON_value_from_state_actions_0() static final int JSON_value_en_main = 1; -// line 488 "Parser.rl" +// line 496 "Parser.rl" void parseValue(ParserResult res, int p, int pe) { @@ -482,14 +490,14 @@ void parseValue(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 486 "Parser.java" +// line 494 "Parser.java" { cs = JSON_value_start; } -// line 495 "Parser.rl" +// line 503 "Parser.rl" -// line 493 "Parser.java" +// line 501 "Parser.java" { int _klen; int _trans = 0; @@ -515,13 +523,13 @@ void parseValue(ParserResult res, int p, int pe) { while ( _nacts-- > 0 ) { switch ( _JSON_value_actions[_acts++] ) { case 9: -// line 473 "Parser.rl" +// line 481 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 525 "Parser.java" +// line 533 "Parser.java" } } @@ -584,25 +592,25 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) switch ( _JSON_value_actions[_acts++] ) { case 0: -// line 390 "Parser.rl" +// line 398 "Parser.rl" { result = getRuntime().getNil(); } break; case 1: -// line 393 "Parser.rl" +// line 401 "Parser.rl" { result = getRuntime().getFalse(); } break; case 2: -// line 396 "Parser.rl" +// line 404 "Parser.rl" { result = getRuntime().getTrue(); } break; case 3: -// line 399 "Parser.rl" +// line 407 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_NAN); @@ -612,7 +620,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 4: -// line 406 "Parser.rl" +// line 414 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_INFINITY); @@ -622,7 +630,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 5: -// line 413 "Parser.rl" +// line 421 "Parser.rl" { if (pe > p + 8 && absSubSequence(p, p + 9).equals(JSON_MINUS_INFINITY)) { @@ -651,7 +659,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 6: -// line 439 "Parser.rl" +// line 447 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -664,7 +672,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 7: -// line 449 "Parser.rl" +// line 457 "Parser.rl" { currentNesting++; parseArray(res, p, pe); @@ -679,7 +687,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 8: -// line 461 "Parser.rl" +// line 469 "Parser.rl" { currentNesting++; parseObject(res, p, pe); @@ -693,7 +701,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } } break; -// line 697 "Parser.java" +// line 705 "Parser.java" } } } @@ -713,7 +721,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) break; } } -// line 496 "Parser.rl" +// line 504 "Parser.rl" if (cs >= JSON_value_first_final && result != null) { if (parser.freeze) { @@ -726,7 +734,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } -// line 730 "Parser.java" +// line 738 "Parser.java" private static byte[] init__JSON_integer_actions_0() { return new byte [] { @@ -825,7 +833,7 @@ private static byte[] init__JSON_integer_trans_actions_0() static final int JSON_integer_en_main = 1; -// line 518 "Parser.rl" +// line 526 "Parser.rl" void parseInteger(ParserResult res, int p, int pe) { @@ -843,15 +851,15 @@ int parseIntegerInternal(int p, int pe) { int cs = EVIL; -// line 847 "Parser.java" +// line 855 "Parser.java" { cs = JSON_integer_start; } -// line 535 "Parser.rl" +// line 543 "Parser.rl" int memo = p; -// line 855 "Parser.java" +// line 863 "Parser.java" { int _klen; int _trans = 0; @@ -932,13 +940,13 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) switch ( _JSON_integer_actions[_acts++] ) { case 0: -// line 512 "Parser.rl" +// line 520 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 942 "Parser.java" +// line 950 "Parser.java" } } } @@ -958,7 +966,7 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) break; } } -// line 537 "Parser.rl" +// line 545 "Parser.rl" if (cs < JSON_integer_first_final) { return -1; @@ -978,7 +986,7 @@ RubyInteger bytesToInum(Ruby runtime, ByteList num) { } -// line 982 "Parser.java" +// line 990 "Parser.java" private static byte[] init__JSON_float_actions_0() { return new byte [] { @@ -1080,7 +1088,7 @@ private static byte[] init__JSON_float_trans_actions_0() static final int JSON_float_en_main = 1; -// line 570 "Parser.rl" +// line 578 "Parser.rl" void parseFloat(ParserResult res, int p, int pe) { @@ -1099,15 +1107,15 @@ int parseFloatInternal(int p, int pe) { int cs = EVIL; -// line 1103 "Parser.java" +// line 1111 "Parser.java" { cs = JSON_float_start; } -// line 588 "Parser.rl" +// line 596 "Parser.rl" int memo = p; -// line 1111 "Parser.java" +// line 1119 "Parser.java" { int _klen; int _trans = 0; @@ -1188,13 +1196,13 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) switch ( _JSON_float_actions[_acts++] ) { case 0: -// line 561 "Parser.rl" +// line 569 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1198 "Parser.java" +// line 1206 "Parser.java" } } } @@ -1214,7 +1222,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) break; } } -// line 590 "Parser.rl" +// line 598 "Parser.rl" if (cs < JSON_float_first_final) { return -1; @@ -1224,7 +1232,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) } -// line 1228 "Parser.java" +// line 1236 "Parser.java" private static byte[] init__JSON_string_actions_0() { return new byte [] { @@ -1326,7 +1334,7 @@ private static byte[] init__JSON_string_trans_actions_0() static final int JSON_string_en_main = 1; -// line 629 "Parser.rl" +// line 637 "Parser.rl" void parseString(ParserResult res, int p, int pe) { @@ -1334,15 +1342,15 @@ void parseString(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 1338 "Parser.java" +// line 1346 "Parser.java" { cs = JSON_string_start; } -// line 636 "Parser.rl" +// line 644 "Parser.rl" int memo = p; -// line 1346 "Parser.java" +// line 1354 "Parser.java" { int _klen; int _trans = 0; @@ -1423,7 +1431,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) switch ( _JSON_string_actions[_acts++] ) { case 0: -// line 604 "Parser.rl" +// line 612 "Parser.rl" { int offset = byteList.begin(); ByteList decoded = decoder.decode(byteList, memo + 1 - offset, @@ -1438,13 +1446,13 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) } break; case 1: -// line 617 "Parser.rl" +// line 625 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1448 "Parser.java" +// line 1456 "Parser.java" } } } @@ -1464,7 +1472,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) break; } } -// line 638 "Parser.rl" +// line 646 "Parser.rl" if (parser.createAdditions) { RubyHash matchString = parser.match_string; @@ -1512,7 +1520,7 @@ public void visit(IRubyObject pattern, IRubyObject klass) { } -// line 1516 "Parser.java" +// line 1524 "Parser.java" private static byte[] init__JSON_array_actions_0() { return new byte [] { @@ -1625,7 +1633,7 @@ private static byte[] init__JSON_array_trans_actions_0() static final int JSON_array_en_main = 1; -// line 721 "Parser.rl" +// line 729 "Parser.rl" void parseArray(ParserResult res, int p, int pe) { @@ -1645,14 +1653,14 @@ void parseArray(ParserResult res, int p, int pe) { } -// line 1649 "Parser.java" +// line 1657 "Parser.java" { cs = JSON_array_start; } -// line 740 "Parser.rl" +// line 748 "Parser.rl" -// line 1656 "Parser.java" +// line 1664 "Parser.java" { int _klen; int _trans = 0; @@ -1733,7 +1741,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) switch ( _JSON_array_actions[_acts++] ) { case 0: -// line 690 "Parser.rl" +// line 698 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -1750,13 +1758,13 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } break; case 1: -// line 705 "Parser.rl" +// line 713 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1760 "Parser.java" +// line 1768 "Parser.java" } } } @@ -1776,7 +1784,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) break; } } -// line 741 "Parser.rl" +// line 749 "Parser.rl" if (cs >= JSON_array_first_final) { res.update(result, p + 1); @@ -1786,7 +1794,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } -// line 1790 "Parser.java" +// line 1798 "Parser.java" private static byte[] init__JSON_object_actions_0() { return new byte [] { @@ -1909,7 +1917,7 @@ private static byte[] init__JSON_object_trans_actions_0() static final int JSON_object_en_main = 1; -// line 798 "Parser.rl" +// line 806 "Parser.rl" void parseObject(ParserResult res, int p, int pe) { @@ -1934,14 +1942,14 @@ void parseObject(ParserResult res, int p, int pe) { } -// line 1938 "Parser.java" +// line 1946 "Parser.java" { cs = JSON_object_start; } -// line 822 "Parser.rl" +// line 830 "Parser.rl" -// line 1945 "Parser.java" +// line 1953 "Parser.java" { int _klen; int _trans = 0; @@ -2022,7 +2030,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) switch ( _JSON_object_actions[_acts++] ) { case 0: -// line 755 "Parser.rl" +// line 763 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2039,7 +2047,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 1: -// line 770 "Parser.rl" +// line 778 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -2057,13 +2065,13 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 2: -// line 786 "Parser.rl" +// line 794 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 2067 "Parser.java" +// line 2075 "Parser.java" } } } @@ -2083,7 +2091,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) break; } } -// line 823 "Parser.rl" +// line 831 "Parser.rl" if (cs < JSON_object_first_final) { res.update(null, p + 1); @@ -2119,7 +2127,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } -// line 2123 "Parser.java" +// line 2131 "Parser.java" private static byte[] init__JSON_actions_0() { return new byte [] { @@ -2222,7 +2230,7 @@ private static byte[] init__JSON_trans_actions_0() static final int JSON_en_main = 1; -// line 877 "Parser.rl" +// line 885 "Parser.rl" public IRubyObject parseImplemetation() { @@ -2232,16 +2240,16 @@ public IRubyObject parseImplemetation() { ParserResult res = new ParserResult(); -// line 2236 "Parser.java" +// line 2244 "Parser.java" { cs = JSON_start; } -// line 886 "Parser.rl" +// line 894 "Parser.rl" p = byteList.begin(); pe = p + byteList.length(); -// line 2245 "Parser.java" +// line 2253 "Parser.java" { int _klen; int _trans = 0; @@ -2322,7 +2330,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) switch ( _JSON_actions[_acts++] ) { case 0: -// line 863 "Parser.rl" +// line 871 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2334,7 +2342,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) } } break; -// line 2338 "Parser.java" +// line 2346 "Parser.java" } } } @@ -2354,7 +2362,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) break; } } -// line 889 "Parser.rl" +// line 897 "Parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/java/src/json/ext/Parser.rl b/java/src/json/ext/Parser.rl index 2173105f9..8102bc93c 100644 --- a/java/src/json/ext/Parser.rl +++ b/java/src/json/ext/Parser.rl @@ -157,6 +157,14 @@ public class Parser extends RubyObject { return parser; } + @JRubyMethod(meta=true) + public static IRubyObject parse(ThreadContext context, IRubyObject clazz, IRubyObject source, IRubyObject opts) { + IRubyObject[] args = new IRubyObject[] {source, opts}; + Parser parser = (Parser)((RubyClass)clazz).allocate(); + parser.callInit(args, null); + return parser.parse(context); + } + @JRubyMethod(required = 1, optional = 1, visibility = Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { Ruby runtime = context.getRuntime(); diff --git a/lib/json/common.rb b/lib/json/common.rb index 546b6ec80..84f2a57c8 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -207,16 +207,7 @@ class MissingUnicodeSupport < JSONError; end # JSON.parse('') # def parse(source, opts = nil) - if opts.nil? - Parser.new(source).parse - else - # NB: The ** shouldn't be required, but we have to deal with - # different versions of the `json` and `json_pure` gems being - # loaded concurrently. - # Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs. - # Ref: https://github.com/ruby/json/issues/650 - Parser.new(source, **opts).parse - end + Parser.parse(source, opts) end # :call-seq: diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index ba38f5d25..33a441073 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -48,6 +48,21 @@ class Parser < StringScanner UNPARSED = Object.new.freeze + class << self + def parse(source, opts = nil) + if opts.nil? + new(source).parse + else + # NB: The ** shouldn't be required, but we have to deal with + # different versions of the `json` and `json_pure` gems being + # loaded concurrently. + # Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs. + # Ref: https://github.com/ruby/json/issues/650 + new(source, **opts).parse + end + end + end + # Creates a new JSON::Pure::Parser instance for the string _source_. # # It will be configured by the _opts_ hash. _opts_ can have the following From f55235167782d049472dae928fd3ef5bd92b6c6c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 1 Nov 2024 17:11:36 +0100 Subject: [PATCH 46/75] Use stdbool --- ext/json/ext/parser/parser.c | 16 ++++++++-------- ext/json/ext/parser/parser.h | 12 ++++++------ ext/json/ext/parser/parser.rl | 16 ++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 710834ebc..ee947c749 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -169,9 +169,9 @@ case 2: #line 153 "parser.rl" { char *np; - json->parsing_name = 1; + json->parsing_name = true; np = JSON_parse_string(json, p, pe, &last_name); - json->parsing_name = 0; + json->parsing_name = false; if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} } goto st3; @@ -1816,29 +1816,29 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) if (option_given_p(opts, tmp)) { json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { - json->allow_nan = 0; + json->allow_nan = false; } tmp = ID2SYM(i_symbolize_names); if (option_given_p(opts, tmp)) { json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { - json->symbolize_names = 0; + json->symbolize_names = false; } tmp = ID2SYM(i_freeze); if (option_given_p(opts, tmp)) { json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { - json->freeze = 0; + json->freeze = false; } tmp = ID2SYM(i_create_additions); if (option_given_p(opts, tmp)) { tmp = rb_hash_aref(opts, tmp); if (NIL_P(tmp)) { - json->create_additions = 1; - json->deprecated_create_additions = 1; + json->create_additions = true; + json->deprecated_create_additions = true; } else { json->create_additions = RTEST(tmp); - json->deprecated_create_additions = 0; + json->deprecated_create_additions = false; } } diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index d1863a2b9..021784eb8 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -38,12 +38,12 @@ typedef struct JSON_ParserStruct { VALUE match_string; FBuffer fbuffer; int max_nesting; - char allow_nan; - char parsing_name; - char symbolize_names; - char freeze; - char create_additions; - char deprecated_create_additions; + bool allow_nan; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; } JSON_Parser; #define GET_PARSER \ diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 0f34c46bd..1923c0e5c 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -152,9 +152,9 @@ static int utf8_encindex; action parse_name { char *np; - json->parsing_name = 1; + json->parsing_name = true; np = JSON_parse_string(json, fpc, pe, &last_name); - json->parsing_name = 0; + json->parsing_name = false; if (np == NULL) { fhold; fbreak; } else fexec np; } @@ -711,29 +711,29 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) if (option_given_p(opts, tmp)) { json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { - json->allow_nan = 0; + json->allow_nan = false; } tmp = ID2SYM(i_symbolize_names); if (option_given_p(opts, tmp)) { json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { - json->symbolize_names = 0; + json->symbolize_names = false; } tmp = ID2SYM(i_freeze); if (option_given_p(opts, tmp)) { json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { - json->freeze = 0; + json->freeze = false; } tmp = ID2SYM(i_create_additions); if (option_given_p(opts, tmp)) { tmp = rb_hash_aref(opts, tmp); if (NIL_P(tmp)) { - json->create_additions = 1; - json->deprecated_create_additions = 1; + json->create_additions = true; + json->deprecated_create_additions = true; } else { json->create_additions = RTEST(tmp); - json->deprecated_create_additions = 0; + json->deprecated_create_additions = false; } } From 278f105bde5c05daac15ed159eea30995910aa99 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 1 Nov 2024 17:45:19 +0100 Subject: [PATCH 47/75] Assume most parsing options aren't set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same strategy used for the generator, if we assume only a couple options are passed at most, we might as well traverse the option hash rather than to check all possible keys. ``` == Parsing small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin23] Warming up -------------------------------------- json 229.732k i/100ms oj 221.571k i/100ms oj strict 255.080k i/100ms Oj::Parser 427.514k i/100ms rapidjson 282.252k i/100ms Calculating ------------------------------------- json 2.185M (± 3.3%) i/s (457.68 ns/i) - 11.027M in 5.052670s oj 2.227M (± 0.4%) i/s (449.10 ns/i) - 11.300M in 5.074920s oj strict 2.532M (± 1.4%) i/s (394.97 ns/i) - 12.754M in 5.038527s Oj::Parser 4.309M (± 0.5%) i/s (232.10 ns/i) - 21.803M in 5.060621s rapidjson 2.811M (± 0.2%) i/s (355.78 ns/i) - 14.113M in 5.020940s Comparison: json: 2184913.9 i/s Oj::Parser: 4308534.8 i/s - 1.97x faster rapidjson: 2810757.1 i/s - 1.29x faster oj strict: 2531841.6 i/s - 1.16x faster oj: 2226694.4 i/s - same-ish: difference falls within error ``` --- ext/json/ext/parser/parser.c | 294 +++++++++++++++------------------- ext/json/ext/parser/parser.h | 2 - ext/json/ext/parser/parser.rl | 136 ++++++---------- 3 files changed, 183 insertions(+), 249 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index ee947c749..983b7dbdf 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -92,22 +92,24 @@ static void raise_parse_error(const char *format, const char *start) static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode; +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; static int binary_encindex; static int utf8_encindex; -#line 129 "parser.rl" +#line 131 "parser.rl" -#line 111 "parser.c" +#line 113 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 27}; enum {JSON_object_error = 0}; @@ -115,7 +117,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 171 "parser.rl" +#line 173 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -131,14 +133,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); -#line 135 "parser.c" +#line 137 "parser.c" { cs = JSON_object_start; } -#line 186 "parser.rl" +#line 188 "parser.rl" -#line 142 "parser.c" +#line 144 "parser.c" { if ( p == pe ) goto _test_eof; @@ -166,7 +168,7 @@ case 2: goto st2; goto st0; tr2: -#line 153 "parser.rl" +#line 155 "parser.rl" { char *np; json->parsing_name = true; @@ -179,7 +181,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 183 "parser.c" +#line 185 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -246,7 +248,7 @@ case 8: goto st8; goto st0; tr11: -#line 137 "parser.rl" +#line 139 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -267,7 +269,7 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 271 "parser.c" +#line 273 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -356,14 +358,14 @@ case 18: goto st9; goto st18; tr4: -#line 161 "parser.rl" +#line 163 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 367 "parser.c" +#line 369 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -461,7 +463,7 @@ case 26: _out: {} } -#line 187 "parser.rl" +#line 189 "parser.rl" if (cs >= JSON_object_first_final) { if (json->create_additions) { @@ -489,7 +491,7 @@ case 26: -#line 493 "parser.c" +#line 495 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -497,7 +499,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 290 "parser.rl" +#line 292 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -505,14 +507,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 509 "parser.c" +#line 511 "parser.c" { cs = JSON_value_start; } -#line 297 "parser.rl" +#line 299 "parser.rl" -#line 516 "parser.c" +#line 518 "parser.c" { if ( p == pe ) goto _test_eof; @@ -546,14 +548,14 @@ case 1: cs = 0; goto _out; tr2: -#line 242 "parser.rl" +#line 244 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr3: -#line 247 "parser.rl" +#line 249 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -573,7 +575,7 @@ cs = 0; } goto st29; tr7: -#line 265 "parser.rl" +#line 267 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -581,7 +583,7 @@ cs = 0; } goto st29; tr11: -#line 271 "parser.rl" +#line 273 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -589,7 +591,7 @@ cs = 0; } goto st29; tr25: -#line 235 "parser.rl" +#line 237 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -599,7 +601,7 @@ cs = 0; } goto st29; tr27: -#line 228 "parser.rl" +#line 230 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -609,19 +611,19 @@ cs = 0; } goto st29; tr31: -#line 222 "parser.rl" +#line 224 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 219 "parser.rl" +#line 221 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 225 "parser.rl" +#line 227 "parser.rl" { *result = Qtrue; } @@ -630,9 +632,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 277 "parser.rl" +#line 279 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 636 "parser.c" +#line 638 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -873,7 +875,7 @@ case 28: _out: {} } -#line 298 "parser.rl" +#line 300 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -887,7 +889,7 @@ case 28: } -#line 891 "parser.c" +#line 893 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -895,7 +897,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 318 "parser.rl" +#line 320 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -903,15 +905,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 907 "parser.c" +#line 909 "parser.c" { cs = JSON_integer_start; } -#line 325 "parser.rl" +#line 327 "parser.rl" json->memo = p; -#line 915 "parser.c" +#line 917 "parser.c" { if ( p == pe ) goto _test_eof; @@ -945,14 +947,14 @@ case 3: goto st0; goto tr4; tr4: -#line 315 "parser.rl" +#line 317 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 956 "parser.c" +#line 958 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -971,7 +973,7 @@ case 5: _out: {} } -#line 327 "parser.rl" +#line 329 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -986,7 +988,7 @@ case 5: } -#line 990 "parser.c" +#line 992 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -994,7 +996,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 352 "parser.rl" +#line 354 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1002,15 +1004,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1006 "parser.c" +#line 1008 "parser.c" { cs = JSON_float_start; } -#line 359 "parser.rl" +#line 361 "parser.rl" json->memo = p; -#line 1014 "parser.c" +#line 1016 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1068,14 +1070,14 @@ case 8: goto st0; goto tr9; tr9: -#line 346 "parser.rl" +#line 348 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1079 "parser.c" +#line 1081 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1136,7 +1138,7 @@ case 7: _out: {} } -#line 361 "parser.rl" +#line 363 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1189,7 +1191,7 @@ case 7: -#line 1193 "parser.c" +#line 1195 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 17}; enum {JSON_array_error = 0}; @@ -1197,7 +1199,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 441 "parser.rl" +#line 443 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1211,14 +1213,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); -#line 1215 "parser.c" +#line 1217 "parser.c" { cs = JSON_array_start; } -#line 454 "parser.rl" +#line 456 "parser.rl" -#line 1222 "parser.c" +#line 1224 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1257,7 +1259,7 @@ case 2: goto st2; goto st0; tr2: -#line 418 "parser.rl" +#line 420 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1277,7 +1279,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1281 "parser.c" +#line 1283 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1377,14 +1379,14 @@ case 12: goto st3; goto st12; tr4: -#line 433 "parser.rl" +#line 435 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1388 "parser.c" +#line 1390 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1440,7 +1442,7 @@ case 16: _out: {} } -#line 455 "parser.rl" +#line 457 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; @@ -1585,7 +1587,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo } -#line 1589 "parser.c" +#line 1591 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1593,7 +1595,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 617 "parser.rl" +#line 619 "parser.rl" static int @@ -1614,15 +1616,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1618 "parser.c" +#line 1620 "parser.c" { cs = JSON_string_start; } -#line 637 "parser.rl" +#line 639 "parser.rl" json->memo = p; -#line 1626 "parser.c" +#line 1628 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1647,7 +1649,7 @@ case 2: goto st0; goto st2; tr2: -#line 604 "parser.rl" +#line 606 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1657,14 +1659,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 614 "parser.rl" +#line 616 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1668 "parser.c" +#line 1670 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1740,7 +1742,7 @@ case 7: _out: {} } -#line 639 "parser.rl" +#line 641 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1776,7 +1778,7 @@ static VALUE convert_encoding(VALUE source) { int encindex = RB_ENCODING_GET(source); - if (encindex == utf8_encindex) { + if (RB_LIKELY(encindex == utf8_encindex)) { return source; } @@ -1788,6 +1790,32 @@ static VALUE convert_encoding(VALUE source) return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) { if (json->Vsource) { @@ -1800,84 +1828,22 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) if (!NIL_P(opts)) { Check_Type(opts, T_HASH); if (RHASH_SIZE(opts) > 0) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = false; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = false; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = false; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - tmp = rb_hash_aref(opts, tmp); - if (NIL_P(tmp)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(tmp); - json->deprecated_create_additions = false; - } - } + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); if (json->symbolize_names && json->create_additions) { rb_raise(rb_eArgError, "options :symbolize_names and :create_additions cannot be " " used in conjunction"); } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - if (NIL_P(json->object_class)) json->object_class = Qfalse; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - if (NIL_P(json->array_class)) json->array_class = Qfalse; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - if (NIL_P(json->decimal_class)) json->decimal_class = Qfalse; - } - - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qfalse; + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); } } - } + } source = convert_encoding(StringValue(source)); StringValue(source); json->len = RSTRING_LEN(source); @@ -1928,7 +1894,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1932 "parser.c" +#line 1898 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1936,7 +1902,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 840 "parser.rl" +#line 806 "parser.rl" /* @@ -1954,16 +1920,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1958 "parser.c" +#line 1924 "parser.c" { cs = JSON_start; } -#line 857 "parser.rl" +#line 823 "parser.rl" p = json->source; pe = p + json->len; -#line 1967 "parser.c" +#line 1933 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1997,7 +1963,7 @@ case 1: cs = 0; goto _out; tr2: -#line 832 "parser.rl" +#line 798 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2007,7 +1973,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2011 "parser.c" +#line 1977 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2096,7 +2062,7 @@ case 9: _out: {} } -#line 860 "parser.rl" +#line 826 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2117,16 +2083,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) parser_init(json, source, opts); -#line 2121 "parser.c" +#line 2087 "parser.c" { cs = JSON_start; } -#line 880 "parser.rl" +#line 846 "parser.rl" p = json->source; pe = p + json->len; -#line 2130 "parser.c" +#line 2096 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2160,7 +2126,7 @@ case 1: cs = 0; goto _out; tr2: -#line 832 "parser.rl" +#line 798 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2170,7 +2136,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2174 "parser.c" +#line 2140 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2259,7 +2225,7 @@ case 9: _out: {} } -#line 883 "parser.rl" +#line 849 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2352,26 +2318,28 @@ void Init_parser(void) rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); - i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); i_encode = rb_intern("encode"); diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index 021784eb8..073bc90f0 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -24,8 +24,6 @@ typedef unsigned char _Bool; # define MAYBE_UNUSED(x) x #endif -#define option_given_p(opts, key) (rb_hash_lookup2(opts, key, Qundef) != Qundef) - typedef struct JSON_ParserStruct { VALUE Vsource; char *source; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 1923c0e5c..bb24dd43e 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -90,11 +90,13 @@ static void raise_parse_error(const char *format, const char *start) static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode; +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; static int binary_encindex; static int utf8_encindex; @@ -671,7 +673,7 @@ static VALUE convert_encoding(VALUE source) { int encindex = RB_ENCODING_GET(source); - if (encindex == utf8_encindex) { + if (RB_LIKELY(encindex == utf8_encindex)) { return source; } @@ -683,6 +685,32 @@ static VALUE convert_encoding(VALUE source) return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) { if (json->Vsource) { @@ -695,84 +723,22 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) if (!NIL_P(opts)) { Check_Type(opts, T_HASH); if (RHASH_SIZE(opts) > 0) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = false; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = false; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = false; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - tmp = rb_hash_aref(opts, tmp); - if (NIL_P(tmp)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(tmp); - json->deprecated_create_additions = false; - } - } + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); if (json->symbolize_names && json->create_additions) { rb_raise(rb_eArgError, "options :symbolize_names and :create_additions cannot be " " used in conjunction"); } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - if (NIL_P(json->object_class)) json->object_class = Qfalse; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - if (NIL_P(json->array_class)) json->array_class = Qfalse; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - if (NIL_P(json->decimal_class)) json->decimal_class = Qfalse; - } - - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qfalse; + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); } } - } + } source = convert_encoding(StringValue(source)); StringValue(source); json->len = RSTRING_LEN(source); @@ -972,26 +938,28 @@ void Init_parser(void) rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); - i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); i_encode = rb_intern("encode"); From e660b61bc770e5ec13f09cdcd0893c269d5b162b Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 1 Nov 2024 17:55:01 +0100 Subject: [PATCH 48/75] parser.rl: initialize the buffer with 512B on the stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This very significantly reduce the overhead on smaller benchmarks ``` == Parsing small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin23] Warming up -------------------------------------- json 304.417k i/100ms oj 219.431k i/100ms oj strict 254.532k i/100ms Oj::Parser 431.309k i/100ms rapidjson 281.703k i/100ms Calculating ------------------------------------- json 3.046M (± 0.1%) i/s (328.25 ns/i) - 15.525M in 5.096243s oj 2.225M (± 0.2%) i/s (449.50 ns/i) - 11.191M in 5.030429s oj strict 2.553M (± 0.5%) i/s (391.75 ns/i) - 12.981M in 5.085538s Oj::Parser 4.280M (± 0.8%) i/s (233.64 ns/i) - 21.565M in 5.038834s rapidjson 2.826M (± 0.3%) i/s (353.83 ns/i) - 14.367M in 5.083480s Comparison: json: 3046420.8 i/s Oj::Parser: 4280132.7 i/s - 1.40x faster rapidjson: 2826209.4 i/s - 1.08x slower oj strict: 2552619.7 i/s - 1.19x slower oj: 2224670.7 i/s - 1.37x slower ``` --- ext/json/ext/parser/parser.c | 26 ++++++++++++++++---------- ext/json/ext/parser/parser.rl | 6 ++++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 983b7dbdf..2d107dd3d 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1919,17 +1919,20 @@ static VALUE cParser_parse(VALUE self) VALUE result = Qnil; GET_PARSER; + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); -#line 1924 "parser.c" + +#line 1927 "parser.c" { cs = JSON_start; } -#line 823 "parser.rl" +#line 826 "parser.rl" p = json->source; pe = p + json->len; -#line 1933 "parser.c" +#line 1936 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1973,7 +1976,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 1977 "parser.c" +#line 1980 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2062,7 +2065,7 @@ case 9: _out: {} } -#line 826 "parser.rl" +#line 829 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2082,17 +2085,20 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) JSON_Parser *json = &parser; parser_init(json, source, opts); + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + -#line 2087 "parser.c" +#line 2093 "parser.c" { cs = JSON_start; } -#line 846 "parser.rl" +#line 852 "parser.rl" p = json->source; pe = p + json->len; -#line 2096 "parser.c" +#line 2102 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2136,7 +2142,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2140 "parser.c" +#line 2146 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2225,7 +2231,7 @@ case 9: _out: {} } -#line 849 "parser.rl" +#line 855 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index bb24dd43e..ab8e9b825 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -819,6 +819,9 @@ static VALUE cParser_parse(VALUE self) VALUE result = Qnil; GET_PARSER; + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + %% write init; p = json->source; pe = p + json->len; @@ -842,6 +845,9 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) JSON_Parser *json = &parser; parser_init(json, source, opts); + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + %% write init; p = json->source; pe = p + json->len; From 7ea4bdb363644f42a3da935bb0769adee832d1c7 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 2 Nov 2024 08:25:32 +0100 Subject: [PATCH 49/75] Annotate benchmarks --- benchmark/parser.rb | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 1c26ed8d4..63b057e8a 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -19,7 +19,6 @@ def benchmark_parsing(name, json_output) Benchmark.ips do |x| x.report("json") { JSON.parse(json_output) } if RUN[:json] x.report("oj") { Oj.load(json_output) } if RUN[:oj] - x.report("oj strict") { Oj.strict_load(json_output) } if RUN[:oj] x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj] x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] x.compare!(order: :baseline) @@ -27,7 +26,13 @@ def benchmark_parsing(name, json_output) puts end +# Oj::Parser is very significanly faster (2.70x) on the nested array benchmark +# thanks to its stack implementation that saves resizing arrays. benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) + +# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks +# in large part thanks to its string caching. +# Other than that we're either a bit slower or a bit faster than regular `Oj.load`. benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) benchmark_parsing "test from oj", < Date: Sat, 2 Nov 2024 08:52:35 +0100 Subject: [PATCH 50/75] Update benchmark notes And add a new activitypub (mastodon) benchmark. --- benchmark/data/activitypub.json | 1 + benchmark/encoder.rb | 27 +++++++++++++++++++-------- benchmark/parser.rb | 14 +++++++++++--- 3 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 benchmark/data/activitypub.json diff --git a/benchmark/data/activitypub.json b/benchmark/data/activitypub.json new file mode 100644 index 000000000..cd1d7bb3a --- /dev/null +++ b/benchmark/data/activitypub.json @@ -0,0 +1 @@ +{"@context":["https://www.w3.org/ns/activitystreams",{"ostatus":"http://ostatus.org#","atomUri":"ostatus:atomUri","inReplyToAtomUri":"ostatus:inReplyToAtomUri","conversation":"ostatus:conversation","sensitive":"as:sensitive","toot":"http://joinmastodon.org/ns#","votersCount":"toot:votersCount","blurhash":"toot:blurhash","focalPoint":{"@container":"@list","@id":"toot:focalPoint"},"Hashtag":"as:Hashtag"}],"id":"https://ruby.social/users/byroot/outbox?page=true","type":"OrderedCollectionPage","next":"https://ruby.social/users/byroot/outbox?max_id=112610149145350336\u0026page=true","prev":"https://ruby.social/users/byroot/outbox?min_id=113395951825326098\u0026page=true","partOf":"https://ruby.social/users/byroot/outbox","orderedItems":[{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-30T10:41:49Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-30T10:41:49Z","url":"https://ruby.social/@byroot/113395951825326098","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113395951825326098","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-30:objectId=57644998:objectType=Conversation","content":"\u003cp\u003eHere\u0026#39;s the post I teased last week.\u003c/p\u003e\u003cp\u003eTL;DR; Average latency: -5%, p99 latency -10%\u003c/p\u003e\u003cp\u003e\u003ca href=\"https://railsatscale.com/2024-10-23-next-generation-oob-gc/\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erailsatscale.com/2024-10-23-ne\u003c/span\u003e\u003cspan class=\"invisible\"\u003ext-generation-oob-gc/\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eHere\u0026#39;s the post I teased last week.\u003c/p\u003e\u003cp\u003eTL;DR; Average latency: -5%, p99 latency -10%\u003c/p\u003e\u003cp\u003e\u003ca href=\"https://railsatscale.com/2024-10-23-next-generation-oob-gc/\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erailsatscale.com/2024-10-23-ne\u003c/span\u003e\u003cspan class=\"invisible\"\u003ext-generation-oob-gc/\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"attachment":[],"tag":[],"replies":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113395951825326098/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113395951825326098/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/likes","type":"Collection","totalItems":7},"shares":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/shares","type":"Collection","totalItems":5}}},{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-24T20:53:26Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://oisaur.com/users/renchap"],"object":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498","type":"Note","summary":null,"inReplyTo":"https://oisaur.com/users/renchap/statuses/113364257424669603","published":"2024-10-24T20:53:26Z","url":"https://ruby.social/@byroot/113364382912445498","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://oisaur.com/users/renchap"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113364382912445498","inReplyToAtomUri":"https://oisaur.com/users/renchap/statuses/113364257424669603","conversation":"tag:ruby.social,2024-10-24:objectId=57167969:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://oisaur.com/@renchap\" class=\"u-url mention\"\u003e@\u003cspan\u003erenchap\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e for JSON generation yes.\u003c/p\u003e\u003cp\u003eIf you can turn that into an easy to run benchmark of some sort and somehow Oj is faster, I\u0026#39;m happy to take a look.\u003c/p\u003e\u003cp\u003eAnd even if it\u0026#39;s not faster, I\u0026#39;d also be happy to see if I can squeeze some more perf out of `json`.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://oisaur.com/@renchap\" class=\"u-url mention\"\u003e@\u003cspan\u003erenchap\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e for JSON generation yes.\u003c/p\u003e\u003cp\u003eIf you can turn that into an easy to run benchmark of some sort and somehow Oj is faster, I\u0026#39;m happy to take a look.\u003c/p\u003e\u003cp\u003eAnd even if it\u0026#39;s not faster, I\u0026#39;d also be happy to see if I can squeeze some more perf out of `json`.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://oisaur.com/users/renchap","name":"@renchap@oisaur.com"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113364382912445498/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113364382912445498/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/likes","type":"Collection","totalItems":2},"shares":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-24T09:54:11Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-24T09:54:11Z","url":"https://ruby.social/@byroot/113361790648929484","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113361790648929484","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-24:objectId=57167969:objectType=Conversation","content":"\u003cp\u003eI\u0026#39;ve just released json 2.7.3 with some bug fixes and lots of performance improvements: \u003ca href=\"https://github.com/ruby/json/releases/tag/v2.7.3\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/ruby/json/releases/\u003c/span\u003e\u003cspan class=\"invisible\"\u003etag/v2.7.3\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eThis is my first release after being made maintainer two weeks ago.\u003c/p\u003e\u003cp\u003eIf you got some realistic benchmarks in which JSON.dump is significantly slower than an alternative gem, please let me know.\u003c/p\u003e\u003cp\u003eAs long as it\u0026#39;s not the result of the alternative doing something incorrect, I\u0026#39;ll consider it as a bug.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eI\u0026#39;ve just released json 2.7.3 with some bug fixes and lots of performance improvements: \u003ca href=\"https://github.com/ruby/json/releases/tag/v2.7.3\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/ruby/json/releases/\u003c/span\u003e\u003cspan class=\"invisible\"\u003etag/v2.7.3\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eThis is my first release after being made maintainer two weeks ago.\u003c/p\u003e\u003cp\u003eIf you got some realistic benchmarks in which JSON.dump is significantly slower than an alternative gem, please let me know.\u003c/p\u003e\u003cp\u003eAs long as it\u0026#39;s not the result of the alternative doing something incorrect, I\u0026#39;ll consider it as a bug.\u003c/p\u003e"},"attachment":[{"type":"Document","mediaType":"image/png","url":"https://cdn.masto.host/rubysocial/media_attachments/files/113/361/788/681/127/110/original/5f0931fc4b8fe796.png","name":null,"blurhash":"UASF-ENLo#xY_4RkM_xu9Yoea#V@_MITWB%g","width":1870,"height":960}],"tag":[],"replies":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113361790648929484/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113361790648929484/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/likes","type":"Collection","totalItems":36},"shares":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/shares","type":"Collection","totalItems":16}}},{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-23T10:54:31Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-23T10:54:31Z","url":"https://ruby.social/@byroot/113356365626792579","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113356365626792579","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-23:objectId=57090061:objectType=Conversation","content":"\u003cp\u003eI\u0026#39;m working on a blog post about our new out of band GC. Sneak peak:\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eI\u0026#39;m working on a blog post about our new out of band GC. Sneak peak:\u003c/p\u003e"},"attachment":[{"type":"Document","mediaType":"image/png","url":"https://cdn.masto.host/rubysocial/media_attachments/files/113/356/364/818/271/017/original/9df7ff5c6469cfbd.png","name":null,"blurhash":"UJRfqJ-=xct5yXjEayWV-rIoRjoL~Wt7oLkC","width":4096,"height":988}],"tag":[],"replies":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113356365626792579/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113356365626792579/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/likes","type":"Collection","totalItems":8},"shares":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/shares","type":"Collection","totalItems":3}}},{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:39:09Z","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562","type":"Note","summary":null,"inReplyTo":"https://hachyderm.io/users/baweaver/statuses/113350162298585353","published":"2024-10-22T08:39:09Z","url":"https://ruby.social/@byroot/113350171031468562","attributedTo":"https://ruby.social/users/byroot","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350171031468562","inReplyToAtomUri":"https://hachyderm.io/users/baweaver/statuses/113350162298585353","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e how would that have been different without CD? \u003c/p\u003e\u003cp\u003eWhatever bug that caused the outage would have been deployed just the same, just as part of a bigger batch.\u003c/p\u003e\u003cp\u003eYou take make 0 sense to me.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e how would that have been different without CD? \u003c/p\u003e\u003cp\u003eWhatever bug that caused the outage would have been deployed just the same, just as part of a bigger batch.\u003c/p\u003e\u003cp\u003eYou take make 0 sense to me.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350171031468562/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350171031468562/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:36:15Z","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017","type":"Note","summary":null,"inReplyTo":"https://hachyderm.io/users/baweaver/statuses/113350151359487275","published":"2024-10-22T08:36:15Z","url":"https://ruby.social/@byroot/113350159628598017","attributedTo":"https://ruby.social/users/byroot","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350159628598017","inReplyToAtomUri":"https://hachyderm.io/users/baweaver/statuses/113350151359487275","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e that doesn\u0026#39;t answer my point.\u003c/p\u003e\u003cp\u003eWhy do you think (all things being equal) releasing less often is better?\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e that doesn\u0026#39;t answer my point.\u003c/p\u003e\u003cp\u003eWhy do you think (all things being equal) releasing less often is better?\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350159628598017/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350159628598017/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:27:54Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/byroot/statuses/113350126084035477","published":"2024-10-22T08:27:54Z","url":"https://ruby.social/@byroot/113350126777528742","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350126777528742","inReplyToAtomUri":"https://ruby.social/users/byroot/statuses/113350126084035477","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eI get the monitoring etc argument, but CD doesn\u0026#39;t necessarily means merge and go get a coffee, while things deploy automatically.\u003c/p\u003e\u003cp\u003eIf you don\u0026#39;t trust your monitoring and don\u0026#39;t have automatic rollbacks when an anomaly is detected, you can perfectly enforce that whoever has a change deploying must be around and check everything is going well.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eI get the monitoring etc argument, but CD doesn\u0026#39;t necessarily means merge and go get a coffee, while things deploy automatically.\u003c/p\u003e\u003cp\u003eIf you don\u0026#39;t trust your monitoring and don\u0026#39;t have automatic rollbacks when an anomaly is detected, you can perfectly enforce that whoever has a change deploying must be around and check everything is going well.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350126777528742/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350126777528742/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:27:44Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477","type":"Note","summary":null,"inReplyTo":"https://hachyderm.io/users/baweaver/statuses/113348847094225477","published":"2024-10-22T08:27:44Z","url":"https://ruby.social/@byroot/113350126084035477","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350126084035477","inReplyToAtomUri":"https://hachyderm.io/users/baweaver/statuses/113348847094225477","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eCan\u0026#39;t disagree more. All things equal, CD reduce the turn around time to deal with a bad release.\u003c/p\u003e\u003cp\u003eAssuming your devs ship 100 PR per week, and 1% of these have a bug, if you ship say, once a day when you hit that bug you need to figure out which of the 20 PRs in the batch is responsible.\u003c/p\u003e\u003cp\u003eIf you ship 20 times a day, so each PR independently, it becomes extremely obvious which PR need to be rolled back and reverted.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eCan\u0026#39;t disagree more. All things equal, CD reduce the turn around time to deal with a bad release.\u003c/p\u003e\u003cp\u003eAssuming your devs ship 100 PR per week, and 1% of these have a bug, if you ship say, once a day when you hit that bug you need to figure out which of the 20 PRs in the batch is responsible.\u003c/p\u003e\u003cp\u003eIf you ship 20 times a day, so each PR independently, it becomes extremely obvious which PR need to be rolled back and reverted.\u003c/p\u003e"},"updated":"2024-10-22T08:28:50Z","attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350126084035477/replies?min_id=113350126777528742\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350126084035477/replies","items":["https://ruby.social/users/byroot/statuses/113350126777528742"]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/likes","type":"Collection","totalItems":2},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-11T08:41:43Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/postmodern"],"object":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-11T08:41:43Z","url":"https://ruby.social/@byroot/113287895679614930","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/postmodern"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113287895679614930","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-11:objectId=56178826:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@postmodern\" class=\"u-url mention\"\u003e@\u003cspan\u003epostmodern\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e it\u0026#39;s a big company, so the experience can vary a ton depending on where in the the org you are.\u003c/p\u003e\u003cp\u003eFeel free to DM details and questions if you want a more targeted answer.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@postmodern\" class=\"u-url mention\"\u003e@\u003cspan\u003epostmodern\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e it\u0026#39;s a big company, so the experience can vary a ton depending on where in the the org you are.\u003c/p\u003e\u003cp\u003eFeel free to DM details and questions if you want a more targeted answer.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/postmodern","name":"@postmodern"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113287895679614930/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113287895679614930/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-08-31T18:11:11Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/flavorjones"],"object":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794","type":"Note","summary":null,"inReplyTo":null,"published":"2024-08-31T18:11:11Z","url":"https://ruby.social/@byroot/113057980213460794","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/flavorjones"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113057980213460794","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-08-31:objectId=53204977:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Exceptionally, I was listening.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Exceptionally, I was listening.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/flavorjones","name":"@flavorjones"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113057980213460794/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113057980213460794/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/likes","type":"Collection","totalItems":2},"shares":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113032547130202524/activity","type":"Announce","actor":"https://ruby.social/users/byroot","published":"2024-08-27T06:23:13Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/pushcx","https://ruby.social/users/byroot/followers"],"object":"https://ruby.social/users/pushcx/statuses/113030799240706907"},{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-08-08T23:52:25Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://sfba.social/users/soaproot","https://ruby.social/users/flavorjones"],"object":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037","type":"Note","summary":null,"inReplyTo":"https://sfba.social/users/soaproot/statuses/112928555487199451","published":"2024-08-08T23:52:25Z","url":"https://ruby.social/@byroot/112929088810252037","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://sfba.social/users/soaproot","https://ruby.social/users/flavorjones"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112929088810252037","inReplyToAtomUri":"https://sfba.social/users/soaproot/statuses/112928555487199451","conversation":"tag:ruby.social,2024-08-08:objectId=51576086:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://sfba.social/@soaproot\" class=\"u-url mention\"\u003e@\u003cspan\u003esoaproot\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e this is open source. Your dependencies are your code too.\u003c/p\u003e\u003cp\u003eWhich means open a PR to fix it there if there isn\u0026#39;t one yet, or at the very least ensure there is an open issue.\u003c/p\u003e\u003cp\u003eThen there are mechanisms to silence specific warnings, such as \u003ca href=\"https://rubygems.org/gems/warning\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"\"\u003erubygems.org/gems/warning\u003c/span\u003e\u003cspan class=\"invisible\"\u003e\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://sfba.social/@soaproot\" class=\"u-url mention\"\u003e@\u003cspan\u003esoaproot\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e this is open source. Your dependencies are your code too.\u003c/p\u003e\u003cp\u003eWhich means open a PR to fix it there if there isn\u0026#39;t one yet, or at the very least ensure there is an open issue.\u003c/p\u003e\u003cp\u003eThen there are mechanisms to silence specific warnings, such as \u003ca href=\"https://rubygems.org/gems/warning\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"\"\u003erubygems.org/gems/warning\u003c/span\u003e\u003cspan class=\"invisible\"\u003e\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://sfba.social/users/soaproot","name":"@soaproot@sfba.social"},{"type":"Mention","href":"https://ruby.social/users/flavorjones","name":"@flavorjones"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112929088810252037/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112929088810252037/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/likes","type":"Collection","totalItems":4},"shares":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/shares","type":"Collection","totalItems":1}}},{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-07-05T10:41:38Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/floehopper"],"object":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/floehopper/statuses/112732960288445318","published":"2024-07-05T10:41:38Z","url":"https://ruby.social/@byroot/112733460786406967","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/floehopper"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112733460786406967","inReplyToAtomUri":"https://ruby.social/users/floehopper/statuses/112732960288445318","conversation":"tag:ruby.social,2024-07-05:objectId=49065856:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@floehopper\" class=\"u-url mention\"\u003e@\u003cspan\u003efloehopper\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e while the French system isn\u0026#39;t strictly FPTP, in practice it is similar enough that historically it almost always gave an absolute majority to minority formations.\u003c/p\u003e\u003cp\u003eIt only failed to do so the last two years.\u003c/p\u003e\u003cp\u003eIt\u0026#39;s was designed this way on purpose for the sake of \u0026quot;stability\u0026quot;...\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@floehopper\" class=\"u-url mention\"\u003e@\u003cspan\u003efloehopper\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e while the French system isn\u0026#39;t strictly FPTP, in practice it is similar enough that historically it almost always gave an absolute majority to minority formations.\u003c/p\u003e\u003cp\u003eIt only failed to do so the last two years.\u003c/p\u003e\u003cp\u003eIt\u0026#39;s was designed this way on purpose for the sake of \u0026quot;stability\u0026quot;...\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/floehopper","name":"@floehopper"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112733460786406967/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112733460786406967/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T09:42:05Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://status.pointless.one/users/pointlessone","https://ruby.social/users/codefolio"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256","type":"Note","summary":null,"inReplyTo":"https://status.pointless.one/users/pointlessone/statuses/112682262345182185","published":"2024-06-26T09:42:05Z","url":"https://ruby.social/@byroot/112682265852412256","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://status.pointless.one/users/pointlessone","https://ruby.social/users/codefolio"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682265852412256","inReplyToAtomUri":"https://status.pointless.one/users/pointlessone/statuses/112682262345182185","conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://status.pointless.one/@pointlessone\" class=\"u-url mention\"\u003e@\u003cspan\u003epointlessone\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e the trailing comma is the number one reason I can\u0026#39;t agree with standardrb.\u003c/p\u003e\u003cp\u003eTotally agree on minimizing diff noise.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://status.pointless.one/@pointlessone\" class=\"u-url mention\"\u003e@\u003cspan\u003epointlessone\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e the trailing comma is the number one reason I can\u0026#39;t agree with standardrb.\u003c/p\u003e\u003cp\u003eTotally agree on minimizing diff noise.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://status.pointless.one/users/pointlessone","name":"@pointlessone@status.pointless.one"},{"type":"Mention","href":"https://ruby.social/users/codefolio","name":"@codefolio"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682265852412256/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682265852412256/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T08:57:19Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/codefolio/statuses/112682077769593093","published":"2024-06-26T08:57:19Z","url":"https://ruby.social/@byroot/112682089780639728","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682089780639728","inReplyToAtomUri":"https://ruby.social/users/codefolio/statuses/112682077769593093","conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e I still do, because I think there is a baseline of cops that save a lot of time during review when people submit PRs.\u003c/p\u003e\u003cp\u003eBut the default config is really puzzling, always have to disable tons of cops: \u003ca href=\"https://github.com/redis-rb/redis-client/blob/master/.rubocop.yml\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/redis-rb/redis-clie\u003c/span\u003e\u003cspan class=\"invisible\"\u003ent/blob/master/.rubocop.yml\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eWish I could use `standard`, but while it\u0026#39;s less annoying, I just plain can\u0026#39;t agree with some of the choices they made, and they don\u0026#39;t allow to deviate so...\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e I still do, because I think there is a baseline of cops that save a lot of time during review when people submit PRs.\u003c/p\u003e\u003cp\u003eBut the default config is really puzzling, always have to disable tons of cops: \u003ca href=\"https://github.com/redis-rb/redis-client/blob/master/.rubocop.yml\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/redis-rb/redis-clie\u003c/span\u003e\u003cspan class=\"invisible\"\u003ent/blob/master/.rubocop.yml\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eWish I could use `standard`, but while it\u0026#39;s less annoying, I just plain can\u0026#39;t agree with some of the choices they made, and they don\u0026#39;t allow to deviate so...\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/codefolio","name":"@codefolio"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682089780639728/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682089780639728/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/likes","type":"Collection","totalItems":4},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T08:53:48Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/codefolio/statuses/112682069742608695","published":"2024-06-26T08:53:48Z","url":"https://ruby.social/@byroot/112682075988473643","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682075988473643","inReplyToAtomUri":"https://ruby.social/users/codefolio/statuses/112682069742608695","conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Yeah. I started a project a few hours ago, already had to disable 6 cops that I consider plain wrong.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Yeah. I started a project a few hours ago, already had to disable 6 cops that I consider plain wrong.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/codefolio","name":"@codefolio"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682075988473643/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682075988473643/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T08:47:41Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547","type":"Note","summary":null,"inReplyTo":null,"published":"2024-06-26T08:47:41Z","url":"https://ruby.social/@byroot/112682051957106547","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682051957106547","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003eI just discovered this rubocop rule while working on a new project \u003ca href=\"https://www.rubydoc.info/gems/rubocop/RuboCop/Cop/Naming/RescuedExceptionsVariableName\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://www.\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erubydoc.info/gems/rubocop/Rubo\u003c/span\u003e\u003cspan class=\"invisible\"\u003eCop/Cop/Naming/RescuedExceptionsVariableName\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eI mean seriously? Why should a single name be enforced for all rescued exceptions? If in context there\u0026#39;s something more meaningful than `error`, then I\u0026#39;m gonna use it.\u003c/p\u003e\u003cp\u003eAlso why on earth would you enforce a single letter as variable name? \u003ca href=\"https://ruby.social/tags/notmyruby\" class=\"mention hashtag\" rel=\"tag\"\u003e#\u003cspan\u003enotmyruby\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eI just discovered this rubocop rule while working on a new project \u003ca href=\"https://www.rubydoc.info/gems/rubocop/RuboCop/Cop/Naming/RescuedExceptionsVariableName\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://www.\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erubydoc.info/gems/rubocop/Rubo\u003c/span\u003e\u003cspan class=\"invisible\"\u003eCop/Cop/Naming/RescuedExceptionsVariableName\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eI mean seriously? Why should a single name be enforced for all rescued exceptions? If in context there\u0026#39;s something more meaningful than `error`, then I\u0026#39;m gonna use it.\u003c/p\u003e\u003cp\u003eAlso why on earth would you enforce a single letter as variable name? \u003ca href=\"https://ruby.social/tags/notmyruby\" class=\"mention hashtag\" rel=\"tag\"\u003e#\u003cspan\u003enotmyruby\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"updated":"2024-06-26T08:54:27Z","attachment":[],"tag":[{"type":"Hashtag","href":"https://ruby.social/tags/notmyruby","name":"#notmyruby"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682051957106547/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682051957106547/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/likes","type":"Collection","totalItems":8},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-24T12:12:03Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"object":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/nony/statuses/112671249287497537","published":"2024-06-24T12:12:03Z","url":"https://ruby.social/@byroot/112671530887629815","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112671530887629815","inReplyToAtomUri":"https://ruby.social/users/nony/statuses/112671249287497537","conversation":"tag:ruby.social,2024-06-24:objectId=48293543:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e no particular feeling no.\u003c/p\u003e\u003cp\u003eAlso I don\u0026#39;t think Eileen is very active around here, so best to discuss this on GitHub or something.\u003c/p\u003e\u003cp\u003eIf your planned refactoring isn\u0026#39;t a ton of work, the simpler is to discuss over a PR. Otherwise you can start a GitHub discussion and tag Eileen: \u003ca href=\"https://github.com/rails/rails/discussions\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/rails/rails/discuss\u003c/span\u003e\u003cspan class=\"invisible\"\u003eions\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e no particular feeling no.\u003c/p\u003e\u003cp\u003eAlso I don\u0026#39;t think Eileen is very active around here, so best to discuss this on GitHub or something.\u003c/p\u003e\u003cp\u003eIf your planned refactoring isn\u0026#39;t a ton of work, the simpler is to discuss over a PR. Otherwise you can start a GitHub discussion and tag Eileen: \u003ca href=\"https://github.com/rails/rails/discussions\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/rails/rails/discuss\u003c/span\u003e\u003cspan class=\"invisible\"\u003eions\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/nony","name":"@nony"},{"type":"Mention","href":"https://ruby.social/users/eileencodes","name":"@eileencodes"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112671530887629815/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112671530887629815/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-24T10:59:22Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"object":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/nony/statuses/112671240096770145","published":"2024-06-24T10:59:22Z","url":"https://ruby.social/@byroot/112671245079603460","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112671245079603460","inReplyToAtomUri":"https://ruby.social/users/nony/statuses/112671240096770145","conversation":"tag:ruby.social,2024-06-24:objectId=48293543:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e That\u0026#39;s more \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u0026#39;s area of expertise.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e That\u0026#39;s more \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u0026#39;s area of expertise.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/nony","name":"@nony"},{"type":"Mention","href":"https://ruby.social/users/eileencodes","name":"@eileencodes"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112671245079603460/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112671245079603460/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-13T16:01:52Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/samsaffron"],"object":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/byroot/statuses/112608230956169882","published":"2024-06-13T16:01:52Z","url":"https://ruby.social/@byroot/112610149145350336","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/samsaffron"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112610149145350336","inReplyToAtomUri":"https://ruby.social/users/byroot/statuses/112608230956169882","conversation":"tag:ruby.social,2024-06-13:objectId=47554849:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@samsaffron\" class=\"u-url mention\"\u003e@\u003cspan\u003esamsaffron\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Thanks for the repro. It\u0026#39;s fixed an backported to 7.1.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@samsaffron\" class=\"u-url mention\"\u003e@\u003cspan\u003esamsaffron\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Thanks for the repro. It\u0026#39;s fixed an backported to 7.1.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/samsaffron","name":"@samsaffron"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112610149145350336/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112610149145350336/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/shares","type":"Collection","totalItems":0}}}]} \ No newline at end of file diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 9569f02e4..39d3e9061 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -55,22 +55,31 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ puts end -# On the first two micro benchmarks, the limitting factor is that we have to create a Generator::State object for every -# call to `JSON.dump`, so we cause 2 allocations per call where alternatives only do one allocation. -# The performance difference is mostly more time spent in GC because of this extra pressure. -# If we re-use the same `JSON::State` instance, we're faster than Oj on the array benchmark, and much closer -# on the Hash one. +# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] + +# On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the +# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster +# than `Oj.dump`. benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]] benchmark_encoding "small nested array", [[1,2,3,4,5]]*10 + +# On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why +# Hash serialization doesn't perform as well as other types. benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } -# On these benchmarks we perform well. Either on par or very closely faster/slower -benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state) +# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing +# with mostly multi-byte characters. This is a tradeoff. benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state) benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state) -benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) + +# On these benchmarks we perform well, we're on par or better. +benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state) +benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json"), except: %i(json_state) benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state) +# On twitter.json we're still about 10% slower, this is worth investigating. +benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) + # This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation # which uses a relatively old version of dtoa.c from David M. Gay. # Oj in `compat` mode is ~10% slower than `json`, but in its default mode is noticeably faster here because @@ -82,4 +91,6 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # Oj speed without losing precision. benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state) +# We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be +# opportunities here. benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state) diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 63b057e8a..bee94f32d 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -26,12 +26,15 @@ def benchmark_parsing(name, json_output) puts end +# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] + # Oj::Parser is very significanly faster (2.70x) on the nested array benchmark # thanks to its stack implementation that saves resizing arrays. +# But we're on par with `Oj.dumo` benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) -# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks -# in large part thanks to its string caching. +# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part thanks to its string caching. + # Other than that we're either a bit slower or a bit faster than regular `Oj.load`. benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) @@ -39,11 +42,16 @@ def benchmark_parsing(name, json_output) {"a":"Alpha","b":true,"c":12345,"d":[true,[false,[-123456789,null],3.9676,["Something else.",false],null]],"e":{"zero":null,"one":1,"two":2,"three":[3],"four":[0,1,2,3,4]},"f":null,"h":{"a":{"b":{"c":{"d":{"e":{"f":{"g":null}}}}}}},"i":[[[[[[[null]]]]]]]} JSON +# On these two more realistic benchmarks, still significanlty slower than alternatives. +# Caching of keys is likely required to be able to match performance. +# On the twitter and activitypub payloads the difference isn't that big (~10%) +# but on citm_catalog it's up to a 50% difference. +benchmark_parsing "activitypub.json", File.read("#{__dir__}/data/activitypub.json") benchmark_parsing "twitter.json", File.read("#{__dir__}/data/twitter.json") benchmark_parsing "citm_catalog.json", File.read("#{__dir__}/data/citm_catalog.json") # rapidjson is 8x faster thanks to it's much more performant float parser. # Unfortunately, there isn't a lot of existing fast float parsers in pure C, # and including C++ is problematic. -# Aside from that, we're faster than other alternatives here. +# Aside from that, we're much faster than other alternatives here. benchmark_parsing "float parsing", File.read("#{__dir__}/data/canada.json") From 6d9694eef080b0bfef5f448cfc51a6b814d55fff Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 2 Nov 2024 22:37:22 +0100 Subject: [PATCH 51/75] Cache parsed object names (#675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cache parsed object names Looking up the fstring table isn't that cheap, if we assume most object names are likely to re-appear, it makes sense to keep a small cache of them on the stack. e.g. `[{"foo": 1, "bar": 2}, {"foo": 3, "bar": 4}]` In term of implementation, we use a simple sorted array with binary search as it's the most compact and guarantee a decent `O(log n)`, and the comparison is first done on the string length, an then fallback to `memcmp`. Compactness is important here because we're allocating on the stack. There is also some simple heuristic to avoid caching names that aren't likely to be repeated, such as numerical names or very long names. This helps quite a bit on `activitypub`, `twitter` and `ctim_catalog` macro-benchmarks. Before: ``` == Parsing activitypub.json (58160 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 714.000 i/100ms oj 801.000 i/100ms Oj::Parser 943.000 i/100ms rapidjson 632.000 i/100ms Calculating ------------------------------------- json 7.135k (± 0.7%) i/s (140.16 μs/i) - 35.700k in 5.003978s oj 7.991k (± 0.2%) i/s (125.14 μs/i) - 40.050k in 5.012044s Oj::Parser 9.611k (± 0.2%) i/s (104.04 μs/i) - 48.093k in 5.003723s rapidjson 6.318k (± 0.2%) i/s (158.29 μs/i) - 31.600k in 5.001896s Comparison: json: 7134.7 i/s Oj::Parser: 9611.5 i/s - 1.35x faster oj: 7990.8 i/s - 1.12x faster rapidjson: 6317.6 i/s - 1.13x slower == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 57.000 i/100ms oj 62.000 i/100ms Oj::Parser 78.000 i/100ms rapidjson 56.000 i/100ms Calculating ------------------------------------- json 573.527 (± 1.6%) i/s (1.74 ms/i) - 2.907k in 5.070094s oj 619.368 (± 1.6%) i/s (1.61 ms/i) - 3.100k in 5.006550s Oj::Parser 770.095 (± 0.9%) i/s (1.30 ms/i) - 3.900k in 5.064768s rapidjson 560.601 (± 0.4%) i/s (1.78 ms/i) - 2.856k in 5.094597s Comparison: json: 573.5 i/s Oj::Parser: 770.1 i/s - 1.34x faster oj: 619.4 i/s - 1.08x faster rapidjson: 560.6 i/s - 1.02x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 31.000 i/100ms oj 34.000 i/100ms Oj::Parser 46.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 319.842 (± 0.3%) i/s (3.13 ms/i) - 1.612k in 5.040026s oj 329.315 (± 2.4%) i/s (3.04 ms/i) - 1.666k in 5.061887s Oj::Parser 452.725 (± 1.1%) i/s (2.21 ms/i) - 2.300k in 5.080996s rapidjson 358.160 (± 0.8%) i/s (2.79 ms/i) - 1.824k in 5.093054s Comparison: json: 319.8 i/s Oj::Parser: 452.7 i/s - 1.42x faster rapidjson: 358.2 i/s - 1.12x faster oj: 329.3 i/s - 1.03x faster ``` After: ``` == Parsing activitypub.json (58160 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 832.000 i/100ms oj 799.000 i/100ms Oj::Parser 969.000 i/100ms rapidjson 636.000 i/100ms Calculating ------------------------------------- json 8.020k (± 0.3%) i/s (124.70 μs/i) - 40.768k in 5.083607s oj 7.942k (± 1.7%) i/s (125.92 μs/i) - 39.950k in 5.031909s Oj::Parser 9.515k (± 4.4%) i/s (105.10 μs/i) - 47.481k in 5.001202s rapidjson 6.282k (± 2.1%) i/s (159.20 μs/i) - 31.800k in 5.064719s Comparison: json: 8019.6 i/s Oj::Parser: 9514.5 i/s - 1.19x faster oj: 7941.9 i/s - same-ish: difference falls within error rapidjson: 6281.6 i/s - 1.28x slower == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 67.000 i/100ms oj 62.000 i/100ms Oj::Parser 79.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 670.935 (± 2.7%) i/s (1.49 ms/i) - 3.417k in 5.096850s oj 618.937 (± 3.2%) i/s (1.62 ms/i) - 3.100k in 5.014800s Oj::Parser 768.894 (± 1.7%) i/s (1.30 ms/i) - 3.871k in 5.036093s rapidjson 556.882 (± 2.7%) i/s (1.80 ms/i) - 2.805k in 5.040970s Comparison: json: 670.9 i/s Oj::Parser: 768.9 i/s - 1.15x faster oj: 618.9 i/s - 1.08x slower rapidjson: 556.9 i/s - 1.20x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 35.000 i/100ms oj 32.000 i/100ms Oj::Parser 43.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 382.262 (± 2.6%) i/s (2.62 ms/i) - 1.925k in 5.039336s oj 348.721 (± 0.6%) i/s (2.87 ms/i) - 1.760k in 5.047265s Oj::Parser 478.294 (± 0.6%) i/s (2.09 ms/i) - 2.408k in 5.034798s rapidjson 398.740 (± 0.8%) i/s (2.51 ms/i) - 2.028k in 5.086365s Comparison: json: 382.3 i/s Oj::Parser: 478.3 i/s - 1.25x faster rapidjson: 398.7 i/s - 1.04x faster oj: 348.7 i/s - 1.10x slower ``` --- ext/json/ext/parser/parser.c | 353 ++++++++++++++++++++++++---------- ext/json/ext/parser/parser.h | 16 +- ext/json/ext/parser/parser.rl | 195 +++++++++++++++++-- 3 files changed, 448 insertions(+), 116 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 2d107dd3d..44ff5618b 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -3,6 +3,165 @@ #include "../fbuffer/fbuffer.h" #include "parser.h" +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +/* name cache */ + +#include +#include + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) / 2; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) / 2; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + /* unicode */ static const signed char digit_values[256] = { @@ -86,30 +245,16 @@ static void raise_parse_error(const char *format, const char *start) ptr = buffer; } - rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; -static int binary_encindex; -static int utf8_encindex; +#line 276 "parser.rl" -#line 131 "parser.rl" - - - -#line 113 "parser.c" +#line 258 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 27}; enum {JSON_object_error = 0}; @@ -117,7 +262,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 173 "parser.rl" +#line 318 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -133,14 +278,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); -#line 137 "parser.c" +#line 282 "parser.c" { cs = JSON_object_start; } -#line 188 "parser.rl" +#line 333 "parser.rl" -#line 144 "parser.c" +#line 289 "parser.c" { if ( p == pe ) goto _test_eof; @@ -168,7 +313,7 @@ case 2: goto st2; goto st0; tr2: -#line 155 "parser.rl" +#line 300 "parser.rl" { char *np; json->parsing_name = true; @@ -181,7 +326,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 185 "parser.c" +#line 330 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -248,7 +393,7 @@ case 8: goto st8; goto st0; tr11: -#line 139 "parser.rl" +#line 284 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -269,7 +414,7 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 273 "parser.c" +#line 418 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -358,14 +503,14 @@ case 18: goto st9; goto st18; tr4: -#line 163 "parser.rl" +#line 308 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 369 "parser.c" +#line 514 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -463,7 +608,7 @@ case 26: _out: {} } -#line 189 "parser.rl" +#line 334 "parser.rl" if (cs >= JSON_object_first_final) { if (json->create_additions) { @@ -491,7 +636,7 @@ case 26: -#line 495 "parser.c" +#line 640 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -499,7 +644,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 292 "parser.rl" +#line 437 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -507,14 +652,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 511 "parser.c" +#line 656 "parser.c" { cs = JSON_value_start; } -#line 299 "parser.rl" +#line 444 "parser.rl" -#line 518 "parser.c" +#line 663 "parser.c" { if ( p == pe ) goto _test_eof; @@ -548,14 +693,14 @@ case 1: cs = 0; goto _out; tr2: -#line 244 "parser.rl" +#line 389 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr3: -#line 249 "parser.rl" +#line 394 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -575,7 +720,7 @@ cs = 0; } goto st29; tr7: -#line 267 "parser.rl" +#line 412 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -583,7 +728,7 @@ cs = 0; } goto st29; tr11: -#line 273 "parser.rl" +#line 418 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -591,7 +736,7 @@ cs = 0; } goto st29; tr25: -#line 237 "parser.rl" +#line 382 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -601,7 +746,7 @@ cs = 0; } goto st29; tr27: -#line 230 "parser.rl" +#line 375 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -611,19 +756,19 @@ cs = 0; } goto st29; tr31: -#line 224 "parser.rl" +#line 369 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 221 "parser.rl" +#line 366 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 227 "parser.rl" +#line 372 "parser.rl" { *result = Qtrue; } @@ -632,9 +777,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 279 "parser.rl" +#line 424 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 638 "parser.c" +#line 783 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -875,7 +1020,7 @@ case 28: _out: {} } -#line 300 "parser.rl" +#line 445 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -889,7 +1034,7 @@ case 28: } -#line 893 "parser.c" +#line 1038 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -897,7 +1042,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 320 "parser.rl" +#line 465 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -905,15 +1050,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 909 "parser.c" +#line 1054 "parser.c" { cs = JSON_integer_start; } -#line 327 "parser.rl" +#line 472 "parser.rl" json->memo = p; -#line 917 "parser.c" +#line 1062 "parser.c" { if ( p == pe ) goto _test_eof; @@ -947,14 +1092,14 @@ case 3: goto st0; goto tr4; tr4: -#line 317 "parser.rl" +#line 462 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 958 "parser.c" +#line 1103 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -973,7 +1118,7 @@ case 5: _out: {} } -#line 329 "parser.rl" +#line 474 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -988,7 +1133,7 @@ case 5: } -#line 992 "parser.c" +#line 1137 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -996,7 +1141,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 354 "parser.rl" +#line 499 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1004,15 +1149,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1008 "parser.c" +#line 1153 "parser.c" { cs = JSON_float_start; } -#line 361 "parser.rl" +#line 506 "parser.rl" json->memo = p; -#line 1016 "parser.c" +#line 1161 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1070,14 +1215,14 @@ case 8: goto st0; goto tr9; tr9: -#line 348 "parser.rl" +#line 493 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1081 "parser.c" +#line 1226 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1138,7 +1283,7 @@ case 7: _out: {} } -#line 363 "parser.rl" +#line 508 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1191,7 +1336,7 @@ case 7: -#line 1195 "parser.c" +#line 1340 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 17}; enum {JSON_array_error = 0}; @@ -1199,7 +1344,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 443 "parser.rl" +#line 588 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1213,14 +1358,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); -#line 1217 "parser.c" +#line 1362 "parser.c" { cs = JSON_array_start; } -#line 456 "parser.rl" +#line 601 "parser.rl" -#line 1224 "parser.c" +#line 1369 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1259,7 +1404,7 @@ case 2: goto st2; goto st0; tr2: -#line 420 "parser.rl" +#line 565 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1279,7 +1424,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1283 "parser.c" +#line 1428 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1379,14 +1524,14 @@ case 12: goto st3; goto st12; tr4: -#line 435 "parser.rl" +#line 580 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1390 "parser.c" +#line 1535 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1442,7 +1587,7 @@ case 16: _out: {} } -#line 457 "parser.rl" +#line 602 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; @@ -1460,7 +1605,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); } else { result = rb_utf8_str_new(start, (long)(end - start)); } @@ -1478,13 +1623,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; char buf[4]; + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + pe = memchr(p, '\\', bufferSize); if (RB_LIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); @@ -1587,7 +1745,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo } -#line 1591 "parser.c" +#line 1749 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1595,7 +1753,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 619 "parser.rl" +#line 777 "parser.rl" static int @@ -1616,15 +1774,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1620 "parser.c" +#line 1778 "parser.c" { cs = JSON_string_start; } -#line 639 "parser.rl" +#line 797 "parser.rl" json->memo = p; -#line 1628 "parser.c" +#line 1786 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1649,9 +1807,9 @@ case 2: goto st0; goto st2; tr2: -#line 606 "parser.rl" +#line 764 "parser.rl" { - *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { p--; {p++; cs = 8; goto _out;} @@ -1659,14 +1817,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 616 "parser.rl" +#line 774 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1670 "parser.c" +#line 1828 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1742,7 +1900,7 @@ case 7: _out: {} } -#line 641 "parser.rl" +#line 799 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1894,7 +2052,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1898 "parser.c" +#line 2056 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1902,7 +2060,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 806 "parser.rl" +#line 964 "parser.rl" /* @@ -1923,16 +2081,16 @@ static VALUE cParser_parse(VALUE self) fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); -#line 1927 "parser.c" +#line 2085 "parser.c" { cs = JSON_start; } -#line 826 "parser.rl" +#line 984 "parser.rl" p = json->source; pe = p + json->len; -#line 1936 "parser.c" +#line 2094 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1966,7 +2124,7 @@ case 1: cs = 0; goto _out; tr2: -#line 798 "parser.rl" +#line 956 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -1976,7 +2134,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 1980 "parser.c" +#line 2138 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2065,7 +2223,7 @@ case 9: _out: {} } -#line 829 "parser.rl" +#line 987 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2089,16 +2247,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); -#line 2093 "parser.c" +#line 2251 "parser.c" { cs = JSON_start; } -#line 852 "parser.rl" +#line 1010 "parser.rl" p = json->source; pe = p + json->len; -#line 2102 "parser.c" +#line 2260 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2132,7 +2290,7 @@ case 1: cs = 0; goto _out; tr2: -#line 798 "parser.rl" +#line 956 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2142,7 +2300,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2146 "parser.c" +#line 2304 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2231,7 +2389,7 @@ case 9: _out: {} } -#line 855 "parser.rl" +#line 1013 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2351,6 +2509,7 @@ void Init_parser(void) binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); } /* diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h index 073bc90f0..5c1aeaf1a 100644 --- a/ext/json/ext/parser/parser.h +++ b/ext/json/ext/parser/parser.h @@ -24,6 +24,19 @@ typedef unsigned char _Bool; # define MAYBE_UNUSED(x) x #endif +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + typedef struct JSON_ParserStruct { VALUE Vsource; char *source; @@ -42,6 +55,7 @@ typedef struct JSON_ParserStruct { bool freeze; bool create_additions; bool deprecated_create_additions; + rvalue_cache name_cache; } JSON_Parser; #define GET_PARSER \ @@ -61,7 +75,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize); +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize); static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static VALUE convert_encoding(VALUE source); static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index ab8e9b825..5b0c207f8 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -1,6 +1,165 @@ #include "../fbuffer/fbuffer.h" #include "parser.h" +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +/* name cache */ + +#include +#include + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) / 2; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) / 2; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + /* unicode */ static const signed char digit_values[256] = { @@ -84,23 +243,9 @@ static void raise_parse_error(const char *format, const char *start) ptr = buffer; } - rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - %%{ machine JSON_common; @@ -471,7 +616,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); } else { result = rb_utf8_str_new(start, (long)(end - start)); } @@ -489,13 +634,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; char buf[4]; + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + pe = memchr(p, '\\', bufferSize); if (RB_LIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); @@ -604,7 +762,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo write data; action parse_string { - *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { fhold; fbreak; @@ -971,6 +1129,7 @@ void Init_parser(void) binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); } /* From 8bf74a977ba2cb02fc885b3525d43a571b613574 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 3 Nov 2024 09:27:59 +0100 Subject: [PATCH 52/75] JSON::Ext::Parser mark the name cache entries when not on the heap This is somewhat dead code as unless you are using `JSON::Parser.new` direcltly we never allocate `JSON::Ext::Parser` anymore. But still, we should mark all its reference in case some code out there uses that. Followup: #675 --- ext/json/ext/parser/extconf.rb | 5 ++--- ext/json/ext/parser/parser.c | 15 +++++++++++++++ ext/json/ext/parser/parser.rl | 15 +++++++++++++++ test/json/json_ext_parser_test.rb | 20 ++++++++++++++++++++ test/json/test_helper.rb | 18 ++++++++++++++++++ 5 files changed, 70 insertions(+), 3 deletions(-) diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index 870588412..c3c23d2cb 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -1,9 +1,8 @@ # frozen_string_literal: true require 'mkmf' -have_func("rb_enc_raise", "ruby.h") -have_func("rb_enc_interned_str", "ruby.h") - +have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 +have_func("rb_gc_mark_locations") # Missing on TruffleRuby append_cflags("-std=c99") create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 44ff5618b..3f6d891c4 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -2399,6 +2399,19 @@ case 9: } } +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; @@ -2408,6 +2421,8 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + const VALUE *name_cache_entries = &json->name_cache.entries[0]; + rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } static void JSON_free(void *ptr) diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 5b0c207f8..fd1bc41e2 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -1019,6 +1019,19 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) } } +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; @@ -1028,6 +1041,8 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + const VALUE *name_cache_entries = &json->name_cache.entries[0]; + rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } static void JSON_free(void *ptr) diff --git a/test/json/json_ext_parser_test.rb b/test/json/json_ext_parser_test.rb index ff6598f49..9db8ae772 100644 --- a/test/json/json_ext_parser_test.rb +++ b/test/json/json_ext_parser_test.rb @@ -27,6 +27,26 @@ def test_error_messages assert_equal "unexpected token at 'NaN'", ex.message end + if GC.respond_to?(:stress=) + def test_gc_stress_parser_new + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + + previous_stress = GC.stress + JSON::Parser.new(payload).parse + ensure + GC.stress = previous_stress + end + + def test_gc_stress + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + + previous_stress = GC.stress + JSON.parse(payload) + ensure + GC.stress = previous_stress + end + end + def parse(json) JSON::Ext::Parser.new(json).parse end diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index 7bff9b339..6fcb76edf 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -19,6 +19,24 @@ rescue LoadError end +if GC.respond_to?(:verify_compaction_references) + # This method was added in Ruby 3.0.0. Calling it this way asks the GC to + # move objects around, helping to find object movement bugs. + begin + GC.verify_compaction_references(double_heap: true, toward: :empty) + rescue NotImplementedError + # Some platforms don't support compaction + end +end + +if GC.respond_to?(:auto_compact=) + begin + GC.auto_compact = true + rescue NotImplementedError + # Some platforms don't support compaction + end +end + unless defined?(Test::Unit::CoreAssertions) require "core_assertions" Test::Unit::TestCase.include Test::Unit::CoreAssertions From 90c8aaaa6a1a5d5b5f6c08335df855fdb8991e33 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 3 Nov 2024 10:22:20 +0100 Subject: [PATCH 53/75] Use rb_str_new_frozen --- ext/json/ext/generator/generator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 362eff7f7..1213f4249 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -1113,7 +1113,7 @@ static VALUE string_config(VALUE config) if (RTEST(config)) { Check_Type(config, T_STRING); if (RSTRING_LEN(config)) { - return RB_OBJ_FROZEN(config) ? config : rb_str_freeze(rb_str_dup(config)); + return rb_str_new_frozen(config); } } return Qfalse; From ca49deefabc8c2e59d4c1ff26804362a21ce78b9 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 3 Nov 2024 10:41:37 +0100 Subject: [PATCH 54/75] Micro-optimize binary search Since we know `high + low` is always positive, we can use a bitshift instead of a division. --- ext/json/ext/parser/parser.c | 4 ++-- ext/json/ext/parser/parser.rl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 3f6d891c4..5016ac7a8 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -79,7 +79,7 @@ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const lon int last_cmp = 0; while (low <= high) { - mid = (high + low) / 2; + mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; last_cmp = rstring_cache_cmp(str, length, entry); @@ -131,7 +131,7 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon int last_cmp = 0; while (low <= high) { - mid = (high + low) / 2; + mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index fd1bc41e2..4508738e7 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -77,7 +77,7 @@ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const lon int last_cmp = 0; while (low <= high) { - mid = (high + low) / 2; + mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; last_cmp = rstring_cache_cmp(str, length, entry); @@ -129,7 +129,7 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon int last_cmp = 0; while (low <= high) { - mid = (high + low) / 2; + mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); From 72b35740919539347f4989b962c8488b682a2a81 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 3 Nov 2024 10:55:33 +0100 Subject: [PATCH 55/75] Get rid of the .h files They're not particularly helpful and also the Rakefile doesn't handle them well currently (doesn't recompile on changes in .h). --- ext/json/ext/fbuffer/fbuffer.h | 39 ++++-- ext/json/ext/generator/depend | 2 +- ext/json/ext/generator/generator.c | 25 +++- ext/json/ext/generator/generator.h | 91 ------------ ext/json/ext/parser/depend | 2 +- ext/json/ext/parser/parser.c | 214 ++++++++++++++++++----------- ext/json/ext/parser/parser.h | 90 ------------ ext/json/ext/parser/parser.rl | 56 +++++++- 8 files changed, 246 insertions(+), 273 deletions(-) delete mode 100644 ext/json/ext/generator/generator.h delete mode 100644 ext/json/ext/parser/parser.h diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index 367ebd89f..d7a2b9549 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -4,6 +4,37 @@ #include "ruby.h" #include "ruby/encoding.h" +/* shims */ +/* This is the fallback definition from Ruby 3.4 */ + +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include +# endif +#elif defined(HAVE_STDBOOL_H) +# include +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + enum fbuffer_type { HEAP = 0, STACK = 1, @@ -38,14 +69,6 @@ static inline void fbuffer_append_char(FBuffer *fb, char newchr); static VALUE fbuffer_to_s(FBuffer *fb); #endif -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -#ifndef RB_LIKELY -#define RB_LIKELY(expr) expr -#endif - static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; diff --git a/ext/json/ext/generator/depend b/ext/json/ext/generator/depend index 1a042a250..967aa7659 100644 --- a/ext/json/ext/generator/depend +++ b/ext/json/ext/generator/depend @@ -1 +1 @@ -generator.o: generator.c generator.h $(srcdir)/../fbuffer/fbuffer.h +generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 1213f4249..df15288a1 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -1,5 +1,27 @@ +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "generator.h" + +#include +#include + +/* ruby api and some helpers */ + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + + long max_nesting; + long depth; + long buffer_initial_length; + + bool allow_nan; + bool ascii_only; + bool script_safe; + bool strict; +} JSON_Generator_State; #ifndef RB_UNLIKELY #define RB_UNLIKELY(cond) (cond) @@ -31,6 +53,7 @@ struct generate_json_data { generator_func func; }; +static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h deleted file mode 100644 index 749a627a1..000000000 --- a/ext/json/ext/generator/generator.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _GENERATOR_H_ -#define _GENERATOR_H_ - -#include -#include - -#include "ruby.h" - -/* This is the fallback definition from Ruby 3.4 */ -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include -# endif -#elif defined(HAVE_STDBOOL_H) -# include -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -/* ruby api and some helpers */ - -typedef struct JSON_Generator_StateStruct { - VALUE indent; - VALUE space; - VALUE space_before; - VALUE object_nl; - VALUE array_nl; - - long max_nesting; - long depth; - long buffer_initial_length; - - bool allow_nan; - bool ascii_only; - bool script_safe; - bool strict; -} JSON_Generator_State; - -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); -#ifdef RUBY_INTEGER_UNIFICATION -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); -#else -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self); -#endif -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_included_s(VALUE self, VALUE modul); -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_to_json_raw_object(VALUE self); -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); -static VALUE mString_Extend_json_create(VALUE self, VALUE o); -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); -static void State_free(void *state); -static VALUE cState_s_allocate(VALUE klass); - -static VALUE cState_generate(VALUE self, VALUE obj); -static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_indent(VALUE self); -static VALUE cState_indent_set(VALUE self, VALUE indent); -static VALUE cState_space(VALUE self); -static VALUE cState_space_set(VALUE self, VALUE space); -static VALUE cState_space_before(VALUE self); -static VALUE cState_space_before_set(VALUE self, VALUE space_before); -static VALUE cState_object_nl(VALUE self); -static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); -static VALUE cState_array_nl(VALUE self); -static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); -static VALUE cState_max_nesting(VALUE self); -static VALUE cState_max_nesting_set(VALUE self, VALUE depth); -static VALUE cState_allow_nan_p(VALUE self); -static VALUE cState_ascii_only_p(VALUE self); -static VALUE cState_depth(VALUE self); -static VALUE cState_depth_set(VALUE self, VALUE depth); -static VALUE cState_script_safe(VALUE self); -static VALUE cState_script_safe_set(VALUE self, VALUE depth); -static VALUE cState_strict(VALUE self); -static VALUE cState_strict_set(VALUE self, VALUE strict); - -static const rb_data_type_t JSON_Generator_State_type; - -#endif diff --git a/ext/json/ext/parser/depend b/ext/json/ext/parser/depend index 498ffa964..c051a244f 100644 --- a/ext/json/ext/parser/depend +++ b/ext/json/ext/parser/depend @@ -1 +1 @@ -parser.o: parser.c parser.h $(srcdir)/../fbuffer/fbuffer.h +parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 5016ac7a8..f3fb8db2f 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1,7 +1,7 @@ /* This file is automatically generated from parser.rl by using ragel */ #line 1 "parser.rl" +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "parser.h" static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; @@ -22,6 +22,19 @@ static int utf8_encindex; #include #include +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + static rb_encoding *enc_utf8; #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 @@ -228,6 +241,47 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int max_nesting; + bool allow_nan; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + #define PARSE_ERROR_FRAGMENT_LEN 32 #ifdef RBIMPL_ATTR_NORETURN RBIMPL_ATTR_NORETURN() @@ -250,11 +304,11 @@ static void raise_parse_error(const char *format, const char *start) -#line 276 "parser.rl" +#line 330 "parser.rl" -#line 258 "parser.c" +#line 312 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 27}; enum {JSON_object_error = 0}; @@ -262,7 +316,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 318 "parser.rl" +#line 372 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -278,14 +332,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); -#line 282 "parser.c" +#line 336 "parser.c" { cs = JSON_object_start; } -#line 333 "parser.rl" +#line 387 "parser.rl" -#line 289 "parser.c" +#line 343 "parser.c" { if ( p == pe ) goto _test_eof; @@ -313,7 +367,7 @@ case 2: goto st2; goto st0; tr2: -#line 300 "parser.rl" +#line 354 "parser.rl" { char *np; json->parsing_name = true; @@ -326,7 +380,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 330 "parser.c" +#line 384 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -393,7 +447,7 @@ case 8: goto st8; goto st0; tr11: -#line 284 "parser.rl" +#line 338 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -414,7 +468,7 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 418 "parser.c" +#line 472 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -503,14 +557,14 @@ case 18: goto st9; goto st18; tr4: -#line 308 "parser.rl" +#line 362 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 514 "parser.c" +#line 568 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -608,7 +662,7 @@ case 26: _out: {} } -#line 334 "parser.rl" +#line 388 "parser.rl" if (cs >= JSON_object_first_final) { if (json->create_additions) { @@ -636,7 +690,7 @@ case 26: -#line 640 "parser.c" +#line 694 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -644,7 +698,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 437 "parser.rl" +#line 491 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -652,14 +706,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 656 "parser.c" +#line 710 "parser.c" { cs = JSON_value_start; } -#line 444 "parser.rl" +#line 498 "parser.rl" -#line 663 "parser.c" +#line 717 "parser.c" { if ( p == pe ) goto _test_eof; @@ -693,14 +747,14 @@ case 1: cs = 0; goto _out; tr2: -#line 389 "parser.rl" +#line 443 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr3: -#line 394 "parser.rl" +#line 448 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -720,7 +774,7 @@ cs = 0; } goto st29; tr7: -#line 412 "parser.rl" +#line 466 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -728,7 +782,7 @@ cs = 0; } goto st29; tr11: -#line 418 "parser.rl" +#line 472 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -736,7 +790,7 @@ cs = 0; } goto st29; tr25: -#line 382 "parser.rl" +#line 436 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -746,7 +800,7 @@ cs = 0; } goto st29; tr27: -#line 375 "parser.rl" +#line 429 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -756,19 +810,19 @@ cs = 0; } goto st29; tr31: -#line 369 "parser.rl" +#line 423 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 366 "parser.rl" +#line 420 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 372 "parser.rl" +#line 426 "parser.rl" { *result = Qtrue; } @@ -777,9 +831,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 424 "parser.rl" +#line 478 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 783 "parser.c" +#line 837 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1020,7 +1074,7 @@ case 28: _out: {} } -#line 445 "parser.rl" +#line 499 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1034,7 +1088,7 @@ case 28: } -#line 1038 "parser.c" +#line 1092 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1042,7 +1096,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 465 "parser.rl" +#line 519 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1050,15 +1104,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 1054 "parser.c" +#line 1108 "parser.c" { cs = JSON_integer_start; } -#line 472 "parser.rl" +#line 526 "parser.rl" json->memo = p; -#line 1062 "parser.c" +#line 1116 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1092,14 +1146,14 @@ case 3: goto st0; goto tr4; tr4: -#line 462 "parser.rl" +#line 516 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 1103 "parser.c" +#line 1157 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1118,7 +1172,7 @@ case 5: _out: {} } -#line 474 "parser.rl" +#line 528 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -1133,7 +1187,7 @@ case 5: } -#line 1137 "parser.c" +#line 1191 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -1141,7 +1195,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 499 "parser.rl" +#line 553 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1149,15 +1203,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1153 "parser.c" +#line 1207 "parser.c" { cs = JSON_float_start; } -#line 506 "parser.rl" +#line 560 "parser.rl" json->memo = p; -#line 1161 "parser.c" +#line 1215 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1215,14 +1269,14 @@ case 8: goto st0; goto tr9; tr9: -#line 493 "parser.rl" +#line 547 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1226 "parser.c" +#line 1280 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1283,7 +1337,7 @@ case 7: _out: {} } -#line 508 "parser.rl" +#line 562 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1336,7 +1390,7 @@ case 7: -#line 1340 "parser.c" +#line 1394 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 17}; enum {JSON_array_error = 0}; @@ -1344,7 +1398,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 588 "parser.rl" +#line 642 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1358,14 +1412,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); -#line 1362 "parser.c" +#line 1416 "parser.c" { cs = JSON_array_start; } -#line 601 "parser.rl" +#line 655 "parser.rl" -#line 1369 "parser.c" +#line 1423 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1404,7 +1458,7 @@ case 2: goto st2; goto st0; tr2: -#line 565 "parser.rl" +#line 619 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1424,7 +1478,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1428 "parser.c" +#line 1482 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1524,14 +1578,14 @@ case 12: goto st3; goto st12; tr4: -#line 580 "parser.rl" +#line 634 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1535 "parser.c" +#line 1589 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1587,7 +1641,7 @@ case 16: _out: {} } -#line 602 "parser.rl" +#line 656 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; @@ -1745,7 +1799,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 1749 "parser.c" +#line 1803 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1753,7 +1807,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 777 "parser.rl" +#line 831 "parser.rl" static int @@ -1774,15 +1828,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1778 "parser.c" +#line 1832 "parser.c" { cs = JSON_string_start; } -#line 797 "parser.rl" +#line 851 "parser.rl" json->memo = p; -#line 1786 "parser.c" +#line 1840 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1807,7 +1861,7 @@ case 2: goto st0; goto st2; tr2: -#line 764 "parser.rl" +#line 818 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1817,14 +1871,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 774 "parser.rl" +#line 828 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1828 "parser.c" +#line 1882 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1900,7 +1954,7 @@ case 7: _out: {} } -#line 799 "parser.rl" +#line 853 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2052,7 +2106,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2056 "parser.c" +#line 2110 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2060,7 +2114,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 964 "parser.rl" +#line 1018 "parser.rl" /* @@ -2081,16 +2135,16 @@ static VALUE cParser_parse(VALUE self) fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); -#line 2085 "parser.c" +#line 2139 "parser.c" { cs = JSON_start; } -#line 984 "parser.rl" +#line 1038 "parser.rl" p = json->source; pe = p + json->len; -#line 2094 "parser.c" +#line 2148 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2124,7 +2178,7 @@ case 1: cs = 0; goto _out; tr2: -#line 956 "parser.rl" +#line 1010 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2134,7 +2188,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2138 "parser.c" +#line 2192 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2223,7 +2277,7 @@ case 9: _out: {} } -#line 987 "parser.rl" +#line 1041 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2247,16 +2301,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); -#line 2251 "parser.c" +#line 2305 "parser.c" { cs = JSON_start; } -#line 1010 "parser.rl" +#line 1064 "parser.rl" p = json->source; pe = p + json->len; -#line 2260 "parser.c" +#line 2314 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2290,7 +2344,7 @@ case 1: cs = 0; goto _out; tr2: -#line 956 "parser.rl" +#line 1010 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2300,7 +2354,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2304 "parser.c" +#line 2358 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2389,7 +2443,7 @@ case 9: _out: {} } -#line 1013 "parser.rl" +#line 1067 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h deleted file mode 100644 index 5c1aeaf1a..000000000 --- a/ext/json/ext/parser/parser.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#include "ruby.h" - -/* This is the fallback definition from Ruby 3.4 */ -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include -# endif -#elif defined(HAVE_STDBOOL_H) -# include -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -#ifndef MAYBE_UNUSED -# define MAYBE_UNUSED(x) x -#endif - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int max_nesting; - bool allow_nan; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; - rvalue_cache name_cache; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static uint32_t unescape_unicode(const unsigned char *p); -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static VALUE convert_encoding(VALUE source); -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cParser_parse(VALUE self); -static void JSON_mark(void *json); -static void JSON_free(void *json); -static VALUE cJSON_parser_s_allocate(VALUE klass); -static VALUE cParser_source(VALUE self); - -static const rb_data_type_t JSON_Parser_type; - -#endif diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 4508738e7..15e88e2be 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -1,5 +1,5 @@ +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "parser.h" static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; @@ -20,6 +20,19 @@ static int utf8_encindex; #include #include +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + static rb_encoding *enc_utf8; #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 @@ -226,6 +239,47 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int max_nesting; + bool allow_nan; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + #define PARSE_ERROR_FRAGMENT_LEN 32 #ifdef RBIMPL_ATTR_NORETURN RBIMPL_ATTR_NORETURN() From d0d4c1db258a8aa6f96996bae961629ce6300161 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 3 Nov 2024 13:26:34 +0100 Subject: [PATCH 56/75] Use batch APIs to create Array and Hash objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Naively appending elements into RArray or RHash is inneficient because it might cause multiple reallocations and rehasing. So it's preferable to accumulate all the elements onto a stack, and then use batch APIs to directly create right sized containers. Before: ``` == Parsing activitypub.json (58160 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 779.000 i/100ms oj 799.000 i/100ms Oj::Parser 953.000 i/100ms rapidjson 630.000 i/100ms Calculating ------------------------------------- json 7.989k (± 0.7%) i/s (125.17 μs/i) - 40.508k in 5.070571s oj 7.931k (± 1.8%) i/s (126.09 μs/i) - 39.950k in 5.039171s Oj::Parser 9.624k (± 0.7%) i/s (103.91 μs/i) - 48.603k in 5.050694s rapidjson 6.287k (± 0.3%) i/s (159.05 μs/i) - 31.500k in 5.010181s Comparison: json: 7989.2 i/s Oj::Parser: 9623.6 i/s - 1.20x faster oj: 7930.8 i/s - same-ish: difference falls within error rapidjson: 6287.3 i/s - 1.27x slower == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 66.000 i/100ms oj 62.000 i/100ms Oj::Parser 78.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 673.530 (± 0.7%) i/s (1.48 ms/i) - 3.432k in 5.095837s oj 620.473 (± 0.5%) i/s (1.61 ms/i) - 3.162k in 5.096259s Oj::Parser 767.687 (± 0.9%) i/s (1.30 ms/i) - 3.900k in 5.080601s rapidjson 553.048 (± 1.1%) i/s (1.81 ms/i) - 2.805k in 5.072525s Comparison: json: 673.5 i/s Oj::Parser: 767.7 i/s - 1.14x faster oj: 620.5 i/s - 1.09x slower rapidjson: 553.0 i/s - 1.22x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 38.000 i/100ms oj 34.000 i/100ms Oj::Parser 47.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 381.312 (± 0.5%) i/s (2.62 ms/i) - 1.938k in 5.082614s oj 328.735 (± 2.1%) i/s (3.04 ms/i) - 1.666k in 5.070407s Oj::Parser 458.938 (± 0.9%) i/s (2.18 ms/i) - 2.303k in 5.018529s rapidjson 376.744 (± 1.3%) i/s (2.65 ms/i) - 1.900k in 5.044113s Comparison: json: 381.3 i/s Oj::Parser: 458.9 i/s - 1.20x faster rapidjson: 376.7 i/s - same-ish: difference falls within error oj: 328.7 i/s - 1.16x slower ``` After: ``` == Parsing activitypub.json (58160 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 960.000 i/100ms oj 796.000 i/100ms Oj::Parser 969.000 i/100ms rapidjson 636.000 i/100ms Calculating ------------------------------------- json 8.957k (± 0.5%) i/s (111.65 μs/i) - 45.120k in 5.037777s oj 7.966k (± 0.5%) i/s (125.53 μs/i) - 40.596k in 5.096207s Oj::Parser 9.579k (± 0.3%) i/s (104.39 μs/i) - 48.450k in 5.057822s rapidjson 6.261k (± 8.9%) i/s (159.73 μs/i) - 31.800k in 5.182342s Comparison: json: 8956.5 i/s Oj::Parser: 9579.3 i/s - 1.07x faster oj: 7966.2 i/s - 1.12x slower rapidjson: 6260.6 i/s - 1.43x slower == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 82.000 i/100ms oj 62.000 i/100ms Oj::Parser 77.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 803.998 (± 0.6%) i/s (1.24 ms/i) - 4.100k in 5.099692s oj 608.292 (± 0.8%) i/s (1.64 ms/i) - 3.100k in 5.096566s Oj::Parser 760.206 (± 0.5%) i/s (1.32 ms/i) - 3.850k in 5.064529s rapidjson 549.562 (± 0.5%) i/s (1.82 ms/i) - 2.750k in 5.004166s Comparison: json: 804.0 i/s Oj::Parser: 760.2 i/s - 1.06x slower oj: 608.3 i/s - 1.32x slower rapidjson: 549.6 i/s - 1.46x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 43.000 i/100ms oj 34.000 i/100ms Oj::Parser 47.000 i/100ms rapidjson 36.000 i/100ms Calculating ------------------------------------- json 447.336 (± 0.9%) i/s (2.24 ms/i) - 2.279k in 5.094945s oj 336.266 (± 2.4%) i/s (2.97 ms/i) - 1.700k in 5.058625s Oj::Parser 466.559 (± 1.3%) i/s (2.14 ms/i) - 2.350k in 5.037637s rapidjson 392.039 (± 0.8%) i/s (2.55 ms/i) - 1.980k in 5.050826s Comparison: json: 447.3 i/s Oj::Parser: 466.6 i/s - 1.04x faster rapidjson: 392.0 i/s - 1.14x slower oj: 336.3 i/s - 1.33x slower ``` --- ext/json/ext/fbuffer/fbuffer.h | 12 +- ext/json/ext/parser/extconf.rb | 5 +- ext/json/ext/parser/parser.c | 423 +++++++++++++++++++++++---------- ext/json/ext/parser/parser.rl | 265 +++++++++++++++++---- 4 files changed, 539 insertions(+), 166 deletions(-) diff --git a/ext/json/ext/fbuffer/fbuffer.h b/ext/json/ext/fbuffer/fbuffer.h index d7a2b9549..3e154a5fa 100644 --- a/ext/json/ext/fbuffer/fbuffer.h +++ b/ext/json/ext/fbuffer/fbuffer.h @@ -36,8 +36,8 @@ typedef unsigned char _Bool; #endif enum fbuffer_type { - HEAP = 0, - STACK = 1, + FBUFFER_HEAP_ALLOCATED = 0, + FBUFFER_STACK_ALLOCATED = 1, }; typedef struct FBufferStruct { @@ -73,7 +73,7 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char * { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; if (stack_buffer) { - fb->type = STACK; + fb->type = FBUFFER_STACK_ALLOCATED; fb->ptr = stack_buffer; fb->capa = stack_buffer_size; } @@ -81,7 +81,7 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char * static void fbuffer_free(FBuffer *fb) { - if (fb->ptr && fb->type == HEAP) { + if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { ruby_xfree(fb->ptr); } } @@ -105,10 +105,10 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) for (required = fb->capa; requested > required - fb->len; required <<= 1); if (required > fb->capa) { - if (fb->type == STACK) { + if (fb->type == FBUFFER_STACK_ALLOCATED) { const char *old_buffer = fb->ptr; fb->ptr = ALLOC_N(char, required); - fb->type = HEAP; + fb->type = FBUFFER_HEAP_ALLOCATED; MEMCPY(fb->ptr, old_buffer, char, fb->len); } else { REALLOC_N(fb->ptr, char, required); diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index c3c23d2cb..f9104de12 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -2,7 +2,10 @@ require 'mkmf' have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 -have_func("rb_gc_mark_locations") # Missing on TruffleRuby +have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 +have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby +have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby + append_cflags("-std=c99") create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index f3fb8db2f..8208e310a 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -17,6 +17,33 @@ static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, static int binary_encindex; static int utf8_encindex; +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + /* name cache */ #include @@ -175,6 +202,110 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon return rsymbol; } +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head); +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} + /* unicode */ static const signed char digit_values[256] = { @@ -260,6 +391,8 @@ typedef struct JSON_ParserStruct { bool create_additions; bool deprecated_create_additions; rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; } JSON_Parser; #define GET_PARSER \ @@ -304,11 +437,11 @@ static void raise_parse_error(const char *format, const char *start) -#line 330 "parser.rl" +#line 463 "parser.rl" -#line 312 "parser.c" +#line 445 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 27}; enum {JSON_object_error = 0}; @@ -316,30 +449,30 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 372 "parser.rl" +#line 501 "parser.rl" +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) + static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); + long stack_head = json->stack->head; -#line 336 "parser.c" +#line 469 "parser.c" { cs = JSON_object_start; } -#line 387 "parser.rl" +#line 516 "parser.rl" -#line 343 "parser.c" +#line 476 "parser.c" { if ( p == pe ) goto _test_eof; @@ -367,20 +500,23 @@ case 2: goto st2; goto st0; tr2: -#line 354 "parser.rl" +#line 480 "parser.rl" { char *np; json->parsing_name = true; - np = JSON_parse_string(json, p, pe, &last_name); + np = JSON_parse_string(json, p, pe, result); json->parsing_name = false; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} + if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { + PUSH(*result); + {p = (( np))-1;} + } } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 384 "parser.c" +#line 520 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -447,19 +583,12 @@ case 8: goto st8; goto st0; tr11: -#line 338 "parser.rl" +#line 471 "parser.rl" { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v, current_nesting); + char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { p--; {p++; cs = 9; goto _out;} } else { - if (json->object_class) { - rb_funcall(*result, i_aset, 2, last_name, v); - } else { - OBJ_FREEZE(last_name); - rb_hash_aset(*result, last_name, v); - } {p = (( np))-1;} } } @@ -468,7 +597,7 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 472 "parser.c" +#line 601 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -557,14 +686,14 @@ case 18: goto st9; goto st18; tr4: -#line 362 "parser.rl" +#line 491 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 568 "parser.c" +#line 697 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -662,10 +791,34 @@ case 26: _out: {} } -#line 388 "parser.rl" +#line 517 "parser.rl" if (cs >= JSON_object_first_final) { - if (json->create_additions) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { VALUE klassname; if (json->object_class) { klassname = rb_funcall(*result, i_aref, 1, json->create_id); @@ -689,8 +842,7 @@ case 26: } - -#line 694 "parser.c" +#line 846 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -698,7 +850,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 491 "parser.rl" +#line 652 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -706,14 +858,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 710 "parser.c" +#line 862 "parser.c" { cs = JSON_value_start; } -#line 498 "parser.rl" +#line 659 "parser.rl" -#line 717 "parser.c" +#line 869 "parser.c" { if ( p == pe ) goto _test_eof; @@ -747,14 +899,19 @@ case 1: cs = 0; goto _out; tr2: -#line 443 "parser.rl" +#line 595 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + if (np == NULL) { + p--; + {p++; cs = 29; goto _out;} + } else { + {p = (( np))-1;} + } } goto st29; tr3: -#line 448 "parser.rl" +#line 605 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -767,14 +924,18 @@ cs = 0; } } np = JSON_parse_float(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} + if (np != NULL) { + {p = (( np))-1;} + } np = JSON_parse_integer(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} + if (np != NULL) { + {p = (( np))-1;} + } p--; {p++; cs = 29; goto _out;} } goto st29; tr7: -#line 466 "parser.rl" +#line 627 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -782,7 +943,7 @@ cs = 0; } goto st29; tr11: -#line 472 "parser.rl" +#line 633 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -790,7 +951,7 @@ cs = 0; } goto st29; tr25: -#line 436 "parser.rl" +#line 588 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -800,7 +961,7 @@ cs = 0; } goto st29; tr27: -#line 429 "parser.rl" +#line 581 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -810,19 +971,19 @@ cs = 0; } goto st29; tr31: -#line 423 "parser.rl" +#line 575 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 420 "parser.rl" +#line 572 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 426 "parser.rl" +#line 578 "parser.rl" { *result = Qtrue; } @@ -831,9 +992,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 478 "parser.rl" +#line 639 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 837 "parser.c" +#line 998 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1074,13 +1235,14 @@ case 28: _out: {} } -#line 499 "parser.rl" +#line 660 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); } if (cs >= JSON_value_first_final) { + PUSH(*result); return p; } else { return NULL; @@ -1088,7 +1250,7 @@ case 28: } -#line 1092 "parser.c" +#line 1254 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1096,7 +1258,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 519 "parser.rl" +#line 681 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1104,15 +1266,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 1108 "parser.c" +#line 1270 "parser.c" { cs = JSON_integer_start; } -#line 526 "parser.rl" +#line 688 "parser.rl" json->memo = p; -#line 1116 "parser.c" +#line 1278 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1146,14 +1308,14 @@ case 3: goto st0; goto tr4; tr4: -#line 516 "parser.rl" +#line 678 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 1157 "parser.c" +#line 1319 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1172,7 +1334,7 @@ case 5: _out: {} } -#line 528 "parser.rl" +#line 690 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -1187,7 +1349,7 @@ case 5: } -#line 1191 "parser.c" +#line 1353 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -1195,7 +1357,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 553 "parser.rl" +#line 715 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1203,15 +1365,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1207 "parser.c" +#line 1369 "parser.c" { cs = JSON_float_start; } -#line 560 "parser.rl" +#line 722 "parser.rl" json->memo = p; -#line 1215 "parser.c" +#line 1377 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1269,14 +1431,14 @@ case 8: goto st0; goto tr9; tr9: -#line 547 "parser.rl" +#line 709 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1280 "parser.c" +#line 1442 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1337,7 +1499,7 @@ case 7: _out: {} } -#line 562 "parser.rl" +#line 724 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1390,7 +1552,7 @@ case 7: -#line 1394 "parser.c" +#line 1556 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 17}; enum {JSON_array_error = 0}; @@ -1398,28 +1560,27 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 642 "parser.rl" +#line 799 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); + long stack_head = json->stack->head; -#line 1416 "parser.c" +#line 1577 "parser.c" { cs = JSON_array_start; } -#line 655 "parser.rl" +#line 811 "parser.rl" -#line 1423 "parser.c" +#line 1584 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1458,18 +1619,13 @@ case 2: goto st2; goto st0; tr2: -#line 619 "parser.rl" +#line 781 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - if (json->array_class) { - rb_funcall(*result, i_leftshift, 1, v); - } else { - rb_ary_push(*result, v); - } {p = (( np))-1;} } } @@ -1478,7 +1634,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1482 "parser.c" +#line 1638 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1578,14 +1734,14 @@ case 12: goto st3; goto st12; tr4: -#line 634 "parser.rl" +#line 791 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1589 "parser.c" +#line 1745 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1641,9 +1797,25 @@ case 16: _out: {} } -#line 656 "parser.rl" +#line 812 "parser.rl" if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + return p + 1; } else { raise_parse_error("unexpected token at '%s'", p); @@ -1799,7 +1971,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 1803 "parser.c" +#line 1975 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1807,7 +1979,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 831 "parser.rl" +#line 1003 "parser.rl" static int @@ -1828,15 +2000,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1832 "parser.c" +#line 2004 "parser.c" { cs = JSON_string_start; } -#line 851 "parser.rl" +#line 1023 "parser.rl" json->memo = p; -#line 1840 "parser.c" +#line 2012 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1861,7 +2033,7 @@ case 2: goto st0; goto st2; tr2: -#line 818 "parser.rl" +#line 990 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1871,14 +2043,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 828 "parser.rl" +#line 1000 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1882 "parser.c" +#line 2054 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1954,7 +2126,7 @@ case 7: _out: {} } -#line 853 "parser.rl" +#line 1025 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2106,7 +2278,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2110 "parser.c" +#line 2282 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2114,7 +2286,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1018 "parser.rl" +#line 1190 "parser.rl" /* @@ -2134,17 +2306,25 @@ static VALUE cParser_parse(VALUE self) char stack_buffer[FBUFFER_STACK_SIZE]; fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + -#line 2139 "parser.c" +#line 2319 "parser.c" { cs = JSON_start; } -#line 1038 "parser.rl" +#line 1218 "parser.rl" p = json->source; pe = p + json->len; -#line 2148 "parser.c" +#line 2328 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2178,7 +2358,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1010 "parser.rl" +#line 1182 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2188,7 +2368,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2192 "parser.c" +#line 2372 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2277,7 +2457,11 @@ case 9: _out: {} } -#line 1041 "parser.rl" +#line 1221 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } if (cs >= JSON_first_final && p == pe) { return result; @@ -2293,24 +2477,32 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) int cs = EVIL; VALUE result = Qnil; - JSON_Parser parser = {0}; - JSON_Parser *json = &parser; + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; parser_init(json, source, opts); char stack_buffer[FBUFFER_STACK_SIZE]; fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; -#line 2305 "parser.c" + +#line 2497 "parser.c" { cs = JSON_start; } -#line 1064 "parser.rl" +#line 1256 "parser.rl" p = json->source; pe = p + json->len; -#line 2314 "parser.c" +#line 2506 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2344,7 +2536,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1010 "parser.rl" +#line 1182 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2354,7 +2546,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2358 "parser.c" +#line 2550 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2443,7 +2635,11 @@ case 9: _out: {} } -#line 1067 "parser.rl" +#line 1259 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } if (cs >= JSON_first_final && p == pe) { return result; @@ -2453,19 +2649,6 @@ case 9: } } -#ifndef HAVE_RB_GC_MARK_LOCATIONS -// For TruffleRuby -void rb_gc_mark_locations(const VALUE *start, const VALUE *end) -{ - VALUE *value = start; - - while (value < end) { - rb_gc_mark(*value); - value++; - } -} -#endif - static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; @@ -2475,6 +2658,8 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + const VALUE *name_cache_entries = &json->name_cache.entries[0]; rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 15e88e2be..9a588b00a 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -15,6 +15,33 @@ static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, static int binary_encindex; static int utf8_encindex; +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + /* name cache */ #include @@ -173,6 +200,110 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon return rsymbol; } +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head); +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} + /* unicode */ static const signed char digit_values[256] = { @@ -258,6 +389,8 @@ typedef struct JSON_ParserStruct { bool create_additions; bool deprecated_create_additions; rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; } JSON_Parser; #define GET_PARSER \ @@ -336,17 +469,10 @@ static void raise_parse_error(const char *format, const char *start) write data; action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); + char *np = JSON_parse_value(json, fpc, pe, result, current_nesting); if (np == NULL) { fhold; fbreak; } else { - if (json->object_class) { - rb_funcall(*result, i_aset, 2, last_name, v); - } else { - OBJ_FREEZE(last_name); - rb_hash_aset(*result, last_name, v); - } fexec np; } } @@ -354,9 +480,12 @@ static void raise_parse_error(const char *format, const char *start) action parse_name { char *np; json->parsing_name = true; - np = JSON_parse_string(json, fpc, pe, &last_name); + np = JSON_parse_string(json, fpc, pe, result); json->parsing_name = false; - if (np == NULL) { fhold; fbreak; } else fexec np; + if (np == NULL) { fhold; fbreak; } else { + PUSH(*result); + fexec np; + } } action exit { fhold; fbreak; } @@ -371,23 +500,47 @@ static void raise_parse_error(const char *format, const char *start) ) @exit; }%% +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) + static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = object_class ? rb_class_new_instance(0, 0, object_class) :rb_hash_new(); + long stack_head = json->stack->head; %% write init; %% write exec; if (cs >= JSON_object_first_final) { - if (json->create_additions) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { VALUE klassname; if (json->object_class) { klassname = rb_funcall(*result, i_aref, 1, json->create_id); @@ -410,7 +563,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } - %%{ machine JSON_value; include JSON_common; @@ -442,7 +594,12 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } action parse_string { char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { fhold; fbreak; } else fexec np; + if (np == NULL) { + fhold; + fbreak; + } else { + fexec np; + } } action parse_number { @@ -457,9 +614,13 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) fexec np; + if (np != NULL) { + fexec np; + } np = JSON_parse_integer(json, fpc, pe, result); - if (np != NULL) fexec np; + if (np != NULL) { + fexec np; + } fhold; fbreak; } @@ -502,6 +663,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul } if (cs >= JSON_value_first_final) { + PUSH(*result); return p; } else { return NULL; @@ -622,11 +784,6 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (np == NULL) { fhold; fbreak; } else { - if (json->array_class) { - rb_funcall(*result, i_leftshift, 1, v); - } else { - rb_ary_push(*result, v); - } fexec np; } } @@ -644,17 +801,32 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = array_class ? rb_class_new_instance(0, 0, array_class) : rb_ary_new(); + long stack_head = json->stack->head; %% write init; %% write exec; if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + return p + 1; } else { raise_parse_error("unexpected token at '%s'", p); @@ -1034,11 +1206,23 @@ static VALUE cParser_parse(VALUE self) char stack_buffer[FBUFFER_STACK_SIZE]; fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + %% write init; p = json->source; pe = p + json->len; %% write exec; + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + if (cs >= JSON_first_final && p == pe) { return result; } else { @@ -1053,18 +1237,30 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) int cs = EVIL; VALUE result = Qnil; - JSON_Parser parser = {0}; - JSON_Parser *json = &parser; + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; parser_init(json, source, opts); char stack_buffer[FBUFFER_STACK_SIZE]; fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + %% write init; p = json->source; pe = p + json->len; %% write exec; + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + if (cs >= JSON_first_final && p == pe) { return result; } else { @@ -1073,19 +1269,6 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) } } -#ifndef HAVE_RB_GC_MARK_LOCATIONS -// For TruffleRuby -void rb_gc_mark_locations(const VALUE *start, const VALUE *end) -{ - VALUE *value = start; - - while (value < end) { - rb_gc_mark(*value); - value++; - } -} -#endif - static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; @@ -1095,6 +1278,8 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + const VALUE *name_cache_entries = &json->name_cache.entries[0]; rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } From 233391251c61a676ff24e727655dbdaf680d6871 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 Nov 2024 09:19:18 +0100 Subject: [PATCH 57/75] Update benchmark notes --- benchmark/parser.rb | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/benchmark/parser.rb b/benchmark/parser.rb index bee94f32d..4425728a4 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -28,24 +28,21 @@ def benchmark_parsing(name, json_output) # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] -# Oj::Parser is very significanly faster (2.70x) on the nested array benchmark -# thanks to its stack implementation that saves resizing arrays. -# But we're on par with `Oj.dumo` +# Oj::Parser is very significanly faster (1.80x) on the nested array benchmark. benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) -# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part thanks to its string caching. - -# Other than that we're either a bit slower or a bit faster than regular `Oj.load`. +# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part because its +# cache is persisted across calls. That's not something we can do with the current API, we'd +# need to expose a stateful API as well, but that's no really desirable. +# Other than that we're faster than regular `Oj.load` by a good margin. benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) benchmark_parsing "test from oj", < Date: Mon, 4 Nov 2024 13:15:20 +0100 Subject: [PATCH 58/75] Add tests for the behavior of JSON.generate with base types subclasses Ref: https://github.com/ruby/json/pull/674 Ref: https://github.com/ruby/json/pull/668 The behavior on such case it quite unclear, the goal here is to figure out whatever was the behavior on Cext version of `json 2.7.0` and get all implementations to converge. We can then decide to make them all behave differently if we so wish. --- ext/json/ext/generator/generator.c | 6 ++- test/json/json_generator_test.rb | 66 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index df15288a1..645500d04 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -755,7 +755,11 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) VALUE key_to_s; switch(rb_type(key)) { case T_STRING: - key_to_s = key; + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { + key_to_s = key; + } else { + key_to_s = rb_funcall(key, i_to_s, 0); + } break; case T_SYMBOL: key_to_s = rb_sym2str(key); diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 6716eb82f..112c03b22 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -536,6 +536,72 @@ def test_to_json_called_with_state_object assert_instance_of JSON::State, argument end + module CustomToJSON + def to_json(*) + %{"#{self.class.name}#to_json"} + end + end + + module CustomToS + def to_s + "#{self.class.name}#to_s" + end + end + + class ArrayWithToJSON < Array + include CustomToJSON + end + + def test_array_subclass_with_to_json + assert_equal '["JSONGeneratorTest::ArrayWithToJSON#to_json"]', JSON.generate([ArrayWithToJSON.new]) + assert_equal '{"[]":1}', JSON.generate(ArrayWithToJSON.new => 1) + end + + class ArrayWithToS < Array + include CustomToS + end + + def test_array_subclass_with_to_s + assert_equal '[[]]', JSON.generate([ArrayWithToS.new]) + assert_equal '{"JSONGeneratorTest::ArrayWithToS#to_s":1}', JSON.generate(ArrayWithToS.new => 1) + end + + class HashWithToJSON < Hash + include CustomToJSON + end + + def test_hash_subclass_with_to_json + assert_equal '["JSONGeneratorTest::HashWithToJSON#to_json"]', JSON.generate([HashWithToJSON.new]) + assert_equal '{"{}":1}', JSON.generate(HashWithToJSON.new => 1) + end + + class HashWithToS < Hash + include CustomToS + end + + def test_hash_subclass_with_to_s + assert_equal '[{}]', JSON.generate([HashWithToS.new]) + assert_equal '{"JSONGeneratorTest::HashWithToS#to_s":1}', JSON.generate(HashWithToS.new => 1) + end + + class StringWithToJSON < String + include CustomToJSON + end + + def test_string_subclass_with_to_json + assert_equal '["JSONGeneratorTest::StringWithToJSON#to_json"]', JSON.generate([StringWithToJSON.new]) + assert_equal '{"":1}', JSON.generate(StringWithToJSON.new => 1) + end + + class StringWithToS < String + include CustomToS + end + + def test_string_subclass_with_to_s + assert_equal '[""]', JSON.generate([StringWithToS.new]) + assert_equal '{"JSONGeneratorTest::StringWithToS#to_s":1}', JSON.generate(StringWithToS.new => 1) + end + if defined?(JSON::Ext::Generator) and RUBY_PLATFORM != "java" def test_valid_utf8_in_different_encoding utf8_string = "€™" From 1da49556247506727a32d260fec68851259cf3b4 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 1 Nov 2024 20:07:57 +0100 Subject: [PATCH 59/75] Speedup #generate_json by using case/when/end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * if/elsif comparing `obj.class` is significantly slower: https://github.com/ruby/json/pull/668#issuecomment-2450747190 * The only case where an exact class check is needed so far is for String (https://github.com/ruby/json/issues/667). * Before: $ ruby -Ilib:ext benchmark/standalone.rb dump pure JSON::Pure::Generator truffleruby 24.2.0-dev-07b978e4, like ruby 3.2.4, Oracle GraalVM JVM [x86_64-linux] Calculating ------------------------------------- JSON.dump(obj) 6.426k (± 5.9%) i/s (155.62 μs/i) - 32.395k in 5.064479s JSON.dump(obj) 6.380k (± 7.4%) i/s (156.73 μs/i) - 31.806k in 5.021304s JSON.dump(obj) 6.276k (±10.5%) i/s (159.33 μs/i) - 31.217k in 5.060762s JSON.dump(obj) 6.450k (± 7.0%) i/s (155.05 μs/i) - 32.395k in 5.059538s JSON.dump(obj) 6.413k (± 6.2%) i/s (155.93 μs/i) - 32.395k in 5.081573s * After: $ ruby -Ilib:ext benchmark/standalone.rb dump pure JSON::Pure::Generator truffleruby 24.2.0-dev-07b978e4, like ruby 3.2.4, Oracle GraalVM JVM [x86_64-linux] Calculating ------------------------------------- JSON.dump(obj) 8.237k (± 5.0%) i/s (121.41 μs/i) - 41.600k in 5.069507s JSON.dump(obj) 8.179k (± 5.1%) i/s (122.26 μs/i) - 40.768k in 5.002035s JSON.dump(obj) 8.147k (± 7.9%) i/s (122.74 μs/i) - 40.768k in 5.044840s JSON.dump(obj) 8.137k (± 6.9%) i/s (122.90 μs/i) - 40.768k in 5.048690s JSON.dump(obj) 8.112k (±10.2%) i/s (123.27 μs/i) - 39.936k in 5.023502s --- lib/json/pure/generator.rb | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index d6cca92ca..21e765467 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -307,20 +307,18 @@ def generate(obj) # Handles @allow_nan, @buffer_initial_length, other ivars must be the default value (see above) private def generate_json(obj, buf) - klass = obj.class - if klass == Hash + case obj + when Hash buf << '{' first = true obj.each_pair do |k,v| buf << ',' unless first key_str = k.to_s - if key_str.is_a?(::String) - if key_str.class == ::String - fast_serialize_string(key_str, buf) - else - generate_json(key_str, buf) - end + if key_str.class == String + fast_serialize_string(key_str, buf) + elsif key_str.is_a?(String) + generate_json(key_str, buf) else raise TypeError, "#{k.class}#to_s returns an instance of #{key_str.class}, expected a String" end @@ -330,7 +328,7 @@ def generate(obj) first = false end buf << '}' - elsif klass == Array + when Array buf << '[' first = true obj.each do |e| @@ -339,9 +337,13 @@ def generate(obj) first = false end buf << ']' - elsif klass == String - fast_serialize_string(obj, buf) - elsif klass == Integer + when String + if obj.class == String + fast_serialize_string(obj, buf) + else + buf << obj.to_json(self) + end + when Integer buf << obj.to_s else # Note: Float is handled this way since Float#to_s is slow anyway @@ -432,8 +434,8 @@ def json_transform(state) result << state.indent * depth if indent key_str = key.to_s - key_json = if key_str.is_a?(::String) - key_str = key_str.to_json(state) + if key_str.is_a?(String) + key_json = key_str.to_json(state) else raise TypeError, "#{key.class}#to_s returns an instance of #{key_str.class}, expected a String" end From 82d21f01c5c20da40e932216da2a91c05d9165a4 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 1 Nov 2024 20:22:42 +0100 Subject: [PATCH 60/75] Re-enable passing test --- test/json/json_parser_test.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 6d8456c79..adff91674 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -32,8 +32,6 @@ def test_argument_encoding_for_binary_unmodified end def test_error_message_encoding - pend if RUBY_ENGINE == 'truffleruby' - bug10705 = '[ruby-core:67386] [Bug #10705]' json = ".\"\xE2\x88\x9A\"" assert_equal(Encoding::UTF_8, json.encoding) From ef91aea7d2f2ce1f4ee44b353c9f4909e37370c3 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 Nov 2024 16:13:08 +0100 Subject: [PATCH 61/75] Add option :allow_trailing_comma to JSON#parse Fix: https://github.com/ruby/json/pull/401 Since we already accept comments without even a flag to turn it off, it makes sense to go just one small bit farther and optionally allow trailing comma so we have decent support for what is generally defined as "JSONC" and frequently used for configuration files. Co-Authored-By: Jan-Joost Spanjers --- ext/json/ext/parser/parser.c | 910 +++++++++++++++++++++++++--------- ext/json/ext/parser/parser.rl | 33 +- java/src/json/ext/Parser.java | 455 +++++++++++------ java/src/json/ext/Parser.rl | 17 +- lib/json/pure/parser.rb | 8 +- test/json/fixtures/fail4.json | 1 - test/json/fixtures/fail9.json | 1 - test/json/json_parser_test.rb | 88 +++- 8 files changed, 1122 insertions(+), 391 deletions(-) delete mode 100644 test/json/fixtures/fail4.json delete mode 100644 test/json/fixtures/fail9.json diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 8208e310a..382e21e12 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -10,7 +10,7 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_deep_const_get, i_match, i_aset, i_aref, i_leftshift, i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, sym_create_additions, sym_create_id, sym_object_class, sym_array_class, sym_decimal_class, sym_match_string; @@ -385,6 +385,7 @@ typedef struct JSON_ParserStruct { FBuffer fbuffer; int max_nesting; bool allow_nan; + bool allow_trailing_comma; bool parsing_name; bool symbolize_names; bool freeze; @@ -437,19 +438,19 @@ static void raise_parse_error(const char *format, const char *start) -#line 463 "parser.rl" +#line 464 "parser.rl" -#line 445 "parser.c" +#line 446 "parser.c" enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 27}; +enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 501 "parser.rl" +#line 504 "parser.rl" #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) @@ -465,15 +466,16 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu long stack_head = json->stack->head; -#line 469 "parser.c" +#line 470 "parser.c" { cs = JSON_object_start; } -#line 516 "parser.rl" +#line 519 "parser.rl" -#line 476 "parser.c" +#line 477 "parser.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -493,14 +495,14 @@ case 2: case 13: goto st2; case 32: goto st2; case 34: goto tr2; - case 47: goto st23; + case 47: goto st28; case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st2; goto st0; tr2: -#line 480 "parser.rl" +#line 483 "parser.rl" { char *np; json->parsing_name = true; @@ -516,7 +518,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 520 "parser.c" +#line 522 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -567,7 +569,7 @@ case 8: case 32: goto st8; case 34: goto tr11; case 45: goto tr11; - case 47: goto st19; + case 47: goto st24; case 73: goto tr11; case 78: goto tr11; case 91: goto tr11; @@ -583,7 +585,7 @@ case 8: goto st8; goto st0; tr11: -#line 471 "parser.rl" +#line 472 "parser.rl" { char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { @@ -597,16 +599,75 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 601 "parser.c" - switch( (*p) ) { - case 13: goto st9; - case 32: goto st9; - case 44: goto st10; - case 47: goto st15; +#line 603 "parser.c" + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) < 44 ) { + if ( 32 <= (*p) && (*p) <= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 44 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st9; + case 269: goto st10; + case 288: goto st10; + case 300: goto st11; + case 303: goto st16; + case 525: goto st9; + case 544: goto st9; + case 556: goto st2; + case 559: goto st20; + } + if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st9; + } else if ( _widec >= 265 ) + goto st10; + goto st0; +tr4: +#line 494 "parser.rl" + { p--; {p++; cs = 32; goto _out;} } + goto st32; +st32: + if ( ++p == pe ) + goto _test_eof32; +case 32: +#line 671 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -615,8 +676,9 @@ case 10: switch( (*p) ) { case 13: goto st10; case 32: goto st10; - case 34: goto tr2; - case 47: goto st11; + case 44: goto st11; + case 47: goto st16; + case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st10; @@ -626,139 +688,288 @@ case 10: goto _test_eof11; case 11: switch( (*p) ) { - case 42: goto st12; - case 47: goto st14; + case 13: goto st11; + case 32: goto st11; + case 34: goto tr2; + case 47: goto st12; } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st11; goto st0; st12: if ( ++p == pe ) goto _test_eof12; case 12: - if ( (*p) == 42 ) - goto st13; - goto st12; + switch( (*p) ) { + case 42: goto st13; + case 47: goto st15; + } + goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st10; - } - goto st12; + if ( (*p) == 42 ) + goto st14; + goto st13; st14: if ( ++p == pe ) goto _test_eof14; case 14: - if ( (*p) == 10 ) - goto st10; - goto st14; + switch( (*p) ) { + case 42: goto st14; + case 47: goto st11; + } + goto st13; st15: if ( ++p == pe ) goto _test_eof15; case 15: - switch( (*p) ) { - case 42: goto st16; - case 47: goto st18; - } - goto st0; + if ( (*p) == 10 ) + goto st11; + goto st15; st16: if ( ++p == pe ) goto _test_eof16; case 16: - if ( (*p) == 42 ) - goto st17; - goto st16; + switch( (*p) ) { + case 42: goto st17; + case 47: goto st19; + } + goto st0; st17: if ( ++p == pe ) goto _test_eof17; case 17: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st9; - } - goto st16; + if ( (*p) == 42 ) + goto st18; + goto st17; st18: if ( ++p == pe ) goto _test_eof18; case 18: - if ( (*p) == 10 ) - goto st9; - goto st18; -tr4: -#line 491 "parser.rl" - { p--; {p++; cs = 27; goto _out;} } - goto st27; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: -#line 697 "parser.c" - goto st0; + switch( (*p) ) { + case 42: goto st18; + case 47: goto st10; + } + goto st17; st19: if ( ++p == pe ) goto _test_eof19; case 19: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st22; - } - goto st0; + if ( (*p) == 10 ) + goto st10; + goto st19; st20: if ( ++p == pe ) goto _test_eof20; case 20: - if ( (*p) == 42 ) - goto st21; - goto st20; + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st17; + case 303: goto st19; + case 554: goto st21; + case 559: goto st23; + } + goto st0; st21: if ( ++p == pe ) goto _test_eof21; case 21: - switch( (*p) ) { - case 42: goto st21; - case 47: goto st8; - } - goto st20; + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 554: goto st22; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; st22: if ( ++p == pe ) goto _test_eof22; case 22: - if ( (*p) == 10 ) - goto st8; - goto st22; + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 303: goto st10; + case 554: goto st22; + case 559: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; st23: if ( ++p == pe ) goto _test_eof23; case 23: - switch( (*p) ) { - case 42: goto st24; - case 47: goto st26; - } + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st10; + case 522: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st23; + } else if ( _widec >= 128 ) + goto st19; goto st0; st24: if ( ++p == pe ) goto _test_eof24; case 24: - if ( (*p) == 42 ) - goto st25; - goto st24; + switch( (*p) ) { + case 42: goto st25; + case 47: goto st27; + } + goto st0; st25: if ( ++p == pe ) goto _test_eof25; case 25: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st2; - } - goto st24; + if ( (*p) == 42 ) + goto st26; + goto st25; st26: if ( ++p == pe ) goto _test_eof26; case 26: + switch( (*p) ) { + case 42: goto st26; + case 47: goto st8; + } + goto st25; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: + if ( (*p) == 10 ) + goto st8; + goto st27; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: + switch( (*p) ) { + case 42: goto st29; + case 47: goto st31; + } + goto st0; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: + if ( (*p) == 42 ) + goto st30; + goto st29; +st30: + if ( ++p == pe ) + goto _test_eof30; +case 30: + switch( (*p) ) { + case 42: goto st30; + case 47: goto st2; + } + goto st29; +st31: + if ( ++p == pe ) + goto _test_eof31; +case 31: if ( (*p) == 10 ) goto st2; - goto st26; + goto st31; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -768,6 +979,7 @@ case 26: _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; + _test_eof32: cs = 32; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; @@ -777,7 +989,6 @@ case 26: _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; @@ -786,12 +997,17 @@ case 26: _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof30: cs = 30; goto _test_eof; + _test_eof31: cs = 31; goto _test_eof; _test_eof: {} _out: {} } -#line 517 "parser.rl" +#line 520 "parser.rl" if (cs >= JSON_object_first_final) { long count = json->stack->head - stack_head; @@ -842,7 +1058,7 @@ case 26: } -#line 846 "parser.c" +#line 1062 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -850,7 +1066,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 652 "parser.rl" +#line 655 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -858,14 +1074,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 862 "parser.c" +#line 1078 "parser.c" { cs = JSON_value_start; } -#line 659 "parser.rl" +#line 662 "parser.rl" -#line 869 "parser.c" +#line 1085 "parser.c" { if ( p == pe ) goto _test_eof; @@ -899,7 +1115,7 @@ case 1: cs = 0; goto _out; tr2: -#line 595 "parser.rl" +#line 598 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { @@ -911,7 +1127,7 @@ cs = 0; } goto st29; tr3: -#line 605 "parser.rl" +#line 608 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -935,7 +1151,7 @@ cs = 0; } goto st29; tr7: -#line 627 "parser.rl" +#line 630 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -943,7 +1159,7 @@ cs = 0; } goto st29; tr11: -#line 633 "parser.rl" +#line 636 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -951,7 +1167,7 @@ cs = 0; } goto st29; tr25: -#line 588 "parser.rl" +#line 591 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -961,7 +1177,7 @@ cs = 0; } goto st29; tr27: -#line 581 "parser.rl" +#line 584 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -971,19 +1187,19 @@ cs = 0; } goto st29; tr31: -#line 575 "parser.rl" +#line 578 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 572 "parser.rl" +#line 575 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 578 "parser.rl" +#line 581 "parser.rl" { *result = Qtrue; } @@ -992,9 +1208,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 639 "parser.rl" +#line 642 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 998 "parser.c" +#line 1214 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1235,7 +1451,7 @@ case 28: _out: {} } -#line 660 "parser.rl" +#line 663 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1250,7 +1466,7 @@ case 28: } -#line 1254 "parser.c" +#line 1470 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1258,7 +1474,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 681 "parser.rl" +#line 684 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1266,15 +1482,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 1270 "parser.c" +#line 1486 "parser.c" { cs = JSON_integer_start; } -#line 688 "parser.rl" +#line 691 "parser.rl" json->memo = p; -#line 1278 "parser.c" +#line 1494 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1308,14 +1524,14 @@ case 3: goto st0; goto tr4; tr4: -#line 678 "parser.rl" +#line 681 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 1319 "parser.c" +#line 1535 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1334,7 +1550,7 @@ case 5: _out: {} } -#line 690 "parser.rl" +#line 693 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -1349,7 +1565,7 @@ case 5: } -#line 1353 "parser.c" +#line 1569 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -1357,7 +1573,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 715 "parser.rl" +#line 718 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1365,15 +1581,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1369 "parser.c" +#line 1585 "parser.c" { cs = JSON_float_start; } -#line 722 "parser.rl" +#line 725 "parser.rl" json->memo = p; -#line 1377 "parser.c" +#line 1593 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1431,14 +1647,14 @@ case 8: goto st0; goto tr9; tr9: -#line 709 "parser.rl" +#line 712 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1442 "parser.c" +#line 1658 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1499,7 +1715,7 @@ case 7: _out: {} } -#line 724 "parser.rl" +#line 727 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1552,15 +1768,15 @@ case 7: -#line 1556 "parser.c" +#line 1772 "parser.c" enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 17}; +enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 799 "parser.rl" +#line 804 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1573,15 +1789,16 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1577 "parser.c" +#line 1793 "parser.c" { cs = JSON_array_start; } -#line 811 "parser.rl" +#line 816 "parser.rl" -#line 1584 "parser.c" +#line 1800 "parser.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -1602,7 +1819,7 @@ case 2: case 32: goto st2; case 34: goto tr2; case 45: goto tr2; - case 47: goto st13; + case 47: goto st18; case 73: goto tr2; case 78: goto tr2; case 91: goto tr2; @@ -1619,7 +1836,7 @@ case 2: goto st2; goto st0; tr2: -#line 781 "parser.rl" +#line 784 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1634,15 +1851,23 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1638 "parser.c" - switch( (*p) ) { +#line 1855 "parser.c" + _widec = (*p); + if ( 44 <= (*p) && (*p) <= 44 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { case 13: goto st3; case 32: goto st3; - case 44: goto st4; - case 47: goto st9; + case 47: goto st4; case 93: goto tr4; + case 300: goto st8; + case 556: goto st13; } - if ( 9 <= (*p) && (*p) <= 10 ) + if ( 9 <= _widec && _widec <= 10 ) goto st3; goto st0; st4: @@ -1650,57 +1875,67 @@ case 3: goto _test_eof4; case 4: switch( (*p) ) { - case 13: goto st4; - case 32: goto st4; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st5; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; + case 42: goto st5; + case 47: goto st7; } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st4; goto st0; st5: if ( ++p == pe ) goto _test_eof5; case 5: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st8; - } - goto st0; + if ( (*p) == 42 ) + goto st6; + goto st5; st6: if ( ++p == pe ) goto _test_eof6; case 6: - if ( (*p) == 42 ) - goto st7; - goto st6; + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; st7: if ( ++p == pe ) goto _test_eof7; case 7: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st4; - } - goto st6; + if ( (*p) == 10 ) + goto st3; + goto st7; +tr4: +#line 796 "parser.rl" + { p--; {p++; cs = 22; goto _out;} } + goto st22; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: +#line 1914 "parser.c" + goto st0; st8: if ( ++p == pe ) goto _test_eof8; case 8: - if ( (*p) == 10 ) - goto st4; - goto st8; + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st9; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; st9: if ( ++p == pe ) goto _test_eof9; @@ -1723,7 +1958,7 @@ case 10: case 11: switch( (*p) ) { case 42: goto st11; - case 47: goto st3; + case 47: goto st8; } goto st10; st12: @@ -1731,50 +1966,252 @@ case 11: goto _test_eof12; case 12: if ( (*p) == 10 ) - goto st3; + goto st8; goto st12; -tr4: -#line 791 "parser.rl" - { p--; {p++; cs = 17; goto _out;} } - goto st17; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: -#line 1745 "parser.c" - goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st16; - } + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) > 32 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 34: goto tr2; + case 45: goto tr2; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + case 269: goto st8; + case 288: goto st8; + case 303: goto st9; + case 525: goto st13; + case 544: goto st13; + case 559: goto st14; + } + if ( _widec < 265 ) { + if ( 48 <= _widec && _widec <= 57 ) + goto tr2; + } else if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st13; + } else + goto st8; goto st0; st14: if ( ++p == pe ) goto _test_eof14; case 14: - if ( (*p) == 42 ) - goto st15; - goto st14; + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st10; + case 303: goto st12; + case 554: goto st15; + case 559: goto st17; + } + goto st0; st15: if ( ++p == pe ) goto _test_eof15; case 15: - switch( (*p) ) { - case 42: goto st15; - case 47: goto st2; - } - goto st14; + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 554: goto st16; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; st16: if ( ++p == pe ) goto _test_eof16; case 16: + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 303: goto st8; + case 554: goto st16; + case 559: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st8; + case 522: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st17; + } else if ( _widec >= 128 ) + goto st12; + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 42: goto st19; + case 47: goto st21; + } + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 42 ) + goto st20; + goto st19; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + switch( (*p) ) { + case 42: goto st20; + case 47: goto st2; + } + goto st19; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: if ( (*p) == 10 ) goto st2; - goto st16; + goto st21; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -1782,22 +2219,27 @@ case 16: _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; _test_eof: {} _out: {} } -#line 812 "parser.rl" +#line 817 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -1971,7 +2413,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 1975 "parser.c" +#line 2417 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1979,7 +2421,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1003 "parser.rl" +#line 1008 "parser.rl" static int @@ -2000,15 +2442,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2004 "parser.c" +#line 2446 "parser.c" { cs = JSON_string_start; } -#line 1023 "parser.rl" +#line 1028 "parser.rl" json->memo = p; -#line 2012 "parser.c" +#line 2454 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2033,7 +2475,7 @@ case 2: goto st0; goto st2; tr2: -#line 990 "parser.rl" +#line 995 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -2043,14 +2485,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 1000 "parser.rl" +#line 1005 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 2054 "parser.c" +#line 2496 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -2126,7 +2568,7 @@ case 7: _out: {} } -#line 1025 "parser.rl" +#line 1030 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2178,16 +2620,17 @@ static int configure_parser_i(VALUE key, VALUE val, VALUE data) { JSON_Parser *json = (JSON_Parser *)data; - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { if (NIL_P(val)) { json->create_additions = true; json->deprecated_create_additions = true; @@ -2278,7 +2721,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2282 "parser.c" +#line 2725 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2286,7 +2729,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1190 "parser.rl" +#line 1196 "parser.rl" /* @@ -2315,16 +2758,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2319 "parser.c" +#line 2762 "parser.c" { cs = JSON_start; } -#line 1218 "parser.rl" +#line 1224 "parser.rl" p = json->source; pe = p + json->len; -#line 2328 "parser.c" +#line 2771 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2358,7 +2801,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1182 "parser.rl" +#line 1188 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2368,7 +2811,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2372 "parser.c" +#line 2815 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2457,7 +2900,7 @@ case 9: _out: {} } -#line 1221 "parser.rl" +#line 1227 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2493,16 +2936,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 2497 "parser.c" +#line 2940 "parser.c" { cs = JSON_start; } -#line 1256 "parser.rl" +#line 1262 "parser.rl" p = json->source; pe = p + json->len; -#line 2506 "parser.c" +#line 2949 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2536,7 +2979,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1182 "parser.rl" +#line 1188 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2546,7 +2989,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2550 "parser.c" +#line 2993 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2635,7 +3078,7 @@ case 9: _out: {} } -#line 1259 "parser.rl" +#line 1265 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2738,6 +3181,7 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); sym_create_additions = ID2SYM(rb_intern("create_additions")); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 9a588b00a..6d4cc7a5b 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -8,7 +8,7 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_deep_const_get, i_match, i_aset, i_aref, i_leftshift, i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_symbolize_names, sym_freeze, +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, sym_create_additions, sym_create_id, sym_object_class, sym_array_class, sym_decimal_class, sym_match_string; @@ -383,6 +383,7 @@ typedef struct JSON_ParserStruct { FBuffer fbuffer; int max_nesting; bool allow_nan; + bool allow_trailing_comma; bool parsing_name; bool symbolize_names; bool freeze; @@ -477,6 +478,8 @@ static void raise_parse_error(const char *format, const char *start) } } + action allow_trailing_comma { json->allow_trailing_comma } + action parse_name { char *np; json->parsing_name = true; @@ -495,7 +498,7 @@ static void raise_parse_error(const char *format, const char *start) main := ( begin_object - (pair (next_pair)*)? ignore* + (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* end_object ) @exit; }%% @@ -788,13 +791,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul } } + action allow_trailing_comma { json->allow_trailing_comma } + action exit { fhold; fbreak; } next_element = value_separator ignore* begin_value >parse_value; main := begin_array ignore* ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*)? + (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? end_array @exit; }%% @@ -1073,16 +1078,17 @@ static int configure_parser_i(VALUE key, VALUE val, VALUE data) { JSON_Parser *json = (JSON_Parser *)data; - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { if (NIL_P(val)) { json->create_additions = true; json->deprecated_create_additions = true; @@ -1358,6 +1364,7 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); sym_create_additions = ID2SYM(rb_intern("create_additions")); diff --git a/java/src/json/ext/Parser.java b/java/src/json/ext/Parser.java index 1e8832908..74037d375 100644 --- a/java/src/json/ext/Parser.java +++ b/java/src/json/ext/Parser.java @@ -54,6 +54,7 @@ public class Parser extends RubyObject { private boolean deprecatedCreateAdditions; private int maxNesting; private boolean allowNaN; + private boolean allowTrailingComma; private boolean symbolizeNames; private boolean freeze; private RubyClass objectClass; @@ -124,6 +125,11 @@ public Parser(Ruby runtime, RubyClass metaClass) { * Infinity and -Infinity in defiance of RFC 4627 * to be parsed by the Parser. This option defaults to false. * + *
:allow_trailing_comma + *
If set to true, allow arrays and objects with a trailing + * comma in defiance of RFC 4627 to be parsed by the Parser. + * This option defaults to false. + * *
:symbolize_names *
If set to true, returns symbols for the names (keys) in * a JSON object. Otherwise strings are returned, which is also the default. @@ -177,6 +183,7 @@ public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { OptionsReader opts = new OptionsReader(context, args.length > 1 ? args[1] : null); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); + this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); this.symbolizeNames = opts.getBool("symbolize_names", false); this.freeze = opts.getBool("freeze", false); this.createId = opts.getString("create_id", getCreateId(context)); @@ -364,11 +371,11 @@ private Ruby getRuntime() { } -// line 390 "Parser.rl" +// line 397 "Parser.rl" -// line 372 "Parser.java" +// line 379 "Parser.java" private static byte[] init__JSON_value_actions_0() { return new byte [] { @@ -482,7 +489,7 @@ private static byte[] init__JSON_value_from_state_actions_0() static final int JSON_value_en_main = 1; -// line 496 "Parser.rl" +// line 503 "Parser.rl" void parseValue(ParserResult res, int p, int pe) { @@ -490,14 +497,14 @@ void parseValue(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 494 "Parser.java" +// line 501 "Parser.java" { cs = JSON_value_start; } -// line 503 "Parser.rl" +// line 510 "Parser.rl" -// line 501 "Parser.java" +// line 508 "Parser.java" { int _klen; int _trans = 0; @@ -523,13 +530,13 @@ void parseValue(ParserResult res, int p, int pe) { while ( _nacts-- > 0 ) { switch ( _JSON_value_actions[_acts++] ) { case 9: -// line 481 "Parser.rl" +// line 488 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 533 "Parser.java" +// line 540 "Parser.java" } } @@ -592,25 +599,25 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) switch ( _JSON_value_actions[_acts++] ) { case 0: -// line 398 "Parser.rl" +// line 405 "Parser.rl" { result = getRuntime().getNil(); } break; case 1: -// line 401 "Parser.rl" +// line 408 "Parser.rl" { result = getRuntime().getFalse(); } break; case 2: -// line 404 "Parser.rl" +// line 411 "Parser.rl" { result = getRuntime().getTrue(); } break; case 3: -// line 407 "Parser.rl" +// line 414 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_NAN); @@ -620,7 +627,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 4: -// line 414 "Parser.rl" +// line 421 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_INFINITY); @@ -630,7 +637,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 5: -// line 421 "Parser.rl" +// line 428 "Parser.rl" { if (pe > p + 8 && absSubSequence(p, p + 9).equals(JSON_MINUS_INFINITY)) { @@ -659,7 +666,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 6: -// line 447 "Parser.rl" +// line 454 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -672,7 +679,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 7: -// line 457 "Parser.rl" +// line 464 "Parser.rl" { currentNesting++; parseArray(res, p, pe); @@ -687,7 +694,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 8: -// line 469 "Parser.rl" +// line 476 "Parser.rl" { currentNesting++; parseObject(res, p, pe); @@ -701,7 +708,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } } break; -// line 705 "Parser.java" +// line 712 "Parser.java" } } } @@ -721,7 +728,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) break; } } -// line 504 "Parser.rl" +// line 511 "Parser.rl" if (cs >= JSON_value_first_final && result != null) { if (parser.freeze) { @@ -734,7 +741,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } -// line 738 "Parser.java" +// line 745 "Parser.java" private static byte[] init__JSON_integer_actions_0() { return new byte [] { @@ -833,7 +840,7 @@ private static byte[] init__JSON_integer_trans_actions_0() static final int JSON_integer_en_main = 1; -// line 526 "Parser.rl" +// line 533 "Parser.rl" void parseInteger(ParserResult res, int p, int pe) { @@ -851,15 +858,15 @@ int parseIntegerInternal(int p, int pe) { int cs = EVIL; -// line 855 "Parser.java" +// line 862 "Parser.java" { cs = JSON_integer_start; } -// line 543 "Parser.rl" +// line 550 "Parser.rl" int memo = p; -// line 863 "Parser.java" +// line 870 "Parser.java" { int _klen; int _trans = 0; @@ -940,13 +947,13 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) switch ( _JSON_integer_actions[_acts++] ) { case 0: -// line 520 "Parser.rl" +// line 527 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 950 "Parser.java" +// line 957 "Parser.java" } } } @@ -966,7 +973,7 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) break; } } -// line 545 "Parser.rl" +// line 552 "Parser.rl" if (cs < JSON_integer_first_final) { return -1; @@ -986,7 +993,7 @@ RubyInteger bytesToInum(Ruby runtime, ByteList num) { } -// line 990 "Parser.java" +// line 997 "Parser.java" private static byte[] init__JSON_float_actions_0() { return new byte [] { @@ -1088,7 +1095,7 @@ private static byte[] init__JSON_float_trans_actions_0() static final int JSON_float_en_main = 1; -// line 578 "Parser.rl" +// line 585 "Parser.rl" void parseFloat(ParserResult res, int p, int pe) { @@ -1107,15 +1114,15 @@ int parseFloatInternal(int p, int pe) { int cs = EVIL; -// line 1111 "Parser.java" +// line 1118 "Parser.java" { cs = JSON_float_start; } -// line 596 "Parser.rl" +// line 603 "Parser.rl" int memo = p; -// line 1119 "Parser.java" +// line 1126 "Parser.java" { int _klen; int _trans = 0; @@ -1196,13 +1203,13 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) switch ( _JSON_float_actions[_acts++] ) { case 0: -// line 569 "Parser.rl" +// line 576 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1206 "Parser.java" +// line 1213 "Parser.java" } } } @@ -1222,7 +1229,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) break; } } -// line 598 "Parser.rl" +// line 605 "Parser.rl" if (cs < JSON_float_first_final) { return -1; @@ -1232,7 +1239,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) } -// line 1236 "Parser.java" +// line 1243 "Parser.java" private static byte[] init__JSON_string_actions_0() { return new byte [] { @@ -1334,7 +1341,7 @@ private static byte[] init__JSON_string_trans_actions_0() static final int JSON_string_en_main = 1; -// line 637 "Parser.rl" +// line 644 "Parser.rl" void parseString(ParserResult res, int p, int pe) { @@ -1342,15 +1349,15 @@ void parseString(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 1346 "Parser.java" +// line 1353 "Parser.java" { cs = JSON_string_start; } -// line 644 "Parser.rl" +// line 651 "Parser.rl" int memo = p; -// line 1354 "Parser.java" +// line 1361 "Parser.java" { int _klen; int _trans = 0; @@ -1431,7 +1438,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) switch ( _JSON_string_actions[_acts++] ) { case 0: -// line 612 "Parser.rl" +// line 619 "Parser.rl" { int offset = byteList.begin(); ByteList decoded = decoder.decode(byteList, memo + 1 - offset, @@ -1446,13 +1453,13 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) } break; case 1: -// line 625 "Parser.rl" +// line 632 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1456 "Parser.java" +// line 1463 "Parser.java" } } } @@ -1472,7 +1479,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) break; } } -// line 646 "Parser.rl" +// line 653 "Parser.rl" if (parser.createAdditions) { RubyHash matchString = parser.match_string; @@ -1520,7 +1527,7 @@ public void visit(IRubyObject pattern, IRubyObject klass) { } -// line 1524 "Parser.java" +// line 1531 "Parser.java" private static byte[] init__JSON_array_actions_0() { return new byte [] { @@ -1531,36 +1538,86 @@ private static byte[] init__JSON_array_actions_0() private static final byte _JSON_array_actions[] = init__JSON_array_actions_0(); +private static byte[] init__JSON_array_cond_offsets_0() +{ + return new byte [] { + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 6, 6, 6, 6, 6, 8, 11, 16, 19 + }; +} + +private static final byte _JSON_array_cond_offsets[] = init__JSON_array_cond_offsets_0(); + + +private static byte[] init__JSON_array_cond_lengths_0() +{ + return new byte [] { + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 0, 0, 0, 0, 2, 3, 5, 3, 0 + }; +} + +private static final byte _JSON_array_cond_lengths[] = init__JSON_array_cond_lengths_0(); + + +private static int[] init__JSON_array_cond_keys_0() +{ + return new int [] { + 44, 44, 9, 9, 10, 10, 13, 13, 32, 32, 47, 47, + 42, 42, 47, 47, 0, 41, 42, 42, 43,65535, 0, 41, + 42, 42, 43, 46, 47, 47, 48,65535, 0, 9, 10, 10, + 11,65535, 0 + }; +} + +private static final int _JSON_array_cond_keys[] = init__JSON_array_cond_keys_0(); + + +private static byte[] init__JSON_array_cond_spaces_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; +} + +private static final byte _JSON_array_cond_spaces[] = init__JSON_array_cond_spaces_0(); + + private static byte[] init__JSON_array_key_offsets_0() { return new byte [] { - 0, 0, 1, 18, 25, 41, 43, 44, 46, 47, 49, 50, - 52, 53, 55, 56, 58, 59 + 0, 0, 1, 18, 26, 28, 29, 31, 32, 48, 50, 51, + 53, 54, 76, 78, 79, 81, 82, 86, 92, 100, 106 }; } private static final byte _JSON_array_key_offsets[] = init__JSON_array_key_offsets_0(); -private static char[] init__JSON_array_trans_keys_0() +private static int[] init__JSON_array_trans_keys_0() { - return new char [] { + return new int [] { 91, 13, 32, 34, 45, 47, 73, 78, 91, 93, 102, 110, - 116, 123, 9, 10, 48, 57, 13, 32, 44, 47, 93, 9, - 10, 13, 32, 34, 45, 47, 73, 78, 91, 102, 110, 116, - 123, 9, 10, 48, 57, 42, 47, 42, 42, 47, 10, 42, - 47, 42, 42, 47, 10, 42, 47, 42, 42, 47, 10, 0 + 116, 123, 9, 10, 48, 57, 13, 32, 47, 93,65580,131116, + 9, 10, 42, 47, 42, 42, 47, 10, 13, 32, 34, 45, + 47, 73, 78, 91, 102, 110, 116, 123, 9, 10, 48, 57, + 42, 47, 42, 42, 47, 10, 34, 45, 73, 78, 91, 93, + 102, 110, 116, 123,65549,65568,65583,131085,131104,131119, 48, 57, + 65545,65546,131081,131082, 42, 47, 42, 42, 47, 10,65578,65583, + 131114,131119,65578,131114,65536,131071,131072,196607,65578,65583,131114,131119, + 65536,131071,131072,196607,65546,131082,65536,131071,131072,196607, 0 }; } -private static final char _JSON_array_trans_keys[] = init__JSON_array_trans_keys_0(); +private static final int _JSON_array_trans_keys[] = init__JSON_array_trans_keys_0(); private static byte[] init__JSON_array_single_lengths_0() { return new byte [] { - 0, 1, 13, 5, 12, 2, 1, 2, 1, 2, 1, 2, - 1, 2, 1, 2, 1, 0 + 0, 1, 13, 6, 2, 1, 2, 1, 12, 2, 1, 2, + 1, 16, 2, 1, 2, 1, 4, 2, 4, 2, 0 }; } @@ -1570,34 +1627,38 @@ private static byte[] init__JSON_array_single_lengths_0() private static byte[] init__JSON_array_range_lengths_0() { return new byte [] { - 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 + 0, 0, 2, 1, 0, 0, 0, 0, 2, 0, 0, 0, + 0, 3, 0, 0, 0, 0, 0, 2, 2, 2, 0 }; } private static final byte _JSON_array_range_lengths[] = init__JSON_array_range_lengths_0(); -private static byte[] init__JSON_array_index_offsets_0() +private static short[] init__JSON_array_index_offsets_0() { - return new byte [] { - 0, 0, 2, 18, 25, 40, 43, 45, 48, 50, 53, 55, - 58, 60, 63, 65, 68, 70 + return new short [] { + 0, 0, 2, 18, 26, 29, 31, 34, 36, 51, 54, 56, + 59, 61, 81, 84, 86, 89, 91, 96, 101, 108, 113 }; } -private static final byte _JSON_array_index_offsets[] = init__JSON_array_index_offsets_0(); +private static final short _JSON_array_index_offsets[] = init__JSON_array_index_offsets_0(); private static byte[] init__JSON_array_indicies_0() { return new byte [] { 0, 1, 0, 0, 2, 2, 3, 2, 2, 2, 4, 2, - 2, 2, 2, 0, 2, 1, 5, 5, 6, 7, 4, 5, - 1, 6, 6, 2, 2, 8, 2, 2, 2, 2, 2, 2, - 2, 6, 2, 1, 9, 10, 1, 11, 9, 11, 6, 9, - 6, 10, 12, 13, 1, 14, 12, 14, 5, 12, 5, 13, - 15, 16, 1, 17, 15, 17, 0, 15, 0, 16, 1, 0 + 2, 2, 2, 0, 2, 1, 5, 5, 6, 4, 7, 8, + 5, 1, 9, 10, 1, 11, 9, 11, 5, 9, 5, 10, + 7, 7, 2, 2, 12, 2, 2, 2, 2, 2, 2, 2, + 7, 2, 1, 13, 14, 1, 15, 13, 15, 7, 13, 7, + 14, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 0, + 0, 3, 8, 8, 16, 2, 0, 8, 1, 17, 18, 1, + 19, 17, 19, 0, 17, 0, 18, 17, 18, 20, 21, 1, + 19, 22, 17, 20, 1, 19, 0, 22, 8, 17, 20, 1, + 0, 8, 18, 21, 1, 1, 0 }; } @@ -1607,8 +1668,8 @@ private static byte[] init__JSON_array_indicies_0() private static byte[] init__JSON_array_trans_targs_0() { return new byte [] { - 2, 0, 3, 13, 17, 3, 4, 9, 5, 6, 8, 7, - 10, 12, 11, 14, 16, 15 + 2, 0, 3, 14, 22, 3, 4, 8, 13, 5, 7, 6, + 9, 10, 12, 11, 18, 15, 17, 16, 19, 21, 20 }; } @@ -1619,7 +1680,7 @@ private static byte[] init__JSON_array_trans_actions_0() { return new byte [] { 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } @@ -1627,13 +1688,13 @@ private static byte[] init__JSON_array_trans_actions_0() static final int JSON_array_start = 1; -static final int JSON_array_first_final = 17; +static final int JSON_array_first_final = 22; static final int JSON_array_error = 0; static final int JSON_array_en_main = 1; -// line 729 "Parser.rl" +// line 738 "Parser.rl" void parseArray(ParserResult res, int p, int pe) { @@ -1653,17 +1714,18 @@ void parseArray(ParserResult res, int p, int pe) { } -// line 1657 "Parser.java" +// line 1718 "Parser.java" { cs = JSON_array_start; } -// line 748 "Parser.rl" +// line 757 "Parser.rl" -// line 1664 "Parser.java" +// line 1725 "Parser.java" { int _klen; int _trans = 0; + int _widec; int _acts; int _nacts; int _keys; @@ -1681,6 +1743,37 @@ void parseArray(ParserResult res, int p, int pe) { continue _goto; } case 1: + _widec = data[p]; + _keys = _JSON_array_cond_offsets[cs]*2 +; _klen = _JSON_array_cond_lengths[cs]; + if ( _klen > 0 ) { + int _lower = _keys +; int _mid; + int _upper = _keys + (_klen<<1) - 2; + while (true) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( _widec < _JSON_array_cond_keys[_mid] ) + _upper = _mid - 2; + else if ( _widec > _JSON_array_cond_keys[_mid+1] ) + _lower = _mid + 2; + else { + switch ( _JSON_array_cond_spaces[_JSON_array_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { + case 0: { + _widec = 65536 + (data[p] - 0); + if ( +// line 705 "Parser.rl" + parser.allowTrailingComma ) _widec += 65536; + break; + } + } + break; + } + } + } + _match: do { _keys = _JSON_array_key_offsets[cs]; _trans = _JSON_array_index_offsets[cs]; @@ -1694,9 +1787,9 @@ void parseArray(ParserResult res, int p, int pe) { break; _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_array_trans_keys[_mid] ) + if ( _widec < _JSON_array_trans_keys[_mid] ) _upper = _mid - 1; - else if ( data[p] > _JSON_array_trans_keys[_mid] ) + else if ( _widec > _JSON_array_trans_keys[_mid] ) _lower = _mid + 1; else { _trans += (_mid - _keys); @@ -1717,9 +1810,9 @@ else if ( data[p] > _JSON_array_trans_keys[_mid] ) break; _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_array_trans_keys[_mid] ) + if ( _widec < _JSON_array_trans_keys[_mid] ) _upper = _mid - 2; - else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) + else if ( _widec > _JSON_array_trans_keys[_mid+1] ) _lower = _mid + 2; else { _trans += ((_mid - _keys)>>1); @@ -1741,7 +1834,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) switch ( _JSON_array_actions[_acts++] ) { case 0: -// line 698 "Parser.rl" +// line 707 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -1758,13 +1851,13 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } break; case 1: -// line 713 "Parser.rl" +// line 722 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1768 "Parser.java" +// line 1861 "Parser.java" } } } @@ -1784,7 +1877,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) break; } } -// line 749 "Parser.rl" +// line 758 "Parser.rl" if (cs >= JSON_array_first_final) { res.update(result, p + 1); @@ -1794,7 +1887,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } -// line 1798 "Parser.java" +// line 1891 "Parser.java" private static byte[] init__JSON_object_actions_0() { return new byte [] { @@ -1805,40 +1898,91 @@ private static byte[] init__JSON_object_actions_0() private static final byte _JSON_object_actions[] = init__JSON_object_actions_0(); +private static byte[] init__JSON_object_cond_offsets_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 11, 16, + 19, 19, 19, 19, 19, 19, 19, 19, 19 + }; +} + +private static final byte _JSON_object_cond_offsets[] = init__JSON_object_cond_offsets_0(); + + +private static byte[] init__JSON_object_cond_lengths_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; +} + +private static final byte _JSON_object_cond_lengths[] = init__JSON_object_cond_lengths_0(); + + +private static int[] init__JSON_object_cond_keys_0() +{ + return new int [] { + 9, 9, 10, 10, 13, 13, 32, 32, 44, 44, 47, 47, + 42, 42, 47, 47, 0, 41, 42, 42, 43,65535, 0, 41, + 42, 42, 43, 46, 47, 47, 48,65535, 0, 9, 10, 10, + 11,65535, 0 + }; +} + +private static final int _JSON_object_cond_keys[] = init__JSON_object_cond_keys_0(); + + +private static byte[] init__JSON_object_cond_spaces_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; +} + +private static final byte _JSON_object_cond_spaces[] = init__JSON_object_cond_spaces_0(); + + private static byte[] init__JSON_object_key_offsets_0() { return new byte [] { - 0, 0, 1, 8, 14, 16, 17, 19, 20, 36, 43, 49, - 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, - 69, 70, 72, 73 + 0, 0, 1, 8, 14, 16, 17, 19, 20, 36, 49, 56, + 62, 64, 65, 67, 68, 70, 71, 73, 74, 78, 84, 92, + 98, 100, 101, 103, 104, 106, 107, 109, 110 }; } private static final byte _JSON_object_key_offsets[] = init__JSON_object_key_offsets_0(); -private static char[] init__JSON_object_trans_keys_0() +private static int[] init__JSON_object_trans_keys_0() { - return new char [] { + return new int [] { 123, 13, 32, 34, 47, 125, 9, 10, 13, 32, 47, 58, 9, 10, 42, 47, 42, 42, 47, 10, 13, 32, 34, 45, 47, 73, 78, 91, 102, 110, 116, 123, 9, 10, 48, 57, - 13, 32, 44, 47, 125, 9, 10, 13, 32, 34, 47, 9, - 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, 47, - 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, 47, - 10, 0 + 125,65549,65568,65580,65583,131085,131104,131116,131119,65545,65546,131081, + 131082, 13, 32, 44, 47, 125, 9, 10, 13, 32, 34, 47, + 9, 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, + 47, 10,65578,65583,131114,131119,65578,131114,65536,131071,131072,196607, + 65578,65583,131114,131119,65536,131071,131072,196607,65546,131082,65536,131071, + 131072,196607, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, + 47, 10, 0 }; } -private static final char _JSON_object_trans_keys[] = init__JSON_object_trans_keys_0(); +private static final int _JSON_object_trans_keys[] = init__JSON_object_trans_keys_0(); private static byte[] init__JSON_object_single_lengths_0() { return new byte [] { - 0, 1, 5, 4, 2, 1, 2, 1, 12, 5, 4, 2, - 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, - 1, 2, 1, 0 + 0, 1, 5, 4, 2, 1, 2, 1, 12, 9, 5, 4, + 2, 1, 2, 1, 2, 1, 2, 1, 4, 2, 4, 2, + 2, 1, 2, 1, 2, 1, 2, 1, 0 }; } @@ -1848,25 +1992,25 @@ private static byte[] init__JSON_object_single_lengths_0() private static byte[] init__JSON_object_range_lengths_0() { return new byte [] { - 0, 0, 1, 1, 0, 0, 0, 0, 2, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0 + 0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } private static final byte _JSON_object_range_lengths[] = init__JSON_object_range_lengths_0(); -private static byte[] init__JSON_object_index_offsets_0() +private static short[] init__JSON_object_index_offsets_0() { - return new byte [] { - 0, 0, 2, 9, 15, 18, 20, 23, 25, 40, 47, 53, - 56, 58, 61, 63, 66, 68, 71, 73, 76, 78, 81, 83, - 86, 88, 91, 93 + return new short [] { + 0, 0, 2, 9, 15, 18, 20, 23, 25, 40, 52, 59, + 65, 68, 70, 73, 75, 78, 80, 83, 85, 90, 95, 102, + 107, 110, 112, 115, 117, 120, 122, 125, 127 }; } -private static final byte _JSON_object_index_offsets[] = init__JSON_object_index_offsets_0(); +private static final short _JSON_object_index_offsets[] = init__JSON_object_index_offsets_0(); private static byte[] init__JSON_object_indicies_0() @@ -1875,11 +2019,14 @@ private static byte[] init__JSON_object_indicies_0() 0, 1, 0, 0, 2, 3, 4, 0, 1, 5, 5, 6, 7, 5, 1, 8, 9, 1, 10, 8, 10, 5, 8, 5, 9, 7, 7, 11, 11, 12, 11, 11, 11, 11, 11, 11, - 11, 7, 11, 1, 13, 13, 14, 15, 4, 13, 1, 14, - 14, 2, 16, 14, 1, 17, 18, 1, 19, 17, 19, 14, - 17, 14, 18, 20, 21, 1, 22, 20, 22, 13, 20, 13, - 21, 23, 24, 1, 25, 23, 25, 7, 23, 7, 24, 26, - 27, 1, 28, 26, 28, 0, 26, 0, 27, 1, 0 + 11, 7, 11, 1, 4, 13, 13, 14, 15, 16, 16, 0, + 17, 13, 16, 1, 13, 13, 14, 15, 4, 13, 1, 14, + 14, 2, 18, 14, 1, 19, 20, 1, 21, 19, 21, 14, + 19, 14, 20, 22, 23, 1, 24, 22, 24, 13, 22, 13, + 23, 22, 23, 25, 26, 1, 24, 27, 22, 25, 1, 24, + 13, 27, 16, 22, 25, 1, 13, 16, 23, 26, 1, 28, + 29, 1, 30, 28, 30, 7, 28, 7, 29, 31, 32, 1, + 33, 31, 33, 0, 31, 0, 32, 1, 0 }; } @@ -1889,9 +2036,9 @@ private static byte[] init__JSON_object_indicies_0() private static byte[] init__JSON_object_trans_targs_0() { return new byte [] { - 2, 0, 3, 23, 27, 3, 4, 8, 5, 7, 6, 9, - 19, 9, 10, 15, 11, 12, 14, 13, 16, 18, 17, 20, - 22, 21, 24, 26, 25 + 2, 0, 3, 28, 32, 3, 4, 8, 5, 7, 6, 9, + 24, 10, 11, 16, 9, 20, 12, 13, 15, 14, 17, 19, + 18, 21, 23, 22, 25, 27, 26, 29, 31, 30 }; } @@ -1903,7 +2050,7 @@ private static byte[] init__JSON_object_trans_actions_0() return new byte [] { 0, 0, 3, 0, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } @@ -1911,13 +2058,13 @@ private static byte[] init__JSON_object_trans_actions_0() static final int JSON_object_start = 1; -static final int JSON_object_first_final = 27; +static final int JSON_object_first_final = 32; static final int JSON_object_error = 0; static final int JSON_object_en_main = 1; -// line 806 "Parser.rl" +// line 819 "Parser.rl" void parseObject(ParserResult res, int p, int pe) { @@ -1942,17 +2089,18 @@ void parseObject(ParserResult res, int p, int pe) { } -// line 1946 "Parser.java" +// line 2093 "Parser.java" { cs = JSON_object_start; } -// line 830 "Parser.rl" +// line 843 "Parser.rl" -// line 1953 "Parser.java" +// line 2100 "Parser.java" { int _klen; int _trans = 0; + int _widec; int _acts; int _nacts; int _keys; @@ -1970,6 +2118,37 @@ void parseObject(ParserResult res, int p, int pe) { continue _goto; } case 1: + _widec = data[p]; + _keys = _JSON_object_cond_offsets[cs]*2 +; _klen = _JSON_object_cond_lengths[cs]; + if ( _klen > 0 ) { + int _lower = _keys +; int _mid; + int _upper = _keys + (_klen<<1) - 2; + while (true) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( _widec < _JSON_object_cond_keys[_mid] ) + _upper = _mid - 2; + else if ( _widec > _JSON_object_cond_keys[_mid+1] ) + _lower = _mid + 2; + else { + switch ( _JSON_object_cond_spaces[_JSON_object_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { + case 0: { + _widec = 65536 + (data[p] - 0); + if ( +// line 772 "Parser.rl" + parser.allowTrailingComma ) _widec += 65536; + break; + } + } + break; + } + } + } + _match: do { _keys = _JSON_object_key_offsets[cs]; _trans = _JSON_object_index_offsets[cs]; @@ -1983,9 +2162,9 @@ void parseObject(ParserResult res, int p, int pe) { break; _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_object_trans_keys[_mid] ) + if ( _widec < _JSON_object_trans_keys[_mid] ) _upper = _mid - 1; - else if ( data[p] > _JSON_object_trans_keys[_mid] ) + else if ( _widec > _JSON_object_trans_keys[_mid] ) _lower = _mid + 1; else { _trans += (_mid - _keys); @@ -2006,9 +2185,9 @@ else if ( data[p] > _JSON_object_trans_keys[_mid] ) break; _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_object_trans_keys[_mid] ) + if ( _widec < _JSON_object_trans_keys[_mid] ) _upper = _mid - 2; - else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) + else if ( _widec > _JSON_object_trans_keys[_mid+1] ) _lower = _mid + 2; else { _trans += ((_mid - _keys)>>1); @@ -2030,7 +2209,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) switch ( _JSON_object_actions[_acts++] ) { case 0: -// line 763 "Parser.rl" +// line 774 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2047,7 +2226,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 1: -// line 778 "Parser.rl" +// line 789 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -2065,13 +2244,13 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 2: -// line 794 "Parser.rl" +// line 805 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 2075 "Parser.java" +// line 2254 "Parser.java" } } } @@ -2091,7 +2270,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) break; } } -// line 831 "Parser.rl" +// line 844 "Parser.rl" if (cs < JSON_object_first_final) { res.update(null, p + 1); @@ -2127,7 +2306,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } -// line 2131 "Parser.java" +// line 2310 "Parser.java" private static byte[] init__JSON_actions_0() { return new byte [] { @@ -2230,7 +2409,7 @@ private static byte[] init__JSON_trans_actions_0() static final int JSON_en_main = 1; -// line 885 "Parser.rl" +// line 898 "Parser.rl" public IRubyObject parseImplemetation() { @@ -2240,16 +2419,16 @@ public IRubyObject parseImplemetation() { ParserResult res = new ParserResult(); -// line 2244 "Parser.java" +// line 2423 "Parser.java" { cs = JSON_start; } -// line 894 "Parser.rl" +// line 907 "Parser.rl" p = byteList.begin(); pe = p + byteList.length(); -// line 2253 "Parser.java" +// line 2432 "Parser.java" { int _klen; int _trans = 0; @@ -2330,7 +2509,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) switch ( _JSON_actions[_acts++] ) { case 0: -// line 871 "Parser.rl" +// line 884 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2342,7 +2521,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) } } break; -// line 2346 "Parser.java" +// line 2525 "Parser.java" } } } @@ -2362,7 +2541,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) break; } } -// line 897 "Parser.rl" +// line 910 "Parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/java/src/json/ext/Parser.rl b/java/src/json/ext/Parser.rl index 8102bc93c..9d2b96d61 100644 --- a/java/src/json/ext/Parser.rl +++ b/java/src/json/ext/Parser.rl @@ -52,6 +52,7 @@ public class Parser extends RubyObject { private boolean deprecatedCreateAdditions; private int maxNesting; private boolean allowNaN; + private boolean allowTrailingComma; private boolean symbolizeNames; private boolean freeze; private RubyClass objectClass; @@ -122,6 +123,11 @@ public class Parser extends RubyObject { * Infinity and -Infinity in defiance of RFC 4627 * to be parsed by the Parser. This option defaults to false. * + *
:allow_trailing_comma + *
If set to true, allow arrays and objects with a trailing + * comma in defiance of RFC 4627 to be parsed by the Parser. + * This option defaults to false. + * *
:symbolize_names *
If set to true, returns symbols for the names (keys) in * a JSON object. Otherwise strings are returned, which is also the default. @@ -175,6 +181,7 @@ public class Parser extends RubyObject { OptionsReader opts = new OptionsReader(context, args.length > 1 ? args[1] : null); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); + this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); this.symbolizeNames = opts.getBool("symbolize_names", false); this.freeze = opts.getBool("freeze", false); this.createId = opts.getString("create_id", getCreateId(context)); @@ -695,6 +702,8 @@ public class Parser extends RubyObject { write data; + action allow_trailing_comma { parser.allowTrailingComma } + action parse_value { parseValue(res, fpc, pe); if (res.result == null) { @@ -723,7 +732,7 @@ public class Parser extends RubyObject { ignore* ) ( ignore* next_element - ignore* )* )? + ignore* )* ( (value_separator ignore*) when allow_trailing_comma )? )? ignore* end_array @exit; }%% @@ -760,6 +769,8 @@ public class Parser extends RubyObject { write data; + action allow_trailing_comma { parser.allowTrailingComma } + action parse_value { parseValue(res, fpc, pe); if (res.result == null) { @@ -801,7 +812,9 @@ public class Parser extends RubyObject { next_pair = ignore* value_separator pair; main := ( - begin_object (pair (next_pair)*)? ignore* end_object + begin_object + (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* + end_object ) @exit; }%% diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index 33a441073..36ef75ca5 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -73,6 +73,9 @@ def parse(source, opts = nil) # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in # defiance of RFC 7159 to be parsed by the Parser. This option defaults # to false. + # * *allow_trailing_comma*: If set to true, allow arrays and objects with a + # trailing comma in defiance of RFC 7159 to be parsed by the Parser. + # This option defaults to false. # * *freeze*: If set to true, all parsed objects will be frozen. Parsed # string will be deduplicated if possible. # * *symbolize_names*: If set to true, returns symbols for the names @@ -103,6 +106,7 @@ def initialize(source, opts = nil) @max_nesting = 0 end @allow_nan = !!opts[:allow_nan] + @allow_trailing_comma = !!opts[:allow_trailing_comma] @symbolize_names = !!opts[:symbolize_names] @freeze = !!opts[:freeze] @@ -284,7 +288,7 @@ def parse_array raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" end when scan(ARRAY_CLOSE) - if delim + if delim && !@allow_trailing_comma raise ParserError, "expected next element in array at '#{peek(20)}'!" end break @@ -327,7 +331,7 @@ def parse_object raise ParserError, "expected value in object at '#{peek(20)}'!" end when scan(OBJECT_CLOSE) - if delim + if delim && !@allow_trailing_comma raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" end if @create_additions and klassname = result[@create_id] diff --git a/test/json/fixtures/fail4.json b/test/json/fixtures/fail4.json deleted file mode 100644 index 9de168bf3..000000000 --- a/test/json/fixtures/fail4.json +++ /dev/null @@ -1 +0,0 @@ -["extra comma",] \ No newline at end of file diff --git a/test/json/fixtures/fail9.json b/test/json/fixtures/fail9.json deleted file mode 100644 index 5815574f3..000000000 --- a/test/json/fixtures/fail9.json +++ /dev/null @@ -1 +0,0 @@ -{"Extra comma": true,} \ No newline at end of file diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index adff91674..9c9b44424 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -180,9 +180,95 @@ def test_parse_json_primitive_values assert parse('NaN', :allow_nan => true).nan? assert parse('Infinity', :allow_nan => true).infinite? assert parse('-Infinity', :allow_nan => true).infinite? - assert_raise(JSON::ParserError) { parse('[ 1, ]') } end + def test_parse_arrays_with_allow_trailing_comma + assert_equal([], parse('[]', allow_trailing_comma: true)) + assert_equal([], parse('[]', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: false) } + + assert_equal([1], parse('[1]', allow_trailing_comma: true)) + assert_equal([1], parse('[1]', allow_trailing_comma: false)) + assert_equal([1], parse('[1,]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[1,]', allow_trailing_comma: false) } + + assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: true)) + assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: false)) + assert_equal([1, 2, 3], parse('[1,2,3,]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[1,2,3,]', allow_trailing_comma: false) } + + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: true)) + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: false)) + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: false) } + + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: true)) + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: false)) + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3,] }', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{ "foo": [1,2,3,] }', allow_trailing_comma: false) } + end + + def test_parse_object_with_allow_trailing_comma + assert_equal({}, parse('{}', allow_trailing_comma: true)) + assert_equal({}, parse('{}', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) } + + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false) + } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false) + } + + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: false) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: false) + } + end + def test_parse_some_strings assert_equal([""], parse('[""]')) assert_equal(["\\"], parse('["\\\\"]')) From f38fde68de8b4915fb610eb34503890f10ebaa4d Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 09:52:17 +0100 Subject: [PATCH 62/75] Fix some warnings in the test suite --- test/json/json_parser_test.rb | 116 +++++++++++++++++----------------- test/json/test_helper.rb | 4 +- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 9c9b44424..9cbaa42f5 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -210,64 +210,64 @@ def test_parse_arrays_with_allow_trailing_comma end def test_parse_object_with_allow_trailing_comma - assert_equal({}, parse('{}', allow_trailing_comma: true)) - assert_equal({}, parse('{}', allow_trailing_comma: false)) - assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) } - assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) } - - assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true)) - assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false)) - assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true)) - assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) } - - assert_equal( - {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, - parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true) - ) - assert_equal( - {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, - parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: false) - ) - assert_equal( - {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, - parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: true) - ) - assert_raise(JSON::ParserError) { - parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false) - } - - assert_equal( - {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, - parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true) - ) - assert_equal( - {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, - parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: false) - ) - assert_equal( - {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, - parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: true) - ) - assert_raise(JSON::ParserError) { - parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false) - } - - assert_equal( - [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], - parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true) - ) - assert_equal( - [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], - parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: false) - ) - assert_equal( - [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], - parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: true) - ) - assert_raise(JSON::ParserError) { - parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: false) - } - end + assert_equal({}, parse('{}', allow_trailing_comma: true)) + assert_equal({}, parse('{}', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) } + + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false) + } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false) + } + + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: false) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: false) + } + end def test_parse_some_strings assert_equal([""], parse('[""]')) diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index 6fcb76edf..f81eeec10 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -23,8 +23,8 @@ # This method was added in Ruby 3.0.0. Calling it this way asks the GC to # move objects around, helping to find object movement bugs. begin - GC.verify_compaction_references(double_heap: true, toward: :empty) - rescue NotImplementedError + GC.verify_compaction_references(expand_heap: true, toward: :empty) + rescue NotImplementedError, ArgumentError # Some platforms don't support compaction end end From 1c44851225958dd1d07a69f4e7175ad6bba71797 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 12:59:58 +0100 Subject: [PATCH 63/75] Raise JSON::GeneratorError instead of Encoding::UndefinedConversionError Followup: https://github.com/ruby/json/pull/633 That's what was raised historically. You could argue that this new exception is more precise, but I've encountered some real production code that expected the old behavior and that was broken by this change. --- ext/json/ext/generator/generator.c | 4 ++++ java/src/json/ext/Generator.java | 20 +++++++++++++------- lib/json/pure/generator.rb | 10 +++++++++- test/json/json_generator_test.rb | 14 +++++++++++--- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 645500d04..80539af6c 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -1036,6 +1036,10 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) struct generate_json_data *data = (struct generate_json_data *)d; fbuffer_free(data->buffer); + if (RBASIC_CLASS(exc) == rb_path2class("Encoding::UndefinedConversionError")) { + exc = rb_exc_new_str(eGeneratorError, rb_funcall(exc, rb_intern("message"), 0)); + } + rb_exc_raise(exc); return Qundef; diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index b149feb36..f76dcb383 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -17,6 +17,7 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; +import org.jruby.exceptions.RaiseException; public final class Generator { private Generator() { @@ -402,14 +403,19 @@ void generate(Session session, RubyString object, ByteList buffer) { RuntimeInfo info = session.getInfo(); RubyString src; - if (object.encoding(session.getContext()) != info.utf8.get()) { - src = (RubyString)object.encode(session.getContext(), - info.utf8.get()); - } else { - src = object; - } + try { + if (object.encoding(session.getContext()) != info.utf8.get()) { + src = (RubyString)object.encode(session.getContext(), + info.utf8.get()); + } else { + src = object; + } - session.getStringEncoder().encode(src.getByteList(), buffer); + session.getStringEncoder().encode(src.getByteList(), buffer); + } catch (RaiseException re) { + throw Utils.newException(session.getContext(), Utils.M_GENERATOR_ERROR, + re.getMessage()); + } } }; diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 21e765467..5b4c83255 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -354,7 +354,13 @@ def generate(obj) # Assumes !@ascii_only, !@script_safe private def fast_serialize_string(string, buf) # :nodoc: buf << '"' - string = string.encode(::Encoding::UTF_8) unless string.encoding == ::Encoding::UTF_8 + unless string.encoding == ::Encoding::UTF_8 + begin + string = string.encode(::Encoding::UTF_8) + rescue Encoding::UndefinedConversionError => error + raise GeneratorError, error.message + end + end raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding? if /["\\\x0-\x1f]/n.match?(string) @@ -557,6 +563,8 @@ def to_json(state = nil, *args) else %("#{JSON.utf8_to_json(string, state.script_safe)}") end + rescue Encoding::UndefinedConversionError => error + raise ::JSON::GeneratorError, error.message end # Module that holds the extending methods if, the String module is diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 112c03b22..3bc7eb80a 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -477,12 +477,20 @@ def test_invalid_encoding_string end assert_includes error.message, "source sequence is illegal/malformed utf-8" - assert_raise(Encoding::UndefinedConversionError) do + assert_raise(JSON::GeneratorError) do + JSON.dump("\x82\xAC\xEF".b) + end + + assert_raise(JSON::GeneratorError) do "\x82\xAC\xEF".b.to_json end - assert_raise(Encoding::UndefinedConversionError) do - JSON.dump("\x82\xAC\xEF".b) + assert_raise(JSON::GeneratorError) do + ["\x82\xAC\xEF".b].to_json + end + + assert_raise(JSON::GeneratorError) do + { foo: "\x82\xAC\xEF".b }.to_json end end From 987f6becde70e109be1370992986fa538e9c2151 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 15:41:10 +0100 Subject: [PATCH 64/75] Turn `json_pure` into an empty gem Fix: https://github.com/ruby/json/issues/650 Closes: https://github.com/ruby/json/pull/682 --- Gemfile | 6 +- README.md | 12 +- Rakefile | 51 +-- json_pure.gemspec | 23 +- lib/json.rb | 7 +- lib/json/common.rb | 12 +- lib/json/ext.rb | 6 +- lib/json/pure.rb | 16 +- lib/json/pure/parser.rb | 356 ------------------- lib/json/{pure => truffle_ruby}/generator.rb | 186 +++++----- test/json/json_common_interface_test.rb | 4 +- test/json/json_ext_parser_test.rb | 74 ++-- test/json/json_generator_test.rb | 34 +- test/json/json_parser_test.rb | 4 +- test/json/test_helper.rb | 20 +- 15 files changed, 175 insertions(+), 636 deletions(-) delete mode 100644 lib/json/pure/parser.rb rename lib/json/{pure => truffle_ruby}/generator.rb (85%) diff --git a/Gemfile b/Gemfile index ef2cf7fa0..4a76a6f91 100644 --- a/Gemfile +++ b/Gemfile @@ -1,10 +1,6 @@ source 'https://rubygems.org' -if ENV['JSON'] == 'pure' - gemspec name: 'json_pure' -else - gemspec name: 'json' -end +gemspec name: 'json' group :development do gem "ruby_memcheck" if RUBY_PLATFORM =~ /linux/i diff --git a/README.md b/README.md index 65f284249..29624e8c2 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,10 @@ ## Description This is an implementation of the JSON specification according to RFC 7159 -http://www.ietf.org/rfc/rfc7159.txt . There is two variants available: +http://www.ietf.org/rfc/rfc7159.txt . -* A pure ruby variant, that relies on the `strscan` extensions, which is - part of the ruby standard library. -* The quite a bit faster native extension variant, which is in parts - implemented in C or Java and comes with a parser generated by the [Ragel] - state machine compiler. - -Both variants of the JSON generator generate UTF-8 character sequences by -default. If an :ascii\_only option with a true value is given, they escape all +The JSON generator generate UTF-8 character sequences by default. +If an :ascii\_only option with a true value is given, they escape all non-ASCII and control characters with \uXXXX escape sequences, and support UTF-16 surrogate pairs in order to be able to generate the whole range of unicode code points. diff --git a/Rakefile b/Rakefile index 7a013eb0d..e16fc8d3d 100644 --- a/Rakefile +++ b/Rakefile @@ -56,12 +56,8 @@ else RAGEL_DOTGEN = %w[rlgen-dot rlgen-cd ragel].find(&which) end -desc "Installing library (pure)" -task :install_pure do - ruby 'install.rb' -end - -task :install_ext_really do +desc "Installing library (extension)" +task :install => [ :compile ] do sitearchdir = CONFIG["sitearchdir"] cd 'ext' do for file in Dir["json/ext/*.#{CONFIG['DLEXT']}"] @@ -73,30 +69,6 @@ task :install_ext_really do end end -desc "Installing library (extension)" -task :install_ext => [ :compile, :install_pure, :install_ext_really ] - -desc "Installing library (extension)" -task :install => :install_ext - -task :check_env do - ENV.key?('JSON') or fail "JSON env var is required" -end - -desc "Testing library (pure ruby)" -task :test_pure => [ :set_env_pure, :check_env, :do_test_pure ] -task(:set_env_pure) { ENV['JSON'] = 'pure' } - -UndocumentedTestTask.new do |t| - t.name = 'do_test_pure' - t.test_files = FileList['test/json/*_test.rb'] - t.verbose = true - t.options = '-v' -end - -desc "Testing library (pure ruby and extension)" -task :test => [ :test_pure, :test_ext ] - namespace :gems do desc 'Install all development gems' task :install do @@ -177,16 +149,14 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' sh "gem build -o pkg/json-#{PKG_VERSION}-java.gem json.gemspec" end - desc "Testing library (jruby)" - task :test_ext => [ :set_env_ext, :create_jar, :check_env, :do_test_ext ] - task(:set_env_ext) { ENV['JSON'] = 'ext' } - UndocumentedTestTask.new do |t| - t.name = 'do_test_ext' + t.name = :test t.test_files = FileList['test/json/*_test.rb'] t.verbose = true t.options = '-v' end + desc "Testing library (jruby)" + task :test => [:create_jar ] file JRUBY_PARSER_JAR => :compile do cd 'java/src' do @@ -239,20 +209,19 @@ else task :compile => [ :ragel, EXT_PARSER_DL, EXT_GENERATOR_DL ] end - desc "Testing library (extension)" - task :test_ext => [ :set_env_ext, :check_env, :compile, :do_test_ext ] - task(:set_env_ext) { ENV['JSON'] = 'ext' } - UndocumentedTestTask.new do |t| - t.name = 'do_test_ext' + t.name = :test t.test_files = FileList['test/json/*_test.rb'] t.verbose = true t.options = '-v' end + desc "Testing library (extension)" + task :test => [ :compile ] + begin require "ruby_memcheck" - RubyMemcheck::TestTask.new(valgrind: [ :set_env_ext, :check_env, :compile, :do_test_ext ]) do |t| + RubyMemcheck::TestTask.new(valgrind: [ :compile, :test ]) do |t| t.test_files = FileList['test/json/*_test.rb'] t.verbose = true t.options = '-v' diff --git a/json_pure.gemspec b/json_pure.gemspec index 37b437c4a..21d39d024 100644 --- a/json_pure.gemspec +++ b/json_pure.gemspec @@ -23,28 +23,7 @@ Gem::Specification.new do |s| "LEGAL", "README.md", "json_pure.gemspec", - "lib/json.rb", - "lib/json/add/bigdecimal.rb", - "lib/json/add/complex.rb", - "lib/json/add/core.rb", - "lib/json/add/date.rb", - "lib/json/add/date_time.rb", - "lib/json/add/exception.rb", - "lib/json/add/ostruct.rb", - "lib/json/add/range.rb", - "lib/json/add/rational.rb", - "lib/json/add/regexp.rb", - "lib/json/add/set.rb", - "lib/json/add/struct.rb", - "lib/json/add/symbol.rb", - "lib/json/add/time.rb", - "lib/json/common.rb", - "lib/json/ext.rb", - "lib/json/generic_object.rb", "lib/json/pure.rb", - "lib/json/pure/generator.rb", - "lib/json/pure/parser.rb", - "lib/json/version.rb", ] s.homepage = "https://ruby.github.io/json" s.metadata = { @@ -56,5 +35,7 @@ Gem::Specification.new do |s| 'wiki_uri' => 'https://github.com/ruby/json/wiki' } + s.add_dependency "json" + s.required_ruby_version = Gem::Requirement.new(">= 2.7") end diff --git a/lib/json.rb b/lib/json.rb index c28e853e1..dfd9b7dfc 100644 --- a/lib/json.rb +++ b/lib/json.rb @@ -583,10 +583,5 @@ # module JSON require 'json/version' - - begin - require 'json/ext' - rescue LoadError - require 'json/pure' - end + require 'json/ext' end diff --git a/lib/json/common.rb b/lib/json/common.rb index 84f2a57c8..2269896ba 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -32,9 +32,7 @@ def [](object, opts = {}) JSON.generate(object, opts) end - # Returns the JSON parser class that is used by JSON. This is either - # JSON::Ext::Parser or JSON::Pure::Parser: - # JSON.parser # => JSON::Ext::Parser + # Returns the JSON parser class that is used by JSON. attr_reader :parser # Set the JSON parser class _parser_ to be used by JSON. @@ -97,14 +95,10 @@ def create_pretty_state ) end - # Returns the JSON generator module that is used by JSON. This is - # either JSON::Ext::Generator or JSON::Pure::Generator: - # JSON.generator # => JSON::Ext::Generator + # Returns the JSON generator module that is used by JSON. attr_reader :generator - # Sets or Returns the JSON generator state class that is used by JSON. This is - # either JSON::Ext::Generator::State or JSON::Pure::Generator::State: - # JSON.state # => JSON::Ext::Generator::State + # Sets or Returns the JSON generator state class that is used by JSON. attr_accessor :state end diff --git a/lib/json/ext.rb b/lib/json/ext.rb index 92ef61eae..2082cae68 100644 --- a/lib/json/ext.rb +++ b/lib/json/ext.rb @@ -8,14 +8,12 @@ module JSON module Ext if RUBY_ENGINE == 'truffleruby' require 'json/ext/parser' - require 'json/pure' - $DEBUG and warn "Using Ext extension for JSON parser and Pure library for JSON generator." + require 'json/truffle_ruby/generator' JSON.parser = Parser - JSON.generator = JSON::Pure::Generator + JSON.generator = ::JSON::TruffleRuby::Generator else require 'json/ext/parser' require 'json/ext/generator' - $DEBUG and warn "Using Ext extension for JSON." JSON.parser = Parser JSON.generator = Generator end diff --git a/lib/json/pure.rb b/lib/json/pure.rb index 69d2256d1..78a6d9dce 100644 --- a/lib/json/pure.rb +++ b/lib/json/pure.rb @@ -1,16 +1,4 @@ # frozen_string_literal: true -require 'json/common' -module JSON - # This module holds all the modules/classes that implement JSON's - # functionality in pure ruby. - module Pure - require 'json/pure/parser' - require 'json/pure/generator' - $DEBUG and warn "Using Pure library for JSON." - JSON.parser = Parser - JSON.generator = Generator - end - - JSON_LOADED = true unless defined?(::JSON::JSON_LOADED) -end +warn "`json_pure` is deprecated and has no effect, just use `json`" +require "json" diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb deleted file mode 100644 index 36ef75ca5..000000000 --- a/lib/json/pure/parser.rb +++ /dev/null @@ -1,356 +0,0 @@ -#frozen_string_literal: true -require 'strscan' - -module JSON - module Pure - # This class implements the JSON parser that is used to parse a JSON string - # into a Ruby data structure. - class Parser < StringScanner - STRING = /" ((?:[^\x0-\x1f"\\] | - # escaped special characters: - \\["\\\/bfnrt] | - \\u[0-9a-fA-F]{4} | - # match all but escaped special characters: - \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*) - "/nx - INTEGER = /(-?0|-?[1-9]\d*)/ - FLOAT = /(-? - (?:0|[1-9]\d*) - (?: - \.\d+(?i:e[+-]?\d+) | - \.\d+ | - (?i:e[+-]?\d+) - ) - )/x - NAN = /NaN/ - INFINITY = /Infinity/ - MINUS_INFINITY = /-Infinity/ - OBJECT_OPEN = /\{/ - OBJECT_CLOSE = /\}/ - ARRAY_OPEN = /\[/ - ARRAY_CLOSE = /\]/ - PAIR_DELIMITER = /:/ - COLLECTION_DELIMITER = /,/ - TRUE = /true/ - FALSE = /false/ - NULL = /null/ - IGNORE = %r( - (?: - //[^\n\r]*[\n\r]| # line comments - /\* # c-style comments - (?: - [\s\S]*? # any char, repeated lazily - ) - \*/ # the End of this comment - |[ \t\r\n]+ # whitespaces: space, horizontal tab, lf, cr - )+ - )mx - - UNPARSED = Object.new.freeze - - class << self - def parse(source, opts = nil) - if opts.nil? - new(source).parse - else - # NB: The ** shouldn't be required, but we have to deal with - # different versions of the `json` and `json_pure` gems being - # loaded concurrently. - # Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs. - # Ref: https://github.com/ruby/json/issues/650 - new(source, **opts).parse - end - end - end - - # Creates a new JSON::Pure::Parser instance for the string _source_. - # - # It will be configured by the _opts_ hash. _opts_ can have the following - # keys: - # * *max_nesting*: The maximum depth of nesting allowed in the parsed data - # structures. Disable depth checking with :max_nesting => false|nil|0, - # it defaults to 100. - # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - # defiance of RFC 7159 to be parsed by the Parser. This option defaults - # to false. - # * *allow_trailing_comma*: If set to true, allow arrays and objects with a - # trailing comma in defiance of RFC 7159 to be parsed by the Parser. - # This option defaults to false. - # * *freeze*: If set to true, all parsed objects will be frozen. Parsed - # string will be deduplicated if possible. - # * *symbolize_names*: If set to true, returns symbols for the names - # (keys) in a JSON object. Otherwise strings are returned, which is - # also the default. It's not possible to use this option in - # conjunction with the *create_additions* option. - # * *create_additions*: If set to true, the Parser creates - # additions when a matching class and create_id are found. This - # option defaults to false. - # * *object_class*: Defaults to Hash. If another type is provided, it will be used - # instead of Hash to represent JSON objects. The type must respond to - # +new+ without arguments, and return an object that respond to +[]=+. - # * *array_class*: Defaults to Array If another type is provided, it will be used - # instead of Hash to represent JSON arrays. The type must respond to - # +new+ without arguments, and return an object that respond to +<<+. - # * *decimal_class*: Specifies which class to use instead of the default - # (Float) when parsing decimal numbers. This class must accept a single - # string argument in its constructor. - def initialize(source, opts = nil) - opts ||= {} - source = convert_encoding source - super source - if !opts.key?(:max_nesting) # defaults to 100 - @max_nesting = 100 - elsif opts[:max_nesting] - @max_nesting = opts[:max_nesting] - else - @max_nesting = 0 - end - @allow_nan = !!opts[:allow_nan] - @allow_trailing_comma = !!opts[:allow_trailing_comma] - @symbolize_names = !!opts[:symbolize_names] - @freeze = !!opts[:freeze] - - @deprecated_create_additions = false - @create_additions = opts.fetch(:create_additions, false) - if @create_additions.nil? - @create_additions = true - @deprecated_create_additions = true - end - - @symbolize_names && @create_additions and raise ArgumentError, - 'options :symbolize_names and :create_additions cannot be used '\ - 'in conjunction' - @create_id = @create_additions ? JSON.create_id : nil - @object_class = opts[:object_class] || Hash - @array_class = opts[:array_class] || Array - @decimal_class = opts[:decimal_class] - @match_string = opts[:match_string] - end - - alias source string - - def reset - super - @current_nesting = 0 - end - - # Parses the current JSON string _source_ and returns the - # complete data structure as a result. - def parse - reset - obj = nil - while !eos? && skip(IGNORE) do end - if eos? - raise ParserError, "source is not valid JSON!" - else - obj = parse_value - UNPARSED.equal?(obj) and raise ParserError, - "source is not valid JSON!" - obj.freeze if @freeze - end - while !eos? && skip(IGNORE) do end - eos? or raise ParserError, "source is not valid JSON!" - obj - end - - private - - def convert_encoding(source) - if source.respond_to?(:to_str) - source = source.to_str - else - raise TypeError, - "#{source.inspect} is not like a string" - end - if source.encoding != ::Encoding::BINARY - source = source.encode(::Encoding::UTF_8) - source.force_encoding(::Encoding::BINARY) - end - source - end - - # Unescape characters in strings. - UNESCAPE_MAP = { - '"' => '"', - '\\' => '\\', - '/' => '/', - 'b' => "\b", - 'f' => "\f", - 'n' => "\n", - 'r' => "\r", - 't' => "\t", - 'u' => nil, - }.freeze - - def parse_string - if scan(STRING) - return '' if self[1].empty? - string = self[1].gsub(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c| - k = $&[1] - if u = UNESCAPE_MAP.fetch(k) { k.chr } - u - else # \uXXXX - bytes = ''.b - i = 0 - while c[6 * i] == ?\\ && c[6 * i + 1] == ?u - bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) - i += 1 - end - bytes.encode(Encoding::UTF_8, Encoding::UTF_16BE).force_encoding(::Encoding::BINARY) - end - end - string.force_encoding(::Encoding::UTF_8) - - if @freeze - string = -string - end - - if @create_additions and @match_string - for (regexp, klass) in @match_string - if klass.json_creatable? and string.match?(regexp) - if @deprecated_create_additions - warn "JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`" - end - - return klass.json_create(string) - end - end - end - string - else - UNPARSED - end - rescue => e - raise ParserError, "Caught #{e.class} at '#{peek(20)}': #{e}" - end - - def parse_value - case - when scan(FLOAT) - if @decimal_class then - if @decimal_class == BigDecimal then - BigDecimal(self[1]) - else - @decimal_class.new(self[1]) || Float(self[1]) - end - else - Float(self[1]) - end - when scan(INTEGER) - Integer(self[1]) - when scan(TRUE) - true - when scan(FALSE) - false - when scan(NULL) - nil - when !UNPARSED.equal?(string = parse_string) - string - when scan(ARRAY_OPEN) - @current_nesting += 1 - ary = parse_array - @current_nesting -= 1 - ary - when scan(OBJECT_OPEN) - @current_nesting += 1 - obj = parse_object - @current_nesting -= 1 - obj - when @allow_nan && scan(NAN) - NaN - when @allow_nan && scan(INFINITY) - Infinity - when @allow_nan && scan(MINUS_INFINITY) - MinusInfinity - else - UNPARSED - end - end - - def parse_array - raise NestingError, "nesting of #@current_nesting is too deep" if - @max_nesting.nonzero? && @current_nesting > @max_nesting - result = @array_class.new - delim = false - loop do - case - when eos? - raise ParserError, "unexpected end of string while parsing array" - when !UNPARSED.equal?(value = parse_value) - delim = false - result << value - skip(IGNORE) - if scan(COLLECTION_DELIMITER) - delim = true - elsif match?(ARRAY_CLOSE) - ; - else - raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" - end - when scan(ARRAY_CLOSE) - if delim && !@allow_trailing_comma - raise ParserError, "expected next element in array at '#{peek(20)}'!" - end - break - when skip(IGNORE) - ; - else - raise ParserError, "unexpected token in array at '#{peek(20)}'!" - end - end - result - end - - def parse_object - raise NestingError, "nesting of #@current_nesting is too deep" if - @max_nesting.nonzero? && @current_nesting > @max_nesting - result = @object_class.new - delim = false - loop do - case - when eos? - raise ParserError, "unexpected end of string while parsing object" - when !UNPARSED.equal?(string = parse_string) - skip(IGNORE) - unless scan(PAIR_DELIMITER) - raise ParserError, "expected ':' in object at '#{peek(20)}'!" - end - skip(IGNORE) - unless UNPARSED.equal?(value = parse_value) - result[@symbolize_names ? string.to_sym : string] = value - delim = false - skip(IGNORE) - if scan(COLLECTION_DELIMITER) - delim = true - elsif match?(OBJECT_CLOSE) - ; - else - raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" - end - else - raise ParserError, "expected value in object at '#{peek(20)}'!" - end - when scan(OBJECT_CLOSE) - if delim && !@allow_trailing_comma - raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" - end - if @create_additions and klassname = result[@create_id] - klass = JSON.deep_const_get(klassname) - break unless klass and klass.json_creatable? - if @deprecated_create_additions - warn "JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`" - end - result = klass.json_create(result) - end - break - when skip(IGNORE) - ; - else - raise ParserError, "unexpected token in object at '#{peek(20)}'!" - end - end - result - end - end - end -end diff --git a/lib/json/pure/generator.rb b/lib/json/truffle_ruby/generator.rb similarity index 85% rename from lib/json/pure/generator.rb rename to lib/json/truffle_ruby/generator.rb index 5b4c83255..b0f3e4209 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/truffle_ruby/generator.rb @@ -1,98 +1,98 @@ # frozen_string_literal: true module JSON - MAP = { - "\x0" => '\u0000', - "\x1" => '\u0001', - "\x2" => '\u0002', - "\x3" => '\u0003', - "\x4" => '\u0004', - "\x5" => '\u0005', - "\x6" => '\u0006', - "\x7" => '\u0007', - "\b" => '\b', - "\t" => '\t', - "\n" => '\n', - "\xb" => '\u000b', - "\f" => '\f', - "\r" => '\r', - "\xe" => '\u000e', - "\xf" => '\u000f', - "\x10" => '\u0010', - "\x11" => '\u0011', - "\x12" => '\u0012', - "\x13" => '\u0013', - "\x14" => '\u0014', - "\x15" => '\u0015', - "\x16" => '\u0016', - "\x17" => '\u0017', - "\x18" => '\u0018', - "\x19" => '\u0019', - "\x1a" => '\u001a', - "\x1b" => '\u001b', - "\x1c" => '\u001c', - "\x1d" => '\u001d', - "\x1e" => '\u001e', - "\x1f" => '\u001f', - '"' => '\"', - '\\' => '\\\\', - }.freeze # :nodoc: - - ESCAPE_PATTERN = /[\/"\\\x0-\x1f]/n # :nodoc: - - SCRIPT_SAFE_MAP = MAP.merge( - '/' => '\\/', - "\u2028".b => '\u2028', - "\u2029".b => '\u2029', - ).freeze - - SCRIPT_SAFE_ESCAPE_PATTERN = Regexp.union(ESCAPE_PATTERN, "\u2028".b, "\u2029".b) - - # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with - # UTF16 big endian characters as \u????, and return it. - def utf8_to_json(string, script_safe = false) # :nodoc: - string = string.b - if script_safe - string.gsub!(SCRIPT_SAFE_ESCAPE_PATTERN) { SCRIPT_SAFE_MAP[$&] || $& } - else - string.gsub!(ESCAPE_PATTERN) { MAP[$&] || $& } - end - string.force_encoding(::Encoding::UTF_8) - string - end + module TruffleRuby + module Generator + MAP = { + "\x0" => '\u0000', + "\x1" => '\u0001', + "\x2" => '\u0002', + "\x3" => '\u0003', + "\x4" => '\u0004', + "\x5" => '\u0005', + "\x6" => '\u0006', + "\x7" => '\u0007', + "\b" => '\b', + "\t" => '\t', + "\n" => '\n', + "\xb" => '\u000b', + "\f" => '\f', + "\r" => '\r', + "\xe" => '\u000e', + "\xf" => '\u000f', + "\x10" => '\u0010', + "\x11" => '\u0011', + "\x12" => '\u0012', + "\x13" => '\u0013', + "\x14" => '\u0014', + "\x15" => '\u0015', + "\x16" => '\u0016', + "\x17" => '\u0017', + "\x18" => '\u0018', + "\x19" => '\u0019', + "\x1a" => '\u001a', + "\x1b" => '\u001b', + "\x1c" => '\u001c', + "\x1d" => '\u001d', + "\x1e" => '\u001e', + "\x1f" => '\u001f', + '"' => '\"', + '\\' => '\\\\', + }.freeze # :nodoc: + + ESCAPE_PATTERN = /[\/"\\\x0-\x1f]/n # :nodoc: + + SCRIPT_SAFE_MAP = MAP.merge( + '/' => '\\/', + "\u2028".b => '\u2028', + "\u2029".b => '\u2029', + ).freeze + + SCRIPT_SAFE_ESCAPE_PATTERN = Regexp.union(ESCAPE_PATTERN, "\u2028".b, "\u2029".b) + + # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with + # UTF16 big endian characters as \u????, and return it. + def utf8_to_json(string, script_safe = false) # :nodoc: + string = string.b + if script_safe + string.gsub!(SCRIPT_SAFE_ESCAPE_PATTERN) { SCRIPT_SAFE_MAP[$&] || $& } + else + string.gsub!(ESCAPE_PATTERN) { MAP[$&] || $& } + end + string.force_encoding(::Encoding::UTF_8) + string + end - def utf8_to_json_ascii(string, script_safe = false) # :nodoc: - string = string.b - map = script_safe ? SCRIPT_SAFE_MAP : MAP - string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& } - string.gsub!(/( - (?: - [\xc2-\xdf][\x80-\xbf] | - [\xe0-\xef][\x80-\xbf]{2} | - [\xf0-\xf4][\x80-\xbf]{3} - )+ | - [\x80-\xc1\xf5-\xff] # invalid - )/nx) { |c| - c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" - s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0] - s.force_encoding(::Encoding::BINARY) - s.gsub!(/.{4}/n, '\\\\u\&') - s.force_encoding(::Encoding::UTF_8) - } - string.force_encoding(::Encoding::UTF_8) - string - rescue => e - raise GeneratorError.wrap(e) - end + def utf8_to_json_ascii(string, script_safe = false) # :nodoc: + string = string.b + map = script_safe ? SCRIPT_SAFE_MAP : MAP + string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& } + string.gsub!(/( + (?: + [\xc2-\xdf][\x80-\xbf] | + [\xe0-\xef][\x80-\xbf]{2} | + [\xf0-\xf4][\x80-\xbf]{3} + )+ | + [\x80-\xc1\xf5-\xff] # invalid + )/nx) { |c| + c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" + s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0] + s.force_encoding(::Encoding::BINARY) + s.gsub!(/.{4}/n, '\\\\u\&') + s.force_encoding(::Encoding::UTF_8) + } + string.force_encoding(::Encoding::UTF_8) + string + rescue => e + raise GeneratorError.wrap(e) + end - def valid_utf8?(string) - encoding = string.encoding - (encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) && - string.valid_encoding? - end - module_function :utf8_to_json, :utf8_to_json_ascii, :valid_utf8? + def valid_utf8?(string) + encoding = string.encoding + (encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) && + string.valid_encoding? + end + module_function :utf8_to_json, :utf8_to_json_ascii, :valid_utf8? - module Pure - module Generator # This class is used to create State instances, that are use to hold data # while generating a JSON text from a Ruby data structure. class State @@ -300,7 +300,7 @@ def generate(obj) else result = obj.to_json(self) end - JSON.valid_utf8?(result) or raise GeneratorError, + JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError, "source sequence #{result.inspect} is illegal/malformed utf-8" result end @@ -559,9 +559,9 @@ def to_json(state = nil, *args) string = encode(::Encoding::UTF_8) end if state.ascii_only? - %("#{JSON.utf8_to_json_ascii(string, state.script_safe)}") + %("#{JSON::TruffleRuby::Generator.utf8_to_json_ascii(string, state.script_safe)}") else - %("#{JSON.utf8_to_json(string, state.script_safe)}") + %("#{JSON::TruffleRuby::Generator.utf8_to_json(string, state.script_safe)}") end rescue Encoding::UndefinedConversionError => error raise ::JSON::GeneratorError, error.message diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index e552412bf..6165cc041 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -52,11 +52,11 @@ def test_parser end def test_generator - assert_match(/::Generator\z/, JSON.generator.name) + assert_match(/::(TruffleRuby)?Generator\z/, JSON.generator.name) end def test_state - assert_match(/::Generator::State\z/, JSON.state.name) + assert_match(/::(TruffleRuby)?Generator::State\z/, JSON.state.name) end def test_create_id diff --git a/test/json/json_ext_parser_test.rb b/test/json/json_ext_parser_test.rb index 9db8ae772..da6150498 100644 --- a/test/json/json_ext_parser_test.rb +++ b/test/json/json_ext_parser_test.rb @@ -2,53 +2,51 @@ require_relative 'test_helper' class JSONExtParserTest < Test::Unit::TestCase - if defined?(JSON::Ext::Parser) - include JSON - - def test_allocate - parser = JSON::Ext::Parser.new("{}") - assert_raise(TypeError, '[ruby-core:35079]') do - parser.__send__(:initialize, "{}") - end - parser = JSON::Ext::Parser.allocate - assert_raise(TypeError, '[ruby-core:35079]') { parser.source } - end + include JSON - def test_error_messages - ex = assert_raise(ParserError) { parse('Infinity') } - assert_equal "unexpected token at 'Infinity'", ex.message + def test_allocate + parser = JSON::Ext::Parser.new("{}") + assert_raise(TypeError, '[ruby-core:35079]') do + parser.__send__(:initialize, "{}") + end + parser = JSON::Ext::Parser.allocate + assert_raise(TypeError, '[ruby-core:35079]') { parser.source } + end - unless RUBY_PLATFORM =~ /java/ - ex = assert_raise(ParserError) { parse('-Infinity') } - assert_equal "unexpected token at '-Infinity'", ex.message - end + def test_error_messages + ex = assert_raise(ParserError) { parse('Infinity') } + assert_equal "unexpected token at 'Infinity'", ex.message - ex = assert_raise(ParserError) { parse('NaN') } - assert_equal "unexpected token at 'NaN'", ex.message + unless RUBY_PLATFORM =~ /java/ + ex = assert_raise(ParserError) { parse('-Infinity') } + assert_equal "unexpected token at '-Infinity'", ex.message end - if GC.respond_to?(:stress=) - def test_gc_stress_parser_new - payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) - - previous_stress = GC.stress - JSON::Parser.new(payload).parse - ensure - GC.stress = previous_stress - end + ex = assert_raise(ParserError) { parse('NaN') } + assert_equal "unexpected token at 'NaN'", ex.message + end - def test_gc_stress - payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + if GC.respond_to?(:stress=) + def test_gc_stress_parser_new + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) - previous_stress = GC.stress - JSON.parse(payload) - ensure - GC.stress = previous_stress - end + previous_stress = GC.stress + JSON::Parser.new(payload).parse + ensure + GC.stress = previous_stress end - def parse(json) - JSON::Ext::Parser.new(json).parse + def test_gc_stress + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + + previous_stress = GC.stress + JSON.parse(payload) + ensure + GC.stress = previous_stress end end + + def parse(json) + JSON::Ext::Parser.new(json).parse + end end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 3bc7eb80a..700220a15 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -343,27 +343,25 @@ def foo.to_h assert_equal '2', state.indent end - if defined?(JSON::Ext::Generator) - def test_broken_bignum # [ruby-core:38867] - pid = fork do - x = 1 << 64 - x.class.class_eval do - def to_s - end - end - begin - JSON::Ext::Generator::State.new.generate(x) - exit 1 - rescue TypeError - exit 0 + def test_broken_bignum # [ruby-core:38867] + pid = fork do + x = 1 << 64 + x.class.class_eval do + def to_s end end - _, status = Process.waitpid2(pid) - assert status.success? - rescue NotImplementedError - # forking to avoid modifying core class of a parent process and - # introducing race conditions of tests are run in parallel + begin + JSON::Ext::Generator::State.new.generate(x) + exit 1 + rescue TypeError + exit 0 + end end + _, status = Process.waitpid2(pid) + assert status.success? + rescue NotImplementedError + # forking to avoid modifying core class of a parent process and + # introducing race conditions of tests are run in parallel end def test_hash_likeness_set_symbol diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 9cbaa42f5..8759ccd26 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -40,7 +40,7 @@ def test_error_message_encoding } assert_equal(Encoding::UTF_8, e.message.encoding, bug10705) assert_include(e.message, json, bug10705) - end if defined?(JSON::Ext::Parser) + end def test_parsing parser = JSON::Parser.new('"test"') @@ -619,7 +619,7 @@ def test_parse_error_incomplete_hash error = assert_raise(JSON::ParserError) do JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"bob@example.com"}') end - if RUBY_ENGINE == "ruby" && defined?(JSON::Ext) + if RUBY_ENGINE == "ruby" assert_equal %(unexpected token at '{"input":{"firstName":"Bob","las'), error.message end end diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index f81eeec10..11bb8ba8c 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -1,23 +1,7 @@ -case ENV['JSON'] -when 'pure' - $LOAD_PATH.unshift(File.expand_path('../../../lib', __FILE__)) - $stderr.puts("Testing JSON::Pure") - require 'json/pure' -when 'ext' - $stderr.puts("Testing JSON::Ext") - $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) - require 'json/ext' -else - $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) - $stderr.puts("Testing JSON") - require 'json' -end +$LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) +require 'json' require 'test/unit' -begin - require 'byebug' -rescue LoadError -end if GC.respond_to?(:verify_compaction_references) # This method was added in Ruby 3.0.0. Calling it this way asks the GC to From 5672fee51f4c5e85ce40cf72bce3c63f6023bc2b Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 16:30:35 +0100 Subject: [PATCH 65/75] Remove final references to json_pure --- Gemfile | 2 +- README-json-jruby.md | 1 - README.md | 24 ------------------------ Rakefile | 4 +--- json_pure.gemspec | 41 ----------------------------------------- lib/json/pure.rb | 4 ---- 6 files changed, 2 insertions(+), 74 deletions(-) delete mode 100644 json_pure.gemspec delete mode 100644 lib/json/pure.rb diff --git a/Gemfile b/Gemfile index 4a76a6f91..98956c7e2 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,6 @@ source 'https://rubygems.org' -gemspec name: 'json' +gemspec group :development do gem "ruby_memcheck" if RUBY_PLATFORM =~ /linux/i diff --git a/README-json-jruby.md b/README-json-jruby.md index 7ea4f0f81..a66ebd621 100644 --- a/README-json-jruby.md +++ b/README-json-jruby.md @@ -3,7 +3,6 @@ JSON-JRuby JSON-JRuby is a port of Florian Frank's native [`json` library](http://json.rubyforge.org/) to JRuby. -It aims to be a perfect drop-in replacement for `json_pure`. Development version diff --git a/README.md b/README.md index 29624e8c2..d76e98996 100644 --- a/README.md +++ b/README.md @@ -21,10 +21,6 @@ endpoint. ## Installation -It's recommended to use the extension variant of JSON, because it's faster than -the pure ruby variant. If you cannot build it on your system, you can settle -for the latter. - Install the gem and add to the application's Gemfile by executing: $ bundle add json @@ -33,12 +29,6 @@ If bundler is not being used to manage dependencies, install the gem by executin $ gem install json - -There is also a pure ruby json only variant of the gem, that can be installed -with: - - $ gem install json_pure - ## Usage To use JSON you can @@ -47,20 +37,6 @@ To use JSON you can require 'json' ``` -to load the installed variant (either the extension `'json'` or the pure -variant `'json_pure'`). If you have installed the extension variant, you can -pick either the extension variant or the pure variant by typing - -```ruby -require 'json/ext' -``` - -or - -```ruby -require 'json/pure' -``` - Now you can parse a JSON document into a ruby data structure by calling ```ruby diff --git a/Rakefile b/Rakefile index e16fc8d3d..c5b518a1c 100644 --- a/Rakefile +++ b/Rakefile @@ -284,13 +284,11 @@ else desc "Create the gem packages" task :package do sh "gem build json.gemspec" - sh "gem build json_pure.gemspec" mkdir_p 'pkg' mv "json-#{PKG_VERSION}.gem", 'pkg' - mv "json_pure-#{PKG_VERSION}.gem", 'pkg' end - desc "Build all gems and archives for a new release of json and json_pure." + desc "Build all gems and archives for a new release of json" task :build => [ :clean, :package ] task :release => :build diff --git a/json_pure.gemspec b/json_pure.gemspec deleted file mode 100644 index 21d39d024..000000000 --- a/json_pure.gemspec +++ /dev/null @@ -1,41 +0,0 @@ -# frozen_string_literal: true - -version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line| - /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1 -end rescue nil - -Gem::Specification.new do |s| - s.name = "json_pure" - s.version = version - - s.summary = "JSON Implementation for Ruby" - s.description = "This is a JSON implementation in pure Ruby." - s.licenses = ["Ruby"] - s.authors = ["Florian Frank"] - s.email = "flori@ping.de" - - s.extra_rdoc_files = ["README.md"] - s.rdoc_options = ["--title", "JSON implementation for ruby", "--main", "README.md"] - s.files = [ - "CHANGES.md", - "COPYING", - "BSDL", - "LEGAL", - "README.md", - "json_pure.gemspec", - "lib/json/pure.rb", - ] - s.homepage = "https://ruby.github.io/json" - s.metadata = { - 'bug_tracker_uri' => 'https://github.com/ruby/json/issues', - 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md', - 'documentation_uri' => 'https://ruby.github.io/json/doc/index.html', - 'homepage_uri' => s.homepage, - 'source_code_uri' => 'https://github.com/ruby/json', - 'wiki_uri' => 'https://github.com/ruby/json/wiki' - } - - s.add_dependency "json" - - s.required_ruby_version = Gem::Requirement.new(">= 2.7") -end diff --git a/lib/json/pure.rb b/lib/json/pure.rb deleted file mode 100644 index 78a6d9dce..000000000 --- a/lib/json/pure.rb +++ /dev/null @@ -1,4 +0,0 @@ -# frozen_string_literal: true - -warn "`json_pure` is deprecated and has no effect, just use `json`" -require "json" From e9894571a1537de0fd1b21b38e5fb3481953b4e6 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 16:39:09 +0100 Subject: [PATCH 66/75] Fix a typo in the README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d76e98996..88fad3ebf 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ checks generate performs, e. g. nesting deepness checks). > [!CAUTION] > You should never use `JSON.unsafe_load` nor `JSON.parse(str, create_additions: true)` to parse untrusted user input, -> as it can lead to remove code execution vulnerabilities. +> as it can lead to remote code execution vulnerabilities. To create a JSON document from a ruby data structure, you can call `JSON.generate` like that: From e85107197b444e41105c456f7d53832b37339497 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 17:29:32 +0100 Subject: [PATCH 67/75] Strip whitespaces --- test/json/json_parser_test.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 8759ccd26..c01e28910 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -214,12 +214,12 @@ def test_parse_object_with_allow_trailing_comma assert_equal({}, parse('{}', allow_trailing_comma: false)) assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) } assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) } - + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true)) assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false)) assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true)) assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) } - + assert_equal( {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true) @@ -235,7 +235,7 @@ def test_parse_object_with_allow_trailing_comma assert_raise(JSON::ParserError) { parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false) } - + assert_equal( {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true) @@ -251,7 +251,7 @@ def test_parse_object_with_allow_trailing_comma assert_raise(JSON::ParserError) { parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false) } - + assert_equal( [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true) From bc2c970ce4319d89518efc031e2a0c2606ca1438 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 Nov 2024 17:30:42 +0100 Subject: [PATCH 68/75] Appease ruby/ruby CI --- .github/workflows/ci.yml | 4 ++-- test/json/test_helper.rb | 28 +++++++++++++++------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c9422e0d7..27171b90d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,7 +50,7 @@ jobs: - run: rake compile - - run: rake test + - run: rake test JSON_COMPACT=1 - run: rake build @@ -78,4 +78,4 @@ jobs: - run: rake compile - - run: rake valgrind + - run: rake valgrind JSON_COMPACT=1 diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index 11bb8ba8c..d849e28b9 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -3,21 +3,23 @@ require 'json' require 'test/unit' -if GC.respond_to?(:verify_compaction_references) - # This method was added in Ruby 3.0.0. Calling it this way asks the GC to - # move objects around, helping to find object movement bugs. - begin - GC.verify_compaction_references(expand_heap: true, toward: :empty) - rescue NotImplementedError, ArgumentError - # Some platforms don't support compaction +if ENV["JSON_COMPACT"] + if GC.respond_to?(:verify_compaction_references) + # This method was added in Ruby 3.0.0. Calling it this way asks the GC to + # move objects around, helping to find object movement bugs. + begin + GC.verify_compaction_references(expand_heap: true, toward: :empty) + rescue NotImplementedError, ArgumentError + # Some platforms don't support compaction + end end -end -if GC.respond_to?(:auto_compact=) - begin - GC.auto_compact = true - rescue NotImplementedError - # Some platforms don't support compaction + if GC.respond_to?(:auto_compact=) + begin + GC.auto_compact = true + rescue NotImplementedError + # Some platforms don't support compaction + end end end From 5cd37032c8ad6230defe2b4902c2c47d969b3b6b Mon Sep 17 00:00:00 2001 From: Yuji Yaginuma Date: Wed, 6 Nov 2024 16:02:49 +0900 Subject: [PATCH 69/75] Add changes for v2.7.6 I picked from https://github.com/ruby/json/blob/v2.7.x/CHANGES.md --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 071251825..7dc91ca3f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,10 @@ * Bump required_ruby_version to 2.7. * More performance improvments to `JSON.dump` and `JSON.generate`. +### 2024-11-04 (2.7.6) + +* Fix a regression in JSON.generate when dealing with Hash keys that are string subclasses, call `to_json` on them. + ### 2024-10-25 (2.7.5) * Fix a memory leak when `#to_json` methods raise an exception. From 1acce7aceb656085ddc09d7f5d9129f5b60eff88 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 6 Nov 2024 12:51:06 +0900 Subject: [PATCH 70/75] Categorize deprecated warning --- ext/json/ext/parser/extconf.rb | 1 + ext/json/ext/parser/parser.c | 247 +++++++++++++++++--------------- ext/json/ext/parser/parser.rl | 13 +- test/json/json_addition_test.rb | 2 +- 4 files changed, 143 insertions(+), 120 deletions(-) diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index f9104de12..4c1ac52a7 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -5,6 +5,7 @@ have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby +have_func("rb_category_warn", "ruby.h") # Missing on TruffleRuby append_cflags("-std=c99") diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 382e21e12..85bdd7922 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -17,6 +17,17 @@ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbo static int binary_encindex; static int utf8_encindex; +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + #ifndef HAVE_RB_GC_MARK_LOCATIONS // For TruffleRuby void rb_gc_mark_locations(const VALUE *start, const VALUE *end) @@ -438,11 +449,11 @@ static void raise_parse_error(const char *format, const char *start) -#line 464 "parser.rl" +#line 475 "parser.rl" -#line 446 "parser.c" +#line 457 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; @@ -450,7 +461,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 504 "parser.rl" +#line 515 "parser.rl" #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) @@ -466,14 +477,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu long stack_head = json->stack->head; -#line 470 "parser.c" +#line 481 "parser.c" { cs = JSON_object_start; } -#line 519 "parser.rl" +#line 530 "parser.rl" -#line 477 "parser.c" +#line 488 "parser.c" { short _widec; if ( p == pe ) @@ -502,7 +513,7 @@ case 2: goto st2; goto st0; tr2: -#line 483 "parser.rl" +#line 494 "parser.rl" { char *np; json->parsing_name = true; @@ -518,7 +529,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 522 "parser.c" +#line 533 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -585,7 +596,7 @@ case 8: goto st8; goto st0; tr11: -#line 472 "parser.rl" +#line 483 "parser.rl" { char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { @@ -599,20 +610,20 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 603 "parser.c" +#line 614 "parser.c" _widec = (*p); if ( (*p) < 13 ) { if ( (*p) > 9 ) { if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -620,26 +631,26 @@ case 9: if ( 32 <= (*p) && (*p) <= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 44 ) { if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -660,14 +671,14 @@ case 9: goto st10; goto st0; tr4: -#line 494 "parser.rl" +#line 505 "parser.rl" { p--; {p++; cs = 32; goto _out;} } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: -#line 671 "parser.c" +#line 682 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -769,13 +780,13 @@ case 20: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -794,20 +805,20 @@ case 21: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -830,13 +841,13 @@ case 22: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -844,19 +855,19 @@ case 22: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -880,20 +891,20 @@ case 23: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1007,7 +1018,7 @@ case 31: _out: {} } -#line 520 "parser.rl" +#line 531 "parser.rl" if (cs >= JSON_object_first_final) { long count = json->stack->head - stack_head; @@ -1045,7 +1056,7 @@ case 31: VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { if (json->deprecated_create_additions) { - rb_warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + json_deprecated(deprecated_create_additions_warning); } *result = rb_funcall(klass, i_json_create, 1, *result); } @@ -1058,7 +1069,7 @@ case 31: } -#line 1062 "parser.c" +#line 1073 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -1066,7 +1077,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 655 "parser.rl" +#line 666 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1074,14 +1085,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1078 "parser.c" +#line 1089 "parser.c" { cs = JSON_value_start; } -#line 662 "parser.rl" +#line 673 "parser.rl" -#line 1085 "parser.c" +#line 1096 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1115,7 +1126,7 @@ case 1: cs = 0; goto _out; tr2: -#line 598 "parser.rl" +#line 609 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { @@ -1127,7 +1138,7 @@ cs = 0; } goto st29; tr3: -#line 608 "parser.rl" +#line 619 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -1151,7 +1162,7 @@ cs = 0; } goto st29; tr7: -#line 630 "parser.rl" +#line 641 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -1159,7 +1170,7 @@ cs = 0; } goto st29; tr11: -#line 636 "parser.rl" +#line 647 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -1167,7 +1178,7 @@ cs = 0; } goto st29; tr25: -#line 591 "parser.rl" +#line 602 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -1177,7 +1188,7 @@ cs = 0; } goto st29; tr27: -#line 584 "parser.rl" +#line 595 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -1187,19 +1198,19 @@ cs = 0; } goto st29; tr31: -#line 578 "parser.rl" +#line 589 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 575 "parser.rl" +#line 586 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 581 "parser.rl" +#line 592 "parser.rl" { *result = Qtrue; } @@ -1208,9 +1219,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 642 "parser.rl" +#line 653 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 1214 "parser.c" +#line 1225 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1451,7 +1462,7 @@ case 28: _out: {} } -#line 663 "parser.rl" +#line 674 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1466,7 +1477,7 @@ case 28: } -#line 1470 "parser.c" +#line 1481 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1474,7 +1485,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 684 "parser.rl" +#line 695 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1482,15 +1493,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 1486 "parser.c" +#line 1497 "parser.c" { cs = JSON_integer_start; } -#line 691 "parser.rl" +#line 702 "parser.rl" json->memo = p; -#line 1494 "parser.c" +#line 1505 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1524,14 +1535,14 @@ case 3: goto st0; goto tr4; tr4: -#line 681 "parser.rl" +#line 692 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 1535 "parser.c" +#line 1546 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1550,7 +1561,7 @@ case 5: _out: {} } -#line 693 "parser.rl" +#line 704 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -1565,7 +1576,7 @@ case 5: } -#line 1569 "parser.c" +#line 1580 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -1573,7 +1584,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 718 "parser.rl" +#line 729 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1581,15 +1592,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1585 "parser.c" +#line 1596 "parser.c" { cs = JSON_float_start; } -#line 725 "parser.rl" +#line 736 "parser.rl" json->memo = p; -#line 1593 "parser.c" +#line 1604 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1647,14 +1658,14 @@ case 8: goto st0; goto tr9; tr9: -#line 712 "parser.rl" +#line 723 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1658 "parser.c" +#line 1669 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1715,7 +1726,7 @@ case 7: _out: {} } -#line 727 "parser.rl" +#line 738 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1768,7 +1779,7 @@ case 7: -#line 1772 "parser.c" +#line 1783 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; @@ -1776,7 +1787,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 804 "parser.rl" +#line 815 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1789,14 +1800,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1793 "parser.c" +#line 1804 "parser.c" { cs = JSON_array_start; } -#line 816 "parser.rl" +#line 827 "parser.rl" -#line 1800 "parser.c" +#line 1811 "parser.c" { short _widec; if ( p == pe ) @@ -1836,7 +1847,7 @@ case 2: goto st2; goto st0; tr2: -#line 784 "parser.rl" +#line 795 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1851,12 +1862,12 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1855 "parser.c" +#line 1866 "parser.c" _widec = (*p); if ( 44 <= (*p) && (*p) <= 44 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1903,14 +1914,14 @@ case 7: goto st3; goto st7; tr4: -#line 796 "parser.rl" +#line 807 "parser.rl" { p--; {p++; cs = 22; goto _out;} } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 1914 "parser.c" +#line 1925 "parser.c" goto st0; st8: if ( ++p == pe ) @@ -1978,13 +1989,13 @@ case 13: if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -1992,19 +2003,19 @@ case 13: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2043,13 +2054,13 @@ case 14: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2068,20 +2079,20 @@ case 15: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2104,13 +2115,13 @@ case 16: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -2118,19 +2129,19 @@ case 16: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2154,20 +2165,20 @@ case 17: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 805 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2239,7 +2250,7 @@ case 21: _out: {} } -#line 817 "parser.rl" +#line 828 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -2413,7 +2424,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2417 "parser.c" +#line 2428 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -2421,7 +2432,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1008 "parser.rl" +#line 1019 "parser.rl" static int @@ -2442,15 +2453,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2446 "parser.c" +#line 2457 "parser.c" { cs = JSON_string_start; } -#line 1028 "parser.rl" +#line 1039 "parser.rl" json->memo = p; -#line 2454 "parser.c" +#line 2465 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2475,7 +2486,7 @@ case 2: goto st0; goto st2; tr2: -#line 995 "parser.rl" +#line 1006 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -2485,14 +2496,14 @@ case 2: {p = (( p + 1))-1;} } } -#line 1005 "parser.rl" +#line 1016 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 2496 "parser.c" +#line 2507 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -2568,7 +2579,7 @@ case 7: _out: {} } -#line 1030 "parser.rl" +#line 1041 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2721,7 +2732,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2725 "parser.c" +#line 2736 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2729,7 +2740,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1196 "parser.rl" +#line 1207 "parser.rl" /* @@ -2758,16 +2769,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2762 "parser.c" +#line 2773 "parser.c" { cs = JSON_start; } -#line 1224 "parser.rl" +#line 1235 "parser.rl" p = json->source; pe = p + json->len; -#line 2771 "parser.c" +#line 2782 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2801,7 +2812,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1188 "parser.rl" +#line 1199 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2811,7 +2822,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2815 "parser.c" +#line 2826 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2900,7 +2911,7 @@ case 9: _out: {} } -#line 1227 "parser.rl" +#line 1238 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2936,16 +2947,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 2940 "parser.c" +#line 2951 "parser.c" { cs = JSON_start; } -#line 1262 "parser.rl" +#line 1273 "parser.rl" p = json->source; pe = p + json->len; -#line 2949 "parser.c" +#line 2960 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2979,7 +2990,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1188 "parser.rl" +#line 1199 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2989,7 +3000,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2993 "parser.c" +#line 3004 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3078,7 +3089,7 @@ case 9: _out: {} } -#line 1265 "parser.rl" +#line 1276 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 6d4cc7a5b..38e740359 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -15,6 +15,17 @@ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbo static int binary_encindex; static int utf8_encindex; +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + #ifndef HAVE_RB_GC_MARK_LOCATIONS // For TruffleRuby void rb_gc_mark_locations(const VALUE *start, const VALUE *end) @@ -554,7 +565,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { if (json->deprecated_create_additions) { - rb_warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + json_deprecated(deprecated_create_additions_warning); } *result = rb_funcall(klass, i_json_create, 1, *result); } diff --git a/test/json/json_addition_test.rb b/test/json/json_addition_test.rb index d78ae55c3..1eb269c2f 100644 --- a/test/json/json_addition_test.rb +++ b/test/json/json_addition_test.rb @@ -163,7 +163,7 @@ def test_core end def test_deprecated_load_create_additions - assert_warning(/use JSON\.unsafe_load/) do + assert_deprecated_warning(/use JSON\.unsafe_load/) do JSON.load(JSON.dump(Time.now)) end end From 96bd97c61eb379e8a088693334ffc09ee4e45a91 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 6 Nov 2024 11:53:26 +0100 Subject: [PATCH 71/75] parser.rl: parse_string implement a fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we assume most string don't contain any escape sequence we can avoid a lot of costly operations when it holds true. Before: ``` == Parsing activitypub.json (58160 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 884.000 i/100ms oj 789.000 i/100ms Oj::Parser 943.000 i/100ms rapidjson 584.000 i/100ms Calculating ------------------------------------- json 8.897k (± 1.3%) i/s (112.40 μs/i) - 45.084k in 5.068520s oj 7.967k (± 1.5%) i/s (125.52 μs/i) - 40.239k in 5.051985s Oj::Parser 9.564k (± 1.4%) i/s (104.56 μs/i) - 48.093k in 5.029626s rapidjson 5.947k (± 1.4%) i/s (168.16 μs/i) - 29.784k in 5.009437s Comparison: json: 8896.5 i/s Oj::Parser: 9563.8 i/s - 1.08x faster oj: 7966.8 i/s - 1.12x slower rapidjson: 5946.7 i/s - 1.50x slower == Parsing twitter.json (567916 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 83.000 i/100ms oj 64.000 i/100ms Oj::Parser 77.000 i/100ms rapidjson 54.000 i/100ms Calculating ------------------------------------- json 823.083 (± 1.8%) i/s (1.21 ms/i) - 4.150k in 5.043805s oj 632.538 (± 1.4%) i/s (1.58 ms/i) - 3.200k in 5.060073s Oj::Parser 769.122 (± 1.8%) i/s (1.30 ms/i) - 3.850k in 5.007501s rapidjson 548.494 (± 1.5%) i/s (1.82 ms/i) - 2.754k in 5.022153s Comparison: json: 823.1 i/s Oj::Parser: 769.1 i/s - 1.07x slower oj: 632.5 i/s - 1.30x slower rapidjson: 548.5 i/s - 1.50x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 41.000 i/100ms oj 34.000 i/100ms Oj::Parser 45.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 427.162 (± 1.2%) i/s (2.34 ms/i) - 2.173k in 5.087666s oj 351.463 (± 2.8%) i/s (2.85 ms/i) - 1.768k in 5.035149s Oj::Parser 461.849 (± 3.7%) i/s (2.17 ms/i) - 2.340k in 5.074461s rapidjson 395.155 (± 1.8%) i/s (2.53 ms/i) - 1.989k in 5.034927s Comparison: json: 427.2 i/s Oj::Parser: 461.8 i/s - 1.08x faster rapidjson: 395.2 i/s - 1.08x slower oj: 351.5 i/s - 1.22x slower ``` After: ``` == Parsing activitypub.json (58160 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 953.000 i/100ms oj 813.000 i/100ms Oj::Parser 956.000 i/100ms rapidjson 563.000 i/100ms Calculating ------------------------------------- json 9.525k (± 1.2%) i/s (104.98 μs/i) - 47.650k in 5.003252s oj 8.117k (± 0.5%) i/s (123.20 μs/i) - 40.650k in 5.008283s Oj::Parser 9.590k (± 3.2%) i/s (104.27 μs/i) - 48.756k in 5.089794s rapidjson 6.020k (± 0.9%) i/s (166.10 μs/i) - 30.402k in 5.050155s Comparison: json: 9525.3 i/s Oj::Parser: 9590.1 i/s - same-ish: difference falls within error oj: 8116.7 i/s - 1.17x slower rapidjson: 6020.5 i/s - 1.58x slower == Parsing twitter.json (567916 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 87.000 i/100ms oj 64.000 i/100ms Oj::Parser 75.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 866.563 (± 0.8%) i/s (1.15 ms/i) - 4.350k in 5.020138s oj 643.567 (± 0.8%) i/s (1.55 ms/i) - 3.264k in 5.072101s Oj::Parser 777.346 (± 3.5%) i/s (1.29 ms/i) - 3.900k in 5.023933s rapidjson 557.158 (± 0.7%) i/s (1.79 ms/i) - 2.805k in 5.034731s Comparison: json: 866.6 i/s Oj::Parser: 777.3 i/s - 1.11x slower oj: 643.6 i/s - 1.35x slower rapidjson: 557.2 i/s - 1.56x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 41.000 i/100ms oj 35.000 i/100ms Oj::Parser 40.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 429.216 (± 1.2%) i/s (2.33 ms/i) - 2.173k in 5.063351s oj 354.755 (± 1.1%) i/s (2.82 ms/i) - 1.785k in 5.032374s Oj::Parser 465.114 (± 3.7%) i/s (2.15 ms/i) - 2.360k in 5.081634s rapidjson 387.135 (± 1.3%) i/s (2.58 ms/i) - 1.950k in 5.037787s Comparison: json: 429.2 i/s Oj::Parser: 465.1 i/s - 1.08x faster rapidjson: 387.1 i/s - 1.11x slower oj: 354.8 i/s - 1.21x slower ``` --- ext/json/ext/parser/parser.c | 145 ++++++++++++++++++++++------------ ext/json/ext/parser/parser.rl | 60 ++++++++++---- 2 files changed, 140 insertions(+), 65 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 85bdd7922..db9b13a68 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -2302,6 +2302,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; @@ -2323,7 +2343,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } pe = memchr(p, '\\', bufferSize); - if (RB_LIKELY(pe == NULL)) { + if (RB_UNLIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); } @@ -2424,15 +2444,15 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2428 "parser.c" +#line 2448 "parser.c" enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 8}; +enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1019 "parser.rl" +#line 1051 "parser.rl" static int @@ -2453,15 +2473,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2457 "parser.c" +#line 2477 "parser.c" { cs = JSON_string_start; } -#line 1039 "parser.rl" +#line 1071 "parser.rl" json->memo = p; -#line 2465 "parser.c" +#line 2485 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2486,47 +2506,56 @@ case 2: goto st0; goto st2; tr2: -#line 1006 "parser.rl" +#line 1033 "parser.rl" + { + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } +#line 1026 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - if (NIL_P(*result)) { - p--; - {p++; cs = 8; goto _out;} - } else { - {p = (( p + 1))-1;} - } + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} } -#line 1016 "parser.rl" - { p--; {p++; cs = 8; goto _out;} } - goto st8; -st8: + goto st9; +tr6: +#line 1026 "parser.rl" + { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } + goto st9; +st9: if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 2507 "parser.c" + goto _test_eof9; +case 9: +#line 2538 "parser.c" goto st0; st3: if ( ++p == pe ) goto _test_eof3; case 3: if ( (*p) == 117 ) - goto st4; + goto st5; if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) goto st0; - goto st2; + goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st5; - } else - goto st5; - goto st0; + switch( (*p) ) { + case 34: goto tr6; + case 92: goto st3; + } + if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st0; + goto st4; st5: if ( ++p == pe ) goto _test_eof5; @@ -2559,27 +2588,41 @@ case 6: case 7: if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st2; + goto st8; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st8; + } else + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st4; } else if ( (*p) > 70 ) { if ( 97 <= (*p) && (*p) <= 102 ) - goto st2; + goto st4; } else - goto st2; + goto st4; goto st0; } _test_eof2: cs = 2; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; _test_eof: {} _out: {} } -#line 1041 "parser.rl" +#line 1073 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2732,7 +2775,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2736 "parser.c" +#line 2779 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2740,7 +2783,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1207 "parser.rl" +#line 1239 "parser.rl" /* @@ -2769,16 +2812,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2773 "parser.c" +#line 2816 "parser.c" { cs = JSON_start; } -#line 1235 "parser.rl" +#line 1267 "parser.rl" p = json->source; pe = p + json->len; -#line 2782 "parser.c" +#line 2825 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2812,7 +2855,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1199 "parser.rl" +#line 1231 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2822,7 +2865,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2826 "parser.c" +#line 2869 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2911,7 +2954,7 @@ case 9: _out: {} } -#line 1238 "parser.rl" +#line 1270 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2947,16 +2990,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 2951 "parser.c" +#line 2994 "parser.c" { cs = JSON_start; } -#line 1273 "parser.rl" +#line 1305 "parser.rl" p = json->source; pe = p + json->len; -#line 2960 "parser.c" +#line 3003 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2990,7 +3033,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1199 "parser.rl" +#line 1231 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -3000,7 +3043,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 3004 "parser.c" +#line 3047 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3089,7 +3132,7 @@ case 9: _out: {} } -#line 1276 "parser.rl" +#line 1308 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 38e740359..06f23fa8b 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -658,10 +658,10 @@ main := ignore* ( Vtrue @parse_true | VNaN @parse_nan | VInfinity @parse_infinity | - begin_number >parse_number | - begin_string >parse_string | - begin_array >parse_array | - begin_object >parse_object + begin_number @parse_number | + begin_string @parse_string | + begin_array @parse_array | + begin_object @parse_object ) ignore* %*exit; }%% @@ -876,6 +876,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; @@ -897,7 +917,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } pe = memchr(p, '\\', bufferSize); - if (RB_LIKELY(pe == NULL)) { + if (RB_UNLIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); } @@ -1003,19 +1023,31 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE write data; - action parse_string { + action parse_complex_string { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - if (NIL_P(*result)) { - fhold; - fbreak; - } else { - fexec p + 1; - } + fexec p + 1; + fhold; + fbreak; } - action exit { fhold; fbreak; } + action parse_simple_string { + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + fexec p + 1; + fhold; + fbreak; + } - main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; + double_quote = '"'; + escape = '\\'; + control = 0..0x1f; + simple = any - escape - double_quote - control; + + main := double_quote ( + (simple*)( + (double_quote) @parse_simple_string | + ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string + ) + ); }%% static int From fb82373612d59454a52330e8cfb533fc250546a4 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 6 Nov 2024 19:56:34 +0900 Subject: [PATCH 72/75] Fix right shift warnings Ignoring `CHAR_BITS` > 8 platform, as far as `ch` indexes `escape_table` that is hard-coded as 256 elements. ``` ../../../../src/ext/json/generator/generator.c(121): warning C4333: '>>': right shift by too large amount, data loss ../../../../src/ext/json/generator/generator.c(122): warning C4333: '>>': right shift by too large amount, data loss ../../../../src/ext/json/generator/generator.c(243): warning C4333: '>>': right shift by too large amount, data loss ../../../../src/ext/json/generator/generator.c(244): warning C4333: '>>': right shift by too large amount, data loss ../../../../src/ext/json/generator/generator.c(291): warning C4333: '>>': right shift by too large amount, data loss ../../../../src/ext/json/generator/generator.c(292): warning C4333: '>>': right shift by too large amount, data loss ``` --- ext/json/ext/generator/generator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 80539af6c..c4f356ac6 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -118,8 +118,8 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: { - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -240,8 +240,8 @@ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char esc case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -288,8 +288,8 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: { - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); From 3a4dc9e1b48b34bf07d79bc2653b8f7dc084a363 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 6 Nov 2024 12:58:23 +0100 Subject: [PATCH 73/75] Implement a fast path for integer parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `rb_cstr2inum` isn't very fast because it handles tons of different scenarios, and also require a NULL terminated string which forces us to copy the number into a secondary buffer. But since the parser already computed the length, we can much more cheaply do this with a very simple function as long as the number is small enough to fit into a native type (`long long`). If the number is too long, we can fallback to the `rb_cstr2inum` slowpath. Before: ``` == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 40.000 i/100ms oj 35.000 i/100ms Oj::Parser 45.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 425.941 (± 1.9%) i/s (2.35 ms/i) - 2.160k in 5.072833s oj 349.617 (± 1.7%) i/s (2.86 ms/i) - 1.750k in 5.006953s Oj::Parser 464.767 (± 1.7%) i/s (2.15 ms/i) - 2.340k in 5.036381s rapidjson 382.413 (± 2.4%) i/s (2.61 ms/i) - 1.938k in 5.070757s Comparison: json: 425.9 i/s Oj::Parser: 464.8 i/s - 1.09x faster rapidjson: 382.4 i/s - 1.11x slower oj: 349.6 i/s - 1.22x slower ``` After: ``` == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. 7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 46.000 i/100ms oj 33.000 i/100ms Oj::Parser 45.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 462.332 (± 3.2%) i/s (2.16 ms/i) - 2.346k in 5.080504s oj 351.140 (± 1.1%) i/s (2.85 ms/i) - 1.782k in 5.075616s Oj::Parser 473.500 (± 1.3%) i/s (2.11 ms/i) - 2.385k in 5.037695s rapidjson 395.052 (± 3.5%) i/s (2.53 ms/i) - 1.989k in 5.042275s Comparison: json: 462.3 i/s Oj::Parser: 473.5 i/s - same-ish: difference falls within error rapidjson: 395.1 i/s - 1.17x slower oj: 351.1 i/s - 1.32x slower ``` --- ext/json/ext/parser/parser.c | 166 ++++++++++++++++++++-------------- ext/json/ext/parser/parser.rl | 34 ++++++- 2 files changed, 126 insertions(+), 74 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index db9b13a68..a5c918fa1 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1488,20 +1488,42 @@ enum {JSON_integer_en_main = 1}; #line 695 "parser.rl" +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) +{ + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } + + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } + + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; -#line 1497 "parser.c" +#line 1519 "parser.c" { cs = JSON_integer_start; } -#line 702 "parser.rl" +#line 724 "parser.rl" json->memo = p; -#line 1505 "parser.c" +#line 1527 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1542,7 +1564,7 @@ case 3: if ( ++p == pe ) goto _test_eof4; case 4: -#line 1546 "parser.c" +#line 1568 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1561,14 +1583,18 @@ case 5: _out: {} } -#line 704 "parser.rl" +#line 726 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; } else { return NULL; @@ -1576,7 +1602,7 @@ case 5: } -#line 1580 "parser.c" +#line 1606 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -1584,7 +1610,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 729 "parser.rl" +#line 755 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1592,15 +1618,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1596 "parser.c" +#line 1622 "parser.c" { cs = JSON_float_start; } -#line 736 "parser.rl" +#line 762 "parser.rl" json->memo = p; -#line 1604 "parser.c" +#line 1630 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1658,14 +1684,14 @@ case 8: goto st0; goto tr9; tr9: -#line 723 "parser.rl" +#line 749 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1669 "parser.c" +#line 1695 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1726,7 +1752,7 @@ case 7: _out: {} } -#line 738 "parser.rl" +#line 764 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; @@ -1779,7 +1805,7 @@ case 7: -#line 1783 "parser.c" +#line 1809 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; @@ -1787,7 +1813,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 815 "parser.rl" +#line 841 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1800,14 +1826,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1804 "parser.c" +#line 1830 "parser.c" { cs = JSON_array_start; } -#line 827 "parser.rl" +#line 853 "parser.rl" -#line 1811 "parser.c" +#line 1837 "parser.c" { short _widec; if ( p == pe ) @@ -1847,7 +1873,7 @@ case 2: goto st2; goto st0; tr2: -#line 795 "parser.rl" +#line 821 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1862,12 +1888,12 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1866 "parser.c" +#line 1892 "parser.c" _widec = (*p); if ( 44 <= (*p) && (*p) <= 44 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1914,14 +1940,14 @@ case 7: goto st3; goto st7; tr4: -#line 807 "parser.rl" +#line 833 "parser.rl" { p--; {p++; cs = 22; goto _out;} } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 1925 "parser.c" +#line 1951 "parser.c" goto st0; st8: if ( ++p == pe ) @@ -1989,13 +2015,13 @@ case 13: if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -2003,19 +2029,19 @@ case 13: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2054,13 +2080,13 @@ case 14: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2079,20 +2105,20 @@ case 15: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2115,13 +2141,13 @@ case 16: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -2129,19 +2155,19 @@ case 16: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2165,20 +2191,20 @@ case 17: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 805 "parser.rl" +#line 831 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2250,7 +2276,7 @@ case 21: _out: {} } -#line 828 "parser.rl" +#line 854 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -2444,7 +2470,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2448 "parser.c" +#line 2474 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; @@ -2452,7 +2478,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1051 "parser.rl" +#line 1077 "parser.rl" static int @@ -2473,15 +2499,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2477 "parser.c" +#line 2503 "parser.c" { cs = JSON_string_start; } -#line 1071 "parser.rl" +#line 1097 "parser.rl" json->memo = p; -#line 2485 "parser.c" +#line 2511 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2506,14 +2532,14 @@ case 2: goto st0; goto st2; tr2: -#line 1033 "parser.rl" +#line 1059 "parser.rl" { *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} p--; {p++; cs = 9; goto _out;} } -#line 1026 "parser.rl" +#line 1052 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2522,7 +2548,7 @@ case 2: } goto st9; tr6: -#line 1026 "parser.rl" +#line 1052 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2534,7 +2560,7 @@ case 2: if ( ++p == pe ) goto _test_eof9; case 9: -#line 2538 "parser.c" +#line 2564 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -2622,7 +2648,7 @@ case 8: _out: {} } -#line 1073 "parser.rl" +#line 1099 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2775,7 +2801,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2779 "parser.c" +#line 2805 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2783,7 +2809,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1239 "parser.rl" +#line 1265 "parser.rl" /* @@ -2812,16 +2838,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2816 "parser.c" +#line 2842 "parser.c" { cs = JSON_start; } -#line 1267 "parser.rl" +#line 1293 "parser.rl" p = json->source; pe = p + json->len; -#line 2825 "parser.c" +#line 2851 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2855,7 +2881,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1231 "parser.rl" +#line 1257 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2865,7 +2891,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2869 "parser.c" +#line 2895 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2954,7 +2980,7 @@ case 9: _out: {} } -#line 1270 "parser.rl" +#line 1296 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2990,16 +3016,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 2994 "parser.c" +#line 3020 "parser.c" { cs = JSON_start; } -#line 1305 "parser.rl" +#line 1331 "parser.rl" p = json->source; pe = p + json->len; -#line 3003 "parser.c" +#line 3029 "parser.c" { if ( p == pe ) goto _test_eof; @@ -3033,7 +3059,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1231 "parser.rl" +#line 1257 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -3043,7 +3069,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 3047 "parser.c" +#line 3073 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3132,7 +3158,7 @@ case 9: _out: {} } -#line 1308 "parser.rl" +#line 1334 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 06f23fa8b..9620b1964 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -694,6 +694,28 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); }%% +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) +{ + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } + + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } + + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; @@ -704,10 +726,14 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; } else { return NULL; From f5812d896e8e9b3ad4662cd522af697f8495ea54 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 6 Nov 2024 14:10:16 +0100 Subject: [PATCH 74/75] Update benchmark annotations And stop benchmarking the Generator::State re-use, as it no longer make a sizeable difference. --- benchmark/encoder.rb | 26 +++++++++++--------------- benchmark/parser.rb | 21 ++++++++++----------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 39d3e9061..acc5fa07b 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -17,7 +17,6 @@ def implementations(ruby_obj) state = JSON::State.new(JSON.dump_default_options) { - json_state: ["json (reuse)", proc { state.generate(ruby_obj) }], json: ["json", proc { JSON.generate(ruby_obj) }], oj: ["oj", proc { Oj.dump(ruby_obj) }], } @@ -58,27 +57,24 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] # On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the -# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster +# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10-20% faster # than `Oj.dump`. benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]] benchmark_encoding "small nested array", [[1,2,3,4,5]]*10 - -# On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why -# Hash serialization doesn't perform as well as other types. benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } -# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing -# with mostly multi-byte characters. This is a tradeoff. -benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state) -benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state) +# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~50% slower when dealing +# with mostly multi-byte characters. There's likely some gains left to be had in multi-byte handling. +benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500) +benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500) # On these benchmarks we perform well, we're on par or better. benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state) -benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json"), except: %i(json_state) -benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state) +benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json") +benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json") -# On twitter.json we're still about 10% slower, this is worth investigating. -benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) +# On twitter.json we're still about 6% slower, this is worth investigating. +benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json") # This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation # which uses a relatively old version of dtoa.c from David M. Gay. @@ -89,8 +85,8 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # but all these are implemented in C++11 or newer, making it hard if not impossible to include them. # Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match # Oj speed without losing precision. -benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state) +benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false # We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be # opportunities here. -benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state) +benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20 diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 4425728a4..6952f3c33 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -28,27 +28,26 @@ def benchmark_parsing(name, json_output) # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] -# Oj::Parser is very significanly faster (1.80x) on the nested array benchmark. -benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) - -# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part because its +# Oj::Parser is significanly faster (~1.3x) on the next 3 micro-benchmarks in large part because its # cache is persisted across calls. That's not something we can do with the current API, we'd # need to expose a stateful API as well, but that's no really desirable. -# Other than that we're faster than regular `Oj.load` by a good margin. +# Other than that we're faster than regular `Oj.load` by a good margin (between 1.3x and 2.4x). +benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) - benchmark_parsing "test from oj", < Date: Wed, 6 Nov 2024 16:10:15 +0100 Subject: [PATCH 75/75] Release 2.8.0 --- CHANGES.md | 9 ++++++--- lib/json/version.rb | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7dc91ca3f..0aee749b9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,12 +1,15 @@ # Changes -### UNRELEASED +### 2024-11-06 (2.8.0) * Emit a deprecation warning when `JSON.load` create custom types without the `create_additions` option being explictly enabled. * Prefer to use `JSON.unsafe_load(string)` or `JSON.load(string, create_additions: true)`. * Emit a deprecation warning when serializing valid UTF-8 strings encoded in `ASCII_8BIT` aka `BINARY`. -* Bump required_ruby_version to 2.7. -* More performance improvments to `JSON.dump` and `JSON.generate`. +* Bump required Ruby version to 2.7. +* Add support for optionally parsing trailing commas, via `allow_trailing_comma: true`, which in cunjunction with the + pre-existing support for comments, make it suitable to parse `jsonc` documents. +* Many performance improvements to `JSON.parse` and `JSON.load`, up to `1.7x` faster on real world documents. +* Some minor performance improvements to `JSON.dump` and `JSON.generate`. ### 2024-11-04 (2.7.6) diff --git a/lib/json/version.rb b/lib/json/version.rb index ee1312c7d..363bdbead 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.8.0.alpha1' + VERSION = '2.8.0' end