diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index afc4ae294..27171b90d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: uses: ruby/actions/.github/workflows/ruby_versions.yml@master with: engine: cruby-jruby - min_version: 2.3 + min_version: 2.7 host: needs: ruby-versions @@ -20,23 +20,17 @@ jobs: fail-fast: false matrix: os: - - ubuntu-20.04 - ubuntu-22.04 - - macos-12 - macos-13 - macos-14 - windows-latest ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} include: - { os: windows-latest , ruby: mswin } # ruby/ruby windows CI - - { os: ubuntu-latest , ruby: jruby-9.3 } # Ruby 2.7 - { os: ubuntu-latest , ruby: jruby-9.4 } # Ruby 3.1 - { os: macos-latest , ruby: truffleruby-head } - { os: ubuntu-latest , ruby: truffleruby-head } exclude: - - { os: macos-14, ruby: 2.3 } - - { os: macos-14, ruby: 2.4 } - - { os: macos-14, ruby: 2.5 } - { os: windows-latest, ruby: jruby } - { os: windows-latest, ruby: jruby-head } @@ -49,8 +43,6 @@ jobs: ruby-version: ${{ matrix.ruby }} apt-get: ragel brew: ragel - # only needed for Ruby 2.3 - mingw: ragel - run: | bundle config --without benchmark @@ -58,9 +50,32 @@ jobs: - run: rake compile - - run: rake test + - run: rake test JSON_COMPACT=1 - run: rake build - run: gem install pkg/*.gem if: ${{ matrix.ruby != '3.2' }} + + valgrind: + name: Ruby memcheck + runs-on: ubuntu-latest + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v3 + + - name: Set up Ruby + uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "3.3" + apt-get: ragel valgrind + + - run: | + bundle config --without benchmark + bundle install + + - run: rake compile + + - run: rake valgrind JSON_COMPACT=1 diff --git a/CHANGES.md b/CHANGES.md index 665635bb3..0aee749b9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,33 @@ # Changes +### 2024-11-06 (2.8.0) + +* Emit a deprecation warning when `JSON.load` create custom types without the `create_additions` option being explictly enabled. + * Prefer to use `JSON.unsafe_load(string)` or `JSON.load(string, create_additions: true)`. +* Emit a deprecation warning when serializing valid UTF-8 strings encoded in `ASCII_8BIT` aka `BINARY`. +* Bump required Ruby version to 2.7. +* Add support for optionally parsing trailing commas, via `allow_trailing_comma: true`, which in cunjunction with the + pre-existing support for comments, make it suitable to parse `jsonc` documents. +* Many performance improvements to `JSON.parse` and `JSON.load`, up to `1.7x` faster on real world documents. +* Some minor performance improvements to `JSON.dump` and `JSON.generate`. + +### 2024-11-04 (2.7.6) + +* Fix a regression in JSON.generate when dealing with Hash keys that are string subclasses, call `to_json` on them. + +### 2024-10-25 (2.7.5) + +* Fix a memory leak when `#to_json` methods raise an exception. +* Gracefully handle formatting configs being set to `nil` instead of `""`. +* Workaround another issue caused by conflicting versions of both `json_pure` and `json` being loaded. + +### 2024-10-25 (2.7.4) + +* Workaround a bug in 3.4.8 and older https://github.com/rubygems/rubygems/pull/6490. + This bug would cause some gems with native extension to fail during compilation. +* Workaround different versions of `json` and `json_pure` being loaded (not officially supported). +* Make `json_pure` Ractor compatible. + ### 2024-10-24 (2.7.3) * Numerous performance optimizations in `JSON.generate` and `JSON.dump` (up to 2 times faster). diff --git a/Gemfile b/Gemfile index 50249b0c7..98956c7e2 100644 --- a/Gemfile +++ b/Gemfile @@ -1,12 +1,10 @@ source 'https://rubygems.org' -if ENV['JSON'] == 'pure' - gemspec name: 'json_pure' -else - gemspec name: 'json' -end +gemspec group :development do + gem "ruby_memcheck" if RUBY_PLATFORM =~ /linux/i + gem "ostruct" gem "rake" gem "rake-compiler" gem "test-unit" diff --git a/README-json-jruby.md b/README-json-jruby.md index 7ea4f0f81..a66ebd621 100644 --- a/README-json-jruby.md +++ b/README-json-jruby.md @@ -3,7 +3,6 @@ JSON-JRuby JSON-JRuby is a port of Florian Frank's native [`json` library](http://json.rubyforge.org/) to JRuby. -It aims to be a perfect drop-in replacement for `json_pure`. Development version diff --git a/README.md b/README.md index 94a35d7bd..88fad3ebf 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,10 @@ ## Description This is an implementation of the JSON specification according to RFC 7159 -http://www.ietf.org/rfc/rfc7159.txt . There is two variants available: +http://www.ietf.org/rfc/rfc7159.txt . -* A pure ruby variant, that relies on the `strscan` extensions, which is - part of the ruby standard library. -* The quite a bit faster native extension variant, which is in parts - implemented in C or Java and comes with a parser generated by the [Ragel] - state machine compiler. - -Both variants of the JSON generator generate UTF-8 character sequences by -default. If an :ascii\_only option with a true value is given, they escape all +The JSON generator generate UTF-8 character sequences by default. +If an :ascii\_only option with a true value is given, they escape all non-ASCII and control characters with \uXXXX escape sequences, and support UTF-16 surrogate pairs in order to be able to generate the whole range of unicode code points. @@ -27,10 +21,6 @@ endpoint. ## Installation -It's recommended to use the extension variant of JSON, because it's faster than -the pure ruby variant. If you cannot build it on your system, you can settle -for the latter. - Install the gem and add to the application's Gemfile by executing: $ bundle add json @@ -39,12 +29,6 @@ If bundler is not being used to manage dependencies, install the gem by executin $ gem install json - -There is also a pure ruby json only variant of the gem, that can be installed -with: - - $ gem install json_pure - ## Usage To use JSON you can @@ -53,20 +37,6 @@ To use JSON you can require 'json' ``` -to load the installed variant (either the extension `'json'` or the pure -variant `'json_pure'`). If you have installed the extension variant, you can -pick either the extension variant or the pure variant by typing - -```ruby -require 'json/ext' -``` - -or - -```ruby -require 'json/pure' -``` - Now you can parse a JSON document into a ruby data structure by calling ```ruby @@ -82,50 +52,11 @@ You can also use the `pretty_generate` method (which formats the output more verbosely and nicely) or `fast_generate` (which doesn't do any of the security checks generate performs, e. g. nesting deepness checks). -There are also the JSON and JSON[] methods which use parse on a String or -generate a JSON document from an array or hash: - -```ruby -document = JSON 'test' => 23 # => "{\"test\":23}" -document = JSON['test' => 23] # => "{\"test\":23}" -``` - -and - -```ruby -data = JSON '{"test":23}' # => {"test"=>23} -data = JSON['{"test":23}'] # => {"test"=>23} -``` - -You can choose to load a set of common additions to ruby core's objects if -you - -```ruby -require 'json/add/core' -``` - -After requiring this you can, e. g., serialise/deserialise Ruby ranges: - -```ruby -JSON JSON(1..10) # => 1..10 -``` - -To find out how to add JSON support to other or your own classes, read the -section "More Examples" below. - -## Serializing exceptions - -The JSON module doesn't extend `Exception` by default. If you convert an `Exception` -object to JSON, it will by default only include the exception message. - -To include the full details, you must either load the `json/add/core` mentioned -above, or specifically load the exception addition: - -```ruby -require 'json/add/exception' -``` +## Handling arbitrary types -## More Examples +> [!CAUTION] +> You should never use `JSON.unsafe_load` nor `JSON.parse(str, create_additions: true)` to parse untrusted user input, +> as it can lead to remote code execution vulnerabilities. To create a JSON document from a ruby data structure, you can call `JSON.generate` like that: @@ -191,7 +122,7 @@ JSON.parse json # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10] json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10] # => "[1,2,{\"a\":3.141},false,true,null,{\"json_class\":\"Range\",\"data\":[4,10,false]}]" -JSON.parse json, :create_additions => true +JSON.unsafe_load json # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10] ``` diff --git a/Rakefile b/Rakefile index e22a3ddd1..c5b518a1c 100644 --- a/Rakefile +++ b/Rakefile @@ -56,12 +56,8 @@ else RAGEL_DOTGEN = %w[rlgen-dot rlgen-cd ragel].find(&which) end -desc "Installing library (pure)" -task :install_pure do - ruby 'install.rb' -end - -task :install_ext_really do +desc "Installing library (extension)" +task :install => [ :compile ] do sitearchdir = CONFIG["sitearchdir"] cd 'ext' do for file in Dir["json/ext/*.#{CONFIG['DLEXT']}"] @@ -73,30 +69,6 @@ task :install_ext_really do end end -desc "Installing library (extension)" -task :install_ext => [ :compile, :install_pure, :install_ext_really ] - -desc "Installing library (extension)" -task :install => :install_ext - -task :check_env do - ENV.key?('JSON') or fail "JSON env var is required" -end - -desc "Testing library (pure ruby)" -task :test_pure => [ :set_env_pure, :check_env, :do_test_pure ] -task(:set_env_pure) { ENV['JSON'] = 'pure' } - -UndocumentedTestTask.new do |t| - t.name = 'do_test_pure' - t.test_files = FileList['test/json/*_test.rb'] - t.verbose = true - t.options = '-v' -end - -desc "Testing library (pure ruby and extension)" -task :test => [ :test_pure, :test_ext ] - namespace :gems do desc 'Install all development gems' task :install do @@ -177,16 +149,14 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' sh "gem build -o pkg/json-#{PKG_VERSION}-java.gem json.gemspec" end - desc "Testing library (jruby)" - task :test_ext => [ :set_env_ext, :create_jar, :check_env, :do_test_ext ] - task(:set_env_ext) { ENV['JSON'] = 'ext' } - UndocumentedTestTask.new do |t| - t.name = 'do_test_ext' + t.name = :test t.test_files = FileList['test/json/*_test.rb'] t.verbose = true t.options = '-v' end + desc "Testing library (jruby)" + task :test => [:create_jar ] file JRUBY_PARSER_JAR => :compile do cd 'java/src' do @@ -239,17 +209,26 @@ else task :compile => [ :ragel, EXT_PARSER_DL, EXT_GENERATOR_DL ] end - desc "Testing library (extension)" - task :test_ext => [ :set_env_ext, :check_env, :compile, :do_test_ext ] - task(:set_env_ext) { ENV['JSON'] = 'ext' } - UndocumentedTestTask.new do |t| - t.name = 'do_test_ext' + t.name = :test t.test_files = FileList['test/json/*_test.rb'] t.verbose = true t.options = '-v' end + desc "Testing library (extension)" + task :test => [ :compile ] + + begin + require "ruby_memcheck" + RubyMemcheck::TestTask.new(valgrind: [ :compile, :test ]) do |t| + t.test_files = FileList['test/json/*_test.rb'] + t.verbose = true + t.options = '-v' + end + rescue LoadError + end + desc "Update the tags file" task :tags do system 'ctags', *Dir['**/*.{rb,c,h,java}'] @@ -305,13 +284,11 @@ else desc "Create the gem packages" task :package do sh "gem build json.gemspec" - sh "gem build json_pure.gemspec" mkdir_p 'pkg' mv "json-#{PKG_VERSION}.gem", 'pkg' - mv "json_pure-#{PKG_VERSION}.gem", 'pkg' end - desc "Build all gems and archives for a new release of json and json_pure." + desc "Build all gems and archives for a new release of json" task :build => [ :clean, :package ] task :release => :build diff --git a/benchmark/data/activitypub.json b/benchmark/data/activitypub.json new file mode 100644 index 000000000..cd1d7bb3a --- /dev/null +++ b/benchmark/data/activitypub.json @@ -0,0 +1 @@ +{"@context":["https://www.w3.org/ns/activitystreams",{"ostatus":"http://ostatus.org#","atomUri":"ostatus:atomUri","inReplyToAtomUri":"ostatus:inReplyToAtomUri","conversation":"ostatus:conversation","sensitive":"as:sensitive","toot":"http://joinmastodon.org/ns#","votersCount":"toot:votersCount","blurhash":"toot:blurhash","focalPoint":{"@container":"@list","@id":"toot:focalPoint"},"Hashtag":"as:Hashtag"}],"id":"https://ruby.social/users/byroot/outbox?page=true","type":"OrderedCollectionPage","next":"https://ruby.social/users/byroot/outbox?max_id=112610149145350336\u0026page=true","prev":"https://ruby.social/users/byroot/outbox?min_id=113395951825326098\u0026page=true","partOf":"https://ruby.social/users/byroot/outbox","orderedItems":[{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-30T10:41:49Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-30T10:41:49Z","url":"https://ruby.social/@byroot/113395951825326098","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113395951825326098","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-30:objectId=57644998:objectType=Conversation","content":"\u003cp\u003eHere\u0026#39;s the post I teased last week.\u003c/p\u003e\u003cp\u003eTL;DR; Average latency: -5%, p99 latency -10%\u003c/p\u003e\u003cp\u003e\u003ca href=\"https://railsatscale.com/2024-10-23-next-generation-oob-gc/\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erailsatscale.com/2024-10-23-ne\u003c/span\u003e\u003cspan class=\"invisible\"\u003ext-generation-oob-gc/\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eHere\u0026#39;s the post I teased last week.\u003c/p\u003e\u003cp\u003eTL;DR; Average latency: -5%, p99 latency -10%\u003c/p\u003e\u003cp\u003e\u003ca href=\"https://railsatscale.com/2024-10-23-next-generation-oob-gc/\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erailsatscale.com/2024-10-23-ne\u003c/span\u003e\u003cspan class=\"invisible\"\u003ext-generation-oob-gc/\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"attachment":[],"tag":[],"replies":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113395951825326098/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113395951825326098/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/likes","type":"Collection","totalItems":7},"shares":{"id":"https://ruby.social/users/byroot/statuses/113395951825326098/shares","type":"Collection","totalItems":5}}},{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-24T20:53:26Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://oisaur.com/users/renchap"],"object":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498","type":"Note","summary":null,"inReplyTo":"https://oisaur.com/users/renchap/statuses/113364257424669603","published":"2024-10-24T20:53:26Z","url":"https://ruby.social/@byroot/113364382912445498","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://oisaur.com/users/renchap"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113364382912445498","inReplyToAtomUri":"https://oisaur.com/users/renchap/statuses/113364257424669603","conversation":"tag:ruby.social,2024-10-24:objectId=57167969:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://oisaur.com/@renchap\" class=\"u-url mention\"\u003e@\u003cspan\u003erenchap\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e for JSON generation yes.\u003c/p\u003e\u003cp\u003eIf you can turn that into an easy to run benchmark of some sort and somehow Oj is faster, I\u0026#39;m happy to take a look.\u003c/p\u003e\u003cp\u003eAnd even if it\u0026#39;s not faster, I\u0026#39;d also be happy to see if I can squeeze some more perf out of `json`.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://oisaur.com/@renchap\" class=\"u-url mention\"\u003e@\u003cspan\u003erenchap\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e for JSON generation yes.\u003c/p\u003e\u003cp\u003eIf you can turn that into an easy to run benchmark of some sort and somehow Oj is faster, I\u0026#39;m happy to take a look.\u003c/p\u003e\u003cp\u003eAnd even if it\u0026#39;s not faster, I\u0026#39;d also be happy to see if I can squeeze some more perf out of `json`.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://oisaur.com/users/renchap","name":"@renchap@oisaur.com"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113364382912445498/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113364382912445498/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/likes","type":"Collection","totalItems":2},"shares":{"id":"https://ruby.social/users/byroot/statuses/113364382912445498/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-24T09:54:11Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-24T09:54:11Z","url":"https://ruby.social/@byroot/113361790648929484","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113361790648929484","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-24:objectId=57167969:objectType=Conversation","content":"\u003cp\u003eI\u0026#39;ve just released json 2.7.3 with some bug fixes and lots of performance improvements: \u003ca href=\"https://github.com/ruby/json/releases/tag/v2.7.3\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/ruby/json/releases/\u003c/span\u003e\u003cspan class=\"invisible\"\u003etag/v2.7.3\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eThis is my first release after being made maintainer two weeks ago.\u003c/p\u003e\u003cp\u003eIf you got some realistic benchmarks in which JSON.dump is significantly slower than an alternative gem, please let me know.\u003c/p\u003e\u003cp\u003eAs long as it\u0026#39;s not the result of the alternative doing something incorrect, I\u0026#39;ll consider it as a bug.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eI\u0026#39;ve just released json 2.7.3 with some bug fixes and lots of performance improvements: \u003ca href=\"https://github.com/ruby/json/releases/tag/v2.7.3\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/ruby/json/releases/\u003c/span\u003e\u003cspan class=\"invisible\"\u003etag/v2.7.3\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eThis is my first release after being made maintainer two weeks ago.\u003c/p\u003e\u003cp\u003eIf you got some realistic benchmarks in which JSON.dump is significantly slower than an alternative gem, please let me know.\u003c/p\u003e\u003cp\u003eAs long as it\u0026#39;s not the result of the alternative doing something incorrect, I\u0026#39;ll consider it as a bug.\u003c/p\u003e"},"attachment":[{"type":"Document","mediaType":"image/png","url":"https://cdn.masto.host/rubysocial/media_attachments/files/113/361/788/681/127/110/original/5f0931fc4b8fe796.png","name":null,"blurhash":"UASF-ENLo#xY_4RkM_xu9Yoea#V@_MITWB%g","width":1870,"height":960}],"tag":[],"replies":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113361790648929484/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113361790648929484/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/likes","type":"Collection","totalItems":36},"shares":{"id":"https://ruby.social/users/byroot/statuses/113361790648929484/shares","type":"Collection","totalItems":16}}},{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-23T10:54:31Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-23T10:54:31Z","url":"https://ruby.social/@byroot/113356365626792579","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113356365626792579","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-23:objectId=57090061:objectType=Conversation","content":"\u003cp\u003eI\u0026#39;m working on a blog post about our new out of band GC. Sneak peak:\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eI\u0026#39;m working on a blog post about our new out of band GC. Sneak peak:\u003c/p\u003e"},"attachment":[{"type":"Document","mediaType":"image/png","url":"https://cdn.masto.host/rubysocial/media_attachments/files/113/356/364/818/271/017/original/9df7ff5c6469cfbd.png","name":null,"blurhash":"UJRfqJ-=xct5yXjEayWV-rIoRjoL~Wt7oLkC","width":4096,"height":988}],"tag":[],"replies":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113356365626792579/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113356365626792579/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/likes","type":"Collection","totalItems":8},"shares":{"id":"https://ruby.social/users/byroot/statuses/113356365626792579/shares","type":"Collection","totalItems":3}}},{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:39:09Z","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562","type":"Note","summary":null,"inReplyTo":"https://hachyderm.io/users/baweaver/statuses/113350162298585353","published":"2024-10-22T08:39:09Z","url":"https://ruby.social/@byroot/113350171031468562","attributedTo":"https://ruby.social/users/byroot","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350171031468562","inReplyToAtomUri":"https://hachyderm.io/users/baweaver/statuses/113350162298585353","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e how would that have been different without CD? \u003c/p\u003e\u003cp\u003eWhatever bug that caused the outage would have been deployed just the same, just as part of a bigger batch.\u003c/p\u003e\u003cp\u003eYou take make 0 sense to me.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e how would that have been different without CD? \u003c/p\u003e\u003cp\u003eWhatever bug that caused the outage would have been deployed just the same, just as part of a bigger batch.\u003c/p\u003e\u003cp\u003eYou take make 0 sense to me.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350171031468562/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350171031468562/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350171031468562/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:36:15Z","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017","type":"Note","summary":null,"inReplyTo":"https://hachyderm.io/users/baweaver/statuses/113350151359487275","published":"2024-10-22T08:36:15Z","url":"https://ruby.social/@byroot/113350159628598017","attributedTo":"https://ruby.social/users/byroot","to":["https://ruby.social/users/byroot/followers"],"cc":["https://www.w3.org/ns/activitystreams#Public","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350159628598017","inReplyToAtomUri":"https://hachyderm.io/users/baweaver/statuses/113350151359487275","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e that doesn\u0026#39;t answer my point.\u003c/p\u003e\u003cp\u003eWhy do you think (all things being equal) releasing less often is better?\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e that doesn\u0026#39;t answer my point.\u003c/p\u003e\u003cp\u003eWhy do you think (all things being equal) releasing less often is better?\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350159628598017/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350159628598017/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350159628598017/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:27:54Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/byroot/statuses/113350126084035477","published":"2024-10-22T08:27:54Z","url":"https://ruby.social/@byroot/113350126777528742","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350126777528742","inReplyToAtomUri":"https://ruby.social/users/byroot/statuses/113350126084035477","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eI get the monitoring etc argument, but CD doesn\u0026#39;t necessarily means merge and go get a coffee, while things deploy automatically.\u003c/p\u003e\u003cp\u003eIf you don\u0026#39;t trust your monitoring and don\u0026#39;t have automatic rollbacks when an anomaly is detected, you can perfectly enforce that whoever has a change deploying must be around and check everything is going well.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eI get the monitoring etc argument, but CD doesn\u0026#39;t necessarily means merge and go get a coffee, while things deploy automatically.\u003c/p\u003e\u003cp\u003eIf you don\u0026#39;t trust your monitoring and don\u0026#39;t have automatic rollbacks when an anomaly is detected, you can perfectly enforce that whoever has a change deploying must be around and check everything is going well.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350126777528742/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350126777528742/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350126777528742/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-22T08:27:44Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"object":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477","type":"Note","summary":null,"inReplyTo":"https://hachyderm.io/users/baweaver/statuses/113348847094225477","published":"2024-10-22T08:27:44Z","url":"https://ruby.social/@byroot/113350126084035477","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://hachyderm.io/users/baweaver"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113350126084035477","inReplyToAtomUri":"https://hachyderm.io/users/baweaver/statuses/113348847094225477","conversation":"tag:hachyderm.io,2024-10-22:objectId=202069168:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eCan\u0026#39;t disagree more. All things equal, CD reduce the turn around time to deal with a bad release.\u003c/p\u003e\u003cp\u003eAssuming your devs ship 100 PR per week, and 1% of these have a bug, if you ship say, once a day when you hit that bug you need to figure out which of the 20 PRs in the batch is responsible.\u003c/p\u003e\u003cp\u003eIf you ship 20 times a day, so each PR independently, it becomes extremely obvious which PR need to be rolled back and reverted.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://hachyderm.io/@baweaver\" class=\"u-url mention\"\u003e@\u003cspan\u003ebaweaver\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003c/p\u003e\u003cp\u003eCan\u0026#39;t disagree more. All things equal, CD reduce the turn around time to deal with a bad release.\u003c/p\u003e\u003cp\u003eAssuming your devs ship 100 PR per week, and 1% of these have a bug, if you ship say, once a day when you hit that bug you need to figure out which of the 20 PRs in the batch is responsible.\u003c/p\u003e\u003cp\u003eIf you ship 20 times a day, so each PR independently, it becomes extremely obvious which PR need to be rolled back and reverted.\u003c/p\u003e"},"updated":"2024-10-22T08:28:50Z","attachment":[],"tag":[{"type":"Mention","href":"https://hachyderm.io/users/baweaver","name":"@baweaver@hachyderm.io"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113350126084035477/replies?min_id=113350126777528742\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113350126084035477/replies","items":["https://ruby.social/users/byroot/statuses/113350126777528742"]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/likes","type":"Collection","totalItems":2},"shares":{"id":"https://ruby.social/users/byroot/statuses/113350126084035477/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-10-11T08:41:43Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/postmodern"],"object":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930","type":"Note","summary":null,"inReplyTo":null,"published":"2024-10-11T08:41:43Z","url":"https://ruby.social/@byroot/113287895679614930","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/postmodern"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113287895679614930","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-10-11:objectId=56178826:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@postmodern\" class=\"u-url mention\"\u003e@\u003cspan\u003epostmodern\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e it\u0026#39;s a big company, so the experience can vary a ton depending on where in the the org you are.\u003c/p\u003e\u003cp\u003eFeel free to DM details and questions if you want a more targeted answer.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@postmodern\" class=\"u-url mention\"\u003e@\u003cspan\u003epostmodern\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e it\u0026#39;s a big company, so the experience can vary a ton depending on where in the the org you are.\u003c/p\u003e\u003cp\u003eFeel free to DM details and questions if you want a more targeted answer.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/postmodern","name":"@postmodern"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113287895679614930/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113287895679614930/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/113287895679614930/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-08-31T18:11:11Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/flavorjones"],"object":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794","type":"Note","summary":null,"inReplyTo":null,"published":"2024-08-31T18:11:11Z","url":"https://ruby.social/@byroot/113057980213460794","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/flavorjones"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/113057980213460794","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-08-31:objectId=53204977:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Exceptionally, I was listening.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Exceptionally, I was listening.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/flavorjones","name":"@flavorjones"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/113057980213460794/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/113057980213460794/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/likes","type":"Collection","totalItems":2},"shares":{"id":"https://ruby.social/users/byroot/statuses/113057980213460794/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/113032547130202524/activity","type":"Announce","actor":"https://ruby.social/users/byroot","published":"2024-08-27T06:23:13Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/pushcx","https://ruby.social/users/byroot/followers"],"object":"https://ruby.social/users/pushcx/statuses/113030799240706907"},{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-08-08T23:52:25Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://sfba.social/users/soaproot","https://ruby.social/users/flavorjones"],"object":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037","type":"Note","summary":null,"inReplyTo":"https://sfba.social/users/soaproot/statuses/112928555487199451","published":"2024-08-08T23:52:25Z","url":"https://ruby.social/@byroot/112929088810252037","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://sfba.social/users/soaproot","https://ruby.social/users/flavorjones"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112929088810252037","inReplyToAtomUri":"https://sfba.social/users/soaproot/statuses/112928555487199451","conversation":"tag:ruby.social,2024-08-08:objectId=51576086:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://sfba.social/@soaproot\" class=\"u-url mention\"\u003e@\u003cspan\u003esoaproot\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e this is open source. Your dependencies are your code too.\u003c/p\u003e\u003cp\u003eWhich means open a PR to fix it there if there isn\u0026#39;t one yet, or at the very least ensure there is an open issue.\u003c/p\u003e\u003cp\u003eThen there are mechanisms to silence specific warnings, such as \u003ca href=\"https://rubygems.org/gems/warning\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"\"\u003erubygems.org/gems/warning\u003c/span\u003e\u003cspan class=\"invisible\"\u003e\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://sfba.social/@soaproot\" class=\"u-url mention\"\u003e@\u003cspan\u003esoaproot\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@flavorjones\" class=\"u-url mention\"\u003e@\u003cspan\u003eflavorjones\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e this is open source. Your dependencies are your code too.\u003c/p\u003e\u003cp\u003eWhich means open a PR to fix it there if there isn\u0026#39;t one yet, or at the very least ensure there is an open issue.\u003c/p\u003e\u003cp\u003eThen there are mechanisms to silence specific warnings, such as \u003ca href=\"https://rubygems.org/gems/warning\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"\"\u003erubygems.org/gems/warning\u003c/span\u003e\u003cspan class=\"invisible\"\u003e\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://sfba.social/users/soaproot","name":"@soaproot@sfba.social"},{"type":"Mention","href":"https://ruby.social/users/flavorjones","name":"@flavorjones"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112929088810252037/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112929088810252037/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/likes","type":"Collection","totalItems":4},"shares":{"id":"https://ruby.social/users/byroot/statuses/112929088810252037/shares","type":"Collection","totalItems":1}}},{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-07-05T10:41:38Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/floehopper"],"object":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/floehopper/statuses/112732960288445318","published":"2024-07-05T10:41:38Z","url":"https://ruby.social/@byroot/112733460786406967","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/floehopper"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112733460786406967","inReplyToAtomUri":"https://ruby.social/users/floehopper/statuses/112732960288445318","conversation":"tag:ruby.social,2024-07-05:objectId=49065856:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@floehopper\" class=\"u-url mention\"\u003e@\u003cspan\u003efloehopper\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e while the French system isn\u0026#39;t strictly FPTP, in practice it is similar enough that historically it almost always gave an absolute majority to minority formations.\u003c/p\u003e\u003cp\u003eIt only failed to do so the last two years.\u003c/p\u003e\u003cp\u003eIt\u0026#39;s was designed this way on purpose for the sake of \u0026quot;stability\u0026quot;...\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@floehopper\" class=\"u-url mention\"\u003e@\u003cspan\u003efloehopper\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e while the French system isn\u0026#39;t strictly FPTP, in practice it is similar enough that historically it almost always gave an absolute majority to minority formations.\u003c/p\u003e\u003cp\u003eIt only failed to do so the last two years.\u003c/p\u003e\u003cp\u003eIt\u0026#39;s was designed this way on purpose for the sake of \u0026quot;stability\u0026quot;...\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/floehopper","name":"@floehopper"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112733460786406967/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112733460786406967/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112733460786406967/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T09:42:05Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://status.pointless.one/users/pointlessone","https://ruby.social/users/codefolio"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256","type":"Note","summary":null,"inReplyTo":"https://status.pointless.one/users/pointlessone/statuses/112682262345182185","published":"2024-06-26T09:42:05Z","url":"https://ruby.social/@byroot/112682265852412256","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://status.pointless.one/users/pointlessone","https://ruby.social/users/codefolio"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682265852412256","inReplyToAtomUri":"https://status.pointless.one/users/pointlessone/statuses/112682262345182185","conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://status.pointless.one/@pointlessone\" class=\"u-url mention\"\u003e@\u003cspan\u003epointlessone\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e the trailing comma is the number one reason I can\u0026#39;t agree with standardrb.\u003c/p\u003e\u003cp\u003eTotally agree on minimizing diff noise.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://status.pointless.one/@pointlessone\" class=\"u-url mention\"\u003e@\u003cspan\u003epointlessone\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e the trailing comma is the number one reason I can\u0026#39;t agree with standardrb.\u003c/p\u003e\u003cp\u003eTotally agree on minimizing diff noise.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://status.pointless.one/users/pointlessone","name":"@pointlessone@status.pointless.one"},{"type":"Mention","href":"https://ruby.social/users/codefolio","name":"@codefolio"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682265852412256/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682265852412256/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682265852412256/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T08:57:19Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/codefolio/statuses/112682077769593093","published":"2024-06-26T08:57:19Z","url":"https://ruby.social/@byroot/112682089780639728","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682089780639728","inReplyToAtomUri":"https://ruby.social/users/codefolio/statuses/112682077769593093","conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e I still do, because I think there is a baseline of cops that save a lot of time during review when people submit PRs.\u003c/p\u003e\u003cp\u003eBut the default config is really puzzling, always have to disable tons of cops: \u003ca href=\"https://github.com/redis-rb/redis-client/blob/master/.rubocop.yml\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/redis-rb/redis-clie\u003c/span\u003e\u003cspan class=\"invisible\"\u003ent/blob/master/.rubocop.yml\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eWish I could use `standard`, but while it\u0026#39;s less annoying, I just plain can\u0026#39;t agree with some of the choices they made, and they don\u0026#39;t allow to deviate so...\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e I still do, because I think there is a baseline of cops that save a lot of time during review when people submit PRs.\u003c/p\u003e\u003cp\u003eBut the default config is really puzzling, always have to disable tons of cops: \u003ca href=\"https://github.com/redis-rb/redis-client/blob/master/.rubocop.yml\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/redis-rb/redis-clie\u003c/span\u003e\u003cspan class=\"invisible\"\u003ent/blob/master/.rubocop.yml\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eWish I could use `standard`, but while it\u0026#39;s less annoying, I just plain can\u0026#39;t agree with some of the choices they made, and they don\u0026#39;t allow to deviate so...\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/codefolio","name":"@codefolio"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682089780639728/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682089780639728/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/likes","type":"Collection","totalItems":4},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682089780639728/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T08:53:48Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/codefolio/statuses/112682069742608695","published":"2024-06-26T08:53:48Z","url":"https://ruby.social/@byroot/112682075988473643","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/codefolio"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682075988473643","inReplyToAtomUri":"https://ruby.social/users/codefolio/statuses/112682069742608695","conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Yeah. I started a project a few hours ago, already had to disable 6 cops that I consider plain wrong.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@codefolio\" class=\"u-url mention\"\u003e@\u003cspan\u003ecodefolio\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Yeah. I started a project a few hours ago, already had to disable 6 cops that I consider plain wrong.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/codefolio","name":"@codefolio"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682075988473643/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682075988473643/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682075988473643/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-26T08:47:41Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"object":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547","type":"Note","summary":null,"inReplyTo":null,"published":"2024-06-26T08:47:41Z","url":"https://ruby.social/@byroot/112682051957106547","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112682051957106547","inReplyToAtomUri":null,"conversation":"tag:ruby.social,2024-06-26:objectId=48431832:objectType=Conversation","content":"\u003cp\u003eI just discovered this rubocop rule while working on a new project \u003ca href=\"https://www.rubydoc.info/gems/rubocop/RuboCop/Cop/Naming/RescuedExceptionsVariableName\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://www.\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erubydoc.info/gems/rubocop/Rubo\u003c/span\u003e\u003cspan class=\"invisible\"\u003eCop/Cop/Naming/RescuedExceptionsVariableName\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eI mean seriously? Why should a single name be enforced for all rescued exceptions? If in context there\u0026#39;s something more meaningful than `error`, then I\u0026#39;m gonna use it.\u003c/p\u003e\u003cp\u003eAlso why on earth would you enforce a single letter as variable name? \u003ca href=\"https://ruby.social/tags/notmyruby\" class=\"mention hashtag\" rel=\"tag\"\u003e#\u003cspan\u003enotmyruby\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003eI just discovered this rubocop rule while working on a new project \u003ca href=\"https://www.rubydoc.info/gems/rubocop/RuboCop/Cop/Naming/RescuedExceptionsVariableName\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://www.\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003erubydoc.info/gems/rubocop/Rubo\u003c/span\u003e\u003cspan class=\"invisible\"\u003eCop/Cop/Naming/RescuedExceptionsVariableName\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e\u003cp\u003eI mean seriously? Why should a single name be enforced for all rescued exceptions? If in context there\u0026#39;s something more meaningful than `error`, then I\u0026#39;m gonna use it.\u003c/p\u003e\u003cp\u003eAlso why on earth would you enforce a single letter as variable name? \u003ca href=\"https://ruby.social/tags/notmyruby\" class=\"mention hashtag\" rel=\"tag\"\u003e#\u003cspan\u003enotmyruby\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"updated":"2024-06-26T08:54:27Z","attachment":[],"tag":[{"type":"Hashtag","href":"https://ruby.social/tags/notmyruby","name":"#notmyruby"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112682051957106547/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112682051957106547/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/likes","type":"Collection","totalItems":8},"shares":{"id":"https://ruby.social/users/byroot/statuses/112682051957106547/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-24T12:12:03Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"object":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/nony/statuses/112671249287497537","published":"2024-06-24T12:12:03Z","url":"https://ruby.social/@byroot/112671530887629815","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112671530887629815","inReplyToAtomUri":"https://ruby.social/users/nony/statuses/112671249287497537","conversation":"tag:ruby.social,2024-06-24:objectId=48293543:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e no particular feeling no.\u003c/p\u003e\u003cp\u003eAlso I don\u0026#39;t think Eileen is very active around here, so best to discuss this on GitHub or something.\u003c/p\u003e\u003cp\u003eIf your planned refactoring isn\u0026#39;t a ton of work, the simpler is to discuss over a PR. Otherwise you can start a GitHub discussion and tag Eileen: \u003ca href=\"https://github.com/rails/rails/discussions\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/rails/rails/discuss\u003c/span\u003e\u003cspan class=\"invisible\"\u003eions\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e no particular feeling no.\u003c/p\u003e\u003cp\u003eAlso I don\u0026#39;t think Eileen is very active around here, so best to discuss this on GitHub or something.\u003c/p\u003e\u003cp\u003eIf your planned refactoring isn\u0026#39;t a ton of work, the simpler is to discuss over a PR. Otherwise you can start a GitHub discussion and tag Eileen: \u003ca href=\"https://github.com/rails/rails/discussions\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"\u003e\u003cspan class=\"invisible\"\u003ehttps://\u003c/span\u003e\u003cspan class=\"ellipsis\"\u003egithub.com/rails/rails/discuss\u003c/span\u003e\u003cspan class=\"invisible\"\u003eions\u003c/span\u003e\u003c/a\u003e\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/nony","name":"@nony"},{"type":"Mention","href":"https://ruby.social/users/eileencodes","name":"@eileencodes"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112671530887629815/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112671530887629815/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/112671530887629815/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-24T10:59:22Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"object":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/nony/statuses/112671240096770145","published":"2024-06-24T10:59:22Z","url":"https://ruby.social/@byroot/112671245079603460","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/nony","https://ruby.social/users/eileencodes"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112671245079603460","inReplyToAtomUri":"https://ruby.social/users/nony/statuses/112671240096770145","conversation":"tag:ruby.social,2024-06-24:objectId=48293543:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e That\u0026#39;s more \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u0026#39;s area of expertise.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@nony\" class=\"u-url mention\"\u003e@\u003cspan\u003enony\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e That\u0026#39;s more \u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@eileencodes\" class=\"u-url mention\"\u003e@\u003cspan\u003eeileencodes\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e \u0026#39;s area of expertise.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/nony","name":"@nony"},{"type":"Mention","href":"https://ruby.social/users/eileencodes","name":"@eileencodes"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112671245079603460/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112671245079603460/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/likes","type":"Collection","totalItems":0},"shares":{"id":"https://ruby.social/users/byroot/statuses/112671245079603460/shares","type":"Collection","totalItems":0}}},{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/activity","type":"Create","actor":"https://ruby.social/users/byroot","published":"2024-06-13T16:01:52Z","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/samsaffron"],"object":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336","type":"Note","summary":null,"inReplyTo":"https://ruby.social/users/byroot/statuses/112608230956169882","published":"2024-06-13T16:01:52Z","url":"https://ruby.social/@byroot/112610149145350336","attributedTo":"https://ruby.social/users/byroot","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://ruby.social/users/byroot/followers","https://ruby.social/users/samsaffron"],"sensitive":false,"atomUri":"https://ruby.social/users/byroot/statuses/112610149145350336","inReplyToAtomUri":"https://ruby.social/users/byroot/statuses/112608230956169882","conversation":"tag:ruby.social,2024-06-13:objectId=47554849:objectType=Conversation","content":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@samsaffron\" class=\"u-url mention\"\u003e@\u003cspan\u003esamsaffron\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Thanks for the repro. It\u0026#39;s fixed an backported to 7.1.\u003c/p\u003e","contentMap":{"en":"\u003cp\u003e\u003cspan class=\"h-card\" translate=\"no\"\u003e\u003ca href=\"https://ruby.social/@samsaffron\" class=\"u-url mention\"\u003e@\u003cspan\u003esamsaffron\u003c/span\u003e\u003c/a\u003e\u003c/span\u003e Thanks for the repro. It\u0026#39;s fixed an backported to 7.1.\u003c/p\u003e"},"attachment":[],"tag":[{"type":"Mention","href":"https://ruby.social/users/samsaffron","name":"@samsaffron"}],"replies":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/replies","type":"Collection","first":{"type":"CollectionPage","next":"https://ruby.social/users/byroot/statuses/112610149145350336/replies?only_other_accounts=true\u0026page=true","partOf":"https://ruby.social/users/byroot/statuses/112610149145350336/replies","items":[]}},"likes":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/likes","type":"Collection","totalItems":1},"shares":{"id":"https://ruby.social/users/byroot/statuses/112610149145350336/shares","type":"Collection","totalItems":0}}}]} \ No newline at end of file diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 662b1c3e7..acc5fa07b 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -17,8 +17,7 @@ def implementations(ruby_obj) state = JSON::State.new(JSON.dump_default_options) { - json_state: ["json (reuse)", proc { state.generate(ruby_obj) }], - json: ["json", proc { JSON.dump(ruby_obj) }], + json: ["json", proc { JSON.generate(ruby_obj) }], oj: ["oj", proc { Oj.dump(ruby_obj) }], } end @@ -55,19 +54,27 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ puts end -# On the first two micro benchmarks, the limitting factor is that we have to create a Generator::State object for every -# call to `JSON.dump`, so we cause 2 allocations per call where alternatives only do one allocation. -# The performance difference is mostly more time spent in GC because of this extra pressure. -# If we re-use the same `JSON::State` instance, we're faster than Oj on the array benchmark, and much closer -# on the Hash one. +# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] + +# On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the +# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10-20% faster +# than `Oj.dump`. +benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]] benchmark_encoding "small nested array", [[1,2,3,4,5]]*10 benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } -# On these benchmarks we perform well. Either on par or very closely faster/slower -benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state) -benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state) -benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) -benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state) +# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~50% slower when dealing +# with mostly multi-byte characters. There's likely some gains left to be had in multi-byte handling. +benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500) +benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500) + +# On these benchmarks we perform well, we're on par or better. +benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state) +benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json") +benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json") + +# On twitter.json we're still about 6% slower, this is worth investigating. +benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json") # This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation # which uses a relatively old version of dtoa.c from David M. Gay. @@ -78,6 +85,8 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # but all these are implemented in C++11 or newer, making it hard if not impossible to include them. # Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match # Oj speed without losing precision. -benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state) +benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false -benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state) +# We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be +# opportunities here. +benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20 diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 1c26ed8d4..6952f3c33 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -19,7 +19,6 @@ def benchmark_parsing(name, json_output) Benchmark.ips do |x| x.report("json") { JSON.parse(json_output) } if RUN[:json] x.report("oj") { Oj.load(json_output) } if RUN[:oj] - x.report("oj strict") { Oj.strict_load(json_output) } if RUN[:oj] x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj] x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] x.compare!(order: :baseline) @@ -27,13 +26,28 @@ def benchmark_parsing(name, json_output) puts end +# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] + +# Oj::Parser is significanly faster (~1.3x) on the next 3 micro-benchmarks in large part because its +# cache is persisted across calls. That's not something we can do with the current API, we'd +# need to expose a stateful API as well, but that's no really desirable. +# Other than that we're faster than regular `Oj.load` by a good margin (between 1.3x and 2.4x). benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) - benchmark_parsing "test from oj", <= 201103L) +# include +# endif +#elif defined(HAVE_STDBOOL_H) +# include +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + +enum fbuffer_type { + FBUFFER_HEAP_ALLOCATED = 0, + FBUFFER_STACK_ALLOCATED = 1, +}; + typedef struct FBufferStruct { + enum fbuffer_type type; unsigned long initial_length; - char *ptr; unsigned long len; unsigned long capa; + char *ptr; } FBuffer; +#define FBUFFER_STACK_SIZE 512 #define FBUFFER_INITIAL_LENGTH_DEFAULT 1024 -#define FBUFFER_PTR(fb) (fb->ptr) -#define FBUFFER_LEN(fb) (fb->len) -#define FBUFFER_CAPA(fb) (fb->capa) +#define FBUFFER_PTR(fb) ((fb)->ptr) +#define FBUFFER_LEN(fb) ((fb)->len) +#define FBUFFER_CAPA(fb) ((fb)->capa) #define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) -static FBuffer *fbuffer_alloc(unsigned long initial_length); static void fbuffer_free(FBuffer *fb); #ifndef JSON_GENERATOR static void fbuffer_clear(FBuffer *fb); @@ -27,29 +64,26 @@ static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); #ifdef JSON_GENERATOR static void fbuffer_append_long(FBuffer *fb, long number); #endif -static void fbuffer_append_char(FBuffer *fb, char newchr); +static inline void fbuffer_append_char(FBuffer *fb, char newchr); #ifdef JSON_GENERATOR static VALUE fbuffer_to_s(FBuffer *fb); #endif -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -static FBuffer *fbuffer_alloc(unsigned long initial_length) +static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) { - FBuffer *fb; - if (initial_length <= 0) initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - fb = ALLOC(FBuffer); - memset((void *) fb, 0, sizeof(FBuffer)); - fb->initial_length = initial_length; - return fb; + fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; + if (stack_buffer) { + fb->type = FBUFFER_STACK_ALLOCATED; + fb->ptr = stack_buffer; + fb->capa = stack_buffer_size; + } } static void fbuffer_free(FBuffer *fb) { - if (fb->ptr) ruby_xfree(fb->ptr); - ruby_xfree(fb); + if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { + ruby_xfree(fb->ptr); + } } #ifndef JSON_GENERATOR @@ -59,22 +93,34 @@ static void fbuffer_clear(FBuffer *fb) } #endif -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) { - if (RB_UNLIKELY(requested > fb->capa - fb->len)) { - unsigned long required; + unsigned long required; - if (RB_UNLIKELY(!fb->ptr)) { - fb->ptr = ALLOC_N(char, fb->initial_length); - fb->capa = fb->initial_length; - } + if (RB_UNLIKELY(!fb->ptr)) { + fb->ptr = ALLOC_N(char, fb->initial_length); + fb->capa = fb->initial_length; + } - for (required = fb->capa; requested > required - fb->len; required <<= 1); + for (required = fb->capa; requested > required - fb->len; required <<= 1); - if (required > fb->capa) { + if (required > fb->capa) { + if (fb->type == FBUFFER_STACK_ALLOCATED) { + const char *old_buffer = fb->ptr; + fb->ptr = ALLOC_N(char, required); + fb->type = FBUFFER_HEAP_ALLOCATED; + MEMCPY(fb->ptr, old_buffer, char, fb->len); + } else { REALLOC_N(fb->ptr, char, required); - fb->capa = required; } + fb->capa = required; + } +} + +static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +{ + if (RB_UNLIKELY(requested > fb->capa - fb->len)) { + fbuffer_do_inc_capa(fb, requested); } } @@ -99,7 +145,7 @@ static void fbuffer_append_str(FBuffer *fb, VALUE str) } #endif -static void fbuffer_append_char(FBuffer *fb, char newchr) +static inline void fbuffer_append_char(FBuffer *fb, char newchr) { fbuffer_inc_capa(fb, 1); *(fb->ptr + fb->len) = newchr; @@ -107,33 +153,25 @@ static void fbuffer_append_char(FBuffer *fb, char newchr) } #ifdef JSON_GENERATOR -static void freverse(char *start, char *end) -{ - char c; - - while (end > start) { - c = *end, *end-- = *start, *start++ = c; - } -} - static long fltoa(long number, char *buf) { - static char digits[] = "0123456789"; + static const char digits[] = "0123456789"; long sign = number; char* tmp = buf; if (sign < 0) number = -number; - do *tmp++ = digits[number % 10]; while (number /= 10); - if (sign < 0) *tmp++ = '-'; - freverse(buf, tmp - 1); - return tmp - buf; + do *tmp-- = digits[number % 10]; while (number /= 10); + if (sign < 0) *tmp-- = '-'; + return buf - tmp; } +#define LONG_BUFFER_SIZE 20 static void fbuffer_append_long(FBuffer *fb, long number) { - char buf[20]; - unsigned long len = fltoa(number, buf); - fbuffer_append(fb, buf, len); + char buf[LONG_BUFFER_SIZE]; + char *buffer_end = buf + LONG_BUFFER_SIZE; + long len = fltoa(number, buffer_end - 1); + fbuffer_append(fb, buffer_end - len, len); } static VALUE fbuffer_to_s(FBuffer *fb) diff --git a/ext/json/ext/generator/depend b/ext/json/ext/generator/depend index 1a042a250..967aa7659 100644 --- a/ext/json/ext/generator/depend +++ b/ext/json/ext/generator/depend @@ -1 +1 @@ -generator.o: generator.c generator.h $(srcdir)/../fbuffer/fbuffer.h +generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index c35e86d9b..c4f356ac6 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -1,5 +1,27 @@ +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "generator.h" + +#include +#include + +/* ruby api and some helpers */ + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + + long max_nesting; + long depth; + long buffer_initial_length; + + bool allow_nan; + bool ascii_only; + bool script_safe; + bool strict; +} JSON_Generator_State; #ifndef RB_UNLIKELY #define RB_UNLIKELY(cond) (cond) @@ -8,6 +30,46 @@ static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8; static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; +static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict; + + +#define GET_STATE_TO(self, state) \ + TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + GET_STATE_TO(self, state) + +struct generate_json_data; + +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); + +struct generate_json_data { + FBuffer *buffer; + VALUE vstate; + JSON_Generator_State *state; + VALUE obj; + generator_func func; +}; + +static VALUE cState_from_state_s(VALUE self, VALUE opts); +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +#ifdef RUBY_INTEGER_UNIFICATION +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +#endif +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); + +static int usascii_encindex, utf8_encindex, binary_encindex; /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. @@ -44,9 +106,6 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca if (RB_UNLIKELY(ch_len)) { switch (ch_len) { - case 0: - pos++; - break; case 1: { FLUSH_POS(1); switch (ch) { @@ -59,8 +118,8 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: { - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -181,8 +240,8 @@ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char esc case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -217,9 +276,6 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons if (RB_UNLIKELY(ch_len)) { switch (ch_len) { - case 0: - pos++; - break; case 1: { FLUSH_POS(1); switch (ch) { @@ -232,8 +288,8 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: { - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -303,14 +359,6 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons RB_GC_GUARD(str); } -static char *fstrndup(const char *ptr, unsigned long len) { - char *result; - if (len <= 0) return NULL; - result = ALLOC_N(char, len); - memcpy(result, ptr, len); - return result; -} - /* * Document-module: JSON::Ext::Generator * @@ -403,7 +451,9 @@ static char *fstrndup(const char *ptr, unsigned long len) { */ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(object); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_object); } /* @@ -415,7 +465,9 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) * produced JSON string output further. */ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(array); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_array); } #ifdef RUBY_INTEGER_UNIFICATION @@ -426,7 +478,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { */ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(integer); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_integer); } #else @@ -437,7 +491,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(fixnum); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_fixnum); } /* @@ -447,7 +503,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(bignum); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_bignum); } #endif @@ -458,7 +516,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(float); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_float); } /* @@ -481,7 +541,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) { */ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(string); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_string); } /* @@ -498,7 +560,7 @@ static VALUE mString_to_json_raw_object(VALUE self) VALUE result = rb_hash_new(); rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_str_new2("raw"), ary); + rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary); return result; } @@ -536,7 +598,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o) */ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(true); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("true", 4); } /* @@ -546,7 +609,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(false); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("false", 5); } /* @@ -556,7 +620,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(null); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("null", 4); } /* @@ -573,30 +638,38 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) rb_scan_args(argc, argv, "01", &state); Check_Type(string, T_STRING); state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string); + return cState_partial_generate(state, string, generate_json_string); +} + +static void State_mark(void *ptr) +{ + JSON_Generator_State *state = ptr; + rb_gc_mark_movable(state->indent); + rb_gc_mark_movable(state->space); + rb_gc_mark_movable(state->space_before); + rb_gc_mark_movable(state->object_nl); + rb_gc_mark_movable(state->array_nl); +} + +static void State_compact(void *ptr) +{ + JSON_Generator_State *state = ptr; + state->indent = rb_gc_location(state->indent); + state->space = rb_gc_location(state->space); + state->space_before = rb_gc_location(state->space_before); + state->object_nl = rb_gc_location(state->object_nl); + state->array_nl = rb_gc_location(state->array_nl); } static void State_free(void *ptr) { JSON_Generator_State *state = ptr; - if (state->indent) ruby_xfree(state->indent); - if (state->space) ruby_xfree(state->space); - if (state->space_before) ruby_xfree(state->space_before); - if (state->object_nl) ruby_xfree(state->object_nl); - if (state->array_nl) ruby_xfree(state->array_nl); ruby_xfree(state); } static size_t State_memsize(const void *ptr) { - const JSON_Generator_State *state = ptr; - size_t size = sizeof(*state); - if (state->indent) size += state->indent_len + 1; - if (state->space) size += state->space_len + 1; - if (state->space_before) size += state->space_before_len + 1; - if (state->object_nl) size += state->object_nl_len + 1; - if (state->array_nl) size += state->array_nl_len + 1; - return size; + return sizeof(JSON_Generator_State); } #ifndef HAVE_RB_EXT_RACTOR_SAFE @@ -606,24 +679,54 @@ static size_t State_memsize(const void *ptr) static const rb_data_type_t JSON_Generator_State_type = { "JSON/Generator/State", - {NULL, State_free, State_memsize,}, + { + .dmark = State_mark, + .dfree = State_free, + .dsize = State_memsize, + .dcompact = State_compact, + }, 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, }; +static void state_init(JSON_Generator_State *state) +{ + state->max_nesting = 100; + state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; +} + static VALUE cState_s_allocate(VALUE klass) { JSON_Generator_State *state; VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state); - state->max_nesting = 100; - state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + state_init(state); return obj; } +static void vstate_spill(struct generate_json_data *data) +{ + VALUE vstate = cState_s_allocate(cState); + GET_STATE(vstate); + MEMCPY(state, data->state, JSON_Generator_State, 1); + data->state = state; + data->vstate = vstate; + RB_OBJ_WRITTEN(vstate, Qundef, state->indent); + RB_OBJ_WRITTEN(vstate, Qundef, state->space); + RB_OBJ_WRITTEN(vstate, Qundef, state->space_before); + RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); +} + +static inline VALUE vstate_get(struct generate_json_data *data) +{ + if (RB_UNLIKELY(!data->vstate)) { + vstate_spill(data); + } + return data->vstate; +} + struct hash_foreach_arg { - FBuffer *buffer; - JSON_Generator_State *state; - VALUE Vstate; + struct generate_json_data *data; int iter; }; @@ -631,27 +734,32 @@ static int json_object_i(VALUE key, VALUE val, VALUE _arg) { struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg; - FBuffer *buffer = arg->buffer; - JSON_Generator_State *state = arg->state; - VALUE Vstate = arg->Vstate; + struct generate_json_data *data = arg->data; + + FBuffer *buffer = data->buffer; + JSON_Generator_State *state = data->state; long depth = state->depth; int j; if (arg->iter > 0) fbuffer_append_char(buffer, ','); if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append(buffer, state->object_nl, state->object_nl_len); + fbuffer_append_str(buffer, state->object_nl); } if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } VALUE key_to_s; switch(rb_type(key)) { case T_STRING: - key_to_s = key; + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { + key_to_s = key; + } else { + key_to_s = rb_funcall(key, i_to_s, 0); + } break; case T_SYMBOL: key_to_s = rb_sym2str(key); @@ -661,47 +769,57 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) break; } - generate_json_string(buffer, Vstate, state, key_to_s); - if (RB_UNLIKELY(state->space_before)) fbuffer_append(buffer, state->space_before, state->space_before_len); + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { + generate_json_string(buffer, data, state, key_to_s); + } else { + generate_json(buffer, data, state, key_to_s); + } + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); fbuffer_append_char(buffer, ':'); - if (RB_UNLIKELY(state->space)) fbuffer_append(buffer, state->space, state->space_len); - generate_json(buffer, Vstate, state, val); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); + generate_json(buffer, data, state, val); arg->iter++; return ST_CONTINUE; } -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { long max_nesting = state->max_nesting; long depth = ++state->depth; int j; - struct hash_foreach_arg arg; if (max_nesting != 0 && depth > max_nesting) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); } + + if (RHASH_SIZE(obj) == 0) { + fbuffer_append(buffer, "{}", 2); + --state->depth; + return; + } + fbuffer_append_char(buffer, '{'); - arg.buffer = buffer; - arg.state = state; - arg.Vstate = Vstate; - arg.iter = 0; + struct hash_foreach_arg arg = { + .data = data, + .iter = 0, + }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); depth = --state->depth; if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append(buffer, state->object_nl, state->object_nl_len); + fbuffer_append_str(buffer, state->object_nl); if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } } fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { long max_nesting = state->max_nesting; long depth = ++state->depth; @@ -709,34 +827,39 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St if (max_nesting != 0 && depth > max_nesting) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); } + + if (RARRAY_LEN(obj) == 0) { + fbuffer_append(buffer, "[]", 2); + --state->depth; + return; + } + fbuffer_append_char(buffer, '['); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len); + if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); for(i = 0; i < RARRAY_LEN(obj); i++) { if (i > 0) { fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len); + if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); } if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } - generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i)); + generate_json(buffer, data, state, RARRAY_AREF(obj, i)); } state->depth = --depth; if (RB_UNLIKELY(state->array_nl)) { - fbuffer_append(buffer, state->array_nl, state->array_nl_len); + fbuffer_append_str(buffer, state->array_nl); if (RB_UNLIKELY(state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append(buffer, state->indent, state->indent_len); + fbuffer_append_str(buffer, state->indent); } } } fbuffer_append_char(buffer, ']'); } -static int usascii_encindex, utf8_encindex, binary_encindex; - static inline int enc_utf8_compatible_p(int enc_idx) { if (enc_idx == usascii_encindex) return 1; @@ -750,13 +873,14 @@ static inline VALUE ensure_valid_encoding(VALUE str) VALUE utf8_string; if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); switch (rb_enc_str_coderange(utf8_string)) { case ENC_CODERANGE_7BIT: + return utf8_string; case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); return utf8_string; break; } @@ -767,7 +891,7 @@ static inline VALUE ensure_valid_encoding(VALUE str) return str; } -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { obj = ensure_valid_encoding(obj); @@ -791,42 +915,43 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); fbuffer_append_str(buffer, tmp); } #ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, state, obj); else - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, state, obj); } #endif -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) + +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { double value = RFLOAT_VALUE(obj); char allow_nan = state->allow_nan; @@ -841,20 +966,20 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St fbuffer_append_str(buffer, tmp); } -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { VALUE tmp; if (obj == Qnil) { - generate_json_null(buffer, Vstate, state, obj); + generate_json_null(buffer, data, state, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, Vstate, state, obj); + generate_json_false(buffer, data, state, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, Vstate, state, obj); + generate_json_true(buffer, data, state, obj); } else if (RB_SPECIAL_CONST_P(obj)) { if (RB_FIXNUM_P(obj)) { - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, state, obj); } else if (RB_FLONUM_P(obj)) { - generate_json_float(buffer, Vstate, state, obj); + generate_json_float(buffer, data, state, obj); } else { goto general; } @@ -862,62 +987,46 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s VALUE klass = RBASIC_CLASS(obj); switch (RB_BUILTIN_TYPE(obj)) { case T_BIGNUM: - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, state, obj); break; case T_HASH: if (klass != rb_cHash) goto general; - generate_json_object(buffer, Vstate, state, obj); + generate_json_object(buffer, data, state, obj); break; case T_ARRAY: if (klass != rb_cArray) goto general; - generate_json_array(buffer, Vstate, state, obj); + generate_json_array(buffer, data, state, obj); break; case T_STRING: if (klass != rb_cString) goto general; - generate_json_string(buffer, Vstate, state, obj); + generate_json_string(buffer, data, state, obj); break; case T_FLOAT: if (klass != rb_cFloat) goto general; - generate_json_float(buffer, Vstate, state, obj); + generate_json_float(buffer, data, state, obj); break; default: general: if (state->strict) { rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, Vstate); + tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); Check_Type(tmp, T_STRING); fbuffer_append_str(buffer, tmp); } else { tmp = rb_funcall(obj, i_to_s, 0); Check_Type(tmp, T_STRING); - generate_json_string(buffer, Vstate, state, tmp); + generate_json_string(buffer, data, state, tmp); } } } } -static FBuffer *cState_prepare_buffer(VALUE self) -{ - FBuffer *buffer; - GET_STATE(self); - buffer = fbuffer_alloc(state->buffer_initial_length); - - return buffer; -} - -struct generate_json_data { - FBuffer *buffer; - VALUE vstate; - JSON_Generator_State *state; - VALUE obj; -}; - static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - generate_json(data->buffer, data->vstate, data->state, data->obj); + data->func(data->buffer, data, data->state, data->obj); return Qnil; } @@ -927,25 +1036,33 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) struct generate_json_data *data = (struct generate_json_data *)d; fbuffer_free(data->buffer); + if (RBASIC_CLASS(exc) == rb_path2class("Encoding::UndefinedConversionError")) { + exc = rb_exc_new_str(eGeneratorError, rb_funcall(exc, rb_intern("message"), 0)); + } + rb_exc_raise(exc); return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj) +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) { - FBuffer *buffer = cState_prepare_buffer(self); GET_STATE(self); + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = {0}; + fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + struct generate_json_data data = { - .buffer = buffer, + .buffer = &buffer, .vstate = self, .state = state, - .obj = obj + .obj = obj, + .func = func }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - return fbuffer_to_s(buffer); + return fbuffer_to_s(&buffer); } /* @@ -957,12 +1074,18 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj) */ static VALUE cState_generate(VALUE self, VALUE obj) { - VALUE result = cState_partial_generate(self, obj); + VALUE result = cState_partial_generate(self, obj, generate_json); GET_STATE(self); (void)state; return result; } +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) +{ + rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`"); + return self; +} + /* * call-seq: initialize_copy(orig) * @@ -979,11 +1102,11 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = fstrndup(origState->indent, origState->indent_len); - objState->space = fstrndup(origState->space, origState->space_len); - objState->space_before = fstrndup(origState->space_before, origState->space_before_len); - objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len); - objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len); + objState->indent = origState->indent; + objState->space = origState->space; + objState->space_before = origState->space_before; + objState->object_nl = origState->object_nl; + objState->array_nl = origState->array_nl; return obj; } @@ -1013,7 +1136,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts) static VALUE cState_indent(VALUE self) { GET_STATE(self); - return state->indent ? rb_str_new(state->indent, state->indent_len) : rb_str_new2(""); + return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0)); +} + +static VALUE string_config(VALUE config) +{ + if (RTEST(config)) { + Check_Type(config, T_STRING); + if (RSTRING_LEN(config)) { + return rb_str_new_frozen(config); + } + } + return Qfalse; } /* @@ -1023,21 +1157,8 @@ static VALUE cState_indent(VALUE self) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { - unsigned long len; GET_STATE(self); - Check_Type(indent, T_STRING); - len = RSTRING_LEN(indent); - if (len == 0) { - if (state->indent) { - ruby_xfree(state->indent); - state->indent = NULL; - state->indent_len = 0; - } - } else { - if (state->indent) ruby_xfree(state->indent); - state->indent = fstrndup(RSTRING_PTR(indent), len); - state->indent_len = len; - } + RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; } @@ -1050,7 +1171,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent) static VALUE cState_space(VALUE self) { GET_STATE(self); - return state->space ? rb_str_new(state->space, state->space_len) : rb_str_new2(""); + return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1061,21 +1182,8 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { - unsigned long len; GET_STATE(self); - Check_Type(space, T_STRING); - len = RSTRING_LEN(space); - if (len == 0) { - if (state->space) { - ruby_xfree(state->space); - state->space = NULL; - state->space_len = 0; - } - } else { - if (state->space) ruby_xfree(state->space); - state->space = fstrndup(RSTRING_PTR(space), len); - state->space_len = len; - } + RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; } @@ -1087,7 +1195,7 @@ static VALUE cState_space_set(VALUE self, VALUE space) static VALUE cState_space_before(VALUE self) { GET_STATE(self); - return state->space_before ? rb_str_new(state->space_before, state->space_before_len) : rb_str_new2(""); + return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1097,21 +1205,8 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { - unsigned long len; GET_STATE(self); - Check_Type(space_before, T_STRING); - len = RSTRING_LEN(space_before); - if (len == 0) { - if (state->space_before) { - ruby_xfree(state->space_before); - state->space_before = NULL; - state->space_before_len = 0; - } - } else { - if (state->space_before) ruby_xfree(state->space_before); - state->space_before = fstrndup(RSTRING_PTR(space_before), len); - state->space_before_len = len; - } + RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; } @@ -1124,7 +1219,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before) static VALUE cState_object_nl(VALUE self) { GET_STATE(self); - return state->object_nl ? rb_str_new(state->object_nl, state->object_nl_len) : rb_str_new2(""); + return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1135,20 +1230,8 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { - unsigned long len; GET_STATE(self); - Check_Type(object_nl, T_STRING); - len = RSTRING_LEN(object_nl); - if (len == 0) { - if (state->object_nl) { - ruby_xfree(state->object_nl); - state->object_nl = NULL; - } - } else { - if (state->object_nl) ruby_xfree(state->object_nl); - state->object_nl = fstrndup(RSTRING_PTR(object_nl), len); - state->object_nl_len = len; - } + RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; } @@ -1160,7 +1243,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) static VALUE cState_array_nl(VALUE self) { GET_STATE(self); - return state->array_nl ? rb_str_new(state->array_nl, state->array_nl_len) : rb_str_new2(""); + return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1170,20 +1253,8 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { - unsigned long len; GET_STATE(self); - Check_Type(array_nl, T_STRING); - len = RSTRING_LEN(array_nl); - if (len == 0) { - if (state->array_nl) { - ruby_xfree(state->array_nl); - state->array_nl = NULL; - } - } else { - if (state->array_nl) ruby_xfree(state->array_nl); - state->array_nl = fstrndup(RSTRING_PTR(array_nl), len); - state->array_nl_len = len; - } + RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; } @@ -1212,6 +1283,11 @@ static VALUE cState_max_nesting(VALUE self) return LONG2FIX(state->max_nesting); } +static long long_config(VALUE num) +{ + return RTEST(num) ? FIX2LONG(num) : 0; +} + /* * call-seq: max_nesting=(depth) * @@ -1221,8 +1297,7 @@ static VALUE cState_max_nesting(VALUE self) static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->max_nesting = FIX2LONG(depth); + state->max_nesting = long_config(depth); return Qnil; } @@ -1350,8 +1425,7 @@ static VALUE cState_depth(VALUE self) static VALUE cState_depth_set(VALUE self, VALUE depth) { GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->depth = FIX2LONG(depth); + state->depth = long_config(depth); return Qnil; } @@ -1366,6 +1440,15 @@ static VALUE cState_buffer_initial_length(VALUE self) return LONG2FIX(state->buffer_initial_length); } +static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length) +{ + Check_Type(buffer_initial_length, T_FIXNUM); + long initial_length = FIX2LONG(buffer_initial_length); + if (initial_length > 0) { + state->buffer_initial_length = initial_length; + } +} + /* * call-seq: buffer_initial_length=(length) * @@ -1374,16 +1457,73 @@ static VALUE cState_buffer_initial_length(VALUE self) */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { - long initial_length; GET_STATE(self); - Check_Type(buffer_initial_length, T_FIXNUM); - initial_length = FIX2LONG(buffer_initial_length); - if (initial_length > 0) { - state->buffer_initial_length = initial_length; - } + buffer_initial_length_set(state, buffer_initial_length); return Qnil; } +static int configure_state_i(VALUE key, VALUE val, VALUE _arg) +{ + JSON_Generator_State *state = (JSON_Generator_State *)_arg; + + if (key == sym_indent) { state->indent = string_config(val); } + else if (key == sym_space) { state->space = string_config(val); } + else if (key == sym_space_before) { state->space_before = string_config(val); } + else if (key == sym_object_nl) { state->object_nl = string_config(val); } + else if (key == sym_array_nl) { state->array_nl = string_config(val); } + else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } + else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } + else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } + else if (key == sym_depth) { state->depth = long_config(val); } + else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } + else if (key == sym_script_safe) { state->script_safe = RTEST(val); } + else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } + else if (key == sym_strict) { state->strict = RTEST(val); } + return ST_CONTINUE; +} + +static void configure_state(JSON_Generator_State *state, VALUE config) +{ + if (!RTEST(config)) return; + + Check_Type(config, T_HASH); + + if (!RHASH_SIZE(config)) return; + + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(config, configure_state_i, (VALUE)state); +} + +static VALUE cState_configure(VALUE self, VALUE opts) +{ + GET_STATE(self); + configure_state(state, opts); + return self; +} + +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) +{ + JSON_Generator_State state = {0}; + state_init(&state); + configure_state(&state, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = {0}; + fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, + .state = &state, + .obj = obj, + .func = generate_json, + }; + rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + + return fbuffer_to_s(&buffer); +} + /* * */ @@ -1408,6 +1548,10 @@ void Init_generator(void) cState = rb_define_class_under(mGenerator, "State", rb_cObject); rb_define_alloc_func(cState, cState_s_allocate); rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); + rb_define_method(cState, "initialize", cState_initialize, -1); + rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings + rb_define_private_method(cState, "_configure", cState_configure, 1); + rb_define_method(cState, "initialize_copy", cState_init_copy, 1); rb_define_method(cState, "indent", cState_indent, 0); rb_define_method(cState, "indent=", cState_indent_set, 1); @@ -1441,6 +1585,8 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, 1); + rb_define_singleton_method(cState, "generate", cState_m_generate, 2); + VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); @@ -1495,7 +1641,23 @@ void Init_generator(void) i_extend = rb_intern("extend"); i_encode = rb_intern("encode"); + sym_indent = ID2SYM(rb_intern("indent")); + sym_space = ID2SYM(rb_intern("space")); + sym_space_before = ID2SYM(rb_intern("space_before")); + sym_object_nl = ID2SYM(rb_intern("object_nl")); + sym_array_nl = ID2SYM(rb_intern("array_nl")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_ascii_only = ID2SYM(rb_intern("ascii_only")); + sym_depth = ID2SYM(rb_intern("depth")); + sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length")); + sym_script_safe = ID2SYM(rb_intern("script_safe")); + sym_escape_slash = ID2SYM(rb_intern("escape_slash")); + sym_strict = ID2SYM(rb_intern("strict")); + usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); binary_encindex = rb_ascii8bit_encindex(); + + rb_require("json/ext/generator/state"); } diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h deleted file mode 100644 index 3710ce7c2..000000000 --- a/ext/json/ext/generator/generator.h +++ /dev/null @@ -1,129 +0,0 @@ -#ifndef _GENERATOR_H_ -#define _GENERATOR_H_ - -#include -#include - -#include "ruby.h" - -/* This is the fallback definition from Ruby 3.4 */ -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include -# endif -#elif defined(HAVE_STDBOOL_H) -# include -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -static char *fstrndup(const char *ptr, unsigned long len); - -/* ruby api and some helpers */ - -typedef struct JSON_Generator_StateStruct { - char *indent; - long indent_len; - char *space; - long space_len; - char *space_before; - long space_before_len; - char *object_nl; - long object_nl_len; - char *array_nl; - long array_nl_len; - long max_nesting; - char allow_nan; - char ascii_only; - char script_safe; - char strict; - long depth; - long buffer_initial_length; -} JSON_Generator_State; - -#define GET_STATE_TO(self, state) \ - TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) - -#define GET_STATE(self) \ - JSON_Generator_State *state; \ - GET_STATE_TO(self, state) - -#define GENERATE_JSON(type) \ - FBuffer *buffer; \ - VALUE Vstate; \ - JSON_Generator_State *state; \ - \ - rb_scan_args(argc, argv, "01", &Vstate); \ - Vstate = cState_from_state_s(cState, Vstate); \ - TypedData_Get_Struct(Vstate, JSON_Generator_State, &JSON_Generator_State_type, state); \ - buffer = cState_prepare_buffer(Vstate); \ - generate_json_##type(buffer, Vstate, state, self); \ - return fbuffer_to_s(buffer) - -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); -#ifdef RUBY_INTEGER_UNIFICATION -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); -#else -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self); -#endif -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_included_s(VALUE self, VALUE modul); -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_to_json_raw_object(VALUE self); -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); -static VALUE mString_Extend_json_create(VALUE self, VALUE o); -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); -static void State_free(void *state); -static VALUE cState_s_allocate(VALUE klass); -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#endif -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static VALUE cState_partial_generate(VALUE self, VALUE obj); -static VALUE cState_generate(VALUE self, VALUE obj); -static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_indent(VALUE self); -static VALUE cState_indent_set(VALUE self, VALUE indent); -static VALUE cState_space(VALUE self); -static VALUE cState_space_set(VALUE self, VALUE space); -static VALUE cState_space_before(VALUE self); -static VALUE cState_space_before_set(VALUE self, VALUE space_before); -static VALUE cState_object_nl(VALUE self); -static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); -static VALUE cState_array_nl(VALUE self); -static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); -static VALUE cState_max_nesting(VALUE self); -static VALUE cState_max_nesting_set(VALUE self, VALUE depth); -static VALUE cState_allow_nan_p(VALUE self); -static VALUE cState_ascii_only_p(VALUE self); -static VALUE cState_depth(VALUE self); -static VALUE cState_depth_set(VALUE self, VALUE depth); -static VALUE cState_script_safe(VALUE self); -static VALUE cState_script_safe_set(VALUE self, VALUE depth); -static VALUE cState_strict(VALUE self); -static VALUE cState_strict_set(VALUE self, VALUE strict); -static FBuffer *cState_prepare_buffer(VALUE self); - -static const rb_data_type_t JSON_Generator_State_type; - -#endif diff --git a/ext/json/ext/parser/depend b/ext/json/ext/parser/depend index 498ffa964..c051a244f 100644 --- a/ext/json/ext/parser/depend +++ b/ext/json/ext/parser/depend @@ -1 +1 @@ -parser.o: parser.c parser.h $(srcdir)/../fbuffer/fbuffer.h +parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index bd06f2782..4c1ac52a7 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -1,33 +1,11 @@ # frozen_string_literal: true require 'mkmf' -have_func("rb_enc_raise", "ruby.h") -have_func("rb_enc_interned_str", "ruby.h") - -# checking if String#-@ (str_uminus) dedupes... ' -begin - a = -(%w(t e s t).join) - b = -(%w(t e s t).join) - if a.equal?(b) - $CFLAGS << ' -DSTR_UMINUS_DEDUPE=1 ' - else - $CFLAGS << ' -DSTR_UMINUS_DEDUPE=0 ' - end -rescue NoMethodError - $CFLAGS << ' -DSTR_UMINUS_DEDUPE=0 ' -end - -# checking if String#-@ (str_uminus) directly interns frozen strings... ' -begin - s = rand.to_s.freeze - if (-s).equal?(s) && (-s.dup).equal?(s) - $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=1 ' - else - $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 ' - end -rescue NoMethodError - $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 ' -end +have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 +have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 +have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby +have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby +have_func("rb_category_warn", "ruby.h") # Missing on TruffleRuby append_cflags("-std=c99") diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index cf0b3cefa..a5c918fa1 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1,7 +1,321 @@ /* This file is automatically generated from parser.rl by using ragel */ #line 1 "parser.rl" +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "parser.h" + +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + +/* name cache */ + +#include +#include + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head); +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} /* unicode */ @@ -69,6 +383,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + #define PARSE_ERROR_FRAGMENT_LEN 32 #ifdef RBIMPL_ATTR_NORETURN RBIMPL_ATTR_NORETURN() @@ -86,60 +444,49 @@ static void raise_parse_error(const char *format, const char *start) ptr = buffer; } - rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus; -static int binary_encindex; -static int utf8_encindex; +#line 475 "parser.rl" -#line 129 "parser.rl" - - -#line 111 "parser.c" +#line 457 "parser.c" enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 27}; +enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 171 "parser.rl" +#line 515 "parser.rl" +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) + static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + long stack_head = json->stack->head; -#line 135 "parser.c" +#line 481 "parser.c" { cs = JSON_object_start; } -#line 186 "parser.rl" +#line 530 "parser.rl" -#line 142 "parser.c" +#line 488 "parser.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -159,27 +506,30 @@ case 2: case 13: goto st2; case 32: goto st2; case 34: goto tr2; - case 47: goto st23; + case 47: goto st28; case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st2; goto st0; tr2: -#line 153 "parser.rl" +#line 494 "parser.rl" { char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, p, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} + json->parsing_name = true; + np = JSON_parse_string(json, p, pe, result); + json->parsing_name = false; + if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { + PUSH(*result); + {p = (( np))-1;} + } } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 183 "parser.c" +#line 533 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -230,7 +580,7 @@ case 8: case 32: goto st8; case 34: goto tr11; case 45: goto tr11; - case 47: goto st19; + case 47: goto st24; case 73: goto tr11; case 78: goto tr11; case 91: goto tr11; @@ -246,19 +596,12 @@ case 8: goto st8; goto st0; tr11: -#line 137 "parser.rl" +#line 483 "parser.rl" { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v, current_nesting); + char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { p--; {p++; cs = 9; goto _out;} } else { - if (NIL_P(json->object_class)) { - OBJ_FREEZE(last_name); - rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); - } {p = (( np))-1;} } } @@ -267,16 +610,75 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 271 "parser.c" - switch( (*p) ) { - case 13: goto st9; - case 32: goto st9; - case 44: goto st10; - case 47: goto st15; +#line 614 "parser.c" + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) < 44 ) { + if ( 32 <= (*p) && (*p) <= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 44 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st9; + case 269: goto st10; + case 288: goto st10; + case 300: goto st11; + case 303: goto st16; + case 525: goto st9; + case 544: goto st9; + case 556: goto st2; + case 559: goto st20; + } + if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st9; + } else if ( _widec >= 265 ) + goto st10; + goto st0; +tr4: +#line 505 "parser.rl" + { p--; {p++; cs = 32; goto _out;} } + goto st32; +st32: + if ( ++p == pe ) + goto _test_eof32; +case 32: +#line 682 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -285,8 +687,9 @@ case 10: switch( (*p) ) { case 13: goto st10; case 32: goto st10; - case 34: goto tr2; - case 47: goto st11; + case 44: goto st11; + case 47: goto st16; + case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st10; @@ -296,139 +699,288 @@ case 10: goto _test_eof11; case 11: switch( (*p) ) { - case 42: goto st12; - case 47: goto st14; + case 13: goto st11; + case 32: goto st11; + case 34: goto tr2; + case 47: goto st12; } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st11; goto st0; st12: if ( ++p == pe ) goto _test_eof12; case 12: - if ( (*p) == 42 ) - goto st13; - goto st12; + switch( (*p) ) { + case 42: goto st13; + case 47: goto st15; + } + goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st10; - } - goto st12; + if ( (*p) == 42 ) + goto st14; + goto st13; st14: if ( ++p == pe ) goto _test_eof14; case 14: - if ( (*p) == 10 ) - goto st10; - goto st14; + switch( (*p) ) { + case 42: goto st14; + case 47: goto st11; + } + goto st13; st15: if ( ++p == pe ) goto _test_eof15; case 15: - switch( (*p) ) { - case 42: goto st16; - case 47: goto st18; - } - goto st0; + if ( (*p) == 10 ) + goto st11; + goto st15; st16: if ( ++p == pe ) goto _test_eof16; case 16: - if ( (*p) == 42 ) - goto st17; - goto st16; + switch( (*p) ) { + case 42: goto st17; + case 47: goto st19; + } + goto st0; st17: if ( ++p == pe ) goto _test_eof17; case 17: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st9; - } - goto st16; + if ( (*p) == 42 ) + goto st18; + goto st17; st18: if ( ++p == pe ) goto _test_eof18; case 18: - if ( (*p) == 10 ) - goto st9; - goto st18; -tr4: -#line 161 "parser.rl" - { p--; {p++; cs = 27; goto _out;} } - goto st27; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: -#line 367 "parser.c" - goto st0; + switch( (*p) ) { + case 42: goto st18; + case 47: goto st10; + } + goto st17; st19: if ( ++p == pe ) goto _test_eof19; case 19: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st22; - } - goto st0; + if ( (*p) == 10 ) + goto st10; + goto st19; st20: if ( ++p == pe ) goto _test_eof20; case 20: - if ( (*p) == 42 ) - goto st21; - goto st20; + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st17; + case 303: goto st19; + case 554: goto st21; + case 559: goto st23; + } + goto st0; st21: if ( ++p == pe ) goto _test_eof21; case 21: - switch( (*p) ) { - case 42: goto st21; - case 47: goto st8; - } - goto st20; + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 554: goto st22; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; st22: if ( ++p == pe ) goto _test_eof22; case 22: - if ( (*p) == 10 ) - goto st8; - goto st22; + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 303: goto st10; + case 554: goto st22; + case 559: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; st23: if ( ++p == pe ) goto _test_eof23; case 23: - switch( (*p) ) { - case 42: goto st24; - case 47: goto st26; - } + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 492 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st10; + case 522: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st23; + } else if ( _widec >= 128 ) + goto st19; goto st0; st24: if ( ++p == pe ) goto _test_eof24; case 24: - if ( (*p) == 42 ) - goto st25; - goto st24; + switch( (*p) ) { + case 42: goto st25; + case 47: goto st27; + } + goto st0; st25: if ( ++p == pe ) goto _test_eof25; case 25: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st2; - } - goto st24; + if ( (*p) == 42 ) + goto st26; + goto st25; st26: if ( ++p == pe ) goto _test_eof26; case 26: + switch( (*p) ) { + case 42: goto st26; + case 47: goto st8; + } + goto st25; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: + if ( (*p) == 10 ) + goto st8; + goto st27; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: + switch( (*p) ) { + case 42: goto st29; + case 47: goto st31; + } + goto st0; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: + if ( (*p) == 42 ) + goto st30; + goto st29; +st30: + if ( ++p == pe ) + goto _test_eof30; +case 30: + switch( (*p) ) { + case 42: goto st30; + case 47: goto st2; + } + goto st29; +st31: + if ( ++p == pe ) + goto _test_eof31; +case 31: if ( (*p) == 10 ) goto st2; - goto st26; + goto st31; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -438,6 +990,7 @@ case 26: _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; + _test_eof32: cs = 32; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; @@ -447,7 +1000,6 @@ case 26: _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; @@ -456,24 +1008,56 @@ case 26: _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof30: cs = 30; goto _test_eof; + _test_eof31: cs = 31; goto _test_eof; _test_eof: {} _out: {} } -#line 187 "parser.rl" +#line 531 "parser.rl" if (cs >= JSON_object_first_final) { - if (json->create_additions) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + klassname = rb_hash_aref(*result, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { + if (json->deprecated_create_additions) { + json_deprecated(deprecated_create_additions_warning); + } *result = rb_funcall(klass, i_json_create, 1, *result); } } @@ -485,8 +1069,7 @@ case 26: } - -#line 490 "parser.c" +#line 1073 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -494,7 +1077,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 287 "parser.rl" +#line 666 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -502,14 +1085,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 506 "parser.c" +#line 1089 "parser.c" { cs = JSON_value_start; } -#line 294 "parser.rl" +#line 673 "parser.rl" -#line 513 "parser.c" +#line 1096 "parser.c" { if ( p == pe ) goto _test_eof; @@ -543,14 +1126,19 @@ case 1: cs = 0; goto _out; tr2: -#line 239 "parser.rl" +#line 609 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + if (np == NULL) { + p--; + {p++; cs = 29; goto _out;} + } else { + {p = (( np))-1;} + } } goto st29; tr3: -#line 244 "parser.rl" +#line 619 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -563,14 +1151,18 @@ cs = 0; } } np = JSON_parse_float(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} + if (np != NULL) { + {p = (( np))-1;} + } np = JSON_parse_integer(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} + if (np != NULL) { + {p = (( np))-1;} + } p--; {p++; cs = 29; goto _out;} } goto st29; tr7: -#line 262 "parser.rl" +#line 641 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -578,7 +1170,7 @@ cs = 0; } goto st29; tr11: -#line 268 "parser.rl" +#line 647 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -586,7 +1178,7 @@ cs = 0; } goto st29; tr25: -#line 232 "parser.rl" +#line 602 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -596,7 +1188,7 @@ cs = 0; } goto st29; tr27: -#line 225 "parser.rl" +#line 595 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -606,19 +1198,19 @@ cs = 0; } goto st29; tr31: -#line 219 "parser.rl" +#line 589 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 216 "parser.rl" +#line 586 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 222 "parser.rl" +#line 592 "parser.rl" { *result = Qtrue; } @@ -627,9 +1219,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 274 "parser.rl" +#line 653 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 633 "parser.c" +#line 1225 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -870,13 +1462,14 @@ case 28: _out: {} } -#line 295 "parser.rl" +#line 674 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); } if (cs >= JSON_value_first_final) { + PUSH(*result); return p; } else { return NULL; @@ -884,7 +1477,7 @@ case 28: } -#line 888 "parser.c" +#line 1481 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -892,23 +1485,45 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 315 "parser.rl" +#line 695 "parser.rl" +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) +{ + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } + + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } + + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; -#line 904 "parser.c" +#line 1519 "parser.c" { cs = JSON_integer_start; } -#line 322 "parser.rl" +#line 724 "parser.rl" json->memo = p; -#line 912 "parser.c" +#line 1527 "parser.c" { if ( p == pe ) goto _test_eof; @@ -942,14 +1557,14 @@ case 3: goto st0; goto tr4; tr4: -#line 312 "parser.rl" +#line 692 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 953 "parser.c" +#line 1568 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -968,14 +1583,18 @@ case 5: _out: {} } -#line 324 "parser.rl" +#line 726 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; } else { return NULL; @@ -983,7 +1602,7 @@ case 5: } -#line 987 "parser.c" +#line 1606 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -991,7 +1610,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 349 "parser.rl" +#line 755 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -999,15 +1618,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1003 "parser.c" +#line 1622 "parser.c" { cs = JSON_float_start; } -#line 356 "parser.rl" +#line 762 "parser.rl" json->memo = p; -#line 1011 "parser.c" +#line 1630 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1065,14 +1684,14 @@ case 8: goto st0; goto tr9; tr9: -#line 343 "parser.rl" +#line 749 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1076 "parser.c" +#line 1695 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1133,12 +1752,12 @@ case 7: _out: {} } -#line 358 "parser.rl" +#line 764 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; ID method_id = 0; - if (!NIL_P(json->decimal_class)) { + if (json->decimal_class) { if (rb_respond_to(json->decimal_class, i_try_convert)) { mod = json->decimal_class; method_id = i_try_convert; @@ -1167,15 +1786,15 @@ case 7: } long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); + VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); *result = rb_funcallv(mod, method_id, 1, &text); } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); + *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); } return p + 1; @@ -1186,37 +1805,37 @@ case 7: -#line 1190 "parser.c" +#line 1809 "parser.c" enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 17}; +enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 438 "parser.rl" +#line 841 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + long stack_head = json->stack->head; -#line 1212 "parser.c" +#line 1830 "parser.c" { cs = JSON_array_start; } -#line 451 "parser.rl" +#line 853 "parser.rl" -#line 1219 "parser.c" +#line 1837 "parser.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -1237,7 +1856,7 @@ case 2: case 32: goto st2; case 34: goto tr2; case 45: goto tr2; - case 47: goto st13; + case 47: goto st18; case 73: goto tr2; case 78: goto tr2; case 91: goto tr2; @@ -1254,18 +1873,13 @@ case 2: goto st2; goto st0; tr2: -#line 415 "parser.rl" +#line 821 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } {p = (( np))-1;} } } @@ -1274,15 +1888,23 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1278 "parser.c" - switch( (*p) ) { +#line 1892 "parser.c" + _widec = (*p); + if ( 44 <= (*p) && (*p) <= 44 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { case 13: goto st3; case 32: goto st3; - case 44: goto st4; - case 47: goto st9; + case 47: goto st4; case 93: goto tr4; + case 300: goto st8; + case 556: goto st13; } - if ( 9 <= (*p) && (*p) <= 10 ) + if ( 9 <= _widec && _widec <= 10 ) goto st3; goto st0; st4: @@ -1290,57 +1912,67 @@ case 3: goto _test_eof4; case 4: switch( (*p) ) { - case 13: goto st4; - case 32: goto st4; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st5; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; + case 42: goto st5; + case 47: goto st7; } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st4; goto st0; st5: if ( ++p == pe ) goto _test_eof5; case 5: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st8; - } - goto st0; + if ( (*p) == 42 ) + goto st6; + goto st5; st6: if ( ++p == pe ) goto _test_eof6; case 6: - if ( (*p) == 42 ) - goto st7; - goto st6; + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; st7: if ( ++p == pe ) goto _test_eof7; case 7: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st4; - } - goto st6; + if ( (*p) == 10 ) + goto st3; + goto st7; +tr4: +#line 833 "parser.rl" + { p--; {p++; cs = 22; goto _out;} } + goto st22; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: +#line 1951 "parser.c" + goto st0; st8: if ( ++p == pe ) goto _test_eof8; case 8: - if ( (*p) == 10 ) - goto st4; - goto st8; + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st9; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; st9: if ( ++p == pe ) goto _test_eof9; @@ -1363,7 +1995,7 @@ case 10: case 11: switch( (*p) ) { case 42: goto st11; - case 47: goto st3; + case 47: goto st8; } goto st10; st12: @@ -1371,50 +2003,252 @@ case 11: goto _test_eof12; case 12: if ( (*p) == 10 ) - goto st3; + goto st8; goto st12; -tr4: -#line 430 "parser.rl" - { p--; {p++; cs = 17; goto _out;} } - goto st17; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: -#line 1385 "parser.c" - goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st16; - } + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) > 32 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 34: goto tr2; + case 45: goto tr2; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + case 269: goto st8; + case 288: goto st8; + case 303: goto st9; + case 525: goto st13; + case 544: goto st13; + case 559: goto st14; + } + if ( _widec < 265 ) { + if ( 48 <= _widec && _widec <= 57 ) + goto tr2; + } else if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st13; + } else + goto st8; goto st0; st14: if ( ++p == pe ) goto _test_eof14; case 14: - if ( (*p) == 42 ) - goto st15; - goto st14; + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st10; + case 303: goto st12; + case 554: goto st15; + case 559: goto st17; + } + goto st0; st15: if ( ++p == pe ) goto _test_eof15; case 15: - switch( (*p) ) { - case 42: goto st15; - case 47: goto st2; - } - goto st14; + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 554: goto st16; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; st16: if ( ++p == pe ) goto _test_eof16; case 16: + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 303: goto st8; + case 554: goto st16; + case 559: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 831 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st8; + case 522: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st17; + } else if ( _widec >= 128 ) + goto st12; + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 42: goto st19; + case 47: goto st21; + } + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 42 ) + goto st20; + goto st19; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + switch( (*p) ) { + case 42: goto st20; + case 47: goto st2; + } + goto st19; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: if ( (*p) == 10 ) goto st2; - goto st16; + goto st21; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -1422,24 +2256,45 @@ case 16: _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; _test_eof: {} _out: {} } -#line 452 "parser.rl" +#line 854 "parser.rl" if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + return p + 1; } else { raise_parse_error("unexpected token at '%s'", p); @@ -1447,29 +2302,81 @@ case 16: } } -static const size_t MAX_STACK_BUFFER_SIZE = 128; -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); + } else { + result = rb_utf8_str_new(start, (long)(end - start)); + } +# else + result = rb_utf8_str_new(start, (long)(end - start)); + if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { - VALUE result = Qnil; size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; char buf[4]; - if (bufferSize > MAX_STACK_BUFFER_SIZE) { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOC_N(char, bufferSize); -# endif - } else { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOCA_N(char, bufferSize); -# endif + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + pe = memchr(p, '\\', bufferSize); + if (RB_UNLIKELY(pe == NULL)) { + return build_string(string, stringEnd, intern, symbolize); } + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = bufferStart = RSTRING_PTR(result); + while (pe < stringEnd) { if (*pe == '\\') { unescape = (char *) "?"; @@ -1502,9 +2409,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int break; case 'u': if (pe > stringEnd - 4) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete unicode character escape sequence at '%s'", p); } else { uint32_t ch = unescape_unicode((unsigned char *) ++pe); @@ -1522,9 +2426,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int if ((ch & 0xFC00) == 0xD800) { pe++; if (pe > stringEnd - 6) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete surrogate pair at '%s'", p); } if (pe[0] == '\\' && pe[1] == 'u') { @@ -1557,56 +2458,27 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int MEMCPY(buffer, p, char, pe - p); buffer += pe - p; } - -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); - } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } -# else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } - - if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } -# endif + rb_str_set_len(result, buffer - bufferStart); if (symbolize) { - result = rb_str_intern(result); + result = rb_str_intern(result); + } else if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); } return result; } -#line 1602 "parser.c" +#line 2474 "parser.c" enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 8}; +enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 630 "parser.rl" +#line 1077 "parser.rl" static int @@ -1627,15 +2499,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1631 "parser.c" +#line 2503 "parser.c" { cs = JSON_string_start; } -#line 650 "parser.rl" +#line 1097 "parser.rl" json->memo = p; -#line 1639 "parser.c" +#line 2511 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1660,47 +2532,56 @@ case 2: goto st0; goto st2; tr2: -#line 617 "parser.rl" +#line 1059 "parser.rl" { - *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - if (NIL_P(*result)) { - p--; - {p++; cs = 8; goto _out;} - } else { - {p = (( p + 1))-1;} - } + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} } -#line 627 "parser.rl" - { p--; {p++; cs = 8; goto _out;} } - goto st8; -st8: +#line 1052 "parser.rl" + { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } + goto st9; +tr6: +#line 1052 "parser.rl" + { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } + goto st9; +st9: if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 1681 "parser.c" + goto _test_eof9; +case 9: +#line 2564 "parser.c" goto st0; st3: if ( ++p == pe ) goto _test_eof3; case 3: if ( (*p) == 117 ) - goto st4; + goto st5; if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) goto st0; - goto st2; + goto st4; st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st5; - } else - goto st5; - goto st0; + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 34: goto tr6; + case 92: goto st3; + } + if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st0; + goto st4; st5: if ( ++p == pe ) goto _test_eof5; @@ -1733,27 +2614,41 @@ case 6: case 7: if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st2; + goto st8; } else if ( (*p) > 70 ) { if ( 97 <= (*p) && (*p) <= 102 ) - goto st2; + goto st8; } else - goto st2; + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st4; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st4; + } else + goto st4; goto st0; } _test_eof2: cs = 2; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; _test_eof: {} _out: {} } -#line 652 "parser.rl" +#line 1099 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1789,18 +2684,78 @@ static VALUE convert_encoding(VALUE source) { int encindex = RB_ENCODING_GET(source); - if (encindex == utf8_encindex) { + if (RB_LIKELY(encindex == utf8_encindex)) { return source; } if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 + // For historical reason, we silently reinterpret binary strings as UTF-8 return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); } - return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); +} + +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + } + + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; } /* @@ -1837,116 +2792,16 @@ static VALUE convert_encoding(VALUE source) */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { - VALUE source, opts; GET_PARSER_INIT; - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_check_arity(argc, 1, 2); - source = argv[0]; - opts = Qnil; - if (argc == 2) { - opts = argv[1]; - Check_Type(argv[1], T_HASH); - if (RHASH_SIZE(argv[1]) > 0) { - opts = argv[1]; - } - } - if (!NIL_P(opts)) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; - } - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 0; - json->create_id = Qnil; - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; - json->Vsource = source; + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } -#line 1950 "parser.c" +#line 2805 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1954,7 +2809,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 858 "parser.rl" +#line 1265 "parser.rl" /* @@ -1971,17 +2826,206 @@ static VALUE cParser_parse(VALUE self) VALUE result = Qnil; GET_PARSER; + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + + +#line 2842 "parser.c" + { + cs = JSON_start; + } + +#line 1293 "parser.rl" + p = json->source; + pe = p + json->len; + +#line 2851 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st6; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +tr2: +#line 1257 "parser.rl" + { + char *np = JSON_parse_value(json, p, pe, &result, 0); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 2895 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st2; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st10; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st10; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st1; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st1; + goto st9; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 1296 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; + } +} + +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; + parser_init(json, source, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; -#line 1976 "parser.c" + +#line 3020 "parser.c" { cs = JSON_start; } -#line 875 "parser.rl" +#line 1331 "parser.rl" p = json->source; pe = p + json->len; -#line 1985 "parser.c" +#line 3029 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2015,7 +3059,7 @@ case 1: cs = 0; goto _out; tr2: -#line 850 "parser.rl" +#line 1257 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2025,7 +3069,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2029 "parser.c" +#line 3073 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2114,7 +3158,11 @@ case 9: _out: {} } -#line 878 "parser.rl" +#line 1334 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } if (cs >= JSON_first_final && p == pe) { return result; @@ -2133,19 +3181,23 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + + const VALUE *name_cache_entries = &json->name_cache.entries[0]; + rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } static void JSON_free(void *ptr) { JSON_Parser *json = ptr; - fbuffer_free(json->fbuffer); + fbuffer_free(&json->fbuffer); ruby_xfree(json); } static size_t JSON_memsize(const void *ptr) { const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); + return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); } static const rb_data_type_t JSON_Parser_type = { @@ -2159,7 +3211,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - json->fbuffer = fbuffer_alloc(0); + fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); return obj; } @@ -2193,6 +3245,8 @@ void Init_parser(void) rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); @@ -2202,30 +3256,38 @@ void Init_parser(void) CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); rb_gc_register_mark_object(CMinusInfinity); + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); - i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); + i_encode = rb_intern("encode"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); } /* diff --git a/ext/json/ext/parser/parser.h b/ext/json/ext/parser/parser.h deleted file mode 100644 index 9c7f0e7d2..000000000 --- a/ext/json/ext/parser/parser.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#include "ruby.h" - -#ifndef MAYBE_UNUSED -# define MAYBE_UNUSED(x) x -#endif - -#define option_given_p(opts, key) (rb_hash_lookup2(opts, key, Qundef) != Qundef) - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - int max_nesting; - int allow_nan; - int parsing_name; - int symbolize_names; - int freeze; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - int create_additions; - VALUE match_string; - FBuffer *fbuffer; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static uint32_t unescape_unicode(const unsigned char *p); -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static VALUE convert_encoding(VALUE source); -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cParser_parse(VALUE self); -static void JSON_mark(void *json); -static void JSON_free(void *json); -static VALUE cJSON_parser_s_allocate(VALUE klass); -static VALUE cParser_source(VALUE self); - -static const rb_data_type_t JSON_Parser_type; - -#endif diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 73f81341a..9620b1964 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -1,5 +1,319 @@ +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "parser.h" + +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + +/* name cache */ + +#include +#include + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head); +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} /* unicode */ @@ -67,6 +381,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + #define PARSE_ERROR_FRAGMENT_LEN 32 #ifdef RBIMPL_ATTR_NORETURN RBIMPL_ATTR_NORETURN() @@ -84,21 +442,9 @@ static void raise_parse_error(const char *format, const char *start) ptr = buffer; } - rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus; - -static int binary_encindex; -static int utf8_encindex; - %%{ machine JSON_common; @@ -135,27 +481,25 @@ static int utf8_encindex; write data; action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); + char *np = JSON_parse_value(json, fpc, pe, result, current_nesting); if (np == NULL) { fhold; fbreak; } else { - if (NIL_P(json->object_class)) { - OBJ_FREEZE(last_name); - rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); - } fexec np; } } + action allow_trailing_comma { json->allow_trailing_comma } + action parse_name { char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, fpc, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { fhold; fbreak; } else fexec np; + json->parsing_name = true; + np = JSON_parse_string(json, fpc, pe, result); + json->parsing_name = false; + if (np == NULL) { fhold; fbreak; } else { + PUSH(*result); + fexec np; + } } action exit { fhold; fbreak; } @@ -165,37 +509,64 @@ static int utf8_encindex; main := ( begin_object - (pair (next_pair)*)? ignore* + (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* end_object ) @exit; }%% +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) + static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + long stack_head = json->stack->head; %% write init; %% write exec; if (cs >= JSON_object_first_final) { - if (json->create_additions) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + klassname = rb_hash_aref(*result, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { + if (json->deprecated_create_additions) { + json_deprecated(deprecated_create_additions_warning); + } *result = rb_funcall(klass, i_json_create, 1, *result); } } @@ -206,7 +577,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } - %%{ machine JSON_value; include JSON_common; @@ -238,7 +608,12 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } action parse_string { char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { fhold; fbreak; } else fexec np; + if (np == NULL) { + fhold; + fbreak; + } else { + fexec np; + } } action parse_number { @@ -253,9 +628,13 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) fexec np; + if (np != NULL) { + fexec np; + } np = JSON_parse_integer(json, fpc, pe, result); - if (np != NULL) fexec np; + if (np != NULL) { + fexec np; + } fhold; fbreak; } @@ -279,10 +658,10 @@ main := ignore* ( Vtrue @parse_true | VNaN @parse_nan | VInfinity @parse_infinity | - begin_number >parse_number | - begin_string >parse_string | - begin_array >parse_array | - begin_object >parse_object + begin_number @parse_number | + begin_string @parse_string | + begin_array @parse_array | + begin_object @parse_object ) ignore* %*exit; }%% @@ -298,6 +677,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul } if (cs >= JSON_value_first_final) { + PUSH(*result); return p; } else { return NULL; @@ -314,6 +694,28 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); }%% +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) +{ + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } + + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } + + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; @@ -324,10 +726,14 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; } else { return NULL; @@ -359,7 +765,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (cs >= JSON_float_first_final) { VALUE mod = Qnil; ID method_id = 0; - if (!NIL_P(json->decimal_class)) { + if (json->decimal_class) { if (rb_respond_to(json->decimal_class, i_try_convert)) { mod = json->decimal_class; method_id = i_try_convert; @@ -388,15 +794,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul } long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); + VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); *result = rb_funcallv(mod, method_id, 1, &text); } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); + *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); } return p + 1; @@ -418,39 +824,51 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (np == NULL) { fhold; fbreak; } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } fexec np; } } + action allow_trailing_comma { json->allow_trailing_comma } + action exit { fhold; fbreak; } next_element = value_separator ignore* begin_value >parse_value; main := begin_array ignore* ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*)? + (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? end_array @exit; }%% static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + long stack_head = json->stack->head; %% write init; %% write exec; if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + return p + 1; } else { raise_parse_error("unexpected token at '%s'", p); @@ -458,29 +876,81 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul } } -static const size_t MAX_STACK_BUFFER_SIZE = 128; -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); + } else { + result = rb_utf8_str_new(start, (long)(end - start)); + } +# else + result = rb_utf8_str_new(start, (long)(end - start)); + if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { - VALUE result = Qnil; size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; char buf[4]; - if (bufferSize > MAX_STACK_BUFFER_SIZE) { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOC_N(char, bufferSize); -# endif - } else { -# ifdef HAVE_RB_ENC_INTERNED_STR - bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1); -# else - bufferStart = buffer = ALLOCA_N(char, bufferSize); -# endif + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + pe = memchr(p, '\\', bufferSize); + if (RB_UNLIKELY(pe == NULL)) { + return build_string(string, stringEnd, intern, symbolize); } + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = bufferStart = RSTRING_PTR(result); + while (pe < stringEnd) { if (*pe == '\\') { unescape = (char *) "?"; @@ -513,9 +983,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int break; case 'u': if (pe > stringEnd - 4) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete unicode character escape sequence at '%s'", p); } else { uint32_t ch = unescape_unicode((unsigned char *) ++pe); @@ -533,9 +1000,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int if ((ch & 0xFC00) == 0xD800) { pe++; if (pe > stringEnd - 6) { - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } raise_parse_error("incomplete surrogate pair at '%s'", p); } if (pe[0] == '\\' && pe[1] == 'u') { @@ -568,41 +1032,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int MEMCPY(buffer, p, char, pe - p); buffer += pe - p; } - -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); - } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } -# else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } - - if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } -# endif + rb_str_set_len(result, buffer - bufferStart); if (symbolize) { - result = rb_str_intern(result); + result = rb_str_intern(result); + } else if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); } return result; @@ -614,19 +1049,31 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int write data; - action parse_string { - *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - if (NIL_P(*result)) { - fhold; - fbreak; - } else { - fexec p + 1; - } + action parse_complex_string { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + fexec p + 1; + fhold; + fbreak; } - action exit { fhold; fbreak; } + action parse_simple_string { + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + fexec p + 1; + fhold; + fbreak; + } - main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; + double_quote = '"'; + escape = '\\'; + control = 0..0x1f; + simple = any - escape - double_quote - control; + + main := double_quote ( + (simple*)( + (double_quote) @parse_simple_string | + ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string + ) + ); }%% static int @@ -684,18 +1131,78 @@ static VALUE convert_encoding(VALUE source) { int encindex = RB_ENCODING_GET(source); - if (encindex == utf8_encindex) { + if (RB_LIKELY(encindex == utf8_encindex)) { return source; } if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 + // For historical reason, we silently reinterpret binary strings as UTF-8 return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); } - return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); +} + +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + } + + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; } /* @@ -732,111 +1239,11 @@ static VALUE convert_encoding(VALUE source) */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { - VALUE source, opts; GET_PARSER_INIT; - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_check_arity(argc, 1, 2); - source = argv[0]; - opts = Qnil; - if (argc == 2) { - opts = argv[1]; - Check_Type(argv[1], T_HASH); - if (RHASH_SIZE(argv[1]) > 0) { - opts = argv[1]; - } - } - if (!NIL_P(opts)) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; - } - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 0; - json->create_id = Qnil; - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; - json->Vsource = source; + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } @@ -871,11 +1278,64 @@ static VALUE cParser_parse(VALUE self) VALUE result = Qnil; GET_PARSER; + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + %% write init; p = json->source; pe = p + json->len; %% write exec; + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; + } +} + +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; + parser_init(json, source, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + if (cs >= JSON_first_final && p == pe) { return result; } else { @@ -893,19 +1353,23 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + + const VALUE *name_cache_entries = &json->name_cache.entries[0]; + rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } static void JSON_free(void *ptr) { JSON_Parser *json = ptr; - fbuffer_free(json->fbuffer); + fbuffer_free(&json->fbuffer); ruby_xfree(json); } static size_t JSON_memsize(const void *ptr) { const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); + return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); } static const rb_data_type_t JSON_Parser_type = { @@ -919,7 +1383,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - json->fbuffer = fbuffer_alloc(0); + fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); return obj; } @@ -953,6 +1417,8 @@ void Init_parser(void) rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); @@ -962,30 +1428,38 @@ void Init_parser(void) CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); rb_gc_register_mark_object(CMinusInfinity); + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); - i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); + i_encode = rb_intern("encode"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); } /* diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 5a296c8f9..f76dcb383 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -17,6 +17,7 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; +import org.jruby.exceptions.RaiseException; public final class Generator { private Generator() { @@ -269,6 +270,12 @@ void generate(Session session, RubyArray object, ByteList buffer) { GeneratorState state = session.getState(); int depth = state.increaseDepth(); + if (object.isEmpty()) { + buffer.append("[]".getBytes()); + state.decreaseDepth(); + return; + } + ByteList indentUnit = state.getIndent(); byte[] shift = Utils.repeat(indentUnit, depth); @@ -327,6 +334,12 @@ void generate(final Session session, RubyHash object, final GeneratorState state = session.getState(); final int depth = state.increaseDepth(); + if (object.isEmpty()) { + buffer.append("{}".getBytes()); + state.decreaseDepth(); + return; + } + final ByteList objectNl = state.getObjectNl(); final byte[] indent = Utils.repeat(state.getIndent(), depth); final ByteList spaceBefore = state.getSpaceBefore(); @@ -347,7 +360,14 @@ public void visit(IRubyObject key, IRubyObject value) { } if (objectNl.length() != 0) buffer.append(indent); - STRING_HANDLER.generate(session, key.asString(), buffer); + IRubyObject keyStr = key.callMethod(context, "to_s"); + if (keyStr.getMetaClass() == runtime.getString()) { + STRING_HANDLER.generate(session, (RubyString)keyStr, buffer); + } else { + Utils.ensureString(keyStr); + Handler keyHandler = (Handler) getHandlerFor(runtime, keyStr); + keyHandler.generate(session, keyStr, buffer); + } session.infectBy(key); buffer.append(spaceBefore); @@ -383,14 +403,19 @@ void generate(Session session, RubyString object, ByteList buffer) { RuntimeInfo info = session.getInfo(); RubyString src; - if (object.encoding(session.getContext()) != info.utf8.get()) { - src = (RubyString)object.encode(session.getContext(), - info.utf8.get()); - } else { - src = object; - } + try { + if (object.encoding(session.getContext()) != info.utf8.get()) { + src = (RubyString)object.encode(session.getContext(), + info.utf8.get()); + } else { + src = object; + } - session.getStringEncoder().encode(src.getByteList(), buffer); + session.getStringEncoder().encode(src.getByteList(), buffer); + } catch (RaiseException re) { + throw Utils.newException(session.getContext(), Utils.M_GENERATOR_ERROR, + re.getMessage()); + } } }; diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index 909f1a56e..1600b04aa 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -5,6 +5,8 @@ */ package json.ext; +import org.jcodings.specific.UTF8Encoding; + import org.jruby.Ruby; import org.jruby.RubyBoolean; import org.jruby.RubyClass; @@ -136,6 +138,11 @@ public static IRubyObject from_state(ThreadContext context, return fromState(context, opts); } + @JRubyMethod(meta=true) + public static IRubyObject generate(ThreadContext context, IRubyObject klass, IRubyObject obj, IRubyObject opts) { + return fromState(context, opts).generate(context, obj); + } + static GeneratorState fromState(ThreadContext context, IRubyObject opts) { return fromState(context, RuntimeInfo.forRuntime(context.getRuntime()), opts); } @@ -225,7 +232,12 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) { public IRubyObject generate(ThreadContext context, IRubyObject obj) { RubyString result = Generator.generateJson(context, obj, this); RuntimeInfo info = RuntimeInfo.forRuntime(context.getRuntime()); - result.force_encoding(context, info.utf8.get()); + if (result.getEncoding() != UTF8Encoding.INSTANCE) { + if (result.isFrozen()) { + result = result.strDup(context.getRuntime()); + } + result.force_encoding(context, info.utf8.get()); + } return result; } diff --git a/java/src/json/ext/Parser.java b/java/src/json/ext/Parser.java index 4234fd21f..74037d375 100644 --- a/java/src/json/ext/Parser.java +++ b/java/src/json/ext/Parser.java @@ -51,8 +51,10 @@ public class Parser extends RubyObject { private RubyString vSource; private RubyString createId; private boolean createAdditions; + private boolean deprecatedCreateAdditions; private int maxNesting; private boolean allowNaN; + private boolean allowTrailingComma; private boolean symbolizeNames; private boolean freeze; private RubyClass objectClass; @@ -123,6 +125,11 @@ public Parser(Ruby runtime, RubyClass metaClass) { * Infinity and -Infinity in defiance of RFC 4627 * to be parsed by the Parser. This option defaults to false. * + *
:allow_trailing_comma + *
If set to true, allow arrays and objects with a trailing + * comma in defiance of RFC 4627 to be parsed by the Parser. + * This option defaults to false. + * *
:symbolize_names *
If set to true, returns symbols for the names (keys) in * a JSON object. Otherwise strings are returned, which is also the default. @@ -158,6 +165,14 @@ public static IRubyObject newInstance(IRubyObject clazz, IRubyObject[] args, Blo return parser; } + @JRubyMethod(meta=true) + public static IRubyObject parse(ThreadContext context, IRubyObject clazz, IRubyObject source, IRubyObject opts) { + IRubyObject[] args = new IRubyObject[] {source, opts}; + Parser parser = (Parser)((RubyClass)clazz).allocate(); + parser.callInit(args, null); + return parser.parse(context); + } + @JRubyMethod(required = 1, optional = 1, visibility = Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { Ruby runtime = context.getRuntime(); @@ -168,10 +183,24 @@ public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { OptionsReader opts = new OptionsReader(context, args.length > 1 ? args[1] : null); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); + this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); this.symbolizeNames = opts.getBool("symbolize_names", false); this.freeze = opts.getBool("freeze", false); this.createId = opts.getString("create_id", getCreateId(context)); - this.createAdditions = opts.getBool("create_additions", false); + + IRubyObject additions = opts.get("create_additions"); + this.createAdditions = false; + this.deprecatedCreateAdditions = false; + + if (additions != null) { + if (additions.isNil()) { + this.createAdditions = true; + this.deprecatedCreateAdditions = true; + } else { + this.createAdditions = opts.getBool("create_additions", false); + } + } + this.objectClass = opts.getClass("object_class", runtime.getHash()); this.arrayClass = opts.getClass("array_class", runtime.getArray()); this.decimalClass = opts.getClass("decimal_class", null); @@ -342,11 +371,11 @@ private Ruby getRuntime() { } -// line 368 "Parser.rl" +// line 397 "Parser.rl" -// line 350 "Parser.java" +// line 379 "Parser.java" private static byte[] init__JSON_value_actions_0() { return new byte [] { @@ -460,7 +489,7 @@ private static byte[] init__JSON_value_from_state_actions_0() static final int JSON_value_en_main = 1; -// line 474 "Parser.rl" +// line 503 "Parser.rl" void parseValue(ParserResult res, int p, int pe) { @@ -468,14 +497,14 @@ void parseValue(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 472 "Parser.java" +// line 501 "Parser.java" { cs = JSON_value_start; } -// line 481 "Parser.rl" +// line 510 "Parser.rl" -// line 479 "Parser.java" +// line 508 "Parser.java" { int _klen; int _trans = 0; @@ -501,13 +530,13 @@ void parseValue(ParserResult res, int p, int pe) { while ( _nacts-- > 0 ) { switch ( _JSON_value_actions[_acts++] ) { case 9: -// line 459 "Parser.rl" +// line 488 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 511 "Parser.java" +// line 540 "Parser.java" } } @@ -570,25 +599,25 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) switch ( _JSON_value_actions[_acts++] ) { case 0: -// line 376 "Parser.rl" +// line 405 "Parser.rl" { result = getRuntime().getNil(); } break; case 1: -// line 379 "Parser.rl" +// line 408 "Parser.rl" { result = getRuntime().getFalse(); } break; case 2: -// line 382 "Parser.rl" +// line 411 "Parser.rl" { result = getRuntime().getTrue(); } break; case 3: -// line 385 "Parser.rl" +// line 414 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_NAN); @@ -598,7 +627,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 4: -// line 392 "Parser.rl" +// line 421 "Parser.rl" { if (parser.allowNaN) { result = getConstant(CONST_INFINITY); @@ -608,7 +637,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 5: -// line 399 "Parser.rl" +// line 428 "Parser.rl" { if (pe > p + 8 && absSubSequence(p, p + 9).equals(JSON_MINUS_INFINITY)) { @@ -637,7 +666,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 6: -// line 425 "Parser.rl" +// line 454 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -650,7 +679,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 7: -// line 435 "Parser.rl" +// line 464 "Parser.rl" { currentNesting++; parseArray(res, p, pe); @@ -665,7 +694,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 8: -// line 447 "Parser.rl" +// line 476 "Parser.rl" { currentNesting++; parseObject(res, p, pe); @@ -679,7 +708,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } } break; -// line 683 "Parser.java" +// line 712 "Parser.java" } } } @@ -699,7 +728,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) break; } } -// line 482 "Parser.rl" +// line 511 "Parser.rl" if (cs >= JSON_value_first_final && result != null) { if (parser.freeze) { @@ -712,7 +741,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } -// line 716 "Parser.java" +// line 745 "Parser.java" private static byte[] init__JSON_integer_actions_0() { return new byte [] { @@ -811,7 +840,7 @@ private static byte[] init__JSON_integer_trans_actions_0() static final int JSON_integer_en_main = 1; -// line 504 "Parser.rl" +// line 533 "Parser.rl" void parseInteger(ParserResult res, int p, int pe) { @@ -829,15 +858,15 @@ int parseIntegerInternal(int p, int pe) { int cs = EVIL; -// line 833 "Parser.java" +// line 862 "Parser.java" { cs = JSON_integer_start; } -// line 521 "Parser.rl" +// line 550 "Parser.rl" int memo = p; -// line 841 "Parser.java" +// line 870 "Parser.java" { int _klen; int _trans = 0; @@ -918,13 +947,13 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) switch ( _JSON_integer_actions[_acts++] ) { case 0: -// line 498 "Parser.rl" +// line 527 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 928 "Parser.java" +// line 957 "Parser.java" } } } @@ -944,7 +973,7 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) break; } } -// line 523 "Parser.rl" +// line 552 "Parser.rl" if (cs < JSON_integer_first_final) { return -1; @@ -964,7 +993,7 @@ RubyInteger bytesToInum(Ruby runtime, ByteList num) { } -// line 968 "Parser.java" +// line 997 "Parser.java" private static byte[] init__JSON_float_actions_0() { return new byte [] { @@ -1066,7 +1095,7 @@ private static byte[] init__JSON_float_trans_actions_0() static final int JSON_float_en_main = 1; -// line 556 "Parser.rl" +// line 585 "Parser.rl" void parseFloat(ParserResult res, int p, int pe) { @@ -1085,15 +1114,15 @@ int parseFloatInternal(int p, int pe) { int cs = EVIL; -// line 1089 "Parser.java" +// line 1118 "Parser.java" { cs = JSON_float_start; } -// line 574 "Parser.rl" +// line 603 "Parser.rl" int memo = p; -// line 1097 "Parser.java" +// line 1126 "Parser.java" { int _klen; int _trans = 0; @@ -1174,13 +1203,13 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) switch ( _JSON_float_actions[_acts++] ) { case 0: -// line 547 "Parser.rl" +// line 576 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1184 "Parser.java" +// line 1213 "Parser.java" } } } @@ -1200,7 +1229,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) break; } } -// line 576 "Parser.rl" +// line 605 "Parser.rl" if (cs < JSON_float_first_final) { return -1; @@ -1210,7 +1239,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) } -// line 1214 "Parser.java" +// line 1243 "Parser.java" private static byte[] init__JSON_string_actions_0() { return new byte [] { @@ -1312,7 +1341,7 @@ private static byte[] init__JSON_string_trans_actions_0() static final int JSON_string_en_main = 1; -// line 615 "Parser.rl" +// line 644 "Parser.rl" void parseString(ParserResult res, int p, int pe) { @@ -1320,15 +1349,15 @@ void parseString(ParserResult res, int p, int pe) { IRubyObject result = null; -// line 1324 "Parser.java" +// line 1353 "Parser.java" { cs = JSON_string_start; } -// line 622 "Parser.rl" +// line 651 "Parser.rl" int memo = p; -// line 1332 "Parser.java" +// line 1361 "Parser.java" { int _klen; int _trans = 0; @@ -1409,7 +1438,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) switch ( _JSON_string_actions[_acts++] ) { case 0: -// line 590 "Parser.rl" +// line 619 "Parser.rl" { int offset = byteList.begin(); ByteList decoded = decoder.decode(byteList, memo + 1 - offset, @@ -1424,13 +1453,13 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) } break; case 1: -// line 603 "Parser.rl" +// line 632 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1434 "Parser.java" +// line 1463 "Parser.java" } } } @@ -1450,7 +1479,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) break; } } -// line 624 "Parser.rl" +// line 653 "Parser.rl" if (parser.createAdditions) { RubyHash matchString = parser.match_string; @@ -1471,6 +1500,9 @@ public void visit(IRubyObject pattern, IRubyObject klass) { RubyClass klass = (RubyClass) memoArray[1]; if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } result = klass.callMethod(context, "json_create", result); } } @@ -1495,7 +1527,7 @@ public void visit(IRubyObject pattern, IRubyObject klass) { } -// line 1499 "Parser.java" +// line 1531 "Parser.java" private static byte[] init__JSON_array_actions_0() { return new byte [] { @@ -1506,36 +1538,86 @@ private static byte[] init__JSON_array_actions_0() private static final byte _JSON_array_actions[] = init__JSON_array_actions_0(); +private static byte[] init__JSON_array_cond_offsets_0() +{ + return new byte [] { + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 6, 6, 6, 6, 6, 8, 11, 16, 19 + }; +} + +private static final byte _JSON_array_cond_offsets[] = init__JSON_array_cond_offsets_0(); + + +private static byte[] init__JSON_array_cond_lengths_0() +{ + return new byte [] { + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 0, 0, 0, 0, 2, 3, 5, 3, 0 + }; +} + +private static final byte _JSON_array_cond_lengths[] = init__JSON_array_cond_lengths_0(); + + +private static int[] init__JSON_array_cond_keys_0() +{ + return new int [] { + 44, 44, 9, 9, 10, 10, 13, 13, 32, 32, 47, 47, + 42, 42, 47, 47, 0, 41, 42, 42, 43,65535, 0, 41, + 42, 42, 43, 46, 47, 47, 48,65535, 0, 9, 10, 10, + 11,65535, 0 + }; +} + +private static final int _JSON_array_cond_keys[] = init__JSON_array_cond_keys_0(); + + +private static byte[] init__JSON_array_cond_spaces_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; +} + +private static final byte _JSON_array_cond_spaces[] = init__JSON_array_cond_spaces_0(); + + private static byte[] init__JSON_array_key_offsets_0() { return new byte [] { - 0, 0, 1, 18, 25, 41, 43, 44, 46, 47, 49, 50, - 52, 53, 55, 56, 58, 59 + 0, 0, 1, 18, 26, 28, 29, 31, 32, 48, 50, 51, + 53, 54, 76, 78, 79, 81, 82, 86, 92, 100, 106 }; } private static final byte _JSON_array_key_offsets[] = init__JSON_array_key_offsets_0(); -private static char[] init__JSON_array_trans_keys_0() +private static int[] init__JSON_array_trans_keys_0() { - return new char [] { + return new int [] { 91, 13, 32, 34, 45, 47, 73, 78, 91, 93, 102, 110, - 116, 123, 9, 10, 48, 57, 13, 32, 44, 47, 93, 9, - 10, 13, 32, 34, 45, 47, 73, 78, 91, 102, 110, 116, - 123, 9, 10, 48, 57, 42, 47, 42, 42, 47, 10, 42, - 47, 42, 42, 47, 10, 42, 47, 42, 42, 47, 10, 0 + 116, 123, 9, 10, 48, 57, 13, 32, 47, 93,65580,131116, + 9, 10, 42, 47, 42, 42, 47, 10, 13, 32, 34, 45, + 47, 73, 78, 91, 102, 110, 116, 123, 9, 10, 48, 57, + 42, 47, 42, 42, 47, 10, 34, 45, 73, 78, 91, 93, + 102, 110, 116, 123,65549,65568,65583,131085,131104,131119, 48, 57, + 65545,65546,131081,131082, 42, 47, 42, 42, 47, 10,65578,65583, + 131114,131119,65578,131114,65536,131071,131072,196607,65578,65583,131114,131119, + 65536,131071,131072,196607,65546,131082,65536,131071,131072,196607, 0 }; } -private static final char _JSON_array_trans_keys[] = init__JSON_array_trans_keys_0(); +private static final int _JSON_array_trans_keys[] = init__JSON_array_trans_keys_0(); private static byte[] init__JSON_array_single_lengths_0() { return new byte [] { - 0, 1, 13, 5, 12, 2, 1, 2, 1, 2, 1, 2, - 1, 2, 1, 2, 1, 0 + 0, 1, 13, 6, 2, 1, 2, 1, 12, 2, 1, 2, + 1, 16, 2, 1, 2, 1, 4, 2, 4, 2, 0 }; } @@ -1545,34 +1627,38 @@ private static byte[] init__JSON_array_single_lengths_0() private static byte[] init__JSON_array_range_lengths_0() { return new byte [] { - 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 + 0, 0, 2, 1, 0, 0, 0, 0, 2, 0, 0, 0, + 0, 3, 0, 0, 0, 0, 0, 2, 2, 2, 0 }; } private static final byte _JSON_array_range_lengths[] = init__JSON_array_range_lengths_0(); -private static byte[] init__JSON_array_index_offsets_0() +private static short[] init__JSON_array_index_offsets_0() { - return new byte [] { - 0, 0, 2, 18, 25, 40, 43, 45, 48, 50, 53, 55, - 58, 60, 63, 65, 68, 70 + return new short [] { + 0, 0, 2, 18, 26, 29, 31, 34, 36, 51, 54, 56, + 59, 61, 81, 84, 86, 89, 91, 96, 101, 108, 113 }; } -private static final byte _JSON_array_index_offsets[] = init__JSON_array_index_offsets_0(); +private static final short _JSON_array_index_offsets[] = init__JSON_array_index_offsets_0(); private static byte[] init__JSON_array_indicies_0() { return new byte [] { 0, 1, 0, 0, 2, 2, 3, 2, 2, 2, 4, 2, - 2, 2, 2, 0, 2, 1, 5, 5, 6, 7, 4, 5, - 1, 6, 6, 2, 2, 8, 2, 2, 2, 2, 2, 2, - 2, 6, 2, 1, 9, 10, 1, 11, 9, 11, 6, 9, - 6, 10, 12, 13, 1, 14, 12, 14, 5, 12, 5, 13, - 15, 16, 1, 17, 15, 17, 0, 15, 0, 16, 1, 0 + 2, 2, 2, 0, 2, 1, 5, 5, 6, 4, 7, 8, + 5, 1, 9, 10, 1, 11, 9, 11, 5, 9, 5, 10, + 7, 7, 2, 2, 12, 2, 2, 2, 2, 2, 2, 2, + 7, 2, 1, 13, 14, 1, 15, 13, 15, 7, 13, 7, + 14, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 0, + 0, 3, 8, 8, 16, 2, 0, 8, 1, 17, 18, 1, + 19, 17, 19, 0, 17, 0, 18, 17, 18, 20, 21, 1, + 19, 22, 17, 20, 1, 19, 0, 22, 8, 17, 20, 1, + 0, 8, 18, 21, 1, 1, 0 }; } @@ -1582,8 +1668,8 @@ private static byte[] init__JSON_array_indicies_0() private static byte[] init__JSON_array_trans_targs_0() { return new byte [] { - 2, 0, 3, 13, 17, 3, 4, 9, 5, 6, 8, 7, - 10, 12, 11, 14, 16, 15 + 2, 0, 3, 14, 22, 3, 4, 8, 13, 5, 7, 6, + 9, 10, 12, 11, 18, 15, 17, 16, 19, 21, 20 }; } @@ -1594,7 +1680,7 @@ private static byte[] init__JSON_array_trans_actions_0() { return new byte [] { 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } @@ -1602,13 +1688,13 @@ private static byte[] init__JSON_array_trans_actions_0() static final int JSON_array_start = 1; -static final int JSON_array_first_final = 17; +static final int JSON_array_first_final = 22; static final int JSON_array_error = 0; static final int JSON_array_en_main = 1; -// line 704 "Parser.rl" +// line 738 "Parser.rl" void parseArray(ParserResult res, int p, int pe) { @@ -1628,17 +1714,18 @@ void parseArray(ParserResult res, int p, int pe) { } -// line 1632 "Parser.java" +// line 1718 "Parser.java" { cs = JSON_array_start; } -// line 723 "Parser.rl" +// line 757 "Parser.rl" -// line 1639 "Parser.java" +// line 1725 "Parser.java" { int _klen; int _trans = 0; + int _widec; int _acts; int _nacts; int _keys; @@ -1656,6 +1743,37 @@ void parseArray(ParserResult res, int p, int pe) { continue _goto; } case 1: + _widec = data[p]; + _keys = _JSON_array_cond_offsets[cs]*2 +; _klen = _JSON_array_cond_lengths[cs]; + if ( _klen > 0 ) { + int _lower = _keys +; int _mid; + int _upper = _keys + (_klen<<1) - 2; + while (true) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( _widec < _JSON_array_cond_keys[_mid] ) + _upper = _mid - 2; + else if ( _widec > _JSON_array_cond_keys[_mid+1] ) + _lower = _mid + 2; + else { + switch ( _JSON_array_cond_spaces[_JSON_array_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { + case 0: { + _widec = 65536 + (data[p] - 0); + if ( +// line 705 "Parser.rl" + parser.allowTrailingComma ) _widec += 65536; + break; + } + } + break; + } + } + } + _match: do { _keys = _JSON_array_key_offsets[cs]; _trans = _JSON_array_index_offsets[cs]; @@ -1669,9 +1787,9 @@ void parseArray(ParserResult res, int p, int pe) { break; _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_array_trans_keys[_mid] ) + if ( _widec < _JSON_array_trans_keys[_mid] ) _upper = _mid - 1; - else if ( data[p] > _JSON_array_trans_keys[_mid] ) + else if ( _widec > _JSON_array_trans_keys[_mid] ) _lower = _mid + 1; else { _trans += (_mid - _keys); @@ -1692,9 +1810,9 @@ else if ( data[p] > _JSON_array_trans_keys[_mid] ) break; _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_array_trans_keys[_mid] ) + if ( _widec < _JSON_array_trans_keys[_mid] ) _upper = _mid - 2; - else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) + else if ( _widec > _JSON_array_trans_keys[_mid+1] ) _lower = _mid + 2; else { _trans += ((_mid - _keys)>>1); @@ -1716,7 +1834,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) switch ( _JSON_array_actions[_acts++] ) { case 0: -// line 673 "Parser.rl" +// line 707 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -1733,13 +1851,13 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } break; case 1: -// line 688 "Parser.rl" +// line 722 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1743 "Parser.java" +// line 1861 "Parser.java" } } } @@ -1759,7 +1877,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) break; } } -// line 724 "Parser.rl" +// line 758 "Parser.rl" if (cs >= JSON_array_first_final) { res.update(result, p + 1); @@ -1769,7 +1887,7 @@ else if ( data[p] > _JSON_array_trans_keys[_mid+1] ) } -// line 1773 "Parser.java" +// line 1891 "Parser.java" private static byte[] init__JSON_object_actions_0() { return new byte [] { @@ -1780,40 +1898,91 @@ private static byte[] init__JSON_object_actions_0() private static final byte _JSON_object_actions[] = init__JSON_object_actions_0(); +private static byte[] init__JSON_object_cond_offsets_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 11, 16, + 19, 19, 19, 19, 19, 19, 19, 19, 19 + }; +} + +private static final byte _JSON_object_cond_offsets[] = init__JSON_object_cond_offsets_0(); + + +private static byte[] init__JSON_object_cond_lengths_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; +} + +private static final byte _JSON_object_cond_lengths[] = init__JSON_object_cond_lengths_0(); + + +private static int[] init__JSON_object_cond_keys_0() +{ + return new int [] { + 9, 9, 10, 10, 13, 13, 32, 32, 44, 44, 47, 47, + 42, 42, 47, 47, 0, 41, 42, 42, 43,65535, 0, 41, + 42, 42, 43, 46, 47, 47, 48,65535, 0, 9, 10, 10, + 11,65535, 0 + }; +} + +private static final int _JSON_object_cond_keys[] = init__JSON_object_cond_keys_0(); + + +private static byte[] init__JSON_object_cond_spaces_0() +{ + return new byte [] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; +} + +private static final byte _JSON_object_cond_spaces[] = init__JSON_object_cond_spaces_0(); + + private static byte[] init__JSON_object_key_offsets_0() { return new byte [] { - 0, 0, 1, 8, 14, 16, 17, 19, 20, 36, 43, 49, - 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, - 69, 70, 72, 73 + 0, 0, 1, 8, 14, 16, 17, 19, 20, 36, 49, 56, + 62, 64, 65, 67, 68, 70, 71, 73, 74, 78, 84, 92, + 98, 100, 101, 103, 104, 106, 107, 109, 110 }; } private static final byte _JSON_object_key_offsets[] = init__JSON_object_key_offsets_0(); -private static char[] init__JSON_object_trans_keys_0() +private static int[] init__JSON_object_trans_keys_0() { - return new char [] { + return new int [] { 123, 13, 32, 34, 47, 125, 9, 10, 13, 32, 47, 58, 9, 10, 42, 47, 42, 42, 47, 10, 13, 32, 34, 45, 47, 73, 78, 91, 102, 110, 116, 123, 9, 10, 48, 57, - 13, 32, 44, 47, 125, 9, 10, 13, 32, 34, 47, 9, - 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, 47, - 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, 47, - 10, 0 + 125,65549,65568,65580,65583,131085,131104,131116,131119,65545,65546,131081, + 131082, 13, 32, 44, 47, 125, 9, 10, 13, 32, 34, 47, + 9, 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, + 47, 10,65578,65583,131114,131119,65578,131114,65536,131071,131072,196607, + 65578,65583,131114,131119,65536,131071,131072,196607,65546,131082,65536,131071, + 131072,196607, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, + 47, 10, 0 }; } -private static final char _JSON_object_trans_keys[] = init__JSON_object_trans_keys_0(); +private static final int _JSON_object_trans_keys[] = init__JSON_object_trans_keys_0(); private static byte[] init__JSON_object_single_lengths_0() { return new byte [] { - 0, 1, 5, 4, 2, 1, 2, 1, 12, 5, 4, 2, - 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, - 1, 2, 1, 0 + 0, 1, 5, 4, 2, 1, 2, 1, 12, 9, 5, 4, + 2, 1, 2, 1, 2, 1, 2, 1, 4, 2, 4, 2, + 2, 1, 2, 1, 2, 1, 2, 1, 0 }; } @@ -1823,25 +1992,25 @@ private static byte[] init__JSON_object_single_lengths_0() private static byte[] init__JSON_object_range_lengths_0() { return new byte [] { - 0, 0, 1, 1, 0, 0, 0, 0, 2, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0 + 0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } private static final byte _JSON_object_range_lengths[] = init__JSON_object_range_lengths_0(); -private static byte[] init__JSON_object_index_offsets_0() +private static short[] init__JSON_object_index_offsets_0() { - return new byte [] { - 0, 0, 2, 9, 15, 18, 20, 23, 25, 40, 47, 53, - 56, 58, 61, 63, 66, 68, 71, 73, 76, 78, 81, 83, - 86, 88, 91, 93 + return new short [] { + 0, 0, 2, 9, 15, 18, 20, 23, 25, 40, 52, 59, + 65, 68, 70, 73, 75, 78, 80, 83, 85, 90, 95, 102, + 107, 110, 112, 115, 117, 120, 122, 125, 127 }; } -private static final byte _JSON_object_index_offsets[] = init__JSON_object_index_offsets_0(); +private static final short _JSON_object_index_offsets[] = init__JSON_object_index_offsets_0(); private static byte[] init__JSON_object_indicies_0() @@ -1850,11 +2019,14 @@ private static byte[] init__JSON_object_indicies_0() 0, 1, 0, 0, 2, 3, 4, 0, 1, 5, 5, 6, 7, 5, 1, 8, 9, 1, 10, 8, 10, 5, 8, 5, 9, 7, 7, 11, 11, 12, 11, 11, 11, 11, 11, 11, - 11, 7, 11, 1, 13, 13, 14, 15, 4, 13, 1, 14, - 14, 2, 16, 14, 1, 17, 18, 1, 19, 17, 19, 14, - 17, 14, 18, 20, 21, 1, 22, 20, 22, 13, 20, 13, - 21, 23, 24, 1, 25, 23, 25, 7, 23, 7, 24, 26, - 27, 1, 28, 26, 28, 0, 26, 0, 27, 1, 0 + 11, 7, 11, 1, 4, 13, 13, 14, 15, 16, 16, 0, + 17, 13, 16, 1, 13, 13, 14, 15, 4, 13, 1, 14, + 14, 2, 18, 14, 1, 19, 20, 1, 21, 19, 21, 14, + 19, 14, 20, 22, 23, 1, 24, 22, 24, 13, 22, 13, + 23, 22, 23, 25, 26, 1, 24, 27, 22, 25, 1, 24, + 13, 27, 16, 22, 25, 1, 13, 16, 23, 26, 1, 28, + 29, 1, 30, 28, 30, 7, 28, 7, 29, 31, 32, 1, + 33, 31, 33, 0, 31, 0, 32, 1, 0 }; } @@ -1864,9 +2036,9 @@ private static byte[] init__JSON_object_indicies_0() private static byte[] init__JSON_object_trans_targs_0() { return new byte [] { - 2, 0, 3, 23, 27, 3, 4, 8, 5, 7, 6, 9, - 19, 9, 10, 15, 11, 12, 14, 13, 16, 18, 17, 20, - 22, 21, 24, 26, 25 + 2, 0, 3, 28, 32, 3, 4, 8, 5, 7, 6, 9, + 24, 10, 11, 16, 9, 20, 12, 13, 15, 14, 17, 19, + 18, 21, 23, 22, 25, 27, 26, 29, 31, 30 }; } @@ -1878,7 +2050,7 @@ private static byte[] init__JSON_object_trans_actions_0() return new byte [] { 0, 0, 3, 0, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } @@ -1886,13 +2058,13 @@ private static byte[] init__JSON_object_trans_actions_0() static final int JSON_object_start = 1; -static final int JSON_object_first_final = 27; +static final int JSON_object_first_final = 32; static final int JSON_object_error = 0; static final int JSON_object_en_main = 1; -// line 781 "Parser.rl" +// line 819 "Parser.rl" void parseObject(ParserResult res, int p, int pe) { @@ -1917,17 +2089,18 @@ void parseObject(ParserResult res, int p, int pe) { } -// line 1921 "Parser.java" +// line 2093 "Parser.java" { cs = JSON_object_start; } -// line 805 "Parser.rl" +// line 843 "Parser.rl" -// line 1928 "Parser.java" +// line 2100 "Parser.java" { int _klen; int _trans = 0; + int _widec; int _acts; int _nacts; int _keys; @@ -1945,6 +2118,37 @@ void parseObject(ParserResult res, int p, int pe) { continue _goto; } case 1: + _widec = data[p]; + _keys = _JSON_object_cond_offsets[cs]*2 +; _klen = _JSON_object_cond_lengths[cs]; + if ( _klen > 0 ) { + int _lower = _keys +; int _mid; + int _upper = _keys + (_klen<<1) - 2; + while (true) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( _widec < _JSON_object_cond_keys[_mid] ) + _upper = _mid - 2; + else if ( _widec > _JSON_object_cond_keys[_mid+1] ) + _lower = _mid + 2; + else { + switch ( _JSON_object_cond_spaces[_JSON_object_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { + case 0: { + _widec = 65536 + (data[p] - 0); + if ( +// line 772 "Parser.rl" + parser.allowTrailingComma ) _widec += 65536; + break; + } + } + break; + } + } + } + _match: do { _keys = _JSON_object_key_offsets[cs]; _trans = _JSON_object_index_offsets[cs]; @@ -1958,9 +2162,9 @@ void parseObject(ParserResult res, int p, int pe) { break; _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_object_trans_keys[_mid] ) + if ( _widec < _JSON_object_trans_keys[_mid] ) _upper = _mid - 1; - else if ( data[p] > _JSON_object_trans_keys[_mid] ) + else if ( _widec > _JSON_object_trans_keys[_mid] ) _lower = _mid + 1; else { _trans += (_mid - _keys); @@ -1981,9 +2185,9 @@ else if ( data[p] > _JSON_object_trans_keys[_mid] ) break; _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_object_trans_keys[_mid] ) + if ( _widec < _JSON_object_trans_keys[_mid] ) _upper = _mid - 2; - else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) + else if ( _widec > _JSON_object_trans_keys[_mid+1] ) _lower = _mid + 2; else { _trans += ((_mid - _keys)>>1); @@ -2005,7 +2209,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) switch ( _JSON_object_actions[_acts++] ) { case 0: -// line 738 "Parser.rl" +// line 774 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2022,7 +2226,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 1: -// line 753 "Parser.rl" +// line 789 "Parser.rl" { parseString(res, p, pe); if (res.result == null) { @@ -2040,13 +2244,13 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } break; case 2: -// line 769 "Parser.rl" +// line 805 "Parser.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 2050 "Parser.java" +// line 2254 "Parser.java" } } } @@ -2066,7 +2270,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) break; } } -// line 806 "Parser.rl" +// line 844 "Parser.rl" if (cs < JSON_object_first_final) { res.update(null, p + 1); @@ -2090,6 +2294,9 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) callMethod(context, "deep_const_get", vKlassName); if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } returnedResult = klass.callMethod(context, "json_create", result); } @@ -2099,7 +2306,7 @@ else if ( data[p] > _JSON_object_trans_keys[_mid+1] ) } -// line 2103 "Parser.java" +// line 2310 "Parser.java" private static byte[] init__JSON_actions_0() { return new byte [] { @@ -2202,7 +2409,7 @@ private static byte[] init__JSON_trans_actions_0() static final int JSON_en_main = 1; -// line 857 "Parser.rl" +// line 898 "Parser.rl" public IRubyObject parseImplemetation() { @@ -2212,16 +2419,16 @@ public IRubyObject parseImplemetation() { ParserResult res = new ParserResult(); -// line 2216 "Parser.java" +// line 2423 "Parser.java" { cs = JSON_start; } -// line 866 "Parser.rl" +// line 907 "Parser.rl" p = byteList.begin(); pe = p + byteList.length(); -// line 2225 "Parser.java" +// line 2432 "Parser.java" { int _klen; int _trans = 0; @@ -2302,7 +2509,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) switch ( _JSON_actions[_acts++] ) { case 0: -// line 843 "Parser.rl" +// line 884 "Parser.rl" { parseValue(res, p, pe); if (res.result == null) { @@ -2314,7 +2521,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) } } break; -// line 2318 "Parser.java" +// line 2525 "Parser.java" } } } @@ -2334,7 +2541,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) break; } } -// line 869 "Parser.rl" +// line 910 "Parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/java/src/json/ext/Parser.rl b/java/src/json/ext/Parser.rl index 0fd6d7968..9d2b96d61 100644 --- a/java/src/json/ext/Parser.rl +++ b/java/src/json/ext/Parser.rl @@ -49,8 +49,10 @@ public class Parser extends RubyObject { private RubyString vSource; private RubyString createId; private boolean createAdditions; + private boolean deprecatedCreateAdditions; private int maxNesting; private boolean allowNaN; + private boolean allowTrailingComma; private boolean symbolizeNames; private boolean freeze; private RubyClass objectClass; @@ -121,6 +123,11 @@ public class Parser extends RubyObject { * Infinity and -Infinity in defiance of RFC 4627 * to be parsed by the Parser. This option defaults to false. * + *
:allow_trailing_comma + *
If set to true, allow arrays and objects with a trailing + * comma in defiance of RFC 4627 to be parsed by the Parser. + * This option defaults to false. + * *
:symbolize_names *
If set to true, returns symbols for the names (keys) in * a JSON object. Otherwise strings are returned, which is also the default. @@ -156,6 +163,14 @@ public class Parser extends RubyObject { return parser; } + @JRubyMethod(meta=true) + public static IRubyObject parse(ThreadContext context, IRubyObject clazz, IRubyObject source, IRubyObject opts) { + IRubyObject[] args = new IRubyObject[] {source, opts}; + Parser parser = (Parser)((RubyClass)clazz).allocate(); + parser.callInit(args, null); + return parser.parse(context); + } + @JRubyMethod(required = 1, optional = 1, visibility = Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { Ruby runtime = context.getRuntime(); @@ -166,10 +181,24 @@ public class Parser extends RubyObject { OptionsReader opts = new OptionsReader(context, args.length > 1 ? args[1] : null); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); + this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); this.symbolizeNames = opts.getBool("symbolize_names", false); this.freeze = opts.getBool("freeze", false); this.createId = opts.getString("create_id", getCreateId(context)); - this.createAdditions = opts.getBool("create_additions", false); + + IRubyObject additions = opts.get("create_additions"); + this.createAdditions = false; + this.deprecatedCreateAdditions = false; + + if (additions != null) { + if (additions.isNil()) { + this.createAdditions = true; + this.deprecatedCreateAdditions = true; + } else { + this.createAdditions = opts.getBool("create_additions", false); + } + } + this.objectClass = opts.getClass("object_class", runtime.getHash()); this.arrayClass = opts.getClass("array_class", runtime.getArray()); this.decimalClass = opts.getClass("decimal_class", null); @@ -641,6 +670,9 @@ public class Parser extends RubyObject { RubyClass klass = (RubyClass) memoArray[1]; if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } result = klass.callMethod(context, "json_create", result); } } @@ -670,6 +702,8 @@ public class Parser extends RubyObject { write data; + action allow_trailing_comma { parser.allowTrailingComma } + action parse_value { parseValue(res, fpc, pe); if (res.result == null) { @@ -698,7 +732,7 @@ public class Parser extends RubyObject { ignore* ) ( ignore* next_element - ignore* )* )? + ignore* )* ( (value_separator ignore*) when allow_trailing_comma )? )? ignore* end_array @exit; }%% @@ -735,6 +769,8 @@ public class Parser extends RubyObject { write data; + action allow_trailing_comma { parser.allowTrailingComma } + action parse_value { parseValue(res, fpc, pe); if (res.result == null) { @@ -776,7 +812,9 @@ public class Parser extends RubyObject { next_pair = ignore* value_separator pair; main := ( - begin_object (pair (next_pair)*)? ignore* end_object + begin_object + (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* + end_object ) @exit; }%% @@ -826,6 +864,9 @@ public class Parser extends RubyObject { callMethod(context, "deep_const_get", vKlassName); if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { + if (parser.deprecatedCreateAdditions) { + klass.getRuntime().getWarnings().warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + } returnedResult = klass.callMethod(context, "json_create", result); } diff --git a/json.gemspec b/json.gemspec index 96721f909..c6aa82d36 100644 --- a/json.gemspec +++ b/json.gemspec @@ -1,3 +1,5 @@ +# frozen_string_literal: true + version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line| /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1 end rescue nil @@ -19,7 +21,7 @@ spec = Gem::Specification.new do |s| 'wiki_uri' => 'https://github.com/ruby/json/wiki' } - s.required_ruby_version = Gem::Requirement.new(">= 2.3") + s.required_ruby_version = Gem::Requirement.new(">= 2.7") if java_ext s.description = "A JSON implementation as a JRuby extension." diff --git a/json_pure.gemspec b/json_pure.gemspec deleted file mode 100644 index 231f672c6..000000000 --- a/json_pure.gemspec +++ /dev/null @@ -1,58 +0,0 @@ -version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line| - /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1 -end rescue nil - -Gem::Specification.new do |s| - s.name = "json_pure".freeze - s.version = version - - s.summary = "JSON Implementation for Ruby".freeze - s.description = "This is a JSON implementation in pure Ruby.".freeze - s.licenses = ["Ruby".freeze] - s.authors = ["Florian Frank".freeze] - s.email = "flori@ping.de".freeze - - s.extra_rdoc_files = ["README.md".freeze] - s.rdoc_options = ["--title".freeze, "JSON implementation for ruby".freeze, "--main".freeze, "README.md".freeze] - s.files = [ - "CHANGES.md".freeze, - "COPYING".freeze, - "BSDL".freeze, - "LEGAL".freeze, - "README.md".freeze, - "json_pure.gemspec".freeze, - "lib/json.rb".freeze, - "lib/json/add/bigdecimal.rb".freeze, - "lib/json/add/complex.rb".freeze, - "lib/json/add/core.rb".freeze, - "lib/json/add/date.rb".freeze, - "lib/json/add/date_time.rb".freeze, - "lib/json/add/exception.rb".freeze, - "lib/json/add/ostruct.rb".freeze, - "lib/json/add/range.rb".freeze, - "lib/json/add/rational.rb".freeze, - "lib/json/add/regexp.rb".freeze, - "lib/json/add/set.rb".freeze, - "lib/json/add/struct.rb".freeze, - "lib/json/add/symbol.rb".freeze, - "lib/json/add/time.rb".freeze, - "lib/json/common.rb".freeze, - "lib/json/ext.rb".freeze, - "lib/json/generic_object.rb".freeze, - "lib/json/pure.rb".freeze, - "lib/json/pure/generator.rb".freeze, - "lib/json/pure/parser.rb".freeze, - "lib/json/version.rb".freeze, - ] - s.homepage = "https://ruby.github.io/json".freeze - s.metadata = { - 'bug_tracker_uri' => 'https://github.com/ruby/json/issues', - 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md', - 'documentation_uri' => 'https://ruby.github.io/json/doc/index.html', - 'homepage_uri' => s.homepage, - 'source_code_uri' => 'https://github.com/ruby/json', - 'wiki_uri' => 'https://github.com/ruby/json/wiki' - } - - s.required_ruby_version = Gem::Requirement.new(">= 2.3".freeze) -end diff --git a/lib/json.rb b/lib/json.rb index 8b1673d53..dfd9b7dfc 100644 --- a/lib/json.rb +++ b/lib/json.rb @@ -378,13 +378,13 @@ # json1 = JSON.generate(ruby) # ruby1 = JSON.parse(json1, create_additions: true) # # Make a nice display. -# display = < self.class.name, - 'b' => _dump, + 'b' => _dump.force_encoding(Encoding::UTF_8), } end diff --git a/lib/json/common.rb b/lib/json/common.rb index bb37820a7..2269896ba 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -32,9 +32,7 @@ def [](object, opts = {}) JSON.generate(object, opts) end - # Returns the JSON parser class that is used by JSON. This is either - # JSON::Ext::Parser or JSON::Pure::Parser: - # JSON.parser # => JSON::Ext::Parser + # Returns the JSON parser class that is used by JSON. attr_reader :parser # Set the JSON parser class _parser_ to be used by JSON. @@ -49,18 +47,9 @@ def parser=(parser) # :nodoc: # level (absolute namespace path?). If there doesn't exist a constant at # the given path, an ArgumentError is raised. def deep_const_get(path) # :nodoc: - path.to_s.split(/::/).inject(Object) do |p, c| - case - when c.empty? then p - when p.const_defined?(c, true) then p.const_get(c) - else - begin - p.const_missing(c) - rescue NameError => e - raise ArgumentError, "can't get const #{path}: #{e}" - end - end - end + Object.const_get(path) + rescue NameError => e + raise ArgumentError, "can't get const #{path}: #{e}" end # Set the module _generator_ to be used by JSON. @@ -69,7 +58,7 @@ def generator=(generator) # :nodoc: @generator = generator generator_methods = generator::GeneratorMethods for const in generator_methods.constants - klass = deep_const_get(const) + klass = const_get(const) modul = generator_methods.const_get(const) klass.class_eval do instance_methods(false).each do |m| @@ -106,14 +95,10 @@ def create_pretty_state ) end - # Returns the JSON generator module that is used by JSON. This is - # either JSON::Ext::Generator or JSON::Pure::Generator: - # JSON.generator # => JSON::Ext::Generator + # Returns the JSON generator module that is used by JSON. attr_reader :generator - # Sets or Returns the JSON generator state class that is used by JSON. This is - # either JSON::Ext::Generator::State or JSON::Pure::Generator::State: - # JSON.state # => JSON::Ext::Generator::State + # Sets or Returns the JSON generator state class that is used by JSON. attr_accessor :state end @@ -195,17 +180,17 @@ class MissingUnicodeSupport < JSONError; end # {Parsing \JSON}[#module-JSON-label-Parsing+JSON]. # # Parses nested JSON objects: - # source = <<-EOT - # { - # "name": "Dave", - # "age" :40, - # "hats": [ - # "Cattleman's", - # "Panama", - # "Tophat" - # ] - # } - # EOT + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON # ruby = JSON.parse(source) # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} # @@ -216,11 +201,7 @@ class MissingUnicodeSupport < JSONError; end # JSON.parse('') # def parse(source, opts = nil) - if opts.nil? - Parser.new(source).parse - else - Parser.new(source, opts).parse - end + Parser.parse(source, opts) end # :call-seq: @@ -302,11 +283,10 @@ def load_file!(filespec, opts = {}) # def generate(obj, opts = nil) if State === opts - state = opts + opts.generate(obj) else - state = State.new(opts) + State.generate(obj, opts) end - state.generate(obj) end # :stopdoc: @@ -399,6 +379,20 @@ def pretty_generate(obj, opts = nil) module_function :pretty_unparse # :startdoc: + class << self + # Sets or returns default options for the JSON.unsafe_load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + attr_accessor :unsafe_load_default_options + end + self.unsafe_load_default_options = { + :max_nesting => false, + :allow_nan => true, + :allow_blank => true, + :create_additions => true, + } + class << self # Sets or returns default options for the JSON.load method. # Initially: @@ -407,11 +401,162 @@ class << self attr_accessor :load_default_options end self.load_default_options = { - :max_nesting => false, :allow_nan => true, :allow_blank => true, - :create_additions => true, + :create_additions => nil, } + # :call-seq: + # JSON.unsafe_load(source, proc = nil, options = {}) -> object + # + # Returns the Ruby objects created by parsing the given +source+. + # + # - Argument +source+ must be, or be convertible to, a \String: + # - If +source+ responds to instance method +to_str+, + # source.to_str becomes the source. + # - If +source+ responds to instance method +to_io+, + # source.to_io.read becomes the source. + # - If +source+ responds to instance method +read+, + # source.read becomes the source. + # - If both of the following are true, source becomes the \String 'null': + # - Option +allow_blank+ specifies a truthy value. + # - The source, as defined above, is +nil+ or the empty \String ''. + # - Otherwise, +source+ remains the source. + # - Argument +proc+, if given, must be a \Proc that accepts one argument. + # It will be called recursively with each result (depth-first order). + # See details below. + # BEWARE: This method is meant to serialise data from trusted user input, + # like from your own database server or clients under your control, it could + # be dangerous to allow untrusted users to pass JSON sources into it. + # - Argument +opts+, if given, contains a \Hash of options for the parsing. + # See {Parsing Options}[#module-JSON-label-Parsing+Options]. + # The default options can be changed via method JSON.unsafe_load_default_options=. + # + # --- + # + # When no +proc+ is given, modifies +source+ as above and returns the result of + # parse(source, opts); see #parse. + # + # Source for following examples: + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON + # + # Load a \String: + # ruby = JSON.unsafe_load(source) + # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load an \IO object: + # require 'stringio' + # object = JSON.unsafe_load(StringIO.new(source)) + # object # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load a \File object: + # path = 't.json' + # File.write(path, source) + # File.open(path) do |file| + # JSON.unsafe_load(file) + # end # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # --- + # + # When +proc+ is given: + # - Modifies +source+ as above. + # - Gets the +result+ from calling parse(source, opts). + # - Recursively calls proc(result). + # - Returns the final result. + # + # Example: + # require 'json' + # + # # Some classes for the example. + # class Base + # def initialize(attributes) + # @attributes = attributes + # end + # end + # class User < Base; end + # class Account < Base; end + # class Admin < Base; end + # # The JSON source. + # json = <<-EOF + # { + # "users": [ + # {"type": "User", "username": "jane", "email": "jane@example.com"}, + # {"type": "User", "username": "john", "email": "john@example.com"} + # ], + # "accounts": [ + # {"account": {"type": "Account", "paid": true, "account_id": "1234"}}, + # {"account": {"type": "Account", "paid": false, "account_id": "1235"}} + # ], + # "admins": {"type": "Admin", "password": "0wn3d"} + # } + # EOF + # # Deserializer method. + # def deserialize_obj(obj, safe_types = %w(User Account Admin)) + # type = obj.is_a?(Hash) && obj["type"] + # safe_types.include?(type) ? Object.const_get(type).new(obj) : obj + # end + # # Call to JSON.unsafe_load + # ruby = JSON.unsafe_load(json, proc {|obj| + # case obj + # when Hash + # obj.each {|k, v| obj[k] = deserialize_obj v } + # when Array + # obj.map! {|v| deserialize_obj v } + # end + # }) + # pp ruby + # Output: + # {"users"=> + # [#"User", "username"=>"jane", "email"=>"jane@example.com"}>, + # #"User", "username"=>"john", "email"=>"john@example.com"}>], + # "accounts"=> + # [{"account"=> + # #"Account", "paid"=>true, "account_id"=>"1234"}>}, + # {"account"=> + # #"Account", "paid"=>false, "account_id"=>"1235"}>}], + # "admins"=> + # #"Admin", "password"=>"0wn3d"}>} + # + def unsafe_load(source, proc = nil, options = nil) + opts = if options.nil? + unsafe_load_default_options + else + unsafe_load_default_options.merge(options) + end + + unless source.is_a?(String) + if source.respond_to? :to_str + source = source.to_str + elsif source.respond_to? :to_io + source = source.to_io.read + elsif source.respond_to?(:read) + source = source.read + end + end + + if opts[:allow_blank] && (source.nil? || source.empty?) + source = 'null' + end + result = parse(source, opts) + recurse_proc(result, &proc) if proc + result + end # :call-seq: # JSON.load(source, proc = nil, options = {}) -> object @@ -435,6 +580,7 @@ class << self # BEWARE: This method is meant to serialise data from trusted user input, # like from your own database server or clients under your control, it could # be dangerous to allow untrusted users to pass JSON sources into it. + # If you must use it, use JSON.unsafe_load instead to make it clear. # - Argument +opts+, if given, contains a \Hash of options for the parsing. # See {Parsing Options}[#module-JSON-label-Parsing+Options]. # The default options can be changed via method JSON.load_default_options=. @@ -445,17 +591,17 @@ class << self # parse(source, opts); see #parse. # # Source for following examples: - # source = <<-EOT - # { - # "name": "Dave", - # "age" :40, - # "hats": [ - # "Cattleman's", - # "Panama", - # "Tophat" - # ] - # } - # EOT + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON # # Load a \String: # ruby = JSON.load(source) diff --git a/lib/json/ext.rb b/lib/json/ext.rb index 775e28a96..2082cae68 100644 --- a/lib/json/ext.rb +++ b/lib/json/ext.rb @@ -8,17 +8,12 @@ module JSON module Ext if RUBY_ENGINE == 'truffleruby' require 'json/ext/parser' - require 'json/pure' - $DEBUG and warn "Using Ext extension for JSON parser and Pure library for JSON generator." + require 'json/truffle_ruby/generator' JSON.parser = Parser - JSON.generator = JSON::Pure::Generator + JSON.generator = ::JSON::TruffleRuby::Generator else require 'json/ext/parser' require 'json/ext/generator' - unless RUBY_ENGINE == 'jruby' - require 'json/ext/generator/state' - end - $DEBUG and warn "Using Ext extension for JSON." JSON.parser = Parser JSON.generator = Generator end diff --git a/lib/json/ext/generator/state.rb b/lib/json/ext/generator/state.rb index 4f9675d7b..6cd9496e6 100644 --- a/lib/json/ext/generator/state.rb +++ b/lib/json/ext/generator/state.rb @@ -42,37 +42,7 @@ def configure(opts) raise TypeError, "can't convert #{opts.class} into Hash" end end - - opts.each do |key, value| - case key - when :indent - self.indent = value - when :space - self.space = value - when :space_before - self.space_before = value - when :array_nl - self.array_nl = value - when :object_nl - self.object_nl = value - when :max_nesting - self.max_nesting = value || 0 - when :depth - self.depth = value - when :buffer_initial_length - self.buffer_initial_length = value - when :allow_nan - self.allow_nan = value - when :ascii_only - self.ascii_only = value - when :script_safe, :escape_slash - self.script_safe = value - when :strict - self.strict = value - end - end - - self + _configure(opts) end alias_method :merge, :configure diff --git a/lib/json/pure.rb b/lib/json/pure.rb deleted file mode 100644 index 69d2256d1..000000000 --- a/lib/json/pure.rb +++ /dev/null @@ -1,16 +0,0 @@ -# frozen_string_literal: true -require 'json/common' - -module JSON - # This module holds all the modules/classes that implement JSON's - # functionality in pure ruby. - module Pure - require 'json/pure/parser' - require 'json/pure/generator' - $DEBUG and warn "Using Pure library for JSON." - JSON.parser = Parser - JSON.generator = Generator - end - - JSON_LOADED = true unless defined?(::JSON::JSON_LOADED) -end diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb deleted file mode 100644 index 3dafe8309..000000000 --- a/lib/json/pure/parser.rb +++ /dev/null @@ -1,331 +0,0 @@ -#frozen_string_literal: true -require 'strscan' - -module JSON - module Pure - # This class implements the JSON parser that is used to parse a JSON string - # into a Ruby data structure. - class Parser < StringScanner - STRING = /" ((?:[^\x0-\x1f"\\] | - # escaped special characters: - \\["\\\/bfnrt] | - \\u[0-9a-fA-F]{4} | - # match all but escaped special characters: - \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*) - "/nx - INTEGER = /(-?0|-?[1-9]\d*)/ - FLOAT = /(-? - (?:0|[1-9]\d*) - (?: - \.\d+(?i:e[+-]?\d+) | - \.\d+ | - (?i:e[+-]?\d+) - ) - )/x - NAN = /NaN/ - INFINITY = /Infinity/ - MINUS_INFINITY = /-Infinity/ - OBJECT_OPEN = /\{/ - OBJECT_CLOSE = /\}/ - ARRAY_OPEN = /\[/ - ARRAY_CLOSE = /\]/ - PAIR_DELIMITER = /:/ - COLLECTION_DELIMITER = /,/ - TRUE = /true/ - FALSE = /false/ - NULL = /null/ - IGNORE = %r( - (?: - //[^\n\r]*[\n\r]| # line comments - /\* # c-style comments - (?: - [\s\S]*? # any char, repeated lazily - ) - \*/ # the End of this comment - |[ \t\r\n]+ # whitespaces: space, horizontal tab, lf, cr - )+ - )mx - - UNPARSED = Object.new.freeze - - # Creates a new JSON::Pure::Parser instance for the string _source_. - # - # It will be configured by the _opts_ hash. _opts_ can have the following - # keys: - # * *max_nesting*: The maximum depth of nesting allowed in the parsed data - # structures. Disable depth checking with :max_nesting => false|nil|0, - # it defaults to 100. - # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - # defiance of RFC 7159 to be parsed by the Parser. This option defaults - # to false. - # * *freeze*: If set to true, all parsed objects will be frozen. Parsed - # string will be deduplicated if possible. - # * *symbolize_names*: If set to true, returns symbols for the names - # (keys) in a JSON object. Otherwise strings are returned, which is - # also the default. It's not possible to use this option in - # conjunction with the *create_additions* option. - # * *create_additions*: If set to true, the Parser creates - # additions when a matching class and create_id are found. This - # option defaults to false. - # * *object_class*: Defaults to Hash. If another type is provided, it will be used - # instead of Hash to represent JSON objects. The type must respond to - # +new+ without arguments, and return an object that respond to +[]=+. - # * *array_class*: Defaults to Array If another type is provided, it will be used - # instead of Hash to represent JSON arrays. The type must respond to - # +new+ without arguments, and return an object that respond to +<<+. - # * *decimal_class*: Specifies which class to use instead of the default - # (Float) when parsing decimal numbers. This class must accept a single - # string argument in its constructor. - def initialize(source, opts = nil) - opts ||= {} - source = convert_encoding source - super source - if !opts.key?(:max_nesting) # defaults to 100 - @max_nesting = 100 - elsif opts[:max_nesting] - @max_nesting = opts[:max_nesting] - else - @max_nesting = 0 - end - @allow_nan = !!opts[:allow_nan] - @symbolize_names = !!opts[:symbolize_names] - @freeze = !!opts[:freeze] - if opts.key?(:create_additions) - @create_additions = !!opts[:create_additions] - else - @create_additions = false - end - @symbolize_names && @create_additions and raise ArgumentError, - 'options :symbolize_names and :create_additions cannot be used '\ - 'in conjunction' - @create_id = @create_additions ? JSON.create_id : nil - @object_class = opts[:object_class] || Hash - @array_class = opts[:array_class] || Array - @decimal_class = opts[:decimal_class] - @match_string = opts[:match_string] - end - - alias source string - - def reset - super - @current_nesting = 0 - end - - # Parses the current JSON string _source_ and returns the - # complete data structure as a result. - def parse - reset - obj = nil - while !eos? && skip(IGNORE) do end - if eos? - raise ParserError, "source is not valid JSON!" - else - obj = parse_value - UNPARSED.equal?(obj) and raise ParserError, - "source is not valid JSON!" - obj.freeze if @freeze - end - while !eos? && skip(IGNORE) do end - eos? or raise ParserError, "source is not valid JSON!" - obj - end - - private - - def convert_encoding(source) - if source.respond_to?(:to_str) - source = source.to_str - else - raise TypeError, - "#{source.inspect} is not like a string" - end - if source.encoding != ::Encoding::ASCII_8BIT - source = source.encode(::Encoding::UTF_8) - source.force_encoding(::Encoding::ASCII_8BIT) - end - source - end - - # Unescape characters in strings. - UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } - UNESCAPE_MAP.update({ - ?" => '"', - ?\\ => '\\', - ?/ => '/', - ?b => "\b", - ?f => "\f", - ?n => "\n", - ?r => "\r", - ?t => "\t", - ?u => nil, - }) - - STR_UMINUS = ''.respond_to?(:-@) - def parse_string - if scan(STRING) - return '' if self[1].empty? - string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c| - if u = UNESCAPE_MAP[$&[1]] - u - else # \uXXXX - bytes = ''.b - i = 0 - while c[6 * i] == ?\\ && c[6 * i + 1] == ?u - bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) - i += 1 - end - bytes.encode(Encoding::UTF_8, Encoding::UTF_16BE).force_encoding(::Encoding::BINARY) - end - end - string.force_encoding(::Encoding::UTF_8) - - if @freeze - if STR_UMINUS - string = -string - else - string.freeze - end - end - - if @create_additions and @match_string - for (regexp, klass) in @match_string - klass.json_creatable? or next - string =~ regexp and return klass.json_create(string) - end - end - string - else - UNPARSED - end - rescue => e - raise ParserError, "Caught #{e.class} at '#{peek(20)}': #{e}" - end - - def parse_value - case - when scan(FLOAT) - if @decimal_class then - if @decimal_class == BigDecimal then - BigDecimal(self[1]) - else - @decimal_class.new(self[1]) || Float(self[1]) - end - else - Float(self[1]) - end - when scan(INTEGER) - Integer(self[1]) - when scan(TRUE) - true - when scan(FALSE) - false - when scan(NULL) - nil - when !UNPARSED.equal?(string = parse_string) - string - when scan(ARRAY_OPEN) - @current_nesting += 1 - ary = parse_array - @current_nesting -= 1 - ary - when scan(OBJECT_OPEN) - @current_nesting += 1 - obj = parse_object - @current_nesting -= 1 - obj - when @allow_nan && scan(NAN) - NaN - when @allow_nan && scan(INFINITY) - Infinity - when @allow_nan && scan(MINUS_INFINITY) - MinusInfinity - else - UNPARSED - end - end - - def parse_array - raise NestingError, "nesting of #@current_nesting is too deep" if - @max_nesting.nonzero? && @current_nesting > @max_nesting - result = @array_class.new - delim = false - loop do - case - when eos? - raise ParserError, "unexpected end of string while parsing array" - when !UNPARSED.equal?(value = parse_value) - delim = false - result << value - skip(IGNORE) - if scan(COLLECTION_DELIMITER) - delim = true - elsif match?(ARRAY_CLOSE) - ; - else - raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" - end - when scan(ARRAY_CLOSE) - if delim - raise ParserError, "expected next element in array at '#{peek(20)}'!" - end - break - when skip(IGNORE) - ; - else - raise ParserError, "unexpected token in array at '#{peek(20)}'!" - end - end - result - end - - def parse_object - raise NestingError, "nesting of #@current_nesting is too deep" if - @max_nesting.nonzero? && @current_nesting > @max_nesting - result = @object_class.new - delim = false - loop do - case - when eos? - raise ParserError, "unexpected end of string while parsing object" - when !UNPARSED.equal?(string = parse_string) - skip(IGNORE) - unless scan(PAIR_DELIMITER) - raise ParserError, "expected ':' in object at '#{peek(20)}'!" - end - skip(IGNORE) - unless UNPARSED.equal?(value = parse_value) - result[@symbolize_names ? string.to_sym : string] = value - delim = false - skip(IGNORE) - if scan(COLLECTION_DELIMITER) - delim = true - elsif match?(OBJECT_CLOSE) - ; - else - raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" - end - else - raise ParserError, "expected value in object at '#{peek(20)}'!" - end - when scan(OBJECT_CLOSE) - if delim - raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" - end - if @create_additions and klassname = result[@create_id] - klass = JSON.deep_const_get klassname - break unless klass and klass.json_creatable? - result = klass.json_create(result) - end - break - when skip(IGNORE) - ; - else - raise ParserError, "unexpected token in object at '#{peek(20)}'!" - end - end - result - end - end - end -end diff --git a/lib/json/pure/generator.rb b/lib/json/truffle_ruby/generator.rb similarity index 73% rename from lib/json/pure/generator.rb rename to lib/json/truffle_ruby/generator.rb index c2268cc36..b0f3e4209 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/truffle_ruby/generator.rb @@ -1,101 +1,105 @@ # frozen_string_literal: true module JSON - MAP = { - "\x0" => '\u0000', - "\x1" => '\u0001', - "\x2" => '\u0002', - "\x3" => '\u0003', - "\x4" => '\u0004', - "\x5" => '\u0005', - "\x6" => '\u0006', - "\x7" => '\u0007', - "\b" => '\b', - "\t" => '\t', - "\n" => '\n', - "\xb" => '\u000b', - "\f" => '\f', - "\r" => '\r', - "\xe" => '\u000e', - "\xf" => '\u000f', - "\x10" => '\u0010', - "\x11" => '\u0011', - "\x12" => '\u0012', - "\x13" => '\u0013', - "\x14" => '\u0014', - "\x15" => '\u0015', - "\x16" => '\u0016', - "\x17" => '\u0017', - "\x18" => '\u0018', - "\x19" => '\u0019', - "\x1a" => '\u001a', - "\x1b" => '\u001b', - "\x1c" => '\u001c', - "\x1d" => '\u001d', - "\x1e" => '\u001e', - "\x1f" => '\u001f', - '"' => '\"', - '\\' => '\\\\', - }.freeze # :nodoc: - - ESCAPE_PATTERN = /[\/"\\\x0-\x1f]/n # :nodoc: - - SCRIPT_SAFE_MAP = MAP.merge( - '/' => '\\/', - "\u2028".b => '\u2028', - "\u2029".b => '\u2029', - ).freeze - - SCRIPT_SAFE_ESCAPE_PATTERN = Regexp.union(ESCAPE_PATTERN, "\u2028".b, "\u2029".b) - - # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with - # UTF16 big endian characters as \u????, and return it. - def utf8_to_json(string, script_safe = false) # :nodoc: - string = string.b - if script_safe - string.gsub!(SCRIPT_SAFE_ESCAPE_PATTERN) { SCRIPT_SAFE_MAP[$&] || $& } - else - string.gsub!(ESCAPE_PATTERN) { MAP[$&] || $& } - end - string.force_encoding(::Encoding::UTF_8) - string - end + module TruffleRuby + module Generator + MAP = { + "\x0" => '\u0000', + "\x1" => '\u0001', + "\x2" => '\u0002', + "\x3" => '\u0003', + "\x4" => '\u0004', + "\x5" => '\u0005', + "\x6" => '\u0006', + "\x7" => '\u0007', + "\b" => '\b', + "\t" => '\t', + "\n" => '\n', + "\xb" => '\u000b', + "\f" => '\f', + "\r" => '\r', + "\xe" => '\u000e', + "\xf" => '\u000f', + "\x10" => '\u0010', + "\x11" => '\u0011', + "\x12" => '\u0012', + "\x13" => '\u0013', + "\x14" => '\u0014', + "\x15" => '\u0015', + "\x16" => '\u0016', + "\x17" => '\u0017', + "\x18" => '\u0018', + "\x19" => '\u0019', + "\x1a" => '\u001a', + "\x1b" => '\u001b', + "\x1c" => '\u001c', + "\x1d" => '\u001d', + "\x1e" => '\u001e', + "\x1f" => '\u001f', + '"' => '\"', + '\\' => '\\\\', + }.freeze # :nodoc: + + ESCAPE_PATTERN = /[\/"\\\x0-\x1f]/n # :nodoc: + + SCRIPT_SAFE_MAP = MAP.merge( + '/' => '\\/', + "\u2028".b => '\u2028', + "\u2029".b => '\u2029', + ).freeze + + SCRIPT_SAFE_ESCAPE_PATTERN = Regexp.union(ESCAPE_PATTERN, "\u2028".b, "\u2029".b) + + # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with + # UTF16 big endian characters as \u????, and return it. + def utf8_to_json(string, script_safe = false) # :nodoc: + string = string.b + if script_safe + string.gsub!(SCRIPT_SAFE_ESCAPE_PATTERN) { SCRIPT_SAFE_MAP[$&] || $& } + else + string.gsub!(ESCAPE_PATTERN) { MAP[$&] || $& } + end + string.force_encoding(::Encoding::UTF_8) + string + end - def utf8_to_json_ascii(string, script_safe = false) # :nodoc: - string = string.b - map = script_safe ? SCRIPT_SAFE_MAP : MAP - string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& } - string.gsub!(/( - (?: - [\xc2-\xdf][\x80-\xbf] | - [\xe0-\xef][\x80-\xbf]{2} | - [\xf0-\xf4][\x80-\xbf]{3} - )+ | - [\x80-\xc1\xf5-\xff] # invalid - )/nx) { |c| - c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" - s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0] - s.force_encoding(::Encoding::ASCII_8BIT) - s.gsub!(/.{4}/n, '\\\\u\&') - s.force_encoding(::Encoding::UTF_8) - } - string.force_encoding(::Encoding::UTF_8) - string - rescue => e - raise GeneratorError.wrap(e) - end + def utf8_to_json_ascii(string, script_safe = false) # :nodoc: + string = string.b + map = script_safe ? SCRIPT_SAFE_MAP : MAP + string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& } + string.gsub!(/( + (?: + [\xc2-\xdf][\x80-\xbf] | + [\xe0-\xef][\x80-\xbf]{2} | + [\xf0-\xf4][\x80-\xbf]{3} + )+ | + [\x80-\xc1\xf5-\xff] # invalid + )/nx) { |c| + c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" + s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0] + s.force_encoding(::Encoding::BINARY) + s.gsub!(/.{4}/n, '\\\\u\&') + s.force_encoding(::Encoding::UTF_8) + } + string.force_encoding(::Encoding::UTF_8) + string + rescue => e + raise GeneratorError.wrap(e) + end - def valid_utf8?(string) - encoding = string.encoding - (encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) && - string.valid_encoding? - end - module_function :utf8_to_json, :utf8_to_json_ascii, :valid_utf8? + def valid_utf8?(string) + encoding = string.encoding + (encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) && + string.valid_encoding? + end + module_function :utf8_to_json, :utf8_to_json_ascii, :valid_utf8? - module Pure - module Generator # This class is used to create State instances, that are use to hold data # while generating a JSON text from a Ruby data structure. class State + def self.generate(obj, opts = nil) + new(opts).generate(obj) + end + # Creates a State object from _opts_, which ought to be Hash to create # a new State instance configured by _opts_, something else to create # an unconfigured instance. If _opts_ is a State object, it is just @@ -130,7 +134,7 @@ def self.from_state(opts) # * *allow_nan*: true if NaN, Infinity, and -Infinity should be # generated, otherwise an exception is thrown, if these values are # encountered. This options defaults to false. - def initialize(opts = {}) + def initialize(opts = nil) @indent = '' @space = '' @space_before = '' @@ -138,10 +142,12 @@ def initialize(opts = {}) @array_nl = '' @allow_nan = false @ascii_only = false - @script_safe = false - @strict = false + @depth = 0 @buffer_initial_length = 1024 - configure opts + @script_safe = false + @strict = false + @max_nesting = 100 + configure(opts) if opts end # This string is used to indent levels in the JSON text. @@ -239,13 +245,13 @@ def configure(opts) end # NOTE: If adding new instance variables here, check whether #generate should check them for #generate_json - @indent = opts[:indent] if opts.key?(:indent) - @space = opts[:space] if opts.key?(:space) - @space_before = opts[:space_before] if opts.key?(:space_before) - @object_nl = opts[:object_nl] if opts.key?(:object_nl) - @array_nl = opts[:array_nl] if opts.key?(:array_nl) - @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) - @ascii_only = opts[:ascii_only] if opts.key?(:ascii_only) + @indent = opts[:indent] || '' if opts.key?(:indent) + @space = opts[:space] || '' if opts.key?(:space) + @space_before = opts[:space_before] || '' if opts.key?(:space_before) + @object_nl = opts[:object_nl] || '' if opts.key?(:object_nl) + @array_nl = opts[:array_nl] || '' if opts.key?(:array_nl) + @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) + @ascii_only = opts[:ascii_only] if opts.key?(:ascii_only) @depth = opts[:depth] || 0 @buffer_initial_length ||= opts[:buffer_initial_length] @@ -294,7 +300,7 @@ def generate(obj) else result = obj.to_json(self) end - JSON.valid_utf8?(result) or raise GeneratorError, + JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError, "source sequence #{result.inspect} is illegal/malformed utf-8" result end @@ -307,7 +313,16 @@ def generate(obj) first = true obj.each_pair do |k,v| buf << ',' unless first - fast_serialize_string(k.to_s, buf) + + key_str = k.to_s + if key_str.class == String + fast_serialize_string(key_str, buf) + elsif key_str.is_a?(String) + generate_json(key_str, buf) + else + raise TypeError, "#{k.class}#to_s returns an instance of #{key_str.class}, expected a String" + end + buf << ':' generate_json(v, buf) first = false @@ -323,7 +338,11 @@ def generate(obj) end buf << ']' when String - fast_serialize_string(obj, buf) + if obj.class == String + fast_serialize_string(obj, buf) + else + buf << obj.to_json(self) + end when Integer buf << obj.to_s else @@ -333,24 +352,23 @@ def generate(obj) end # Assumes !@ascii_only, !@script_safe - if Regexp.method_defined?(:match?) - private def fast_serialize_string(string, buf) # :nodoc: - buf << '"' - string = string.encode(::Encoding::UTF_8) unless string.encoding == ::Encoding::UTF_8 - raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding? - - if /["\\\x0-\x1f]/n.match?(string) - buf << string.gsub(/["\\\x0-\x1f]/n, MAP) - else - buf << string + private def fast_serialize_string(string, buf) # :nodoc: + buf << '"' + unless string.encoding == ::Encoding::UTF_8 + begin + string = string.encode(::Encoding::UTF_8) + rescue Encoding::UndefinedConversionError => error + raise GeneratorError, error.message end - buf << '"' end - else - # Ruby 2.3 compatibility - private def fast_serialize_string(string, buf) # :nodoc: - buf << string.to_json(self) + raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding? + + if /["\\\x0-\x1f]/n.match?(string) + buf << string.gsub(/["\\\x0-\x1f]/n, MAP) + else + buf << string end + buf << '"' end # Return the value returned by method +name+. @@ -406,15 +424,29 @@ def json_shift(state) end def json_transform(state) + depth = state.depth += 1 + + if empty? + state.depth -= 1 + return '{}' + end + delim = ",#{state.object_nl}" result = +"{#{state.object_nl}" - depth = state.depth += 1 first = true indent = !state.object_nl.empty? each { |key, value| result << delim unless first result << state.indent * depth if indent - result = +"#{result}#{key.to_s.to_json(state)}#{state.space_before}:#{state.space}" + + key_str = key.to_s + if key_str.is_a?(String) + key_json = key_str.to_json(state) + else + raise TypeError, "#{key.class}#to_s returns an instance of #{key_str.class}, expected a String" + end + + result = +"#{result}#{key_json}#{state.space_before}:#{state.space}" if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value) raise GeneratorError, "#{value.class} not allowed in JSON" elsif value.respond_to?(:to_json) @@ -448,6 +480,13 @@ def to_json(state = nil, *) private def json_transform(state) + depth = state.depth += 1 + + if empty? + state.depth -= 1 + return '[]' + end + result = '['.dup if state.array_nl.empty? delim = "," @@ -455,7 +494,7 @@ def json_transform(state) result << state.array_nl delim = ",#{state.array_nl}" end - depth = state.depth += 1 + first = true indent = !state.array_nl.empty? each { |value| @@ -520,10 +559,12 @@ def to_json(state = nil, *args) string = encode(::Encoding::UTF_8) end if state.ascii_only? - %("#{JSON.utf8_to_json_ascii(string, state.script_safe)}") + %("#{JSON::TruffleRuby::Generator.utf8_to_json_ascii(string, state.script_safe)}") else - %("#{JSON.utf8_to_json(string, state.script_safe)}") + %("#{JSON::TruffleRuby::Generator.utf8_to_json(string, state.script_safe)}") end + rescue Encoding::UndefinedConversionError => error + raise ::JSON::GeneratorError, error.message end # Module that holds the extending methods if, the String module is diff --git a/lib/json/version.rb b/lib/json/version.rb index d3d621b75..363bdbead 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.7.3' + VERSION = '2.8.0' end diff --git a/test/json/fixtures/fail4.json b/test/json/fixtures/fail4.json deleted file mode 100644 index 9de168bf3..000000000 --- a/test/json/fixtures/fail4.json +++ /dev/null @@ -1 +0,0 @@ -["extra comma",] \ No newline at end of file diff --git a/test/json/fixtures/fail9.json b/test/json/fixtures/fail9.json deleted file mode 100644 index 5815574f3..000000000 --- a/test/json/fixtures/fail9.json +++ /dev/null @@ -1 +0,0 @@ -{"Extra comma": true,} \ No newline at end of file diff --git a/test/json/json_addition_test.rb b/test/json/json_addition_test.rb index 8c3fbda56..1eb269c2f 100644 --- a/test/json/json_addition_test.rb +++ b/test/json/json_addition_test.rb @@ -162,6 +162,12 @@ def test_core assert_equal(/foo/i, JSON(JSON(/foo/i), :create_additions => true)) end + def test_deprecated_load_create_additions + assert_deprecated_warning(/use JSON\.unsafe_load/) do + JSON.load(JSON.dump(Time.now)) + end + end + def test_utc_datetime now = Time.now d = DateTime.parse(now.to_s) # usual case diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index e552412bf..6165cc041 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -52,11 +52,11 @@ def test_parser end def test_generator - assert_match(/::Generator\z/, JSON.generator.name) + assert_match(/::(TruffleRuby)?Generator\z/, JSON.generator.name) end def test_state - assert_match(/::Generator::State\z/, JSON.state.name) + assert_match(/::(TruffleRuby)?Generator::State\z/, JSON.state.name) end def test_create_id diff --git a/test/json/json_ext_parser_test.rb b/test/json/json_ext_parser_test.rb index ff6598f49..da6150498 100644 --- a/test/json/json_ext_parser_test.rb +++ b/test/json/json_ext_parser_test.rb @@ -2,33 +2,51 @@ require_relative 'test_helper' class JSONExtParserTest < Test::Unit::TestCase - if defined?(JSON::Ext::Parser) - include JSON - - def test_allocate - parser = JSON::Ext::Parser.new("{}") - assert_raise(TypeError, '[ruby-core:35079]') do - parser.__send__(:initialize, "{}") - end - parser = JSON::Ext::Parser.allocate - assert_raise(TypeError, '[ruby-core:35079]') { parser.source } + include JSON + + def test_allocate + parser = JSON::Ext::Parser.new("{}") + assert_raise(TypeError, '[ruby-core:35079]') do + parser.__send__(:initialize, "{}") end + parser = JSON::Ext::Parser.allocate + assert_raise(TypeError, '[ruby-core:35079]') { parser.source } + end - def test_error_messages - ex = assert_raise(ParserError) { parse('Infinity') } - assert_equal "unexpected token at 'Infinity'", ex.message + def test_error_messages + ex = assert_raise(ParserError) { parse('Infinity') } + assert_equal "unexpected token at 'Infinity'", ex.message - unless RUBY_PLATFORM =~ /java/ - ex = assert_raise(ParserError) { parse('-Infinity') } - assert_equal "unexpected token at '-Infinity'", ex.message - end + unless RUBY_PLATFORM =~ /java/ + ex = assert_raise(ParserError) { parse('-Infinity') } + assert_equal "unexpected token at '-Infinity'", ex.message + end + + ex = assert_raise(ParserError) { parse('NaN') } + assert_equal "unexpected token at 'NaN'", ex.message + end - ex = assert_raise(ParserError) { parse('NaN') } - assert_equal "unexpected token at 'NaN'", ex.message + if GC.respond_to?(:stress=) + def test_gc_stress_parser_new + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + + previous_stress = GC.stress + JSON::Parser.new(payload).parse + ensure + GC.stress = previous_stress end - def parse(json) - JSON::Ext::Parser.new(json).parse + def test_gc_stress + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + + previous_stress = GC.stress + JSON.parse(payload) + ensure + GC.stress = previous_stress end end + + def parse(json) + JSON::Ext::Parser.new(json).parse + end end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 7dc45e3a5..700220a15 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -19,24 +19,24 @@ def setup } @json2 = '{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' + '"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}' - @json3 = <<'EOT'.chomp -{ - "a": 2, - "b": 3.141, - "c": "c", - "d": [ - 1, - "b", - 3.14 - ], - "e": { - "foo": "bar" - }, - "g": "\"\u0000\u001f", - "h": 1000.0, - "i": 0.001 -} -EOT + @json3 = <<~'JSON'.chomp + { + "a": 2, + "b": 3.141, + "c": "c", + "d": [ + 1, + "b", + 3.14 + ], + "e": { + "foo": "bar" + }, + "g": "\"\u0000\u001f", + "h": 1000.0, + "i": 0.001 + } + JSON end def silence @@ -90,10 +90,17 @@ def test_dump_strict def test_generate_pretty json = pretty_generate({}) - assert_equal(<<'EOT'.chomp, json) -{ -} -EOT + assert_equal('{}', json) + + json = pretty_generate({1=>{}, 2=>[], 3=>4}) + assert_equal(<<~'JSON'.chomp, json) + { + "1": {}, + "2": [], + "3": 4 + } + JSON + json = pretty_generate(@hash) # hashes aren't (insertion) ordered on every ruby implementation # assert_equal(@json3, json) @@ -101,11 +108,11 @@ def test_generate_pretty parsed_json = parse(json) assert_equal(@hash, parsed_json) json = pretty_generate({1=>2}) - assert_equal(<<'EOT'.chomp, json) -{ - "1": 2 -} -EOT + assert_equal(<<~'JSON'.chomp, json) + { + "1": 2 + } + JSON parsed_json = parse(json) assert_equal({"1"=>2}, parsed_json) assert_equal '666', pretty_generate(666) @@ -114,14 +121,14 @@ def test_generate_pretty def test_generate_custom state = State.new(:space_before => " ", :space => " ", :indent => "", :object_nl => "\n", :array_nl => "") json = generate({1=>{2=>3,4=>[5,6]}}, state) - assert_equal(<<'EOT'.chomp, json) -{ -"1" : { -"2" : 3, -"4" : [5,6] -} -} -EOT + assert_equal(<<~'JSON'.chomp, json) + { + "1" : { + "2" : 3, + "4" : [5,6] + } + } + JSON end def test_fast_generate @@ -167,6 +174,27 @@ def test_states assert s[:check_circular?] end + def test_falsy_state + object = { foo: [1, 2], bar: { egg: :spam }} + expected_json = JSON.generate( + object, + array_nl: "", + indent: "", + object_nl: "", + space: "", + space_before: "", + ) + + assert_equal expected_json, JSON.generate( + object, + array_nl: nil, + indent: nil, + object_nl: nil, + space: nil, + space_before: nil, + ) + end + def test_pretty_state state = JSON.create_pretty_state assert_equal({ @@ -261,19 +289,19 @@ def test_buffer_initial_length end def test_gc - if respond_to?(:assert_in_out_err) && !(RUBY_PLATFORM =~ /java/) - assert_in_out_err(%w[-rjson -Ilib -Iext], <<-EOS, [], []) - bignum_too_long_to_embed_as_string = 1234567890123456789012345 - expect = bignum_too_long_to_embed_as_string.to_s - GC.stress = true - - 10.times do |i| - tmp = bignum_too_long_to_embed_as_string.to_json - raise "'\#{expect}' is expected, but '\#{tmp}'" unless tmp == expect - end - EOS + pid = fork do + bignum_too_long_to_embed_as_string = 1234567890123456789012345 + expect = bignum_too_long_to_embed_as_string.to_s + GC.stress = true + + 10.times do |i| + tmp = bignum_too_long_to_embed_as_string.to_json + raise "#{expect}' is expected, but '#{tmp}'" unless tmp == expect + end end - end if GC.respond_to?(:stress=) + _, status = Process.waitpid2(pid) + assert_predicate status, :success? + end if GC.respond_to?(:stress=) && Process.respond_to?(:fork) def test_configure_using_configure_and_merge numbered_state = { @@ -315,27 +343,25 @@ def foo.to_h assert_equal '2', state.indent end - if defined?(JSON::Ext::Generator) - def test_broken_bignum # [ruby-core:38867] - pid = fork do - x = 1 << 64 - x.class.class_eval do - def to_s - end - end - begin - JSON::Ext::Generator::State.new.generate(x) - exit 1 - rescue TypeError - exit 0 + def test_broken_bignum # [ruby-core:38867] + pid = fork do + x = 1 << 64 + x.class.class_eval do + def to_s end end - _, status = Process.waitpid2(pid) - assert status.success? - rescue NotImplementedError - # forking to avoid modifying core class of a parent process and - # introducing race conditions of tests are run in parallel + begin + JSON::Ext::Generator::State.new.generate(x) + exit 1 + rescue TypeError + exit 0 + end end + _, status = Process.waitpid2(pid) + assert status.success? + rescue NotImplementedError + # forking to avoid modifying core class of a parent process and + # introducing race conditions of tests are run in parallel end def test_hash_likeness_set_symbol @@ -449,23 +475,152 @@ def test_invalid_encoding_string end assert_includes error.message, "source sequence is illegal/malformed utf-8" - assert_raise(Encoding::UndefinedConversionError) do + assert_raise(JSON::GeneratorError) do + JSON.dump("\x82\xAC\xEF".b) + end + + assert_raise(JSON::GeneratorError) do "\x82\xAC\xEF".b.to_json end - assert_raise(Encoding::UndefinedConversionError) do - JSON.dump("\x82\xAC\xEF".b) + assert_raise(JSON::GeneratorError) do + ["\x82\xAC\xEF".b].to_json + end + + assert_raise(JSON::GeneratorError) do + { foo: "\x82\xAC\xEF".b }.to_json + end + end + + class MyCustomString < String + def to_json(_state = nil) + '"my_custom_key"' + end + + def to_s + self + end + end + + def test_string_subclass_as_keys + # Ref: https://github.com/ruby/json/issues/667 + # if key.to_s doesn't return a bare string, we call `to_json` on it. + key = MyCustomString.new("won't be used") + assert_equal '{"my_custom_key":1}', JSON.generate(key => 1) + end + + class FakeString + def to_json(_state = nil) + raise "Shouldn't be called" + end + + def to_s + self + end + end + + def test_custom_object_as_keys + key = FakeString.new + error = assert_raise(TypeError) do + JSON.generate(key => 1) + end + assert_match "FakeString", error.message + end + + def test_to_json_called_with_state_object + object = Object.new + called = false + argument = nil + object.singleton_class.define_method(:to_json) do |state| + called = true + argument = state + "" + end + + assert_equal "", JSON.dump(object) + assert called, "#to_json wasn't called" + assert_instance_of JSON::State, argument + end + + module CustomToJSON + def to_json(*) + %{"#{self.class.name}#to_json"} end end + module CustomToS + def to_s + "#{self.class.name}#to_s" + end + end + + class ArrayWithToJSON < Array + include CustomToJSON + end + + def test_array_subclass_with_to_json + assert_equal '["JSONGeneratorTest::ArrayWithToJSON#to_json"]', JSON.generate([ArrayWithToJSON.new]) + assert_equal '{"[]":1}', JSON.generate(ArrayWithToJSON.new => 1) + end + + class ArrayWithToS < Array + include CustomToS + end + + def test_array_subclass_with_to_s + assert_equal '[[]]', JSON.generate([ArrayWithToS.new]) + assert_equal '{"JSONGeneratorTest::ArrayWithToS#to_s":1}', JSON.generate(ArrayWithToS.new => 1) + end + + class HashWithToJSON < Hash + include CustomToJSON + end + + def test_hash_subclass_with_to_json + assert_equal '["JSONGeneratorTest::HashWithToJSON#to_json"]', JSON.generate([HashWithToJSON.new]) + assert_equal '{"{}":1}', JSON.generate(HashWithToJSON.new => 1) + end + + class HashWithToS < Hash + include CustomToS + end + + def test_hash_subclass_with_to_s + assert_equal '[{}]', JSON.generate([HashWithToS.new]) + assert_equal '{"JSONGeneratorTest::HashWithToS#to_s":1}', JSON.generate(HashWithToS.new => 1) + end + + class StringWithToJSON < String + include CustomToJSON + end + + def test_string_subclass_with_to_json + assert_equal '["JSONGeneratorTest::StringWithToJSON#to_json"]', JSON.generate([StringWithToJSON.new]) + assert_equal '{"":1}', JSON.generate(StringWithToJSON.new => 1) + end + + class StringWithToS < String + include CustomToS + end + + def test_string_subclass_with_to_s + assert_equal '[""]', JSON.generate([StringWithToS.new]) + assert_equal '{"JSONGeneratorTest::StringWithToS#to_s":1}', JSON.generate(StringWithToS.new => 1) + end + if defined?(JSON::Ext::Generator) and RUBY_PLATFORM != "java" def test_valid_utf8_in_different_encoding utf8_string = "€™" wrong_encoding_string = utf8_string.b # This behavior is historical. Not necessary desirable. We should deprecated it. # The pure and java version of the gem already don't behave this way. - assert_equal utf8_string.to_json, wrong_encoding_string.to_json - assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string) + assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do + assert_equal utf8_string.to_json, wrong_encoding_string.to_json + end + + assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do + assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string) + end end def test_string_ext_included_calls_super diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 8d3c0c17c..c01e28910 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -19,21 +19,19 @@ def test_construction assert_equal 'test', parser.source end - def test_argument_encoding - source = "{}".encode("UTF-16") + def test_argument_encoding_unmodified + source = "{}".encode(Encoding::UTF_16) JSON::Parser.new(source) assert_equal Encoding::UTF_16, source.encoding end - def test_argument_encoding_for_binary - source = "{}".encode("ASCII-8BIT") + def test_argument_encoding_for_binary_unmodified + source = "{}".b JSON::Parser.new(source) assert_equal Encoding::ASCII_8BIT, source.encoding end def test_error_message_encoding - pend if RUBY_ENGINE == 'truffleruby' - bug10705 = '[ruby-core:67386] [Bug #10705]' json = ".\"\xE2\x88\x9A\"" assert_equal(Encoding::UTF_8, json.encoding) @@ -42,7 +40,7 @@ def test_error_message_encoding } assert_equal(Encoding::UTF_8, e.message.encoding, bug10705) assert_include(e.message, json, bug10705) - end if defined?(JSON::Ext::Parser) + end def test_parsing parser = JSON::Parser.new('"test"') @@ -182,7 +180,93 @@ def test_parse_json_primitive_values assert parse('NaN', :allow_nan => true).nan? assert parse('Infinity', :allow_nan => true).infinite? assert parse('-Infinity', :allow_nan => true).infinite? - assert_raise(JSON::ParserError) { parse('[ 1, ]') } + end + + def test_parse_arrays_with_allow_trailing_comma + assert_equal([], parse('[]', allow_trailing_comma: true)) + assert_equal([], parse('[]', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: false) } + + assert_equal([1], parse('[1]', allow_trailing_comma: true)) + assert_equal([1], parse('[1]', allow_trailing_comma: false)) + assert_equal([1], parse('[1,]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[1,]', allow_trailing_comma: false) } + + assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: true)) + assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: false)) + assert_equal([1, 2, 3], parse('[1,2,3,]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[1,2,3,]', allow_trailing_comma: false) } + + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: true)) + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: false)) + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: false) } + + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: true)) + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: false)) + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3,] }', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{ "foo": [1,2,3,] }', allow_trailing_comma: false) } + end + + def test_parse_object_with_allow_trailing_comma + assert_equal({}, parse('{}', allow_trailing_comma: true)) + assert_equal({}, parse('{}', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) } + + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false) + } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false) + } + + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: false) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: false) + } end def test_parse_some_strings @@ -196,6 +280,23 @@ def test_parse_some_strings ) end + if RUBY_ENGINE != "jruby" # https://github.com/ruby/json/issues/138 + def test_parse_broken_string + s = parse(%{["\x80"]})[0] + assert_equal("\x80", s) + assert_equal Encoding::UTF_8, s.encoding + assert_equal false, s.valid_encoding? + + s = parse(%{["\x80"]}.b)[0] + assert_equal("\x80", s) + assert_equal Encoding::UTF_8, s.encoding + assert_equal false, s.valid_encoding? + + input = %{["\x80"]}.dup.force_encoding(Encoding::US_ASCII) + assert_raise(Encoding::InvalidByteSequenceError) { parse(input) } + end + end + def test_parse_big_integers json1 = JSON(orig = (1 << 31) - 1) assert_equal orig, parse(json1) @@ -247,50 +348,50 @@ def test_freeze end def test_parse_comments - json = < "value1", "key2" => "value2", "key3" => "value3" }, parse(json)) - json = < "value1" }, parse(json)) - json = < "value1" }, parse(json)) end @@ -518,7 +619,7 @@ def test_parse_error_incomplete_hash error = assert_raise(JSON::ParserError) do JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"bob@example.com"}') end - if RUBY_ENGINE == "ruby" && defined?(JSON::Ext) + if RUBY_ENGINE == "ruby" assert_equal %(unexpected token at '{"input":{"firstName":"Bob","las'), error.message end end diff --git a/test/json/ractor_test.rb b/test/json/ractor_test.rb index e0116400f..f857c9a8b 100644 --- a/test/json/ractor_test.rb +++ b/test/json/ractor_test.rb @@ -9,10 +9,7 @@ class JSONInRactorTest < Test::Unit::TestCase def test_generate - assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}", ignore_stderr: true) - begin; - $VERBOSE = nil - require "json" + pid = fork do r = Ractor.new do json = JSON.generate({ 'a' => 2, @@ -26,9 +23,22 @@ def test_generate }) JSON.parse(json) end - expected_json = '{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' + - '"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}' - assert_equal(JSON.parse(expected_json), r.take) - end; + expected_json = JSON.parse('{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' + + '"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}') + actual_json = r.take + + if expected_json == actual_json + exit 0 + else + puts "Expected:" + puts expected_json + puts "Acutual:" + puts actual_json + puts + exit 1 + end + end + _, status = Process.waitpid2(pid) + assert_predicate status, :success? end -end if defined?(Ractor) +end if defined?(Ractor) && Process.respond_to?(:fork) diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index 4955a02c9..d849e28b9 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -1,19 +1,26 @@ -case ENV['JSON'] -when 'pure' - $:.unshift File.join(__dir__, '../../lib') - require 'json/pure' -when 'ext' - $:.unshift File.join(__dir__, '../../ext'), File.join(__dir__, '../../lib') - require 'json/ext' -else - $:.unshift File.join(__dir__, '../../ext'), File.join(__dir__, '../../lib') - require 'json' -end +$LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) +require 'json' require 'test/unit' -begin - require 'byebug' -rescue LoadError + +if ENV["JSON_COMPACT"] + if GC.respond_to?(:verify_compaction_references) + # This method was added in Ruby 3.0.0. Calling it this way asks the GC to + # move objects around, helping to find object movement bugs. + begin + GC.verify_compaction_references(expand_heap: true, toward: :empty) + rescue NotImplementedError, ArgumentError + # Some platforms don't support compaction + end + end + + if GC.respond_to?(:auto_compact=) + begin + GC.auto_compact = true + rescue NotImplementedError + # Some platforms don't support compaction + end + end end unless defined?(Test::Unit::CoreAssertions)