diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..caefad87 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +ext/json/ext/parser/parser.c linguist-generated=true +java/src/json/ext/Parser.java linguist-generated=true diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..6778b049 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'daily' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27171b90..d4c98195 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - { os: windows-latest, ruby: jruby-head } steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Ruby uses: ruby/setup-ruby-pkgs@v1 @@ -64,7 +64,7 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Ruby uses: ruby/setup-ruby-pkgs@v1 diff --git a/CHANGES.md b/CHANGES.md index f40572e8..f9efe041 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ # Changes +### 2024-11-14 (2.8.2) + +* `JSON.load_file` explictly read the file as UTF-8. + ### 2024-11-06 (2.8.1) * Fix the java packages to include the extension. diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 6952f3c3..bacb8e9e 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -19,7 +19,7 @@ def benchmark_parsing(name, json_output) Benchmark.ips do |x| x.report("json") { JSON.parse(json_output) } if RUN[:json] x.report("oj") { Oj.load(json_output) } if RUN[:oj] - x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj] + x.report("Oj::Parser") { Oj::Parser.new(:usual).parse(json_output) } if RUN[:oj] x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] x.compare!(order: :baseline) end @@ -28,10 +28,6 @@ def benchmark_parsing(name, json_output) # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] -# Oj::Parser is significanly faster (~1.3x) on the next 3 micro-benchmarks in large part because its -# cache is persisted across calls. That's not something we can do with the current API, we'd -# need to expose a stateful API as well, but that's no really desirable. -# Other than that we're faster than regular `Oj.load` by a good margin (between 1.3x and 2.4x). benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) benchmark_parsing "test from oj", <in_array++; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); + json->in_array--; if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr11: -#line 647 "parser.rl" +#line 645 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -1219,9 +1217,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 653 "parser.rl" +#line 651 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 1225 "parser.c" +#line 1223 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1462,7 +1460,7 @@ case 28: _out: {} } -#line 674 "parser.rl" +#line 672 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1477,7 +1475,7 @@ case 28: } -#line 1481 "parser.c" +#line 1479 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1485,7 +1483,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 695 "parser.rl" +#line 693 "parser.rl" #define MAX_FAST_INTEGER_SIZE 18 @@ -1510,82 +1508,8 @@ static inline VALUE fast_parse_integer(char *p, char *pe) return LL2NUM(memo); } -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) { - int cs = EVIL; - - -#line 1519 "parser.c" - { - cs = JSON_integer_start; - } - -#line 724 "parser.rl" - json->memo = p; - -#line 1527 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr4; -tr4: -#line 692 "parser.rl" - { p--; {p++; cs = 4; goto _out;} } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1568 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - goto tr4; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 726 "parser.rl" - - if (cs >= JSON_integer_first_final) { long len = p - json->memo; if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { *result = fast_parse_integer(json->memo, p); @@ -1596,37 +1520,35 @@ case 5: *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); } return p + 1; - } else { - return NULL; - } } -#line 1606 "parser.c" +#line 1527 "parser.c" enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 8}; +enum {JSON_float_first_final = 6}; enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 755 "parser.rl" +#line 745 "parser.rl" -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; + bool is_float = false; -#line 1622 "parser.c" +#line 1544 "parser.c" { cs = JSON_float_start; } -#line 762 "parser.rl" +#line 753 "parser.rl" json->memo = p; -#line 1630 "parser.c" +#line 1552 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1635,10 +1557,10 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul case 1: switch( (*p) ) { case 45: goto st2; - case 48: goto st3; + case 48: goto st6; } if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; + goto st10; goto st0; st0: cs = 0; @@ -1648,24 +1570,42 @@ cs = 0; goto _test_eof2; case 2: if ( (*p) == 48 ) - goto st3; + goto st6; if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; + goto st10; goto st0; -st3: +st6: if ( ++p == pe ) - goto _test_eof3; -case 3: + goto _test_eof6; +case 6: switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr7; +tr7: +#line 737 "parser.rl" + { p--; {p++; cs = 7; goto _out;} } + goto st7; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: +#line 1599 "parser.c" goto st0; -st4: +tr8: +#line 738 "parser.rl" + { is_float = true; } + goto st3; +st3: if ( ++p == pe ) - goto _test_eof4; -case 4: + goto _test_eof3; +case 3: +#line 1609 "parser.c" if ( 48 <= (*p) && (*p) <= 57 ) goto st8; goto st0; @@ -1674,87 +1614,86 @@ case 4: goto _test_eof8; case 8: switch( (*p) ) { - case 69: goto st5; - case 101: goto st5; + case 69: goto st4; + case 101: goto st4; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st8; } else if ( (*p) >= 45 ) goto st0; - goto tr9; + goto tr7; tr9: -#line 749 "parser.rl" - { p--; {p++; cs = 9; goto _out;} } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 1695 "parser.c" - goto st0; -st5: +#line 738 "parser.rl" + { is_float = true; } + goto st4; +st4: if ( ++p == pe ) - goto _test_eof5; -case 5: + goto _test_eof4; +case 4: +#line 1635 "parser.c" switch( (*p) ) { - case 43: goto st6; - case 45: goto st6; + case 43: goto st5; + case 45: goto st5; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; goto st0; -st6: +st5: if ( ++p == pe ) - goto _test_eof6; -case 6: + goto _test_eof5; +case 5: if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; goto st0; -st10: +st9: if ( ++p == pe ) - goto _test_eof10; -case 10: + goto _test_eof9; +case 9: switch( (*p) ) { case 69: goto st0; case 101: goto st0; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; } else if ( (*p) >= 45 ) goto st0; - goto tr9; -st7: + goto tr7; +st10: if ( ++p == pe ) - goto _test_eof7; -case 7: + goto _test_eof10; +case 10: switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; + goto st10; + goto tr7; } _test_eof2: cs = 2; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; _test_eof: {} _out: {} } -#line 764 "parser.rl" +#line 755 "parser.rl" if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } VALUE mod = Qnil; ID method_id = 0; if (json->decimal_class) { @@ -1805,7 +1744,7 @@ case 7: -#line 1809 "parser.c" +#line 1748 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; @@ -1813,7 +1752,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 841 "parser.rl" +#line 835 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1826,14 +1765,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1830 "parser.c" +#line 1769 "parser.c" { cs = JSON_array_start; } -#line 853 "parser.rl" +#line 847 "parser.rl" -#line 1837 "parser.c" +#line 1776 "parser.c" { short _widec; if ( p == pe ) @@ -1873,7 +1812,7 @@ case 2: goto st2; goto st0; tr2: -#line 821 "parser.rl" +#line 815 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1888,12 +1827,12 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1892 "parser.c" +#line 1831 "parser.c" _widec = (*p); if ( 44 <= (*p) && (*p) <= 44 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1940,14 +1879,14 @@ case 7: goto st3; goto st7; tr4: -#line 833 "parser.rl" +#line 827 "parser.rl" { p--; {p++; cs = 22; goto _out;} } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 1951 "parser.c" +#line 1890 "parser.c" goto st0; st8: if ( ++p == pe ) @@ -2015,13 +1954,13 @@ case 13: if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -2029,19 +1968,19 @@ case 13: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2080,13 +2019,13 @@ case 14: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2105,20 +2044,20 @@ case 15: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2141,13 +2080,13 @@ case 16: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -2155,19 +2094,19 @@ case 16: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2191,20 +2130,20 @@ case 17: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2276,7 +2215,7 @@ case 21: _out: {} } -#line 854 "parser.rl" +#line 848 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -2332,7 +2271,7 @@ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringE { size_t bufferSize = stringEnd - string; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -2355,7 +2294,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE int unescape_len; char buf[4]; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -2470,7 +2409,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2474 "parser.c" +#line 2413 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; @@ -2478,7 +2417,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1077 "parser.rl" +#line 1071 "parser.rl" static int @@ -2499,15 +2438,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2503 "parser.c" +#line 2442 "parser.c" { cs = JSON_string_start; } -#line 1097 "parser.rl" +#line 1091 "parser.rl" json->memo = p; -#line 2511 "parser.c" +#line 2450 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2532,14 +2471,14 @@ case 2: goto st0; goto st2; tr2: -#line 1059 "parser.rl" +#line 1053 "parser.rl" { *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} p--; {p++; cs = 9; goto _out;} } -#line 1052 "parser.rl" +#line 1046 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2548,7 +2487,7 @@ case 2: } goto st9; tr6: -#line 1052 "parser.rl" +#line 1046 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2560,7 +2499,7 @@ case 2: if ( ++p == pe ) goto _test_eof9; case 9: -#line 2564 "parser.c" +#line 2503 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -2648,7 +2587,7 @@ case 8: _out: {} } -#line 1099 "parser.rl" +#line 1093 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2801,7 +2740,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2805 "parser.c" +#line 2744 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2809,7 +2748,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1265 "parser.rl" +#line 1259 "parser.rl" /* @@ -2838,16 +2777,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2842 "parser.c" +#line 2781 "parser.c" { cs = JSON_start; } -#line 1293 "parser.rl" +#line 1287 "parser.rl" p = json->source; pe = p + json->len; -#line 2851 "parser.c" +#line 2790 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2881,7 +2820,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1257 "parser.rl" +#line 1251 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2891,7 +2830,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2895 "parser.c" +#line 2834 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2980,7 +2919,7 @@ case 9: _out: {} } -#line 1296 "parser.rl" +#line 1290 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -3016,16 +2955,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 3020 "parser.c" +#line 2959 "parser.c" { cs = JSON_start; } -#line 1331 "parser.rl" +#line 1325 "parser.rl" p = json->source; pe = p + json->len; -#line 3029 "parser.c" +#line 2968 "parser.c" { if ( p == pe ) goto _test_eof; @@ -3059,7 +2998,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1257 "parser.rl" +#line 1251 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -3069,7 +3008,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 3073 "parser.c" +#line 3012 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3158,7 +3097,7 @@ case 9: _out: {} } -#line 1334 "parser.rl" +#line 1328 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 9620b196..eab60b91 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -392,6 +392,7 @@ typedef struct JSON_ParserStruct { VALUE decimal_class; VALUE match_string; FBuffer fbuffer; + int in_array; int max_nesting; bool allow_nan; bool allow_trailing_comma; @@ -420,8 +421,7 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -627,11 +627,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu raise_parse_error("unexpected token at '%s'", p); } } - np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) { - fexec np; - } - np = JSON_parse_integer(json, fpc, pe, result); + np = JSON_parse_number(json, fpc, pe, result); if (np != NULL) { fexec np; } @@ -640,7 +636,9 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu action parse_array { char *np; + json->in_array++; np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); + json->in_array--; if (np == NULL) { fhold; fbreak; } else fexec np; } @@ -716,15 +714,8 @@ static inline VALUE fast_parse_integer(char *p, char *pe) return LL2NUM(memo); } -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) { - int cs = EVIL; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_integer_first_final) { long len = p - json->memo; if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { *result = fast_parse_integer(json->memo, p); @@ -735,9 +726,6 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); } return p + 1; - } else { - return NULL; - } } %%{ @@ -747,22 +735,28 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res write data; action exit { fhold; fbreak; } + action isFloat { is_float = true; } main := '-'? ( - (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) - | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) - ) (^[0-9Ee.\-]? @exit ); + (('0' | [1-9][0-9]*) + ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | + ([Ee] [+\-]?[0-9]+)) > isFloat)? + ) (^[0-9Ee.\-]? @exit )); }%% -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; + bool is_float = false; %% write init; json->memo = p; %% write exec; if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } VALUE mod = Qnil; ID method_id = 0; if (json->decimal_class) { @@ -906,7 +900,7 @@ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringE { size_t bufferSize = stringEnd - string; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -929,7 +923,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE int unescape_len; char buf[4]; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); diff --git a/lib/json/common.rb b/lib/json/common.rb index 2269896b..4c6b2e1a 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -1,4 +1,5 @@ -#frozen_string_literal: true +# frozen_string_literal: true + require 'json/version' module JSON @@ -25,7 +26,7 @@ def [](object, opts = {}) elsif object.respond_to?(:to_str) str = object.to_str if str.is_a?(String) - return JSON.parse(object.to_str, opts) + return JSON.parse(str, opts) end end @@ -230,8 +231,8 @@ def parse!(source, opts = {}) # parse(File.read(path), opts) # # See method #parse. - def load_file(filespec, opts = {}) - parse(File.read(filespec), opts) + def load_file(filespec, opts = nil) + parse(File.read(filespec, encoding: Encoding::UTF_8), opts) end # :call-seq: @@ -242,7 +243,7 @@ def load_file(filespec, opts = {}) # # See method #parse! def load_file!(filespec, opts = {}) - parse!(File.read(filespec), opts) + parse!(File.read(filespec, encoding: Encoding::UTF_8), opts) end # :call-seq: diff --git a/lib/json/version.rb b/lib/json/version.rb index 52da68dd..d5cfb549 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.8.1' + VERSION = '2.8.2' end diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 6165cc04..a5d62337 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + require_relative 'test_helper' require 'stringio' require 'tempfile' @@ -189,8 +190,29 @@ def test_load_file_with_option! test_load_file_with_option_shared(:load_file!) end + def test_load_file_with_bad_default_external_encoding + data = { "key" => "€" } + temp_file_containing(JSON.dump(data)) do |path| + loaded_data = with_external_encoding(Encoding::US_ASCII) do + JSON.load_file(path) + end + assert_equal data, loaded_data + end + end + private + def with_external_encoding(encoding) + verbose = $VERBOSE + $VERBOSE = nil + previous_encoding = Encoding.default_external + Encoding.default_external = encoding + yield + ensure + Encoding.default_external = previous_encoding + $VERBOSE = verbose + end + def test_load_shared(method_name) temp_file_containing(@json) do |filespec| assert_equal JSON.public_send(method_name, filespec), @hash