From 447a718cd6ca2b28855edda2bb2cd19fec502541 Mon Sep 17 00:00:00 2001 From: MSP-Greg Date: Wed, 6 Nov 2024 11:26:23 -0600 Subject: [PATCH 01/11] CI: ci.yml - update to actions/checkout@v4 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27171b90..d4c98195 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - { os: windows-latest, ruby: jruby-head } steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Ruby uses: ruby/setup-ruby-pkgs@v1 @@ -64,7 +64,7 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Ruby uses: ruby/setup-ruby-pkgs@v1 From 0d5ac5baa013eca3f743db540404cbee41120778 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 7 Nov 2024 13:13:11 +0900 Subject: [PATCH 02/11] Added automated update workflow for GitHub Actions --- .github/dependabot.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..6778b049 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'daily' From 42bc089112a4a1cd4fee64cc11eab4b81beab806 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 7 Nov 2024 08:19:57 +0100 Subject: [PATCH 03/11] Mark parser.c and Parser.java as generated files This way they're hidden in diffs. It would be good to enforce on CI that the generated files match the source change, however ragel's output isn't consistent across versions and system, so we'll have to rely on changes being noticed by further contributions. --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..caefad87 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +ext/json/ext/parser/parser.c linguist-generated=true +java/src/json/ext/Parser.java linguist-generated=true From 0c0e0930cd64813d0147a4cccace00c935d83e12 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 6 Nov 2024 17:12:07 -0800 Subject: [PATCH 04/11] Reduce comparisons when parsing numbers Before this commit, we would try to scan for a float, then if that failed, scan for an integer. But floats and integers have many bytes in common, so we would end up scanning the same bytes multiple times. This patch combines integer and float scanning machines so that we only have to scan bytes once. If the machine finds "float parts", then it executes the "isFloat" transition in the machine, which sets a boolean letting us know that the parser found a float. If we didn't find a float, but we did match, then we know it's an int. --- ext/json/ext/parser/parser.c | 428 +++++++++++++++------------------- ext/json/ext/parser/parser.rl | 29 +-- 2 files changed, 192 insertions(+), 265 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index a5c918fa..b6cf93fc 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -422,7 +422,6 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -449,11 +448,11 @@ static void raise_parse_error(const char *format, const char *start) -#line 475 "parser.rl" +#line 474 "parser.rl" -#line 457 "parser.c" +#line 456 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; @@ -461,7 +460,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 515 "parser.rl" +#line 514 "parser.rl" #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) @@ -477,14 +476,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu long stack_head = json->stack->head; -#line 481 "parser.c" +#line 480 "parser.c" { cs = JSON_object_start; } -#line 530 "parser.rl" +#line 529 "parser.rl" -#line 488 "parser.c" +#line 487 "parser.c" { short _widec; if ( p == pe ) @@ -513,7 +512,7 @@ case 2: goto st2; goto st0; tr2: -#line 494 "parser.rl" +#line 493 "parser.rl" { char *np; json->parsing_name = true; @@ -529,7 +528,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 533 "parser.c" +#line 532 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -596,7 +595,7 @@ case 8: goto st8; goto st0; tr11: -#line 483 "parser.rl" +#line 482 "parser.rl" { char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { @@ -610,20 +609,20 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 614 "parser.c" +#line 613 "parser.c" _widec = (*p); if ( (*p) < 13 ) { if ( (*p) > 9 ) { if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -631,26 +630,26 @@ case 9: if ( 32 <= (*p) && (*p) <= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 44 ) { if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -671,14 +670,14 @@ case 9: goto st10; goto st0; tr4: -#line 505 "parser.rl" +#line 504 "parser.rl" { p--; {p++; cs = 32; goto _out;} } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: -#line 682 "parser.c" +#line 681 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -780,13 +779,13 @@ case 20: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -805,20 +804,20 @@ case 21: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -841,13 +840,13 @@ case 22: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -855,19 +854,19 @@ case 22: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -891,20 +890,20 @@ case 23: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 492 "parser.rl" +#line 491 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1018,7 +1017,7 @@ case 31: _out: {} } -#line 531 "parser.rl" +#line 530 "parser.rl" if (cs >= JSON_object_first_final) { long count = json->stack->head - stack_head; @@ -1069,7 +1068,7 @@ case 31: } -#line 1073 "parser.c" +#line 1072 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -1077,7 +1076,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 666 "parser.rl" +#line 661 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1085,14 +1084,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1089 "parser.c" +#line 1088 "parser.c" { cs = JSON_value_start; } -#line 673 "parser.rl" +#line 668 "parser.rl" -#line 1096 "parser.c" +#line 1095 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1126,7 +1125,7 @@ case 1: cs = 0; goto _out; tr2: -#line 609 "parser.rl" +#line 608 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { @@ -1138,7 +1137,7 @@ cs = 0; } goto st29; tr3: -#line 619 "parser.rl" +#line 618 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -1154,15 +1153,11 @@ cs = 0; if (np != NULL) { {p = (( np))-1;} } - np = JSON_parse_integer(json, p, pe, result); - if (np != NULL) { - {p = (( np))-1;} - } p--; {p++; cs = 29; goto _out;} } goto st29; tr7: -#line 641 "parser.rl" +#line 636 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -1170,7 +1165,7 @@ cs = 0; } goto st29; tr11: -#line 647 "parser.rl" +#line 642 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -1178,7 +1173,7 @@ cs = 0; } goto st29; tr25: -#line 602 "parser.rl" +#line 601 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -1188,7 +1183,7 @@ cs = 0; } goto st29; tr27: -#line 595 "parser.rl" +#line 594 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -1198,19 +1193,19 @@ cs = 0; } goto st29; tr31: -#line 589 "parser.rl" +#line 588 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 586 "parser.rl" +#line 585 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 592 "parser.rl" +#line 591 "parser.rl" { *result = Qtrue; } @@ -1219,9 +1214,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 653 "parser.rl" +#line 648 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 1225 "parser.c" +#line 1220 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1462,7 +1457,7 @@ case 28: _out: {} } -#line 674 "parser.rl" +#line 669 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1477,7 +1472,7 @@ case 28: } -#line 1481 "parser.c" +#line 1476 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1485,7 +1480,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 695 "parser.rl" +#line 690 "parser.rl" #define MAX_FAST_INTEGER_SIZE 18 @@ -1510,82 +1505,8 @@ static inline VALUE fast_parse_integer(char *p, char *pe) return LL2NUM(memo); } -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) { - int cs = EVIL; - - -#line 1519 "parser.c" - { - cs = JSON_integer_start; - } - -#line 724 "parser.rl" - json->memo = p; - -#line 1527 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr4; -tr4: -#line 692 "parser.rl" - { p--; {p++; cs = 4; goto _out;} } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1568 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - goto tr4; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 726 "parser.rl" - - if (cs >= JSON_integer_first_final) { long len = p - json->memo; if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { *result = fast_parse_integer(json->memo, p); @@ -1596,37 +1517,35 @@ case 5: *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); } return p + 1; - } else { - return NULL; - } } -#line 1606 "parser.c" +#line 1524 "parser.c" enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 8}; +enum {JSON_float_first_final = 6}; enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 755 "parser.rl" +#line 742 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; + bool is_float = false; -#line 1622 "parser.c" +#line 1541 "parser.c" { cs = JSON_float_start; } -#line 762 "parser.rl" +#line 750 "parser.rl" json->memo = p; -#line 1630 "parser.c" +#line 1549 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1635,10 +1554,10 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul case 1: switch( (*p) ) { case 45: goto st2; - case 48: goto st3; + case 48: goto st6; } if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; + goto st10; goto st0; st0: cs = 0; @@ -1648,24 +1567,42 @@ cs = 0; goto _test_eof2; case 2: if ( (*p) == 48 ) - goto st3; + goto st6; if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; + goto st10; goto st0; -st3: +st6: if ( ++p == pe ) - goto _test_eof3; -case 3: + goto _test_eof6; +case 6: switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr7; +tr7: +#line 734 "parser.rl" + { p--; {p++; cs = 7; goto _out;} } + goto st7; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: +#line 1596 "parser.c" goto st0; -st4: +tr8: +#line 735 "parser.rl" + { is_float = true; } + goto st3; +st3: if ( ++p == pe ) - goto _test_eof4; -case 4: + goto _test_eof3; +case 3: +#line 1606 "parser.c" if ( 48 <= (*p) && (*p) <= 57 ) goto st8; goto st0; @@ -1674,87 +1611,86 @@ case 4: goto _test_eof8; case 8: switch( (*p) ) { - case 69: goto st5; - case 101: goto st5; + case 69: goto st4; + case 101: goto st4; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st8; } else if ( (*p) >= 45 ) goto st0; - goto tr9; + goto tr7; tr9: -#line 749 "parser.rl" - { p--; {p++; cs = 9; goto _out;} } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 1695 "parser.c" - goto st0; -st5: +#line 735 "parser.rl" + { is_float = true; } + goto st4; +st4: if ( ++p == pe ) - goto _test_eof5; -case 5: + goto _test_eof4; +case 4: +#line 1632 "parser.c" switch( (*p) ) { - case 43: goto st6; - case 45: goto st6; + case 43: goto st5; + case 45: goto st5; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; goto st0; -st6: +st5: if ( ++p == pe ) - goto _test_eof6; -case 6: + goto _test_eof5; +case 5: if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; goto st0; -st10: +st9: if ( ++p == pe ) - goto _test_eof10; -case 10: + goto _test_eof9; +case 9: switch( (*p) ) { case 69: goto st0; case 101: goto st0; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; } else if ( (*p) >= 45 ) goto st0; - goto tr9; -st7: + goto tr7; +st10: if ( ++p == pe ) - goto _test_eof7; -case 7: + goto _test_eof10; +case 10: switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; + goto st10; + goto tr7; } _test_eof2: cs = 2; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; _test_eof: {} _out: {} } -#line 764 "parser.rl" +#line 752 "parser.rl" if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } VALUE mod = Qnil; ID method_id = 0; if (json->decimal_class) { @@ -1805,7 +1741,7 @@ case 7: -#line 1809 "parser.c" +#line 1745 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; @@ -1813,7 +1749,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 841 "parser.rl" +#line 832 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1826,14 +1762,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1830 "parser.c" +#line 1766 "parser.c" { cs = JSON_array_start; } -#line 853 "parser.rl" +#line 844 "parser.rl" -#line 1837 "parser.c" +#line 1773 "parser.c" { short _widec; if ( p == pe ) @@ -1873,7 +1809,7 @@ case 2: goto st2; goto st0; tr2: -#line 821 "parser.rl" +#line 812 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1888,12 +1824,12 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1892 "parser.c" +#line 1828 "parser.c" _widec = (*p); if ( 44 <= (*p) && (*p) <= 44 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1940,14 +1876,14 @@ case 7: goto st3; goto st7; tr4: -#line 833 "parser.rl" +#line 824 "parser.rl" { p--; {p++; cs = 22; goto _out;} } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 1951 "parser.c" +#line 1887 "parser.c" goto st0; st8: if ( ++p == pe ) @@ -2015,13 +1951,13 @@ case 13: if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -2029,19 +1965,19 @@ case 13: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2080,13 +2016,13 @@ case 14: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2105,20 +2041,20 @@ case 15: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2141,13 +2077,13 @@ case 16: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -2155,19 +2091,19 @@ case 16: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2191,20 +2127,20 @@ case 17: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 831 "parser.rl" +#line 822 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2276,7 +2212,7 @@ case 21: _out: {} } -#line 854 "parser.rl" +#line 845 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -2470,7 +2406,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2474 "parser.c" +#line 2410 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; @@ -2478,7 +2414,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1077 "parser.rl" +#line 1068 "parser.rl" static int @@ -2499,15 +2435,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2503 "parser.c" +#line 2439 "parser.c" { cs = JSON_string_start; } -#line 1097 "parser.rl" +#line 1088 "parser.rl" json->memo = p; -#line 2511 "parser.c" +#line 2447 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2532,14 +2468,14 @@ case 2: goto st0; goto st2; tr2: -#line 1059 "parser.rl" +#line 1050 "parser.rl" { *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} p--; {p++; cs = 9; goto _out;} } -#line 1052 "parser.rl" +#line 1043 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2548,7 +2484,7 @@ case 2: } goto st9; tr6: -#line 1052 "parser.rl" +#line 1043 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2560,7 +2496,7 @@ case 2: if ( ++p == pe ) goto _test_eof9; case 9: -#line 2564 "parser.c" +#line 2500 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -2648,7 +2584,7 @@ case 8: _out: {} } -#line 1099 "parser.rl" +#line 1090 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2801,7 +2737,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2805 "parser.c" +#line 2741 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2809,7 +2745,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1265 "parser.rl" +#line 1256 "parser.rl" /* @@ -2838,16 +2774,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2842 "parser.c" +#line 2778 "parser.c" { cs = JSON_start; } -#line 1293 "parser.rl" +#line 1284 "parser.rl" p = json->source; pe = p + json->len; -#line 2851 "parser.c" +#line 2787 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2881,7 +2817,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1257 "parser.rl" +#line 1248 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2891,7 +2827,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2895 "parser.c" +#line 2831 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2980,7 +2916,7 @@ case 9: _out: {} } -#line 1296 "parser.rl" +#line 1287 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -3016,16 +2952,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 3020 "parser.c" +#line 2956 "parser.c" { cs = JSON_start; } -#line 1331 "parser.rl" +#line 1322 "parser.rl" p = json->source; pe = p + json->len; -#line 3029 "parser.c" +#line 2965 "parser.c" { if ( p == pe ) goto _test_eof; @@ -3059,7 +2995,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1257 "parser.rl" +#line 1248 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -3069,7 +3005,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 3073 "parser.c" +#line 3009 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3158,7 +3094,7 @@ case 9: _out: {} } -#line 1334 "parser.rl" +#line 1325 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 9620b196..f3011490 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -420,7 +420,6 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -631,10 +630,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu if (np != NULL) { fexec np; } - np = JSON_parse_integer(json, fpc, pe, result); - if (np != NULL) { - fexec np; - } fhold; fbreak; } @@ -716,15 +711,8 @@ static inline VALUE fast_parse_integer(char *p, char *pe) return LL2NUM(memo); } -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) { - int cs = EVIL; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_integer_first_final) { long len = p - json->memo; if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { *result = fast_parse_integer(json->memo, p); @@ -735,9 +723,6 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); } return p + 1; - } else { - return NULL; - } } %%{ @@ -747,22 +732,28 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res write data; action exit { fhold; fbreak; } + action isFloat { is_float = true; } main := '-'? ( - (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) - | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) - ) (^[0-9Ee.\-]? @exit ); + (('0' | [1-9][0-9]*) + ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | + ([Ee] [+\-]?[0-9]+)) > isFloat)? + ) (^[0-9Ee.\-]? @exit )); }%% static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; + bool is_float = false; %% write init; json->memo = p; %% write exec; if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } VALUE mod = Qnil; ID method_id = 0; if (json->decimal_class) { From e51e7966979b2413665acfbf4412a959cd7d7179 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 7 Nov 2024 08:49:53 +0100 Subject: [PATCH 05/11] Rename parse_float into parse_number --- ext/json/ext/parser/parser.c | 6 +++--- ext/json/ext/parser/parser.rl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index b6cf93fc..09ca4f75 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -422,7 +422,7 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -1149,7 +1149,7 @@ cs = 0; raise_parse_error("unexpected token at '%s'", p); } } - np = JSON_parse_float(json, p, pe, result); + np = JSON_parse_number(json, p, pe, result); if (np != NULL) { {p = (( np))-1;} } @@ -1531,7 +1531,7 @@ enum {JSON_float_en_main = 1}; #line 742 "parser.rl" -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; bool is_float = false; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index f3011490..9a1f7392 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -420,7 +420,7 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -626,7 +626,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu raise_parse_error("unexpected token at '%s'", p); } } - np = JSON_parse_float(json, fpc, pe, result); + np = JSON_parse_number(json, fpc, pe, result); if (np != NULL) { fexec np; } @@ -741,7 +741,7 @@ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) ) (^[0-9Ee.\-]? @exit )); }%% -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; bool is_float = false; From 3a8505a8fa39711a957b2fc942862a874a8e8bef Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 7 Nov 2024 17:22:10 +0100 Subject: [PATCH 06/11] JSON.load_file: explictly load the file as UTF-8 Fix: https://github.com/ruby/json/issues/697 This way even if `Encoding.default_external` is set to a weird value the document will be parsed just fine. --- CHANGES.md | 2 ++ lib/json/common.rb | 9 +++++---- test/json/json_common_interface_test.rb | 23 +++++++++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index f40572e8..f64384ca 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ # Changes +* `JSON.load_file` explictly read the file as UTF-8. + ### 2024-11-06 (2.8.1) * Fix the java packages to include the extension. diff --git a/lib/json/common.rb b/lib/json/common.rb index 2269896b..03aaaa6e 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -1,4 +1,5 @@ -#frozen_string_literal: true +# frozen_string_literal: true + require 'json/version' module JSON @@ -230,8 +231,8 @@ def parse!(source, opts = {}) # parse(File.read(path), opts) # # See method #parse. - def load_file(filespec, opts = {}) - parse(File.read(filespec), opts) + def load_file(filespec, opts = nil) + parse(File.read(filespec, encoding: Encoding::UTF_8), opts) end # :call-seq: @@ -242,7 +243,7 @@ def load_file(filespec, opts = {}) # # See method #parse! def load_file!(filespec, opts = {}) - parse!(File.read(filespec), opts) + parse!(File.read(filespec, encoding: Encoding::UTF_8), opts) end # :call-seq: diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 6165cc04..643d3e92 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + require_relative 'test_helper' require 'stringio' require 'tempfile' @@ -189,8 +190,30 @@ def test_load_file_with_option! test_load_file_with_option_shared(:load_file!) end + def test_load_file_with_bad_default_external_encoding + data = { "key" => "€" } + temp_file_containing(JSON.dump(data)) do |path| + loaded_data = with_external_encoding(Encoding::US_ASCII) do + JSON.load_file(path) + end + assert_equal data, loaded_data + end + end + private + def with_external_encoding(encoding) + verbose = $VERBOSE + $VERBOSE = nil + previous_encoding = Encoding.default_external + Encoding.default_external = encoding + yield + ensure + verbose = $VERBOSE + Encoding.default_external = previous_encoding + $VERBOSE = verbose + end + def test_load_shared(method_name) temp_file_containing(@json) do |filespec| assert_equal JSON.public_send(method_name, filespec), @hash From 855563f4764d6f2710c4653f57728ed9e28d55a1 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 8 Nov 2024 16:53:03 +0100 Subject: [PATCH 07/11] Benchmark `Oj::Parser` in a thread safe way The documentation state `Oj::Parser.usual` isn't thread safe: https://github.com/ohler55/oj/blob/c70bf4125b546bc7146840b15de36460d42b4dff/ext/oj/parser.c#L1507-L1513 As such we shouldn't benchark it this way, but instantiate a new parser every time. Technically in real world scenarios you could create a pool of parsers and re-use them, but if it's not provided by the gem, I'm not sure we should go out of our way to do it. --- benchmark/parser.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 6952f3c3..bacb8e9e 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -19,7 +19,7 @@ def benchmark_parsing(name, json_output) Benchmark.ips do |x| x.report("json") { JSON.parse(json_output) } if RUN[:json] x.report("oj") { Oj.load(json_output) } if RUN[:oj] - x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj] + x.report("Oj::Parser") { Oj::Parser.new(:usual).parse(json_output) } if RUN[:oj] x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] x.compare!(order: :baseline) end @@ -28,10 +28,6 @@ def benchmark_parsing(name, json_output) # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] -# Oj::Parser is significanly faster (~1.3x) on the next 3 micro-benchmarks in large part because its -# cache is persisted across calls. That's not something we can do with the current API, we'd -# need to expose a stateful API as well, but that's no really desirable. -# Other than that we're faster than regular `Oj.load` by a good margin (between 1.3x and 2.4x). benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) benchmark_parsing "test from oj", < Date: Mon, 11 Nov 2024 20:07:45 +0900 Subject: [PATCH 08/11] Fix $VERBOSE restore --- test/json/json_common_interface_test.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 643d3e92..a5d62337 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -209,7 +209,6 @@ def with_external_encoding(encoding) Encoding.default_external = encoding yield ensure - verbose = $VERBOSE Encoding.default_external = previous_encoding $VERBOSE = verbose end From 00c45ddc9fa49a7e8de7394bb37542ca24f2fd70 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 13 Nov 2024 13:34:27 +0100 Subject: [PATCH 09/11] Only use the key cache if the Hash is in an Array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the likeliness of seeing that key again is really low, and looking up the cache is just a waste. Before: ``` == Parsing small hash (65 bytes) ruby 3.4.0dev (2024-11-13T12:32:57Z fstr-update-callba.. 9b44b455b3) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 343.049k i/100ms oj 213.943k i/100ms Oj::Parser 31.583k i/100ms rapidjson 303.433k i/100ms Calculating ------------------------------------- json 3.704M (± 1.5%) i/s (270.01 ns/i) - 18.525M in 5.003078s oj 2.200M (± 1.1%) i/s (454.46 ns/i) - 11.125M in 5.056526s Oj::Parser 285.369k (± 4.8%) i/s (3.50 μs/i) - 1.453M in 5.103866s rapidjson 3.216M (± 1.6%) i/s (310.95 ns/i) - 16.082M in 5.001973s Comparison: json: 3703517.4 i/s rapidjson: 3215983.0 i/s - 1.15x slower oj: 2200417.1 i/s - 1.68x slower Oj::Parser: 285369.1 i/s - 12.98x slower == Parsing test from oj (258 bytes) ruby 3.4.0dev (2024-11-13T12:32:57Z fstr-update-callba.. 9b44b455b3) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 54.539k i/100ms oj 41.473k i/100ms Oj::Parser 24.064k i/100ms rapidjson 51.466k i/100ms Calculating ------------------------------------- json 549.386k (± 1.6%) i/s (1.82 μs/i) - 2.781M in 5.064316s oj 417.003k (± 1.3%) i/s (2.40 μs/i) - 2.115M in 5.073047s Oj::Parser 226.500k (± 4.7%) i/s (4.42 μs/i) - 1.131M in 5.005466s rapidjson 526.124k (± 1.0%) i/s (1.90 μs/i) - 2.676M in 5.087176s Comparison: json: 549385.6 i/s rapidjson: 526124.3 i/s - 1.04x slower oj: 417003.4 i/s - 1.32x slower Oj::Parser: 226500.4 i/s - 2.43x slower ``` After: ``` == Parsing small hash (65 bytes) ruby 3.4.0dev (2024-11-13T12:32:57Z fstr-update-callba.. 9b44b455b3) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 361.394k i/100ms oj 217.203k i/100ms Oj::Parser 28.855k i/100ms rapidjson 303.404k i/100ms Calculating ------------------------------------- json 3.859M (± 2.9%) i/s (259.13 ns/i) - 19.515M in 5.061302s oj 2.191M (± 1.6%) i/s (456.49 ns/i) - 11.077M in 5.058043s Oj::Parser 315.132k (± 7.1%) i/s (3.17 μs/i) - 1.587M in 5.065707s rapidjson 3.156M (± 4.0%) i/s (316.88 ns/i) - 15.777M in 5.008949s Comparison: json: 3859046.5 i/s rapidjson: 3155778.5 i/s - 1.22x slower oj: 2190616.0 i/s - 1.76x slower Oj::Parser: 315132.4 i/s - 12.25x slower == Parsing test from oj (258 bytes) ruby 3.4.0dev (2024-11-13T12:32:57Z fstr-update-callba.. 9b44b455b3) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 55.682k i/100ms oj 40.343k i/100ms Oj::Parser 25.119k i/100ms rapidjson 51.500k i/100ms Calculating ------------------------------------- json 555.808k (± 1.4%) i/s (1.80 μs/i) - 2.784M in 5.010092s oj 412.283k (± 1.7%) i/s (2.43 μs/i) - 2.098M in 5.089900s Oj::Parser 279.306k (±13.3%) i/s (3.58 μs/i) - 1.356M in 5.022079s rapidjson 517.177k (± 2.7%) i/s (1.93 μs/i) - 2.626M in 5.082352s Comparison: json: 555808.3 i/s rapidjson: 517177.1 i/s - 1.07x slower oj: 412283.2 i/s - 1.35x slower Oj::Parser: 279306.5 i/s - 1.99x slower ``` --- ext/json/ext/parser/parser.c | 239 +++++++++++++++++----------------- ext/json/ext/parser/parser.rl | 7 +- 2 files changed, 126 insertions(+), 120 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 09ca4f75..b6252556 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -394,6 +394,7 @@ typedef struct JSON_ParserStruct { VALUE decimal_class; VALUE match_string; FBuffer fbuffer; + int in_array; int max_nesting; bool allow_nan; bool allow_trailing_comma; @@ -448,11 +449,11 @@ static void raise_parse_error(const char *format, const char *start) -#line 474 "parser.rl" +#line 475 "parser.rl" -#line 456 "parser.c" +#line 457 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; @@ -460,7 +461,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 514 "parser.rl" +#line 515 "parser.rl" #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) @@ -476,14 +477,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu long stack_head = json->stack->head; -#line 480 "parser.c" +#line 481 "parser.c" { cs = JSON_object_start; } -#line 529 "parser.rl" +#line 530 "parser.rl" -#line 487 "parser.c" +#line 488 "parser.c" { short _widec; if ( p == pe ) @@ -512,7 +513,7 @@ case 2: goto st2; goto st0; tr2: -#line 493 "parser.rl" +#line 494 "parser.rl" { char *np; json->parsing_name = true; @@ -528,7 +529,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 532 "parser.c" +#line 533 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -595,7 +596,7 @@ case 8: goto st8; goto st0; tr11: -#line 482 "parser.rl" +#line 483 "parser.rl" { char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { @@ -609,20 +610,20 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 613 "parser.c" +#line 614 "parser.c" _widec = (*p); if ( (*p) < 13 ) { if ( (*p) > 9 ) { if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -630,26 +631,26 @@ case 9: if ( 32 <= (*p) && (*p) <= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 44 ) { if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -670,14 +671,14 @@ case 9: goto st10; goto st0; tr4: -#line 504 "parser.rl" +#line 505 "parser.rl" { p--; {p++; cs = 32; goto _out;} } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: -#line 681 "parser.c" +#line 682 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -779,13 +780,13 @@ case 20: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -804,20 +805,20 @@ case 21: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -840,13 +841,13 @@ case 22: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -854,19 +855,19 @@ case 22: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -890,20 +891,20 @@ case 23: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 491 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1017,7 +1018,7 @@ case 31: _out: {} } -#line 530 "parser.rl" +#line 531 "parser.rl" if (cs >= JSON_object_first_final) { long count = json->stack->head - stack_head; @@ -1068,7 +1069,7 @@ case 31: } -#line 1072 "parser.c" +#line 1073 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -1076,7 +1077,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 661 "parser.rl" +#line 664 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1084,14 +1085,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1088 "parser.c" +#line 1089 "parser.c" { cs = JSON_value_start; } -#line 668 "parser.rl" +#line 671 "parser.rl" -#line 1095 "parser.c" +#line 1096 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1125,7 +1126,7 @@ case 1: cs = 0; goto _out; tr2: -#line 608 "parser.rl" +#line 609 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { @@ -1137,7 +1138,7 @@ cs = 0; } goto st29; tr3: -#line 618 "parser.rl" +#line 619 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -1157,15 +1158,17 @@ cs = 0; } goto st29; tr7: -#line 636 "parser.rl" +#line 637 "parser.rl" { char *np; + json->in_array++; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); + json->in_array--; if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr11: -#line 642 "parser.rl" +#line 645 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -1173,7 +1176,7 @@ cs = 0; } goto st29; tr25: -#line 601 "parser.rl" +#line 602 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -1183,7 +1186,7 @@ cs = 0; } goto st29; tr27: -#line 594 "parser.rl" +#line 595 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -1193,19 +1196,19 @@ cs = 0; } goto st29; tr31: -#line 588 "parser.rl" +#line 589 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 585 "parser.rl" +#line 586 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 591 "parser.rl" +#line 592 "parser.rl" { *result = Qtrue; } @@ -1214,9 +1217,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 648 "parser.rl" +#line 651 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 1220 "parser.c" +#line 1223 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1457,7 +1460,7 @@ case 28: _out: {} } -#line 669 "parser.rl" +#line 672 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1472,7 +1475,7 @@ case 28: } -#line 1476 "parser.c" +#line 1479 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1480,7 +1483,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 690 "parser.rl" +#line 693 "parser.rl" #define MAX_FAST_INTEGER_SIZE 18 @@ -1520,7 +1523,7 @@ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) } -#line 1524 "parser.c" +#line 1527 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 6}; enum {JSON_float_error = 0}; @@ -1528,7 +1531,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 742 "parser.rl" +#line 745 "parser.rl" static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1537,15 +1540,15 @@ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *resu bool is_float = false; -#line 1541 "parser.c" +#line 1544 "parser.c" { cs = JSON_float_start; } -#line 750 "parser.rl" +#line 753 "parser.rl" json->memo = p; -#line 1549 "parser.c" +#line 1552 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1585,24 +1588,24 @@ case 6: goto st0; goto tr7; tr7: -#line 734 "parser.rl" +#line 737 "parser.rl" { p--; {p++; cs = 7; goto _out;} } goto st7; st7: if ( ++p == pe ) goto _test_eof7; case 7: -#line 1596 "parser.c" +#line 1599 "parser.c" goto st0; tr8: -#line 735 "parser.rl" +#line 738 "parser.rl" { is_float = true; } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1606 "parser.c" +#line 1609 "parser.c" if ( 48 <= (*p) && (*p) <= 57 ) goto st8; goto st0; @@ -1621,14 +1624,14 @@ case 8: goto st0; goto tr7; tr9: -#line 735 "parser.rl" +#line 738 "parser.rl" { is_float = true; } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 1632 "parser.c" +#line 1635 "parser.c" switch( (*p) ) { case 43: goto st5; case 45: goto st5; @@ -1685,7 +1688,7 @@ case 10: _out: {} } -#line 752 "parser.rl" +#line 755 "parser.rl" if (cs >= JSON_float_first_final) { if (!is_float) { @@ -1741,7 +1744,7 @@ case 10: -#line 1745 "parser.c" +#line 1748 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; @@ -1749,7 +1752,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 832 "parser.rl" +#line 835 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1762,14 +1765,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1766 "parser.c" +#line 1769 "parser.c" { cs = JSON_array_start; } -#line 844 "parser.rl" +#line 847 "parser.rl" -#line 1773 "parser.c" +#line 1776 "parser.c" { short _widec; if ( p == pe ) @@ -1809,7 +1812,7 @@ case 2: goto st2; goto st0; tr2: -#line 812 "parser.rl" +#line 815 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1824,12 +1827,12 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1828 "parser.c" +#line 1831 "parser.c" _widec = (*p); if ( 44 <= (*p) && (*p) <= 44 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1876,14 +1879,14 @@ case 7: goto st3; goto st7; tr4: -#line 824 "parser.rl" +#line 827 "parser.rl" { p--; {p++; cs = 22; goto _out;} } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 1887 "parser.c" +#line 1890 "parser.c" goto st0; st8: if ( ++p == pe ) @@ -1951,13 +1954,13 @@ case 13: if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -1965,19 +1968,19 @@ case 13: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2016,13 +2019,13 @@ case 14: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2041,20 +2044,20 @@ case 15: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2077,13 +2080,13 @@ case 16: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -2091,19 +2094,19 @@ case 16: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2127,20 +2130,20 @@ case 17: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 822 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2212,7 +2215,7 @@ case 21: _out: {} } -#line 845 "parser.rl" +#line 848 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -2268,7 +2271,7 @@ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringE { size_t bufferSize = stringEnd - string; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -2291,7 +2294,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE int unescape_len; char buf[4]; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -2406,7 +2409,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2410 "parser.c" +#line 2413 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; @@ -2414,7 +2417,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1068 "parser.rl" +#line 1071 "parser.rl" static int @@ -2435,15 +2438,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2439 "parser.c" +#line 2442 "parser.c" { cs = JSON_string_start; } -#line 1088 "parser.rl" +#line 1091 "parser.rl" json->memo = p; -#line 2447 "parser.c" +#line 2450 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2468,14 +2471,14 @@ case 2: goto st0; goto st2; tr2: -#line 1050 "parser.rl" +#line 1053 "parser.rl" { *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} p--; {p++; cs = 9; goto _out;} } -#line 1043 "parser.rl" +#line 1046 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2484,7 +2487,7 @@ case 2: } goto st9; tr6: -#line 1043 "parser.rl" +#line 1046 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); {p = (( p + 1))-1;} @@ -2496,7 +2499,7 @@ case 2: if ( ++p == pe ) goto _test_eof9; case 9: -#line 2500 "parser.c" +#line 2503 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -2584,7 +2587,7 @@ case 8: _out: {} } -#line 1090 "parser.rl" +#line 1093 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2737,7 +2740,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2741 "parser.c" +#line 2744 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2745,7 +2748,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1256 "parser.rl" +#line 1259 "parser.rl" /* @@ -2774,16 +2777,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2778 "parser.c" +#line 2781 "parser.c" { cs = JSON_start; } -#line 1284 "parser.rl" +#line 1287 "parser.rl" p = json->source; pe = p + json->len; -#line 2787 "parser.c" +#line 2790 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2817,7 +2820,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1248 "parser.rl" +#line 1251 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2827,7 +2830,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2831 "parser.c" +#line 2834 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2916,7 +2919,7 @@ case 9: _out: {} } -#line 1287 "parser.rl" +#line 1290 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2952,16 +2955,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 2956 "parser.c" +#line 2959 "parser.c" { cs = JSON_start; } -#line 1322 "parser.rl" +#line 1325 "parser.rl" p = json->source; pe = p + json->len; -#line 2965 "parser.c" +#line 2968 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2995,7 +2998,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1248 "parser.rl" +#line 1251 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -3005,7 +3008,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 3009 "parser.c" +#line 3012 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3094,7 +3097,7 @@ case 9: _out: {} } -#line 1325 "parser.rl" +#line 1328 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 9a1f7392..eab60b91 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -392,6 +392,7 @@ typedef struct JSON_ParserStruct { VALUE decimal_class; VALUE match_string; FBuffer fbuffer; + int in_array; int max_nesting; bool allow_nan; bool allow_trailing_comma; @@ -635,7 +636,9 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu action parse_array { char *np; + json->in_array++; np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); + json->in_array--; if (np == NULL) { fhold; fbreak; } else fexec np; } @@ -897,7 +900,7 @@ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringE { size_t bufferSize = stringEnd - string; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -920,7 +923,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE int unescape_len; char buf[4]; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); From 61f022dfbdcaf28736cea99d4eb09ae87f9558ce Mon Sep 17 00:00:00 2001 From: razokulover Date: Thu, 14 Nov 2024 20:24:26 +0900 Subject: [PATCH 10/11] Fix redundant to_str call --- lib/json/common.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/json/common.rb b/lib/json/common.rb index 03aaaa6e..4c6b2e1a 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -26,7 +26,7 @@ def [](object, opts = {}) elsif object.respond_to?(:to_str) str = object.to_str if str.is_a?(String) - return JSON.parse(object.to_str, opts) + return JSON.parse(str, opts) end end From d5e4a6e3fd10d4707a5bd0d5c1e8dcc771724ccf Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 14 Nov 2024 14:54:13 +0100 Subject: [PATCH 11/11] Release 2.8.2 --- CHANGES.md | 2 ++ lib/json/version.rb | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index f64384ca..f9efe041 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ # Changes +### 2024-11-14 (2.8.2) + * `JSON.load_file` explictly read the file as UTF-8. ### 2024-11-06 (2.8.1) diff --git a/lib/json/version.rb b/lib/json/version.rb index 52da68dd..d5cfb549 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.8.1' + VERSION = '2.8.2' end