diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9574d30..b6f82ae9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,26 +14,29 @@ jobs: host: needs: ruby-versions - name: ${{ matrix.os }} ${{ matrix.ruby }} + name: ${{ matrix.os }} ${{ matrix.ruby }} ${{ matrix.env }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - - ubuntu-latest - - macos-14 - - windows-latest + - ubuntu-latest + - macos-14 + - windows-latest ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} + env: + - "" include: - - { os: ubuntu-24.04-arm, ruby: 3.4 } - - { os: macos-13, ruby: 3.4 } - - { os: windows-latest , ruby: mswin } # ruby/ruby windows CI - - { os: ubuntu-latest , ruby: jruby-9.4 } # Ruby 3.1 - - { os: macos-latest , ruby: truffleruby-head } - - { os: ubuntu-latest , ruby: truffleruby-head } + - { os: ubuntu-24.04-arm, ruby: 3.4 } + - { os: ubuntu-latest , ruby: 3.4, env: "JSON_DISABLE_SIMD=1" } + - { os: macos-13, ruby: 3.4 } + - { os: windows-latest , ruby: mswin } # ruby/ruby windows CI + - { os: ubuntu-latest , ruby: jruby-9.4 } # Ruby 3.1 + - { os: macos-latest , ruby: truffleruby-head } + - { os: ubuntu-latest , ruby: truffleruby-head } exclude: - - { os: windows-latest, ruby: jruby } - - { os: windows-latest, ruby: jruby-head } + - { os: windows-latest, ruby: jruby } + - { os: windows-latest, ruby: jruby-head } steps: - uses: actions/checkout@v4 @@ -49,9 +52,9 @@ jobs: bundle config --without benchmark bundle install - - run: rake compile + - run: rake compile ${{ matrix.env }} - - run: rake test JSON_COMPACT=1 + - run: rake test JSON_COMPACT=1 ${{ matrix.env }} - run: rake build diff --git a/CHANGES.md b/CHANGES.md index 26fed9e4..c2ee0daf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,19 @@ # Changes +### Unreleased + +* Handle non-string hash keys with broken `to_s` implementations. +* `JSON.generate` now uses SSE2 (x86) or NEON (arm64) instructions when available to escape strings. + +### 2025-04-25 (2.11.3) + +* Fix a regression in `JSON.pretty_generate` that could cause indentation to be off once some `#to_json` has been called. + +### 2025-04-24 (2.11.2) + +* Add back `JSON::PRETTY_STATE_PROTOTYPE`. This constant was private API but is used by popular gems like `multi_json`. + It now emits a deprecation warning. + ### 2025-04-24 (2.11.1) * Add back `JSON.restore`, `JSON.unparse`, `JSON.fast_unparse` and `JSON.pretty_unparse`. diff --git a/ext/json/ext/generator/depend b/ext/json/ext/generator/depend index 60697f2c..21bdc420 100644 --- a/ext/json/ext/generator/depend +++ b/ext/json/ext/generator/depend @@ -1,3 +1,4 @@ generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h generator.o: generator.c $(srcdir)/../vendor/fpconv.c generator.o: generator.c $(srcdir)/../vendor/jeaiii-ltoa.h +generator.o: generator.c $(srcdir)/simd.h diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb index 078068cf..e44890e2 100644 --- a/ext/json/ext/generator/extconf.rb +++ b/ext/json/ext/generator/extconf.rb @@ -6,5 +6,34 @@ else append_cflags("-std=c99") $defs << "-DJSON_GENERATOR" + + if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/ + # Try to compile a small program using NEON instructions + if have_header('arm_neon.h') + have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC') + #include + int main() { + uint8x16_t test = vdupq_n_u8(32); + return 0; + } + SRC + $defs.push("-DENABLE_SIMD") + end + end + + if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC') + #include + int main() { + __m128i test = _mm_set1_epi8(32); + return 0; + } + SRC + $defs.push("-DENABLE_SIMD") + end + + have_header('cpuid.h') + end + create_makefile 'json/ext/generator' end diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 428f5e21..633fa87c 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -5,6 +5,8 @@ #include #include +#include "simd.h" + /* ruby api and some helpers */ typedef struct JSON_Generator_StateStruct { @@ -45,7 +47,7 @@ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_n struct generate_json_data; -typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj); struct generate_json_data { FBuffer *buffer; @@ -57,20 +59,20 @@ struct generate_json_data { static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); -static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj); #ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj); #endif -static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static int usascii_encindex, utf8_encindex, binary_encindex; @@ -109,12 +111,40 @@ typedef struct _search_state { const char *end; const char *cursor; FBuffer *buffer; + +#ifdef ENABLE_SIMD + const char *chunk_base; + const char *chunk_end; + bool has_matches; + +#if defined(HAVE_SIMD_NEON) + uint64_t matches_mask; +#elif defined(HAVE_SIMD_SSE2) + int matches_mask; +#else +#error "Unknown SIMD Implementation." +#endif /* HAVE_SIMD_NEON */ +#endif /* ENABLE_SIMD */ } search_state; -static inline void search_flush(search_state *search) -{ - fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); - search->cursor = search->ptr; +#if (defined(__GNUC__ ) || defined(__clang__)) +#define FORCE_INLINE __attribute__((always_inline)) +#else +#define FORCE_INLINE +#endif + +static inline FORCE_INLINE void search_flush(search_state *search) +{ + // Do not remove this conditional without profiling, specifically escape-heavy text. + // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush). + // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method + // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the + // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if + // nothing needs to be flushed, we can save a few memory references with this conditional. + if (search->ptr > search->cursor) { + fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); + search->cursor = search->ptr; + } } static const unsigned char escape_table_basic[256] = { @@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +static unsigned char (*search_escape_basic_impl)(search_state *); + static inline unsigned char search_escape_basic(search_state *search) { while (search->ptr < search->end) { @@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search) return 0; } -static inline void escape_UTF8_char_basic(search_state *search) { +static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search) +{ const unsigned char ch = (unsigned char)*search->ptr; switch (ch) { case '"': fbuffer_append(search->buffer, "\\\"", 2); break; @@ -186,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) { */ static inline void convert_UTF8_to_JSON(search_state *search) { - while (search_escape_basic(search)) { + while (search_escape_basic_impl(search)) { escape_UTF8_char_basic(search); } } -static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) { +static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) +{ const unsigned char ch = (unsigned char)*search->ptr; switch (ch_len) { case 1: { @@ -227,6 +261,280 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) search->cursor = (search->ptr += ch_len); } +#ifdef ENABLE_SIMD + +static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) +{ + // Flush the buffer so everything up until the last 'len' characters are unflushed. + search_flush(search); + + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, vec_len); + + char *s = (buf->ptr + buf->len); + + // Pad the buffer with dummy characters that won't need escaping. + // This seem wateful at first sight, but memset of vector length is very fast. + memset(s, 'X', vec_len); + + // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + MEMCPY(s, search->ptr, char, len); + + return s; +} + +#ifdef HAVE_SIMD_NEON + +static inline FORCE_INLINE unsigned char neon_next_match(search_state *search) +{ + uint64_t mask = search->matches_mask; + uint32_t index = trailing_zeros64(mask) >> 2; + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} + +// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches) +{ + const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); + const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return mask & 0x8888888888888888ull; +} + +static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr) +{ + uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); + + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33)); + + uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\')); + uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash); + + return neon_match_mask(needs_escape); +} + +static inline unsigned char search_escape_basic_neon(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return neon_next_match(search); + } else { + // neon_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + search->ptr = search->chunk_end; + } + } + + /* + * The code below implements an SIMD-based algorithm to determine if N bytes at a time + * need to be escaped. + * + * Assume the ptr = "Te\sting!" (the double quotes are included in the string) + * + * The explanation will be limited to the first 8 bytes of the string for simplicity. However + * the vector insructions may work on larger vectors. + * + * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers. + * + * lower_bound: [20 20 20 20 20 20 20 20] + * backslash: [5C 5C 5C 5C 5C 5C 5C 5C] + * dblquote: [22 22 22 22 22 22 22 22] + * + * Next we load the first chunk of the ptr: + * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n) + * + * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector + * as no bytes are less than 32 (0x20): + * [0 0 0 0 0 0 0 0] + * + * Next, we check if any byte in chunk is equal to a backslash: + * [0 0 0 FF 0 0 0 0] + * + * Finally we check if any byte in chunk is equal to a double quote: + * [FF 0 0 0 0 0 0 0] + * + * Now we have three vectors where each byte indicates if the corresponding byte in chunk + * needs to be escaped. We combine these vectors with a series of logical OR instructions. + * This is the needs_escape vector and it is equal to: + * [FF 0 0 FF 0 0 0 0] + * + * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of + * the values in the vector. This computes how many bytes need to be escaped within this chunk. + * + * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then, + * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we + * have at least one byte that needs to be escaped. + */ + while (search->ptr + sizeof(uint8x16_t) <= search->end) { + uint64_t mask = neon_rules_update(search->ptr); + + if (!mask) { + search->ptr += sizeof(uint8x16_t); + continue; + } + search->matches_mask = mask; + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(uint8x16_t); + return neon_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining); + + uint64_t mask = neon_rules_update(s); + + if (!mask) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + search->buffer->len += remaining; + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + + search->matches_mask = mask; + search->has_matches = true; + search->chunk_end = search->end; + search->chunk_base = search->ptr; + return neon_next_match(search); + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} +#endif /* HAVE_SIMD_NEON */ + +#ifdef HAVE_SIMD_SSE2 + +#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) +#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) +#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) +#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) + +static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search) +{ + int mask = search->matches_mask; + int index = trailing_zeros(mask); + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif + +static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr) +{ + __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); + + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33)); + __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\')); + __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash); + return _mm_movemask_epi8(needs_escape); +} + +static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return sse2_next_match(search); + } else { + // sse2_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) { + search->ptr = search->end; + } else { + search->ptr = search->chunk_base + sizeof(__m128i); + } + } + } + + while (search->ptr + sizeof(__m128i) <= search->end) { + int needs_escape_mask = sse2_update(search->ptr); + + if (needs_escape_mask == 0) { + search->ptr += sizeof(__m128i); + continue; + } + + search->has_matches = true; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining); + + int needs_escape_mask = sse2_update(s); + + if (needs_escape_mask == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + search->buffer->len += remaining; + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + + search->has_matches = true; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} + +#endif /* HAVE_SIMD_SSE2 */ + +#endif /* ENABLE_SIMD */ + static const unsigned char script_safe_escape_table[256] = { // ASCII Control Characters 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, @@ -789,6 +1097,21 @@ struct hash_foreach_arg { int iter; }; +static VALUE +convert_string_subclass(VALUE key) +{ + VALUE key_to_s = rb_funcall(key, i_to_s, 0); + + if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) { + VALUE cname = rb_obj_class(key); + rb_raise(rb_eTypeError, + "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")", + cname, "String", cname, "to_s", rb_obj_class(key_to_s)); + } + + return key_to_s; +} + static int json_object_i(VALUE key, VALUE val, VALUE _arg) { @@ -802,12 +1125,12 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) int j; if (arg->iter > 0) fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append_str(buffer, state->object_nl); + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); } - if (RB_UNLIKELY(state->indent)) { + if (RB_UNLIKELY(data->state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); + fbuffer_append_str(buffer, data->state->indent); } } @@ -817,7 +1140,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { key_to_s = key; } else { - key_to_s = rb_funcall(key, i_to_s, 0); + key_to_s = convert_string_subclass(key); } break; case T_SYMBOL: @@ -829,21 +1152,22 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) } if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { - generate_json_string(buffer, data, state, key_to_s); + generate_json_string(buffer, data, key_to_s); } else { - generate_json(buffer, data, state, key_to_s); + generate_json(buffer, data, key_to_s); } - if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before); fbuffer_append_char(buffer, ':'); - if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); - generate_json(buffer, data, state, val); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); + generate_json(buffer, data, val); arg->iter++; return ST_CONTINUE; } -static inline long increase_depth(JSON_Generator_State *state) +static inline long increase_depth(struct generate_json_data *data) { + JSON_Generator_State *state = data->state; long depth = ++state->depth; if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); @@ -851,14 +1175,14 @@ static inline long increase_depth(JSON_Generator_State *state) return depth; } -static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { int j; - long depth = increase_depth(state); + long depth = increase_depth(data); if (RHASH_SIZE(obj) == 0) { fbuffer_append(buffer, "{}", 2); - --state->depth; + --data->state->depth; return; } @@ -870,49 +1194,49 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); - depth = --state->depth; - if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append_str(buffer, state->object_nl); - if (RB_UNLIKELY(state->indent)) { + depth = --data->state->depth; + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + if (RB_UNLIKELY(data->state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); + fbuffer_append_str(buffer, data->state->indent); } } } fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { int i, j; - long depth = increase_depth(state); + long depth = increase_depth(data); if (RARRAY_LEN(obj) == 0) { fbuffer_append(buffer, "[]", 2); - --state->depth; + --data->state->depth; return; } fbuffer_append_char(buffer, '['); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); for(i = 0; i < RARRAY_LEN(obj); i++) { if (i > 0) { fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); } - if (RB_UNLIKELY(state->indent)) { + if (RB_UNLIKELY(data->state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); + fbuffer_append_str(buffer, data->state->indent); } } - generate_json(buffer, data, state, RARRAY_AREF(obj, i)); + generate_json(buffer, data, RARRAY_AREF(obj, i)); } - state->depth = --depth; - if (RB_UNLIKELY(state->array_nl)) { - fbuffer_append_str(buffer, state->array_nl); - if (RB_UNLIKELY(state->indent)) { + data->state->depth = --depth; + if (RB_UNLIKELY(data->state->array_nl)) { + fbuffer_append_str(buffer, data->state->array_nl); + if (RB_UNLIKELY(data->state->indent)) { for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); + fbuffer_append_str(buffer, data->state->indent); } } } @@ -961,7 +1285,7 @@ static inline VALUE ensure_valid_encoding(VALUE str) return str; } -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { obj = ensure_valid_encoding(obj); @@ -974,12 +1298,18 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat search.cursor = search.ptr; search.end = search.ptr + len; +#ifdef ENABLE_SIMD + search.matches_mask = 0; + search.has_matches = false; + search.chunk_base = NULL; +#endif /* ENABLE_SIMD */ + switch(rb_enc_str_coderange(obj)) { case ENC_CODERANGE_7BIT: case ENC_CODERANGE_VALID: - if (RB_UNLIKELY(state->ascii_only)) { - convert_UTF8_to_ASCII_only_JSON(&search, state->script_safe ? script_safe_escape_table : ascii_only_escape_table); - } else if (RB_UNLIKELY(state->script_safe)) { + if (RB_UNLIKELY(data->state->ascii_only)) { + convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); + } else if (RB_UNLIKELY(data->state->script_safe)) { convert_UTF8_to_script_safe_JSON(&search); } else { convert_UTF8_to_JSON(&search); @@ -992,7 +1322,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat fbuffer_append_char(buffer, '"'); } -static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp; if (rb_respond_to(obj, i_to_json)) { @@ -1002,68 +1332,68 @@ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *d } else { tmp = rb_funcall(obj, i_to_s, 0); Check_Type(tmp, T_STRING); - generate_json_string(buffer, data, state, tmp); + generate_json_string(buffer, data, tmp); } } -static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - if (state->strict) { - generate_json_string(buffer, data, state, rb_sym2str(obj)); + if (data->state->strict) { + generate_json_string(buffer, data, rb_sym2str(obj)); } else { - generate_json_fallback(buffer, data, state, obj); + generate_json_fallback(buffer, data, obj); } } -static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); fbuffer_append_str(buffer, tmp); } #ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, data, state, obj); + generate_json_fixnum(buffer, data, obj); else - generate_json_bignum(buffer, data, state, obj); + generate_json_bignum(buffer, data, obj); } #endif -static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { double value = RFLOAT_VALUE(obj); - char allow_nan = state->allow_nan; + char allow_nan = data->state->allow_nan; if (isinf(value) || isnan(value)) { /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */ if (!allow_nan) { - if (state->strict && state->as_json) { - VALUE casted_obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil); + if (data->state->strict && data->state->as_json) { + VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil); if (casted_obj != obj) { - increase_depth(state); - generate_json(buffer, data, state, casted_obj); - state->depth--; + increase_depth(data); + generate_json(buffer, data, casted_obj); + data->state->depth--; return; } } @@ -1089,30 +1419,30 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data buffer->len += len; } -static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE fragment = RSTRUCT_GET(obj, 0); Check_Type(fragment, T_STRING); fbuffer_append_str(buffer, fragment); } -static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { bool as_json_called = false; start: if (obj == Qnil) { - generate_json_null(buffer, data, state, obj); + generate_json_null(buffer, data, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, data, state, obj); + generate_json_false(buffer, data, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, data, state, obj); + generate_json_true(buffer, data, obj); } else if (RB_SPECIAL_CONST_P(obj)) { if (RB_FIXNUM_P(obj)) { - generate_json_fixnum(buffer, data, state, obj); + generate_json_fixnum(buffer, data, obj); } else if (RB_FLONUM_P(obj)) { - generate_json_float(buffer, data, state, obj); + generate_json_float(buffer, data, obj); } else if (RB_STATIC_SYM_P(obj)) { - generate_json_symbol(buffer, data, state, obj); + generate_json_symbol(buffer, data, obj); } else { goto general; } @@ -1120,43 +1450,43 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON VALUE klass = RBASIC_CLASS(obj); switch (RB_BUILTIN_TYPE(obj)) { case T_BIGNUM: - generate_json_bignum(buffer, data, state, obj); + generate_json_bignum(buffer, data, obj); break; case T_HASH: if (klass != rb_cHash) goto general; - generate_json_object(buffer, data, state, obj); + generate_json_object(buffer, data, obj); break; case T_ARRAY: if (klass != rb_cArray) goto general; - generate_json_array(buffer, data, state, obj); + generate_json_array(buffer, data, obj); break; case T_STRING: if (klass != rb_cString) goto general; - generate_json_string(buffer, data, state, obj); + generate_json_string(buffer, data, obj); break; case T_SYMBOL: - generate_json_symbol(buffer, data, state, obj); + generate_json_symbol(buffer, data, obj); break; case T_FLOAT: if (klass != rb_cFloat) goto general; - generate_json_float(buffer, data, state, obj); + generate_json_float(buffer, data, obj); break; case T_STRUCT: if (klass != cFragment) goto general; - generate_json_fragment(buffer, data, state, obj); + generate_json_fragment(buffer, data, obj); break; default: general: - if (state->strict) { - if (RTEST(state->as_json) && !as_json_called) { - obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil); + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil); as_json_called = true; goto start; } else { raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); } } else { - generate_json_fallback(buffer, data, state, obj); + generate_json_fallback(buffer, data, obj); } } } @@ -1166,7 +1496,7 @@ static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - data->func(data->buffer, data, data->state, data->obj); + data->func(data->buffer, data, data->obj); return Qnil; } @@ -1837,4 +2167,23 @@ void Init_generator(void) binary_encindex = rb_ascii8bit_encindex(); rb_require("json/ext/generator/state"); + + + switch(find_simd_implementation()) { +#ifdef ENABLE_SIMD +#ifdef HAVE_SIMD_NEON + case SIMD_NEON: + search_escape_basic_impl = search_escape_basic_neon; + break; +#endif /* HAVE_SIMD_NEON */ +#ifdef HAVE_SIMD_SSE2 + case SIMD_SSE2: + search_escape_basic_impl = search_escape_basic_sse2; + break; +#endif /* HAVE_SIMD_SSE2 */ +#endif /* ENABLE_SIMD */ + default: + search_escape_basic_impl = search_escape_basic; + break; + } } diff --git a/ext/json/ext/generator/simd.h b/ext/json/ext/generator/simd.h new file mode 100644 index 00000000..2fbc9316 --- /dev/null +++ b/ext/json/ext/generator/simd.h @@ -0,0 +1,110 @@ +typedef enum { + SIMD_NONE, + SIMD_NEON, + SIMD_SSE2 +} SIMD_Implementation; + +#ifdef ENABLE_SIMD + +#ifdef __clang__ + #if __has_builtin(__builtin_ctzll) + #define HAVE_BUILTIN_CTZLL 1 + #else + #define HAVE_BUILTIN_CTZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define HAVE_BUILTIN_CTZLL 1 +#else + #define HAVE_BUILTIN_CTZLL 0 +#endif + +static inline uint32_t trailing_zeros64(uint64_t input) { +#if HAVE_BUILTIN_CTZLL + return __builtin_ctzll(input); +#else + uint32_t trailing_zeros = 0; + uint64_t temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +static inline int trailing_zeros(int input) { + #if HAVE_BUILTIN_CTZLL + return __builtin_ctz(input); + #else + int trailing_zeros = 0; + int temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; + #endif +} + +#define SIMD_MINIMUM_THRESHOLD 6 + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) +#include + +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 +static SIMD_Implementation find_simd_implementation(void) { + return SIMD_NEON; +} + +#define HAVE_SIMD_NEON 1 + +uint8x16x4_t load_uint8x16_4(const unsigned char *table) { + uint8x16x4_t tab; + tab.val[0] = vld1q_u8(table); + tab.val[1] = vld1q_u8(table+16); + tab.val[2] = vld1q_u8(table+32); + tab.val[3] = vld1q_u8(table+48); + return tab; +} + +#endif /* ARM Neon Support.*/ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + +#ifdef HAVE_X86INTRIN_H +#include + +#define HAVE_SIMD_SSE2 1 + +#ifdef HAVE_CPUID_H +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 + +#include +#endif /* HAVE_CPUID_H */ + +static SIMD_Implementation find_simd_implementation(void) { + +#if defined(__GNUC__ ) || defined(__clang__) +#ifdef __GNUC__ + __builtin_cpu_init(); +#endif /* __GNUC__ */ + + // TODO Revisit. I think the SSE version now only uses SSE2 instructions. + if (__builtin_cpu_supports("sse2")) { + return SIMD_SSE2; + } +#endif /* __GNUC__ || __clang__*/ + + return SIMD_NONE; +} + +#endif /* HAVE_X86INTRIN_H */ +#endif /* X86_64 Support */ + +#endif /* ENABLE_SIMD */ + +#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED +static SIMD_Implementation find_simd_implementation(void) { + return SIMD_NONE; +} +#endif diff --git a/lib/json/common.rb b/lib/json/common.rb index 77d024de..98bc0ac8 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -961,6 +961,24 @@ def restore(...) load(...) end module_function :restore + + class << self + private + + def const_missing(const_name) + case const_name + when :PRETTY_STATE_PROTOTYPE + if RUBY_VERSION >= "3.0" + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + state.new(PRETTY_GENERATE_OPTIONS) + else + super + end + end + end # :startdoc: # JSON::Coder holds a parser and generator configuration. diff --git a/lib/json/version.rb b/lib/json/version.rb index 1fa83370..c8744c97 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.11.1' + VERSION = '2.11.3' end diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 9c125513..745400fa 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -91,6 +91,30 @@ def test_fast_generate def test_pretty_generate assert_equal "[\n 1,\n 2,\n 3\n]", JSON.pretty_generate([ 1, 2, 3 ]) + assert_equal <<~JSON.strip, JSON.pretty_generate({ a: { b: "f"}, c: "d"}) + { + "a": { + "b": "f" + }, + "c": "d" + } + JSON + + # Cause the state to be spilled on the heap. + o = Object.new + def o.to_s + "Object" + end + actual = JSON.pretty_generate({ a: { b: o}, c: "d", e: "f"}) + assert_equal <<~JSON.strip, actual + { + "a": { + "b": "Object" + }, + "c": "d", + "e": "f" + } + JSON end def test_load diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index f87006ac..3192b555 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -410,18 +410,34 @@ def test_backslash json = '["\\\\.(?i:gif|jpe?g|png)$"]' assert_equal json, generate(data) # - data = [ '\\"' ] - json = '["\\\\\""]' + data = [ '\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$' ] + json = '["\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$"]' + assert_equal json, generate(data) + # + data = [ '\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"' ] + json = '["\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\""]' assert_equal json, generate(data) # data = [ '/' ] json = '["/"]' assert_equal json, generate(data) # + data = [ '////////////////////////////////////////////////////////////////////////////////////' ] + json = '["////////////////////////////////////////////////////////////////////////////////////"]' + assert_equal json, generate(data) + # data = [ '/' ] json = '["\/"]' assert_equal json, generate(data, :script_safe => true) # + data = [ '///////////' ] + json = '["\/\/\/\/\/\/\/\/\/\/\/"]' + assert_equal json, generate(data, :script_safe => true) + # + data = [ '///////////////////////////////////////////////////////' ] + json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]' + assert_equal json, generate(data, :script_safe => true) + # data = [ "\u2028\u2029" ] json = '["\u2028\u2029"]' assert_equal json, generate(data, :script_safe => true) @@ -438,6 +454,10 @@ def test_backslash json = '["\""]' assert_equal json, generate(data) # + data = ['"""""""""""""""""""""""""'] + json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]' + assert_equal json, generate(data) + # data = ["'"] json = '["\\\'"]' assert_equal '["\'"]', generate(data) @@ -445,6 +465,72 @@ def test_backslash data = ["倩", "瀨"] json = '["倩","瀨"]' assert_equal json, generate(data, script_safe: true) + # + data = '["This is a "test" of the emergency broadcast system."]' + json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\"" + assert_equal json, generate(data) + # + data = '\tThis is a test of the emergency broadcast system.' + json = "\"\\\\tThis is a test of the emergency broadcast system.\"" + assert_equal json, generate(data) + # + data = 'This\tis a test of the emergency broadcast system.' + json = "\"This\\\\tis a test of the emergency broadcast system.\"" + assert_equal json, generate(data) + # + data = 'This is\ta test of the emergency broadcast system.' + json = "\"This is\\\\ta test of the emergency broadcast system.\"" + assert_equal json, generate(data) + # + data = 'This is a test of the emergency broadcast\tsystem.' + json = "\"This is a test of the emergency broadcast\\\\tsystem.\"" + assert_equal json, generate(data) + # + data = 'This is a test of the emergency broadcast\tsystem.\n' + json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\"" + assert_equal json, generate(data) + data = '"' * 15 + json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\"" + assert_equal json, generate(data) + data = "\"\"\"\"\"\"\"\"\"\"\"\"\"\"a" + json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"a\"" + assert_equal json, generate(data) + data = "\u0001\u0001\u0001\u0001" + json = "\"\\u0001\\u0001\\u0001\\u0001\"" + assert_equal json, generate(data) + data = "\u0001a\u0001a\u0001a\u0001a" + json = "\"\\u0001a\\u0001a\\u0001a\\u0001a\"" + assert_equal json, generate(data) + data = "\u0001aa\u0001aa" + json = "\"\\u0001aa\\u0001aa\"" + assert_equal json, generate(data) + data = "\u0001aa\u0001aa\u0001aa" + json = "\"\\u0001aa\\u0001aa\\u0001aa\"" + assert_equal json, generate(data) + data = "\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa" + json = "\"\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\"" + assert_equal json, generate(data) + data = "\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002" + json = "\"\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\"" + assert_equal json, generate(data) + data = "ab\u0002c" + json = "\"ab\\u0002c\"" + assert_equal json, generate(data) + data = "ab\u0002cab\u0002cab\u0002cab\u0002c" + json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002c\"" + assert_equal json, generate(data) + data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c" + json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\"" + assert_equal json, generate(data) + data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f" + json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\"" + assert_equal json, generate(data) + data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b" + json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\"" + assert_equal json, generate(data) + data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t" + json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\"" + assert_equal json, generate(data) end def test_string_subclass @@ -605,6 +691,22 @@ def test_string_subclass_with_to_s assert_equal '{"JSONGeneratorTest::StringWithToS#to_s":1}', JSON.generate(StringWithToS.new => 1) end + def test_string_subclass_with_broken_to_s + klass = Class.new(String) do + def to_s + false + end + end + s = klass.new("test") + assert_equal '["test"]', JSON.generate([s]) + + omit("Can't figure out how to match behavior in java code") if RUBY_PLATFORM == "java" + + assert_raise TypeError do + JSON.generate(s => 1) + end + end + if defined?(JSON::Ext::Generator) and RUBY_PLATFORM != "java" def test_valid_utf8_in_different_encoding utf8_string = "€™"