JavaScriptExpert
diff --git a/‎Makefile
Lines changed: 1 addition & 1 deletion b/‎Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/checkperf.sh
Lines changed: 1 addition & 0 deletions b/‎scripts/checkperf.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎singleheader/amalgamation_demo.cpp
Lines changed: 1 addition & 1 deletion b/‎singleheader/amalgamation_demo.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎singleheader/simdjson.cpp
Lines changed: 166 additions & 218 deletions b/‎singleheader/simdjson.cpp
Lines changed: 166 additions & 218 deletions
diff --git a/‎singleheader/simdjson.h
Lines changed: 6 additions & 2 deletions b/‎singleheader/simdjson.h
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/arm64/simd_input.h
Lines changed: 37 additions & 10 deletions b/‎src/arm64/simd_input.h
Lines changed: 37 additions & 10 deletions
diff --git a/‎src/arm64/stage1_find_marks.h
Lines changed: 15 additions & 39 deletions b/‎src/arm64/stage1_find_marks.h
Lines changed: 15 additions & 39 deletions
diff --git a/‎src/generic/stage1_find_marks_flatten.h
Lines changed: 2 additions & 2 deletions b/‎src/generic/stage1_find_marks_flatten.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/haswell/simd_input.h
Lines changed: 33 additions & 10 deletions b/‎src/haswell/simd_input.h
Lines changed: 33 additions & 10 deletions
@@ -1,4 +1,4 @@
-REFERENCE_VERSION = v0.2.1
+REFERENCE_VERSION = master
 
 .SUFFIXES:
 #
 
@@ -27,4 +27,5 @@ make parse
 make perfdiff
 
 echo "Running perfdiff:"
+echo ./perfdiff \"$current/parse -t $perftests\" \"$reference/parse -t $perftests\"
 ./perfdiff "$current/parse -t $perftests" "$reference/parse -t $perftests"
@@ -1,4 +1,4 @@
-/* auto-generated on Sun Aug 18 15:06:50 DST 2019. Do not edit! */
+/* auto-generated on Fri Aug 23 11:02:39 DST 2019. Do not edit! */
 
 #include <iostream>
 #include "simdjson.h"
 
@@ -1,4 +1,4 @@
-/* auto-generated on Sun Aug 18 15:06:50 DST 2019. Do not edit! */
+/* auto-generated on Fri Aug 23 11:02:39 DST 2019. Do not edit! */
 /* begin file include/simdjson/simdjson_version.h */
 // /include/simdjson/simdjson_version.h automatically generated by release.py,
 // do not change by hand
@@ -36438,13 +36438,17 @@ class ParsedJson::BasicIterator {
   // (in case of repeated keys, this only finds the first one).
   // We seek the key using C's strcmp so if your JSON strings contain
   // NULL chars, this would trigger a false positive: if you expect that
-  // to be the case, take extra precautions.
+  // to be the case, take extra precautions. 
+  // Furthermore, we do the comparison character-by-character
+  // without taking into account Unicode equivalence.
   inline bool move_to_key(const char *key);
   // when at {, go one level deep, looking for a given key
   // if successful, we are left pointing at the value,
   // if not, we are still pointing at the object ({)
   // (in case of repeated keys, this only finds the first one).
   // The string we search for can contain NULL values.
+  // Furthermore, we do the comparison character-by-character
+  // without taking into account Unicode equivalence.
   inline bool move_to_key(const char *key, uint32_t length);
 
   // when at a key location within an object, this moves to the accompanying
 
@@ -46,22 +46,49 @@ struct simd_input<Architecture::ARM64> {
     this->i3 = vld1q_u8(ptr + 48);
   }
 
+  really_inline simd_input(uint8x16_t a0, uint8x16_t a1, uint8x16_t a2, uint8x16_t a3) {
+    this->i0 = a0;
+    this->i1 = a1;
+    this->i2 = a2;
+    this->i3 = a3;
+  }
+
+  template <typename F>
+  really_inline simd_input<Architecture::ARM64> map(F const& map_chunk) {
+    return simd_input<Architecture::ARM64>(
+      map_chunk(this->i0),
+      map_chunk(this->i1),
+      map_chunk(this->i2),
+      map_chunk(this->i3)
+    );
+  }
+
+  template <typename F>
+  really_inline simd_input<Architecture::ARM64> map(simd_input<Architecture::ARM64> b, F const& map_chunk) {
+    return simd_input<Architecture::ARM64>(
+      map_chunk(this->i0, b.i0),
+      map_chunk(this->i1, b.i1),
+      map_chunk(this->i2, b.i2),
+      map_chunk(this->i3, b.i3)
+    );
+  }
+
+  really_inline uint64_t to_bitmask() {
+    return neon_movemask_bulk(this->i0, this->i1, this->i2, this->i3);
+  }
+
   really_inline uint64_t eq(uint8_t m) {
     const uint8x16_t mask = vmovq_n_u8(m);
-    uint8x16_t cmp_res_0 = vceqq_u8(this->i0, mask);
-    uint8x16_t cmp_res_1 = vceqq_u8(this->i1, mask);
-    uint8x16_t cmp_res_2 = vceqq_u8(this->i2, mask);
-    uint8x16_t cmp_res_3 = vceqq_u8(this->i3, mask);
-    return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
+    return this->map( [&](auto a) {
+      return vceqq_u8(a, mask);
+    }).to_bitmask();
   }
 
   really_inline uint64_t lteq(uint8_t m) {
     const uint8x16_t mask = vmovq_n_u8(m);
-    uint8x16_t cmp_res_0 = vcleq_u8(this->i0, mask);
-    uint8x16_t cmp_res_1 = vcleq_u8(this->i1, mask);
-    uint8x16_t cmp_res_2 = vcleq_u8(this->i2, mask);
-    uint8x16_t cmp_res_3 = vcleq_u8(this->i3, mask);
-    return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
+    return this->map( [&](auto a) {
+      return vcleq_u8(a, mask);
+    }).to_bitmask();
   }
 
 }; // struct simd_input
 
@@ -12,7 +12,7 @@
 
 namespace simdjson::arm64 {
 
-static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
+really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
 
 #ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
   return vmull_p64(-1ULL, quote_bits);
@@ -21,52 +21,28 @@ static really_inline uint64_t compute_quote_mask(uint64_t quote_bits) {
 #endif
 }
 
-static really_inline void find_whitespace_and_structurals(
+really_inline void find_whitespace_and_structurals(
     simd_input<ARCHITECTURE> in, uint64_t &whitespace,
     uint64_t &structurals) {
   const uint8x16_t low_nibble_mask =
       (uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
   const uint8x16_t high_nibble_mask =
       (uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
-  const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
-  const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
   const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
 
-  uint8x16_t nib_0_lo = vandq_u8(in.i0, low_nib_and_mask);
-  uint8x16_t nib_0_hi = vshrq_n_u8(in.i0, 4);
-  uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo);
-  uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi);
-  uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi);
-
-  uint8x16_t nib_1_lo = vandq_u8(in.i1, low_nib_and_mask);
-  uint8x16_t nib_1_hi = vshrq_n_u8(in.i1, 4);
-  uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo);
-  uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi);
-  uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi);
-
-  uint8x16_t nib_2_lo = vandq_u8(in.i2, low_nib_and_mask);
-  uint8x16_t nib_2_hi = vshrq_n_u8(in.i2, 4);
-  uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo);
-  uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi);
-  uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi);
-
-  uint8x16_t nib_3_lo = vandq_u8(in.i3, low_nib_and_mask);
-  uint8x16_t nib_3_hi = vshrq_n_u8(in.i3, 4);
-  uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo);
-  uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi);
-  uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi);
-
-  uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask);
-  uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask);
-  uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask);
-  uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask);
-  structurals = neon_movemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
-
-  uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask);
-  uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask);
-  uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask);
-  uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
-  whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
+  auto v = in.map([&](auto chunk) {
+    uint8x16_t nib_lo = vandq_u8(chunk, low_nib_and_mask);
+    uint8x16_t nib_hi = vshrq_n_u8(chunk, 4);
+    uint8x16_t shuf_lo = vqtbl1q_u8(low_nibble_mask, nib_lo);
+    uint8x16_t shuf_hi = vqtbl1q_u8(high_nibble_mask, nib_hi);
+    return vandq_u8(shuf_lo, shuf_hi);
+  });
+
+  const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
+  structurals = MAP_BITMASK( v, vtstq_u8(_v, structural_shufti_mask) );
+
+  const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
+  whitespace = MAP_BITMASK( v, vtstq_u8(_v, whitespace_shufti_mask) );
 }
 
 #include "generic/stage1_find_marks_flatten.h"
 
@@ -8,7 +8,7 @@
 // This is just a naive implementation. It should be normally
 // disable, but can be used for research purposes to compare
 // again our optimized version.
-static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
+really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
   uint32_t *out_ptr = base_ptr + base;
   idx -= 64;
   while (bits != 0) {
@@ -26,7 +26,7 @@ static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint3
 // base_ptr[base] incrementing base as we go
 // will potentially store extra values beyond end of valid bits, so base_ptr
 // needs to be large enough to handle this
-static really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
+really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
   // In some instances, the next branch is expensive because it is mispredicted.
   // Unfortunately, in other cases,
   // it helps tremendously.
 
@@ -18,22 +18,45 @@ struct simd_input<Architecture::HASWELL> {
     this->hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
   }
 
+  really_inline simd_input(__m256i a_lo, __m256i a_hi) {
+    this->lo = a_lo;
+    this->hi = a_hi;
+  }
+
+  template <typename F>
+  really_inline simd_input<Architecture::HASWELL> map(F const& map_chunk) {
+    return simd_input<Architecture::HASWELL>(
+      map_chunk(this->lo),
+      map_chunk(this->hi)
+    );
+  }
+
+  template <typename F>
+  really_inline simd_input<Architecture::HASWELL> map(simd_input<Architecture::HASWELL> b, F const& map_chunk) {
+    return simd_input<Architecture::HASWELL>(
+      map_chunk(this->lo, b.lo),
+      map_chunk(this->hi, b.hi)
+    );
+  }
+
+  really_inline uint64_t to_bitmask() {
+    uint64_t r_lo = static_cast<uint32_t>(_mm256_movemask_epi8(this->lo));
+    uint64_t r_hi =                       _mm256_movemask_epi8(this->hi);
+    return r_lo | (r_hi << 32);
+  }
+
   really_inline uint64_t eq(uint8_t m) {
     const __m256i mask = _mm256_set1_epi8(m);
-    __m256i cmp_res_0 = _mm256_cmpeq_epi8(this->lo, mask);
-    uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
-    __m256i cmp_res_1 = _mm256_cmpeq_epi8(this->hi, mask);
-    uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
-    return res_0 | (res_1 << 32);
+    return this->map( [&](auto a) {
+      return _mm256_cmpeq_epi8(a, mask);
+    }).to_bitmask();
   }
 
   really_inline uint64_t lteq(uint8_t m) {
     const __m256i maxval = _mm256_set1_epi8(m);
-    __m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, this->lo), maxval);
-    uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
-    __m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, this->hi), maxval);
-    uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
-    return res_0 | (res_1 << 32);
+    return this->map( [&](auto a) {
+      return _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, a), maxval);
+    }).to_bitmask();
   }
 
 }; // struct simd_input
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-REFERENCE_VERSION = v0.2.1`
	`1`	`+REFERENCE_VERSION = master`
`2`	`2`
`3`	`3`	`.SUFFIXES:`
`4`	`4`	`#`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-/* auto-generated on Sun Aug 18 15:06:50 DST 2019. Do not edit! */`
	`1`	`+/* auto-generated on Fri Aug 23 11:02:39 DST 2019. Do not edit! */`
`2`	`2`
`3`	`3`	`#include <iostream>`
`4`	`4`	`#include "simdjson.h"`