Skip to content

Commit f4fa5b7

Browse files
committed
Add MAP_CHUNKS2, make parameter name related to input
1 parent 169568c commit f4fa5b7

File tree

7 files changed

+71
-33
lines changed

7 files changed

+71
-33
lines changed

src/arm64/simd_input.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,32 @@ struct simd_input<Architecture::ARM64> {
6363
);
6464
}
6565

66+
template <typename F>
67+
really_inline simd_input<Architecture::ARM64> map(simd_input<Architecture::ARM64> b, F const& map_chunk) {
68+
return simd_input<Architecture::ARM64>(
69+
map_chunk(this->i0, b.i0),
70+
map_chunk(this->i1, b.i1),
71+
map_chunk(this->i2, b.i2),
72+
map_chunk(this->i3, b.i3)
73+
);
74+
}
75+
6676
really_inline uint64_t to_bitmask() {
6777
return neon_movemask_bulk(this->i0, this->i1, this->i2, this->i3);
6878
}
6979

7080
really_inline uint64_t eq(uint8_t m) {
7181
const uint8x16_t mask = vmovq_n_u8(m);
72-
return this->MAP_BITMASK( vceqq_u8(chunk, mask) );
82+
return this->map( [&](auto a) {
83+
return vceqq_u8(a, mask);
84+
}).to_bitmask();
7385
}
7486

7587
really_inline uint64_t lteq(uint8_t m) {
7688
const uint8x16_t mask = vmovq_n_u8(m);
77-
return this->MAP_BITMASK( vcleq_u8(chunk, mask) );
89+
return this->map( [&](auto a) {
90+
return vcleq_u8(a, mask);
91+
}).to_bitmask();
7892
}
7993

8094
}; // struct simd_input

src/arm64/stage1_find_marks.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ really_inline void find_whitespace_and_structurals(
3030
(uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
3131
const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
3232

33-
simd_input<ARCHITECTURE> v = in.map([&](auto chunk) {
33+
auto v = in.map([&](auto chunk) {
3434
uint8x16_t nib_lo = vandq_u8(chunk, low_nib_and_mask);
3535
uint8x16_t nib_hi = vshrq_n_u8(chunk, 4);
3636
uint8x16_t shuf_lo = vqtbl1q_u8(low_nibble_mask, nib_lo);
@@ -39,10 +39,10 @@ really_inline void find_whitespace_and_structurals(
3939
});
4040

4141
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
42-
structurals = v.MAP_BITMASK( vtstq_u8(chunk, structural_shufti_mask) );
42+
structurals = MAP_BITMASK( v, vtstq_u8(_v, structural_shufti_mask) );
4343

4444
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
45-
whitespace = v.MAP_BITMASK( vtstq_u8(chunk, whitespace_shufti_mask) );
45+
whitespace = MAP_BITMASK( v, vtstq_u8(_v, whitespace_shufti_mask) );
4646
}
4747

4848
#include "generic/stage1_find_marks_flatten.h"

src/haswell/simd_input.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ struct simd_input<Architecture::HASWELL> {
3131
);
3232
}
3333

34+
template <typename F>
35+
really_inline simd_input<Architecture::HASWELL> map(simd_input<Architecture::HASWELL> b, F const& map_chunk) {
36+
return simd_input<Architecture::HASWELL>(
37+
map_chunk(this->lo, b.lo),
38+
map_chunk(this->hi, b.hi)
39+
);
40+
}
41+
3442
really_inline uint64_t to_bitmask() {
3543
uint64_t r_lo = static_cast<uint32_t>(_mm256_movemask_epi8(this->lo));
3644
uint64_t r_hi = _mm256_movemask_epi8(this->hi);
@@ -39,12 +47,16 @@ struct simd_input<Architecture::HASWELL> {
3947

4048
really_inline uint64_t eq(uint8_t m) {
4149
const __m256i mask = _mm256_set1_epi8(m);
42-
return this->MAP_BITMASK( _mm256_cmpeq_epi8(chunk, mask) );
50+
return this->map( [&](auto a) {
51+
return _mm256_cmpeq_epi8(a, mask);
52+
}).to_bitmask();
4353
}
4454

4555
really_inline uint64_t lteq(uint8_t m) {
4656
const __m256i maxval = _mm256_set1_epi8(m);
47-
return this->MAP_BITMASK( _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, chunk), maxval) );
57+
return this->map( [&](auto a) {
58+
return _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, a), maxval);
59+
}).to_bitmask();
4860
}
4961

5062
}; // struct simd_input

src/haswell/stage1_find_marks.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,20 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
6262
// clang-format off
6363
const __m256i structural_table =
6464
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
65-
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
65+
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
6666
const __m256i white_table = _mm256_setr_epi8(
6767
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
6868
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
6969
// clang-format on
7070
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
7171
const __m256i struct_mask = _mm256_set1_epi8(32);
7272

73-
whitespace = in.MAP_BITMASK( _mm256_cmpeq_epi8(chunk, _mm256_shuffle_epi8(white_table, chunk)) );
74-
auto struct_r1 = in.MAP_CHUNKS( _mm256_add_epi8(struct_offset, chunk) );
75-
auto struct_r2 = in.MAP_CHUNKS( _mm256_or_si256(chunk, struct_mask) );
76-
auto struct_r3 = struct_r1.MAP_CHUNKS( _mm256_shuffle_epi8(structural_table, chunk) );
77-
structurals = simd_input<ARCHITECTURE>(
78-
_mm256_cmpeq_epi8(struct_r2.lo, struct_r3.lo),
79-
_mm256_cmpeq_epi8(struct_r2.hi, struct_r3.hi)
80-
).to_bitmask();
73+
whitespace = MAP_BITMASK( in, _mm256_cmpeq_epi8(_in, _mm256_shuffle_epi8(white_table, _in)) );
74+
75+
auto r1 = MAP_CHUNKS( in, _mm256_add_epi8(struct_offset, _in) );
76+
auto r2 = MAP_CHUNKS( in, _mm256_or_si256(_in, struct_mask) );
77+
auto r3 = MAP_CHUNKS( r1, _mm256_shuffle_epi8(structural_table, _r1) );
78+
structurals = MAP_BITMASK2( r2, r3, _mm256_cmpeq_epi8(_r2, _r3) );
8179

8280
#endif // else SIMDJSON_NAIVE_STRUCTURAL
8381
}

src/simd_input.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ struct simd_input {
1414
// Map through each simd register in this input, producing another simd_input.
1515
template <typename F>
1616
really_inline simd_input<T> map(F const& map_chunk);
17+
// Map through each simd register across two inputs, producing a single simd_input.
18+
template <typename F>
19+
really_inline simd_input<T> map(simd_input<T> b, F const& map_chunk);
1720
// turn this bytemask (usually the result of a simd comparison operation) into a bitmask.
1821
uint64_t to_bitmask();
1922
// a straightforward comparison of a mask against input.
@@ -22,8 +25,10 @@ struct simd_input {
2225
uint64_t lteq(uint8_t m);
2326
}; // struct simd_input
2427

25-
#define MAP_CHUNKS(EXPR) map([&](auto chunk) { return EXPR; })
26-
#define MAP_BITMASK(EXPR) map([&](auto chunk) { return EXPR; }).to_bitmask()
28+
#define MAP_CHUNKS(A, EXPR) A.map([&](auto _##A) { return (EXPR); })
29+
#define MAP_BITMASK(A, EXPR) MAP_CHUNKS(A, EXPR).to_bitmask()
30+
#define MAP_CHUNKS2(A, B, EXPR) A.map((B), [&](auto _##A, auto _##B) { return (EXPR); })
31+
#define MAP_BITMASK2(A, B, EXPR) MAP_CHUNKS2(A, B, EXPR).to_bitmask()
2732

2833
} // namespace simdjson
2934

src/westmere/simd_input.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ struct simd_input<Architecture::WESTMERE> {
4040
);
4141
}
4242

43+
template <typename F>
44+
really_inline simd_input<Architecture::WESTMERE> map(simd_input<Architecture::WESTMERE> b, F const& map_chunk) {
45+
return simd_input<Architecture::WESTMERE>(
46+
map_chunk(this->v0, b.v0),
47+
map_chunk(this->v1, b.v1),
48+
map_chunk(this->v2, b.v2),
49+
map_chunk(this->v3, b.v3)
50+
);
51+
}
52+
4353
really_inline uint64_t to_bitmask() {
4454
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(this->v0));
4555
uint64_t r1 = _mm_movemask_epi8(this->v1);
@@ -50,12 +60,16 @@ struct simd_input<Architecture::WESTMERE> {
5060

5161
really_inline uint64_t eq(uint8_t m) {
5262
const __m128i mask = _mm_set1_epi8(m);
53-
return this->MAP_BITMASK( _mm_cmpeq_epi8(chunk, mask) );
63+
return this->map( [&](auto a) {
64+
return _mm_cmpeq_epi8(a, mask);
65+
}).to_bitmask();
5466
}
5567

5668
really_inline uint64_t lteq(uint8_t m) {
5769
const __m128i maxval = _mm_set1_epi8(m);
58-
return this->MAP_BITMASK( _mm_cmpeq_epi8(_mm_max_epu8(maxval, chunk), maxval) );
70+
return this->map( [&](auto a) {
71+
return _mm_cmpeq_epi8(_mm_max_epu8(maxval, a), maxval);
72+
}).to_bitmask();
5973
}
6074

6175
}; // struct simd_input

src/westmere/stage1_find_marks.h

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,17 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
2323

2424
const __m128i structural_table =
2525
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
26-
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
27-
100, 9, 10, 112, 100, 13, 100, 100);
26+
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
27+
100, 9, 10, 112, 100, 13, 100, 100);
2828
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
2929
const __m128i struct_mask = _mm_set1_epi8(32);
3030

31-
whitespace = in.MAP_BITMASK( _mm_cmpeq_epi8(chunk, _mm_shuffle_epi8(white_table, chunk)) );
32-
33-
auto r1 = in.MAP_CHUNKS( _mm_add_epi8(struct_offset, chunk) );
34-
auto r2 = in.MAP_CHUNKS( _mm_or_si128(chunk, struct_mask) );
35-
auto r3 = r1.MAP_CHUNKS( _mm_shuffle_epi8(structural_table, chunk) );
36-
structurals = simd_input<ARCHITECTURE>(
37-
_mm_cmpeq_epi8(r2.v0, r3.v0),
38-
_mm_cmpeq_epi8(r2.v1, r3.v1),
39-
_mm_cmpeq_epi8(r2.v2, r3.v2),
40-
_mm_cmpeq_epi8(r2.v3, r3.v3)
41-
).to_bitmask();
31+
whitespace = MAP_BITMASK( in, _mm_cmpeq_epi8(_in, _mm_shuffle_epi8(white_table, _in)) );
32+
33+
auto r1 = MAP_CHUNKS( in, _mm_add_epi8(struct_offset, _in) );
34+
auto r2 = MAP_CHUNKS( in, _mm_or_si128(_in, struct_mask) );
35+
auto r3 = MAP_CHUNKS( r1, _mm_shuffle_epi8(structural_table, _r1) );
36+
structurals = MAP_BITMASK2( r2, r3, _mm_cmpeq_epi8(_r2, _r3) );
4237
}
4338

4439
#include "generic/stage1_find_marks_flatten.h"

0 commit comments

Comments
 (0)