@@ -34,46 +34,26 @@ really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits
34
34
return ;
35
35
uint32_t cnt = hamming (bits );
36
36
idx -= 64 ;
37
- {
38
- base_ptr [0 ] = idx + trailing_zeroes (bits );
39
- bits = bits & (bits - 1 );
40
- base_ptr [1 ] = idx + trailing_zeroes (bits );
41
- bits = bits & (bits - 1 );
42
- base_ptr [2 ] = idx + trailing_zeroes (bits );
43
- bits = bits & (bits - 1 );
44
- base_ptr [3 ] = idx + trailing_zeroes (bits );
45
- bits = bits & (bits - 1 );
46
- base_ptr [4 ] = idx + trailing_zeroes (bits );
47
- bits = bits & (bits - 1 );
48
- base_ptr [5 ] = idx + trailing_zeroes (bits );
49
- bits = bits & (bits - 1 );
50
- base_ptr [6 ] = idx + trailing_zeroes (bits );
51
- bits = bits & (bits - 1 );
52
- base_ptr [7 ] = idx + trailing_zeroes (bits );
37
+
38
+ // Do the first 8 all together
39
+ for (int i = 0 ; i < 8 ; i ++ ) {
40
+ base_ptr [i ] = idx + trailing_zeroes (bits );
53
41
bits = bits & (bits - 1 );
54
42
}
55
- // We hope that the next branch is easily predicted.
43
+
44
+ // Do the next 8 all together (we hope in most cases it won't happen at all
45
+ // and the branch is easily predicted).
56
46
if (cnt > 8 ) {
57
- base_ptr [8 ] = idx + trailing_zeroes (bits );
58
- bits = bits & (bits - 1 );
59
- base_ptr [9 ] = idx + trailing_zeroes (bits );
60
- bits = bits & (bits - 1 );
61
- base_ptr [10 ] = idx + trailing_zeroes (bits );
62
- bits = bits & (bits - 1 );
63
- base_ptr [11 ] = idx + trailing_zeroes (bits );
64
- bits = bits & (bits - 1 );
65
- base_ptr [12 ] = idx + trailing_zeroes (bits );
66
- bits = bits & (bits - 1 );
67
- base_ptr [13 ] = idx + trailing_zeroes (bits );
68
- bits = bits & (bits - 1 );
69
- base_ptr [14 ] = idx + trailing_zeroes (bits );
70
- bits = bits & (bits - 1 );
71
- base_ptr [15 ] = idx + trailing_zeroes (bits );
72
- bits = bits & (bits - 1 );
47
+ for (int i = 8 ; i < 16 ; i ++ ) {
48
+ base_ptr [i ] = idx + trailing_zeroes (bits );
49
+ bits = bits & (bits - 1 );
50
+ }
73
51
}
74
- if (cnt > 16 ) { // unluckly: we rarely get here
75
- // since it means having one structural or pseudo-structral element
76
- // every 4 characters (possible with inputs like "","","",...).
52
+
53
+ // Most files don't have 16+ structurals per block, so we take several basically guaranteed
54
+ // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
55
+ // or the start of a value ("abc" true 123) every 4 characters.
56
+ if (cnt > 16 ) {
77
57
uint32_t i = 16 ;
78
58
do {
79
59
base_ptr [i ] = idx + trailing_zeroes (bits );
0 commit comments