Skip to content

Commit 585f84a

Browse files
jkeiserlemire
authored andcommitted
Move architecture-specific headers to src/ (simdjson#287)
* Use namespaces instead of templates for stage1 impls * Move stage1 implementation into the src/ directory * Move architecture-specific code to src/
1 parent a1bff85 commit 585f84a

40 files changed

+3298
-3347
lines changed

.gitignore

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77
# Build outputs (TODO build to a subdir so we can exclude that instead)
88
/allparserscheckfile
99
/basictests
10+
/benchmark/parse
11+
/benchmark/perfdiff
12+
/benchmark/statisticalmodel
1013
/json2json
1114
/jsoncheck
1215
/jsonpointer
1316
/jsonstats
17+
/libsimdjson.so*
1418
/minify
1519
/numberparsingcheck
1620
/parse
@@ -25,8 +29,33 @@
2529
/simdjson.h
2630
/singleheader/amalgamation_demo
2731
/singleheader/demo
32+
/tests/basictests
33+
/tests/jsoncheck
34+
/tests/pointercheck
35+
/tools/json2json
36+
/tools/jsonstats
37+
/tools/minify
38+
# CMake ignore from https://github.com/github/gitignore/blob/master/CMake.gitignore
2839

29-
# Generic from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
40+
CMakeLists.txt.user
41+
CMakeCache.txt
42+
CMakeFiles
43+
CMakeScripts
44+
Testing
45+
Makefile
46+
cmake_install.cmake
47+
install_manifest.txt
48+
compile_commands.json
49+
CTestTestfile.cmake
50+
_deps
51+
52+
# CMake files that may be specific to our installation
53+
/CPackConfig.cmake
54+
/CPackSourceConfig.cmake
55+
# We check in a custom version of root Makefile that is not generated by CMake
56+
!/Makefile
57+
58+
# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
3059

3160
# Prerequisites
3261
*.d

Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ else
2222
ARCHFLAGS ?= -msse4.2 -mpclmul # lowest supported feature set?
2323
endif
2424

25-
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(EXTRAFLAGS)
25+
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Isrc -Ibenchmark/linux $(EXTRAFLAGS)
2626
CFLAGS = $(ARCHFLAGS) -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src $(EXTRAFLAGS)
2727

2828

@@ -63,7 +63,11 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
6363
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
6464
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
6565

66-
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
66+
# Load headers and sources
67+
LIBHEADERS=src/simd_input.h src/simdutf8check.h src/stringparsing.h src/arm64/architecture.h src/arm64/simd_input.h src/arm64/simdutf8check.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h src/generic/stage1_find_marks_flatten.h src/generic/stage1_find_marks.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/haswell/architecture.h src/haswell/simd_input.h src/haswell/simdutf8check.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h src/westmere/architecture.h src/westmere/simd_input.h src/westmere/simdutf8check.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
68+
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/numberparsing.h include/simdjson/padded_string.h include/simdjson/parsedjson.h include/simdjson/parsedjsoniterator.h include/simdjson/portability.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/simdprune_tables.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
69+
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
70+
6771
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
6872
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
6973
MINIFIERLIBFILES=src/jsonminifier.cpp

amalgamation.sh

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,28 @@ $SCRIPTPATH/src/simdjson.cpp
1717
$SCRIPTPATH/src/jsonioutil.cpp
1818
$SCRIPTPATH/src/jsonminifier.cpp
1919
$SCRIPTPATH/src/jsonparser.cpp
20-
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
20+
$SCRIPTPATH/src/simd_input.h
21+
$SCRIPTPATH/src/arm64/architecture.h
22+
$SCRIPTPATH/src/haswell/architecture.h
23+
$SCRIPTPATH/src/westmere/architecture.h
24+
$SCRIPTPATH/src/arm64/simd_input.h
25+
$SCRIPTPATH/src/haswell/simd_input.h
26+
$SCRIPTPATH/src/westmere/simd_input.h
27+
$SCRIPTPATH/src/simdutf8check.h
28+
$SCRIPTPATH/src/arm64/simdutf8check.h
29+
$SCRIPTPATH/src/haswell/simdutf8check.h
30+
$SCRIPTPATH/src/westmere/simdutf8check.h
31+
$SCRIPTPATH/src/arm64/stage1_find_marks.h
32+
$SCRIPTPATH/src/haswell/stage1_find_marks.h
33+
$SCRIPTPATH/src/westmere/stage1_find_marks.h
2134
$SCRIPTPATH/src/stage1_find_marks.cpp
35+
$SCRIPTPATH/src/stringparsing.h
36+
$SCRIPTPATH/src/arm64/stringparsing.h
37+
$SCRIPTPATH/src/haswell/stringparsing.h
38+
$SCRIPTPATH/src/westmere/stringparsing.h
39+
$SCRIPTPATH/src/arm64/stage2_build_tape.h
40+
$SCRIPTPATH/src/haswell/stage2_build_tape.h
41+
$SCRIPTPATH/src/westmere/stage2_build_tape.h
2242
$SCRIPTPATH/src/stage2_build_tape.cpp
2343
$SCRIPTPATH/src/parsedjson.cpp
2444
$SCRIPTPATH/src/parsedjsoniterator.cpp
@@ -36,25 +56,10 @@ $SCRIPTPATH/include/simdjson/jsoncharutils.h
3656
$SCRIPTPATH/include/simdjson/jsonformatutils.h
3757
$SCRIPTPATH/include/simdjson/jsonioutil.h
3858
$SCRIPTPATH/include/simdjson/simdprune_tables.h
39-
$SCRIPTPATH/include/simdjson/simd_input.h
40-
$SCRIPTPATH/include/simdjson/simd_input_haswell.h
41-
$SCRIPTPATH/include/simdjson/simd_input_westmere.h
42-
$SCRIPTPATH/include/simdjson/simd_input_arm64.h
43-
$SCRIPTPATH/include/simdjson/simdutf8check.h
44-
$SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
45-
$SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
46-
$SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
4759
$SCRIPTPATH/include/simdjson/jsonminifier.h
4860
$SCRIPTPATH/include/simdjson/parsedjson.h
4961
$SCRIPTPATH/include/simdjson/parsedjsoniterator.h
5062
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
51-
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
52-
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
53-
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
54-
$SCRIPTPATH/include/simdjson/stringparsing.h
55-
$SCRIPTPATH/include/simdjson/stringparsing_westmere.h
56-
$SCRIPTPATH/include/simdjson/stringparsing_haswell.h
57-
$SCRIPTPATH/include/simdjson/stringparsing_arm64.h
5863
$SCRIPTPATH/include/simdjson/numberparsing.h
5964
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
6065
$SCRIPTPATH/include/simdjson/jsonparser.h
@@ -74,17 +79,27 @@ function dofile()
7479
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
7580
while IFS= read -r line
7681
do
77-
if [[ "${line}" == '#include "simdjson'* ]]; then
78-
# we paste the contents of simdjson header files with names ending by _common.h
79-
# we ignore every other simdjson headers
80-
if [[ "${line}" == '#include "simdjson/'*'_common.h"'* ]]; then
81-
file=$(echo $line| cut -d'"' -f 2)
82-
echo "$(<include/$file)" # we assume those files are always in include/
83-
fi
84-
else
85-
# Otherwise we simply copy the line
86-
echo "$line"
82+
if [[ "${line}" == '#include "'*'"'* ]]; then
83+
file=$(echo $line| cut -d'"' -f 2)
84+
85+
if [[ "${file}" == '../'* ]]; then
86+
file=$(echo $file| cut -d'/' -f 2-)
87+
fi;
88+
89+
# we ignore simdjson headers (except src/generic/*.h); they are handled in the above list
90+
if [ -f include/$file ]; then
91+
continue;
92+
elif [ -f src/$file ]; then
93+
# we paste the contents of src/generic/*.h
94+
if [[ "${file}" == *'generic/'*'.h' ]]; then
95+
echo "$(<src/$file)"
96+
fi;
97+
continue;
98+
fi;
8799
fi;
100+
101+
# Otherwise we simply copy the line
102+
echo "$line"
88103
done < "$1"
89104
echo "/* end file $RELFILE */"
90105
}

benchmark/parse.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
8888
}
8989

9090
// Responsible to select the best json_parse implementation
91-
int find_structural_bits_dispatch(const uint8_t *buf, size_t len,
92-
ParsedJson &pj) {
91+
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
9392
Architecture best_implementation = _find_best_supported_implementation();
9493
// Selecting the best implementation
9594
switch (best_implementation) {

include/CMakeLists.txt

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,20 @@
1-
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include/simdjson)
1+
set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
22
set(SIMDJSON_INCLUDE
3-
${SIMDJSON_INCLUDE_DIR}/common_defs.h
4-
${SIMDJSON_INCLUDE_DIR}/isadetection.h
5-
${SIMDJSON_INCLUDE_DIR}/jsoncharutils.h
6-
${SIMDJSON_INCLUDE_DIR}/jsonformatutils.h
7-
${SIMDJSON_INCLUDE_DIR}/jsonioutil.h
8-
${SIMDJSON_INCLUDE_DIR}/jsonminifier.h
9-
${SIMDJSON_INCLUDE_DIR}/jsonparser.h
10-
${SIMDJSON_INCLUDE_DIR}/numberparsing.h
11-
${SIMDJSON_INCLUDE_DIR}/padded_string.h
12-
${SIMDJSON_INCLUDE_DIR}/parsedjson.h
13-
${SIMDJSON_INCLUDE_DIR}/parsedjsoniterator.h
14-
${SIMDJSON_INCLUDE_DIR}/portability.h
15-
${SIMDJSON_INCLUDE_DIR}/simdjson.h
16-
${SIMDJSON_INCLUDE_DIR}/simdjson_version.h
17-
${SIMDJSON_INCLUDE_DIR}/simdprune_tables.h
18-
${SIMDJSON_INCLUDE_DIR}/simdutf8check_arm64.h
19-
${SIMDJSON_INCLUDE_DIR}/simdutf8check_haswell.h
20-
${SIMDJSON_INCLUDE_DIR}/simdutf8check_westmere.h
21-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
22-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
23-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
24-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
25-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
26-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
27-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h
28-
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape.h
29-
${SIMDJSON_INCLUDE_DIR}/stage2_build_tape_common.h
30-
${SIMDJSON_INCLUDE_DIR}/stringparsing.h
31-
${SIMDJSON_INCLUDE_DIR}/stringparsing_arm64.h
32-
${SIMDJSON_INCLUDE_DIR}/stringparsing_common.h
33-
${SIMDJSON_INCLUDE_DIR}/stringparsing_haswell.h
34-
${SIMDJSON_INCLUDE_DIR}/stringparsing_westmere.h
3+
${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
4+
${SIMDJSON_INCLUDE_DIR}/simdjson/isadetection.h
5+
${SIMDJSON_INCLUDE_DIR}/simdjson/jsoncharutils.h
6+
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonformatutils.h
7+
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
8+
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
9+
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
10+
${SIMDJSON_INCLUDE_DIR}/simdjson/numberparsing.h
11+
${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h
12+
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjson.h
13+
${SIMDJSON_INCLUDE_DIR}/simdjson/parsedjsoniterator.h
14+
${SIMDJSON_INCLUDE_DIR}/simdjson/portability.h
15+
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson.h
16+
${SIMDJSON_INCLUDE_DIR}/simdjson/simdjson_version.h
17+
${SIMDJSON_INCLUDE_DIR}/simdjson/simdprune_tables.h
18+
${SIMDJSON_INCLUDE_DIR}/simdjson/stage1_find_marks.h
19+
${SIMDJSON_INCLUDE_DIR}/simdjson/stage2_build_tape.h
3520
)

include/simdjson/stage1_find_marks.h

Lines changed: 3 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,19 @@
11
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
22
#define SIMDJSON_STAGE1_FIND_MARKS_H
33

4-
#include "simdjson/common_defs.h"
54
#include "simdjson/parsedjson.h"
6-
#include "simdjson/portability.h"
75
#include "simdjson/simdjson.h"
8-
#include "simdjson/simd_input.h"
9-
#include <cassert>
106

117
namespace simdjson {
128

13-
template <Architecture> uint64_t compute_quote_mask(uint64_t quote_bits);
14-
15-
namespace {
16-
// for when clmul is unavailable
17-
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
18-
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
19-
quote_mask = quote_mask ^ (quote_mask << 2);
20-
quote_mask = quote_mask ^ (quote_mask << 4);
21-
quote_mask = quote_mask ^ (quote_mask << 8);
22-
quote_mask = quote_mask ^ (quote_mask << 16);
23-
quote_mask = quote_mask ^ (quote_mask << 32);
24-
return quote_mask;
25-
}
26-
} // namespace
27-
28-
template <Architecture T>
29-
really_inline uint64_t find_odd_backslash_sequences(
30-
simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);
31-
32-
template <Architecture T>
33-
really_inline uint64_t find_quote_mask_and_bits(
34-
simd_input<T> in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote,
35-
uint64_t &quote_bits, uint64_t &error_mask);
36-
37-
// do a 'shufti' to detect structural JSON characters
38-
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
39-
// these go into the first 3 buckets of the comparison (1/2/4)
40-
41-
// we are also interested in the four whitespace characters
42-
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
43-
// these go into the next 2 buckets of the comparison (8/16)
44-
template <Architecture T>
45-
void find_whitespace_and_structurals(simd_input<T> in, uint64_t &whitespace,
46-
uint64_t &structurals);
47-
48-
// return a updated structural bit vector with quoted contents cleared out and
49-
// pseudo-structural characters added to the mask
50-
// updates prev_iter_ends_pseudo_pred which tells us whether the previous
51-
// iteration ended on a whitespace or a structural character (which means that
52-
// the next iteration
53-
// will have a pseudo-structural character at its start)
54-
really_inline uint64_t finalize_structurals(
55-
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
56-
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
57-
// mask off anything inside quotes
58-
structurals &= ~quote_mask;
59-
// add the real quote bits back into our bit_mask as well, so we can
60-
// quickly traverse the strings we've spent all this trouble gathering
61-
structurals |= quote_bits;
62-
// Now, establish "pseudo-structural characters". These are non-whitespace
63-
// characters that are (a) outside quotes and (b) have a predecessor that's
64-
// either whitespace or a structural character. This means that subsequent
65-
// passes will get a chance to encounter the first character of every string
66-
// of non-whitespace and, if we're parsing an atom like true/false/null or a
67-
// number we can stop at the first whitespace or structural character
68-
// following it.
69-
70-
// a qualified predecessor is something that can happen 1 position before an
71-
// pseudo-structural character
72-
uint64_t pseudo_pred = structurals | whitespace;
73-
74-
uint64_t shifted_pseudo_pred =
75-
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
76-
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
77-
uint64_t pseudo_structurals =
78-
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
79-
structurals |= pseudo_structurals;
80-
81-
// now, we've used our close quotes all we need to. So let's switch them off
82-
// they will be off in the quote mask and on in quote bits.
83-
structurals &= ~(quote_bits & ~quote_mask);
84-
return structurals;
85-
}
86-
879
template <Architecture T = Architecture::NATIVE>
88-
int find_structural_bits(const uint8_t *buf, size_t len,
89-
simdjson::ParsedJson &pj);
10+
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj);
9011

9112
template <Architecture T = Architecture::NATIVE>
92-
int find_structural_bits(const char *buf, size_t len,
93-
simdjson::ParsedJson &pj) {
13+
int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj) {
9414
return find_structural_bits((const uint8_t *)buf, len, pj);
9515
}
9616

97-
// flatten out values in 'bits' assuming that they are are to have values of idx
98-
// plus their position in the bitvector, and store these indexes at
99-
// base_ptr[base] incrementing base as we go
100-
// will potentially store extra values beyond end of valid bits, so base_ptr
101-
// needs to be large enough to handle this
102-
template <Architecture T = Architecture::NATIVE>
103-
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
104-
uint32_t idx, uint64_t bits);
105-
106-
} // namespace simdjson
17+
}; // namespace simdjson
10718

10819
#endif

0 commit comments

Comments
 (0)