Skip to content

Commit 910f272

Browse files
authored
Add parser implementation interface and selection API (simdjson#501)
* Make architecture implementations virtual functions - Easier to add new architectures (add implementation to implementation.cpp) - Easier to add new algorithms / functions to architecture selection (add to implementation.h, implement) - Automatically select best implementation in static initialization - Allow user to explicitly select implementation with a string (i.e. parameter) - Allow user to inspect current implementation name/description - Allow user to list available implementations - Eliminate architecture enum and architecture-based templating - Add noexcept in non-inline functions * Move implementation static methods to their own classes * Detect best supported implementation on first use * available_implementationsI() -> available_implementations
1 parent b6423a3 commit 910f272

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1565
-1730
lines changed

Makefile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ LIBHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/h
7070
LIBHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
7171
LIBHEADERS=src/jsoncharutils.h src/simdprune_tables.h $(LIBHEADERS_GENERIC) $(LIBHEADERS_ARM64) $(LIBHEADERS_HASWELL) $(LIBHEADERS_WESTMERE)
7272

73-
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document_iterator.h include/simdjson/document_parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
73+
PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
7474
HEADERS=$(PUBHEADERS) $(LIBHEADERS)
7575

76-
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document_parser.cpp
76+
LIBFILES=src/document.cpp src/error.cpp src/jsonioutil.cpp src/implementation.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp
7777
MINIFIERHEADERS=include/simdjson/jsonminifier.h
7878
MINIFIERLIBFILES=src/jsonminifier.cpp
7979

@@ -160,16 +160,16 @@ submodules:
160160

161161
$(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE) $(CJSON_INCLUDE) $(JSMN_INCLUDE) : submodules
162162

163-
parse: benchmark/parse.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
163+
parse: benchmark/parse.cpp benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
164164
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
165165

166166
get_corpus_benchmark: benchmark/get_corpus_benchmark.cpp $(HEADERS) $(LIBFILES)
167167
$(CXX) $(CXXFLAGS) -o get_corpus_benchmark $(LIBFILES) benchmark/get_corpus_benchmark.cpp $(LIBFLAGS)
168168

169-
parse_stream: benchmark/parse_stream.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
169+
parse_stream: benchmark/parse_stream.cpp benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
170170
$(CXX) $(CXXFLAGS) -o parse_stream $(LIBFILES) benchmark/parse_stream.cpp $(LIBFLAGS)
171171

172-
benchfeatures: benchmark/benchfeatures.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
172+
benchfeatures: benchmark/benchfeatures.cpp benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
173173
$(CXX) $(CXXFLAGS) -o benchfeatures $(LIBFILES) benchmark/benchfeatures.cpp $(LIBFLAGS)
174174

175175
perfdiff: benchmark/perfdiff.cpp
@@ -210,15 +210,15 @@ readme_examples: tests/readme_examples.cpp $(HEADERS) $(LIBFILES)
210210

211211

212212
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
213-
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document_parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
213+
$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/implementation.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
214214

215215
integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
216216
$(CXX) $(CXXFLAGS) -o integer_tests $(LIBFILES) tests/integer_tests.cpp -I. $(LIBFLAGS)
217217

218218

219219

220220
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
221-
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document_parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
221+
$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/implementation.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
222222

223223
pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES)
224224
$(CXX) $(CXXFLAGS) -o pointercheck $(LIBFILES) tests/pointercheck.cpp -I. $(LIBFLAGS)

amalgamation.sh

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,13 @@ INCLUDEPATH="$SCRIPTPATH/include"
1616

1717
# this list excludes the "src/generic headers"
1818
ALLCFILES="
19+
document.cpp
1920
error.cpp
21+
implementation.cpp
2022
jsonioutil.cpp
2123
jsonminifier.cpp
22-
jsonparser.cpp
2324
stage1_find_marks.cpp
2425
stage2_build_tape.cpp
25-
document.cpp
26-
document_parser.cpp
2726
"
2827

2928
# order matters
@@ -38,11 +37,7 @@ simdjson/padded_string.h
3837
simdjson/jsonioutil.h
3938
simdjson/jsonminifier.h
4039
simdjson/document.h
41-
simdjson/document_iterator.h
42-
simdjson/document_parser.h
4340
simdjson/parsedjson.h
44-
simdjson/stage1_find_marks.h
45-
simdjson/stage2_build_tape.h
4641
simdjson/jsonparser.h
4742
simdjson/jsonstream.h
4843
"

benchmark/benchfeatures.cpp

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include "json_parser.h"
21
#include "event_counter.h"
32

43
#include <cassert>
@@ -34,10 +33,7 @@
3433
#include "simdjson/common_defs.h"
3534
#include "simdjson/isadetection.h"
3635
#include "simdjson/jsonioutil.h"
37-
#include "simdjson/jsonparser.h"
38-
#include "simdjson/parsedjson.h"
39-
#include "simdjson/stage1_find_marks.h"
40-
#include "simdjson/stage2_build_tape.h"
36+
#include "simdjson/document.h"
4137

4238
#include <functional>
4339

@@ -132,6 +128,7 @@ struct option_struct {
132128
if (arch == architecture::UNSUPPORTED) {
133129
arch = find_best_supported_architecture();
134130
}
131+
document::parser::use_implementation(arch);
135132
}
136133

137134
template<typename F>
@@ -160,7 +157,7 @@ struct feature_benchmarker {
160157
benchmarker struct23;
161158
benchmarker struct23_miss;
162159

163-
feature_benchmarker(json_parser& parser, event_collector& collector) :
160+
feature_benchmarker(const simdjson::implementation &parser, event_collector& collector) :
164161
utf8 ("jsonexamples/generated/utf-8.json", parser, collector),
165162
utf8_miss ("jsonexamples/generated/utf-8-miss.json", parser, collector),
166163
escape ("jsonexamples/generated/escape.json", parser, collector),
@@ -410,12 +407,10 @@ int main(int argc, char *argv[]) {
410407
event_collector collector;
411408

412409
// Set up benchmarkers by reading all files
413-
json_parser parser(options.arch);
414-
415-
feature_benchmarker features(parser, collector);
416-
benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", parser, collector);
417-
benchmarker twitter("jsonexamples/twitter.json", parser, collector);
418-
benchmarker random("jsonexamples/random.json", parser, collector);
410+
feature_benchmarker features(collector);
411+
benchmarker gsoc_2018("jsonexamples/gsoc-2018.json", collector);
412+
benchmarker twitter("jsonexamples/twitter.json", collector);
413+
benchmarker random("jsonexamples/random.json", collector);
419414

420415
// Run the benchmarks
421416
progress_bar progress(options.iterations, 100);

benchmark/benchmarker.h

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#ifndef __BENCHMARKER_H
22
#define __BENCHMARKER_H
33

4-
#include "json_parser.h"
54
#include "event_counter.h"
65

76
#include <cassert>
@@ -38,8 +37,6 @@
3837
#include "simdjson/jsonioutil.h"
3938
#include "simdjson/jsonparser.h"
4039
#include "simdjson/document.h"
41-
#include "simdjson/stage1_find_marks.h"
42-
#include "simdjson/stage2_build_tape.h"
4340

4441
#include <functional>
4542

@@ -264,8 +261,6 @@ struct benchmarker {
264261
const padded_string json;
265262
// JSON filename
266263
const char *filename;
267-
// Parser that will parse the JSON file
268-
const json_parser& parser;
269264
// Event collector that can be turned on to measure cycles, missed branches, etc.
270265
event_collector& collector;
271266

@@ -281,8 +276,8 @@ struct benchmarker {
281276
// Speed and event summary for allocation
282277
event_aggregate allocate_stage;
283278

284-
benchmarker(const char *_filename, const json_parser& _parser, event_collector& _collector)
285-
: json(load_json(_filename)), filename(_filename), parser(_parser), collector(_collector), stats(NULL) {}
279+
benchmarker(const char *_filename, event_collector& _collector)
280+
: json(load_json(_filename)), filename(_filename), collector(_collector), stats(NULL) {}
286281

287282
~benchmarker() {
288283
if (stats) {
@@ -307,14 +302,15 @@ struct benchmarker {
307302
really_inline void run_iteration(bool stage1_only, bool hotbuffers=false) {
308303
// Allocate document::parser
309304
collector.start();
310-
document::parser pj;
311-
bool allocok = pj.allocate_capacity(json.size());
305+
document::parser parser;
306+
bool allocok = parser.allocate_capacity(json.size());
312307
event_count allocate_count = collector.end();
313308
allocate_stage << allocate_count;
309+
// Run it once to get hot buffers
314310
if(hotbuffers) {
315-
int result = parser.parse((const uint8_t *)json.data(), json.size(), pj);
316-
if (result != simdjson::SUCCESS) {
317-
exit_error(string("Failed to parse ") + filename + string(":") + pj.get_error_message());
311+
auto result = parser.parse((const uint8_t *)json.data(), json.size());
312+
if (result.error) {
313+
exit_error(string("Failed to parse ") + filename + string(":") + result.get_error_message());
318314
}
319315
}
320316

@@ -325,12 +321,11 @@ struct benchmarker {
325321

326322
// Stage 1 (find structurals)
327323
collector.start();
328-
int result = parser.stage1((const uint8_t *)json.data(), json.size(), pj);
324+
error_code error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
329325
event_count stage1_count = collector.end();
330326
stage1 << stage1_count;
331-
332-
if (result != simdjson::SUCCESS) {
333-
exit_error(string("Failed to parse ") + filename + " during stage 1: " + pj.get_error_message());
327+
if (error) {
328+
exit_error(string("Failed to parse ") + filename + " during stage 1: " + error_message(error));
334329
}
335330

336331
// Stage 2 (unified machine) and the rest
@@ -340,9 +335,9 @@ struct benchmarker {
340335
} else {
341336
event_count stage2_count;
342337
collector.start();
343-
result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
344-
if (result != simdjson::SUCCESS) {
345-
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + pj.get_error_message());
338+
error = active_implementation->stage2((const uint8_t *)json.data(), json.size(), parser);
339+
if (error) {
340+
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
346341
}
347342
stage2_count = collector.end();
348343
stage2 << stage2_count;
@@ -351,12 +346,12 @@ struct benchmarker {
351346
// Calculate stats the first time we parse
352347
if (stats == NULL) {
353348
if (stage1_only) { // we need stage 2 once
354-
result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
355-
if (result != simdjson::SUCCESS) {
349+
error = active_implementation->stage2((const uint8_t *)json.data(), json.size(), parser);
350+
if (error) {
356351
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
357352
}
358353
}
359-
stats = new json_stats(json, pj);
354+
stats = new json_stats(json, parser);
360355
}
361356
}
362357

benchmark/json_parser.h

Lines changed: 0 additions & 136 deletions
This file was deleted.

0 commit comments

Comments
 (0)