Skip to content

Commit aea7991

Browse files
authored
Adding a "get_corpus" benchmark. (simdjson#456)
* Adding a "get_corpus" benchmark. * Improving portability.
1 parent 80b4dd2 commit aea7991

File tree

3 files changed

+59
-0
lines changed

3 files changed

+59
-0
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ $(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJ
160160
parse: benchmark/parse.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
161161
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
162162

163+
get_corpus_benchmark: benchmark/get_corpus_benchmark.cpp $(HEADERS) $(LIBFILES)
164+
$(CXX) $(CXXFLAGS) -o get_corpus_benchmark $(LIBFILES) benchmark/get_corpus_benchmark.cpp $(LIBFLAGS)
165+
166+
163167
parse_stream: benchmark/parse_stream.cpp benchmark/json_parser.h benchmark/event_counter.h benchmark/benchmarker.h $(HEADERS) $(LIBFILES)
164168
$(CXX) $(CXXFLAGS) -o parse_stream $(LIBFILES) benchmark/parse_stream.cpp $(LIBFLAGS)
165169

benchmark/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@ target_include_directories(${SIMDJSON_LIB_NAME}
77
add_cpp_benchmark(parse)
88
add_cpp_benchmark(statisticalmodel)
99
add_cpp_benchmark(parse_stream)
10+
add_cpp_benchmark(get_corpus_benchmark)
1011

1112
add_executable(perfdiff perfdiff.cpp)

benchmark/get_corpus_benchmark.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
2+
#include "simdjson/common_defs.h"
3+
#include "simdjson/jsonioutil.h"
4+
#include "simdjson/jsonparser.h"
5+
#include <chrono>
6+
#include <cstring>
7+
#include <iostream>
8+
9+
never_inline
10+
double bench(std::string filename, simdjson::padded_string& p) {
11+
std::chrono::time_point<std::chrono::steady_clock> start_clock =
12+
std::chrono::steady_clock::now();
13+
simdjson::get_corpus(filename).swap(p);
14+
std::chrono::time_point<std::chrono::steady_clock> end_clock =
15+
std::chrono::steady_clock::now();
16+
std::chrono::duration<double> elapsed = end_clock - start_clock;
17+
return (p.size() / (1024. * 1024 * 1024.)) / elapsed.count();
18+
}
19+
20+
int main(int argc, char *argv[]) {
21+
int optind = 1;
22+
if (optind >= argc) {
23+
std::cerr << "Reads document as far as possible. " << std::endl;
24+
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
25+
exit(1);
26+
}
27+
const char *filename = argv[optind];
28+
if (optind + 1 < argc) {
29+
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
30+
<< std::endl;
31+
}
32+
simdjson::padded_string p;
33+
bench(filename, p);
34+
double meanval = 0;
35+
double maxval = 0;
36+
double minval = 10000;
37+
std::cout << "file size: "<< (p.size() / (1024. * 1024 * 1024.)) << " GB" <<std::endl;
38+
size_t times = p.size() > 1024*1024*1024 ? 5 : 50;
39+
try {
40+
for(size_t i = 0; i < times; i++) {
41+
double tval = bench(filename, p);
42+
if(maxval < tval) maxval = tval;
43+
if(minval > tval) minval = tval;
44+
meanval += tval;
45+
}
46+
} catch (const std::exception &) { // caught by reference to base
47+
std::cerr << "Could not load the file " << filename << std::endl;
48+
return EXIT_FAILURE;
49+
}
50+
std::cout << "average speed: " << meanval / times << " GB/s"<< std::endl;
51+
std::cout << "min speed : " << minval << " GB/s" << std::endl;
52+
std::cout << "max speed : " << maxval << " GB/s" << std::endl;
53+
return EXIT_SUCCESS;
54+
}

0 commit comments

Comments
 (0)