Skip to content

Commit 158a3c5

Browse files
authored
Merge pull request simdjson#1351 from simdjson/jkeiser/unordered-lookup
Make `object["field"]` order-insensitive in On Demand
2 parents 93807bf + 98666e8 commit 158a3c5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+3354
-2514
lines changed

.appveyor.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,24 @@ environment:
1313

1414
matrix:
1515
- job_name: VS2019
16-
CMAKE_ARGS: -A %Platform%
16+
CMAKE_ARGS: -A %Platform%
1717
- job_name: VS2019ARM
1818
CMAKE_ARGS: -A ARM64 -DCMAKE_CROSSCOMPILING=1 -D SIMDJSON_GOOGLE_BENCHMARKS=OFF # Does Google Benchmark builds under VS ARM?
1919
- job_name: VS2017 (Static, No Threads)
2020
image: Visual Studio 2017
21-
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_ENABLE_THREADS=OFF
21+
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_ENABLE_THREADS=OFF
2222
CTEST_ARGS: -LE explicitonly
2323
- job_name: VS2019 (Win32)
2424
platform: Win32
25-
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_ENABLE_THREADS=ON # This should be the default. Testing anyway.
26-
CTEST_ARGS: -E "checkperf|ondemand_basictests"
25+
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_ENABLE_THREADS=ON # This should be the default. Testing anyway.
26+
CTEST_ARGS: -LE explicitonly
2727
- job_name: VS2019 (Win32, No Exceptions)
2828
platform: Win32
29-
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_ENABLE_THREADS=ON -DSIMDJSON_EXCEPTIONS=OFF
30-
CTEST_ARGS: -E "checkperf|ondemand_basictests"
29+
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_ENABLE_THREADS=ON -DSIMDJSON_EXCEPTIONS=OFF
30+
CTEST_ARGS: -LE explicitonly
3131
- job_name: VS2015
3232
image: Visual Studio 2015
33-
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_ENABLE_THREADS=OFF
33+
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_ENABLE_THREADS=OFF
3434
CTEST_ARGS: -LE explicitonly
3535

3636
build_script:

.github/workflows/mingw-ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,5 @@ jobs:
6161
mkdir build32
6262
cd build32
6363
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
64-
cmake --build . --target parse_many_test jsoncheck basictests ondemand_basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
65-
ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
64+
cmake --build . --target acceptance_tests --verbose
65+
ctest -L acceptance --output-on-failure

.github/workflows/mingw64-ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ jobs:
6161
mkdir build64
6262
cd build64
6363
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
64-
cmake --build . --target parse_many_test jsoncheck basictests ondemand_basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
65-
ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
64+
cmake --build . --target acceptance_tests --verbose
65+
ctest -L acceptance --output-on-failure
6666
cd ..
6767
mkdir build64debug
6868
cd build64debug
6969
cmake -DCMAKE_BUILD_TYPE=Debug -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
70-
cmake --build . --target parse_many_test jsoncheck basictests ondemand_basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
71-
ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
70+
cmake --build . --target acceptance_tests --verbose
71+
ctest -L acceptance --output-on-failure

benchmark/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
include_directories( . linux )
22
link_libraries(simdjson-windows-headers test-data)
33

4-
4+
# bench_sax links against the source
55
if (TARGET benchmark::benchmark)
66
add_executable(bench_sax bench_sax.cpp)
77
target_link_libraries(bench_sax PRIVATE simdjson-internal-flags simdjson-include-source benchmark::benchmark)
88
endif (TARGET benchmark::benchmark)
99

10+
# Everything else links against simdjson proper
1011
link_libraries(simdjson simdjson-flags)
12+
1113
add_executable(benchfeatures benchfeatures.cpp)
1214
add_executable(get_corpus_benchmark get_corpus_benchmark.cpp)
1315
add_executable(perfdiff perfdiff.cpp)
@@ -42,6 +44,7 @@ endif()
4244

4345
if (TARGET benchmark::benchmark)
4446
link_libraries(benchmark::benchmark)
47+
add_subdirectory(largerandom)
4548
add_executable(bench_parse_call bench_parse_call.cpp)
4649
add_executable(bench_dom_api bench_dom_api.cpp)
4750
add_executable(bench_ondemand bench_ondemand.cpp)

benchmark/bench_ondemand.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SIMDJSON_POP_DISABLE_WARNINGS
1212
#include "partial_tweets/dom.h"
1313

1414
#include "largerandom/ondemand.h"
15+
#include "largerandom/ondemand_unordered.h"
1516
// #include "largerandom/iter.h"
1617
#include "largerandom/dom.h"
1718

benchmark/distinctuserid/ondemand.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
3333
ids.clear();
3434
// Walk the document, parsing as we go
3535
auto doc = parser.iterate(json);
36-
for (ondemand::object tweet : doc["statuses"]) {
36+
for (ondemand::object tweet : doc.find_field("statuses")) {
3737
// We believe that all statuses have a matching
3838
// user, and we are willing to throw when they do not.
39-
ids.push_back(tweet["user"]["id"]);
39+
ids.push_back(tweet.find_field("user").find_field("id"));
4040
// Not all tweets have a "retweeted_status", but when they do
4141
// we want to go and find the user within.
42-
auto retweet = tweet["retweeted_status"];
42+
auto retweet = tweet.find_field("retweeted_status");
4343
if(!retweet.error()) {
44-
ids.push_back(retweet["user"]["id"]);
44+
ids.push_back(retweet.find_field("user").find_field("id"));
4545
}
4646
}
4747
remove_duplicates(ids);

benchmark/find_tweet/ondemand.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
3333
text = "";
3434
// Walk the document, parsing as we go
3535
auto doc = parser.iterate(json);
36-
for (ondemand::object tweet : doc["statuses"]) {
37-
if (uint64_t(tweet["id"]) == TWEET_ID) {
38-
text = tweet["text"];
36+
for (ondemand::object tweet : doc.find_field("statuses")) {
37+
if (uint64_t(tweet.find_field("id")) == TWEET_ID) {
38+
text = tweet.find_field("text");
3939
return true;
4040
}
4141
}

benchmark/kostya/ondemand.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
2727
using std::endl;
2828

2929
auto doc = parser.iterate(json);
30-
for (ondemand::object coord : doc["coordinates"]) {
31-
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
30+
for (ondemand::object coord : doc.find_field("coordinates")) {
31+
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
3232
}
3333

3434
return true;
@@ -56,10 +56,10 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
5656
count = 0;
5757

5858
auto doc = parser.iterate(json);
59-
for (ondemand::object coord : doc["coordinates"]) {
60-
sum.x += double(coord["x"]);
61-
sum.y += double(coord["y"]);
62-
sum.z += double(coord["z"]);
59+
for (ondemand::object coord : doc.find_field("coordinates")) {
60+
sum.x += double(coord.find_field("x"));
61+
sum.y += double(coord.find_field("y"));
62+
sum.z += double(coord.find_field("z"));
6363
count++;
6464
}
6565

benchmark/largerandom/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
if (TARGET benchmark::benchmark)
2+
link_libraries(benchmark::benchmark)
3+
add_executable(bench_ondemand_largerandom bench_ondemand_largerandom.cpp)
4+
add_executable(bench_ondemand_unordered_largerandom bench_ondemand_unordered_largerandom.cpp)
5+
endif()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include "simdjson.h"
2+
#include <iostream>
3+
#include <sstream>
4+
#include <random>
5+
#include <vector>
6+
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
7+
#include <benchmark/benchmark.h>
8+
SIMDJSON_POP_DISABLE_WARNINGS
9+
10+
#define BENCHMARK_NO_DOM
11+
12+
#include "largerandom/ondemand.h"
13+
14+
BENCHMARK_MAIN();
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include "simdjson.h"
2+
#include <iostream>
3+
#include <sstream>
4+
#include <random>
5+
#include <vector>
6+
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
7+
#include <benchmark/benchmark.h>
8+
SIMDJSON_POP_DISABLE_WARNINGS
9+
10+
#define BENCHMARK_NO_DOM
11+
12+
#include "largerandom/ondemand_unordered.h"
13+
14+
BENCHMARK_MAIN();

benchmark/largerandom/dom.h

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,38 +32,6 @@ simdjson_really_inline bool Dom::Run(const padded_string &json) {
3232

3333
BENCHMARK_TEMPLATE(LargeRandom, Dom);
3434

35-
namespace sum {
36-
37-
class Dom {
38-
public:
39-
simdjson_really_inline bool Run(const padded_string &json);
40-
41-
simdjson_really_inline my_point &Result() { return sum; }
42-
simdjson_really_inline size_t ItemCount() { return count; }
43-
44-
private:
45-
dom::parser parser{};
46-
my_point sum{};
47-
size_t count{};
48-
};
49-
50-
simdjson_really_inline bool Dom::Run(const padded_string &json) {
51-
sum = { 0, 0, 0 };
52-
count = 0;
53-
54-
for (auto coord : parser.parse(json)) {
55-
sum.x += double(coord["x"]);
56-
sum.y += double(coord["y"]);
57-
sum.z += double(coord["z"]);
58-
count++;
59-
}
60-
61-
return true;
62-
}
63-
64-
BENCHMARK_TEMPLATE(LargeRandomSum, Dom);
65-
66-
} // namespace sum
6735
} // namespace largerandom
6836

6937
#endif // SIMDJSON_EXCEPTIONS

benchmark/largerandom/iter.h

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -48,45 +48,6 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {
4848

4949
BENCHMARK_TEMPLATE(LargeRandom, Iter);
5050

51-
52-
namespace sum {
53-
54-
class Iter {
55-
public:
56-
simdjson_really_inline bool Run(const padded_string &json);
57-
58-
simdjson_really_inline my_point &Result() { return sum; }
59-
simdjson_really_inline size_t ItemCount() { return count; }
60-
61-
private:
62-
ondemand::parser parser{};
63-
my_point sum{};
64-
size_t count{};
65-
};
66-
67-
simdjson_really_inline bool Iter::Run(const padded_string &json) {
68-
sum = {0,0,0};
69-
count = 0;
70-
71-
auto iter = parser.iterate_raw(json).value();
72-
if (!iter.start_array()) { return false; }
73-
do {
74-
if (!iter.start_object() || iter.field_key().value() != "x" || iter.field_value()) { return false; }
75-
sum.x += iter.consume_double();
76-
if (!iter.has_next_field() || iter.field_key().value() != "y" || iter.field_value()) { return false; }
77-
sum.y += iter.consume_double();
78-
if (!iter.has_next_field() || iter.field_key().value() != "z" || iter.field_value()) { return false; }
79-
sum.z += iter.consume_double();
80-
if (*iter.advance() != '}') { return false; }
81-
count++;
82-
} while (iter.has_next_element());
83-
84-
return true;
85-
}
86-
87-
BENCHMARK_TEMPLATE(LargeRandomSum, Iter);
88-
89-
} // namespace sum
9051
} // namespace largerandom
9152

9253
#endif // SIMDJSON_EXCEPTIONS

benchmark/largerandom/largerandom.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88

99
namespace largerandom {
1010
template<typename T> static void LargeRandom(benchmark::State &state);
11-
namespace sum {
12-
template<typename T> static void LargeRandomSum(benchmark::State &state);
13-
}
1411

1512
using namespace simdjson;
1613

@@ -59,22 +56,21 @@ simdjson_unused static std::ostream &operator<<(std::ostream &o, const my_point
5956
//
6057
#include <vector>
6158
#include "event_counter.h"
59+
#ifndef BENCHMARK_NO_DOM
6260
#include "dom.h"
61+
#endif
6362
#include "json_benchmark.h"
6463

6564
namespace largerandom {
6665

6766
template<typename T> static void LargeRandom(benchmark::State &state) {
67+
#ifdef BENCHMARK_NO_DOM
68+
JsonBenchmark<T, T>(state, get_built_json_array());
69+
#else
6870
JsonBenchmark<T, Dom>(state, get_built_json_array());
71+
#endif
6972
}
7073

71-
namespace sum {
72-
73-
template<typename T> static void LargeRandomSum(benchmark::State &state) {
74-
JsonBenchmark<T, Dom>(state, get_built_json_array());
75-
}
76-
77-
}
7874
} // namespace largerandom
7975

8076
#endif // SIMDJSON_EXCEPTIONS

benchmark/largerandom/ondemand.h

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,47 +25,14 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
2525

2626
auto doc = parser.iterate(json);
2727
for (ondemand::object coord : doc) {
28-
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
28+
container.emplace_back(my_point{coord.find_field("x"), coord.find_field("y"), coord.find_field("z")});
2929
}
3030

3131
return true;
3232
}
3333

3434
BENCHMARK_TEMPLATE(LargeRandom, OnDemand);
3535

36-
37-
namespace sum {
38-
39-
class OnDemand {
40-
public:
41-
simdjson_really_inline bool Run(const padded_string &json);
42-
simdjson_really_inline my_point &Result() { return sum; }
43-
simdjson_really_inline size_t ItemCount() { return count; }
44-
45-
private:
46-
ondemand::parser parser{};
47-
my_point sum{};
48-
size_t count{};
49-
};
50-
51-
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
52-
sum = {0,0,0};
53-
count = 0;
54-
55-
auto doc = parser.iterate(json);
56-
for (ondemand::object coord : doc.get_array()) {
57-
sum.x += double(coord["x"]);
58-
sum.y += double(coord["y"]);
59-
sum.z += double(coord["z"]);
60-
count++;
61-
}
62-
63-
return true;
64-
}
65-
66-
BENCHMARK_TEMPLATE(LargeRandomSum, OnDemand);
67-
68-
} // namespace sum
6936
} // namespace largerandom
7037

7138
#endif // SIMDJSON_EXCEPTIONS
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#pragma once
2+
3+
#if SIMDJSON_EXCEPTIONS
4+
5+
#include "largerandom.h"
6+
7+
namespace largerandom {
8+
9+
using namespace simdjson;
10+
using namespace simdjson::builtin;
11+
12+
class OnDemandUnordered {
13+
public:
14+
simdjson_really_inline bool Run(const padded_string &json);
15+
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
16+
simdjson_really_inline size_t ItemCount() { return container.size(); }
17+
18+
private:
19+
ondemand::parser parser{};
20+
std::vector<my_point> container{};
21+
};
22+
23+
simdjson_really_inline bool OnDemandUnordered::Run(const padded_string &json) {
24+
container.clear();
25+
26+
auto doc = parser.iterate(json);
27+
for (ondemand::object coord : doc) {
28+
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
29+
}
30+
31+
return true;
32+
}
33+
34+
BENCHMARK_TEMPLATE(LargeRandom, OnDemandUnordered);
35+
36+
} // namespace largerandom
37+
38+
#endif // SIMDJSON_EXCEPTIONS

0 commit comments

Comments
 (0)