Skip to content

Commit 6b02b06

Browse files
authored
Merge pull request simdjson#1330 from simdjson/jkeiser/depth-tracking
Permit partial iteration in On Demand
2 parents 5f7b2ba + 806cb39 commit 6b02b06

36 files changed

+2343
-1332
lines changed

benchmark/bench_ondemand.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,22 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
88
SIMDJSON_POP_DISABLE_WARNINGS
99

1010
#include "partial_tweets/ondemand.h"
11-
#include "partial_tweets/iter.h"
11+
// #include "partial_tweets/iter.h"
1212
#include "partial_tweets/dom.h"
1313

1414
#include "largerandom/ondemand.h"
15-
#include "largerandom/iter.h"
15+
// #include "largerandom/iter.h"
1616
#include "largerandom/dom.h"
1717

1818
#include "kostya/ondemand.h"
19-
#include "kostya/iter.h"
19+
// #include "kostya/iter.h"
2020
#include "kostya/dom.h"
2121

2222
#include "distinctuserid/ondemand.h"
2323
#include "distinctuserid/dom.h"
2424

25+
#include "find_tweet/ondemand.h"
26+
#include "find_tweet/dom.h"
27+
2528

2629
BENCHMARK_MAIN();

benchmark/distinctuserid/distinctuserid.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,20 @@
66
#include "json_benchmark.h"
77

88

9+
//
10+
// Interface
11+
//
12+
13+
namespace distinct_user_id {
14+
template<typename T> static void DistinctUserID(benchmark::State &state);
15+
916
bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
1017

1118
void remove_duplicates(std::vector<int64_t> &v) {
1219
std::sort(v.begin(), v.end());
1320
auto last = std::unique(v.begin(), v.end());
1421
v.erase(last, v.end());
1522
}
16-
17-
//
18-
// Interface
19-
//
20-
21-
namespace distinct_user_id {
22-
template<typename T> static void DistinctUserID(benchmark::State &state);
2323
} // namespace
2424

2525
//

benchmark/distinctuserid/dom.h

Lines changed: 13 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8,54 +8,6 @@ namespace distinct_user_id {
88

99
using namespace simdjson;
1010

11-
12-
simdjson_really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::element element);
13-
void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::array array) {
14-
for (auto child : array) {
15-
simdjson_recurse(v, child);
16-
}
17-
}
18-
void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::object object) {
19-
for (auto [key, value] : object) {
20-
if((key.size() == 4) && (memcmp(key.data(), "user", 4) == 0)) {
21-
// we are in an object under the key "user"
22-
simdjson::error_code error;
23-
simdjson::dom::object child_object;
24-
simdjson::dom::object child_array;
25-
if (not (error = value.get(child_object))) {
26-
for (auto [child_key, child_value] : child_object) {
27-
if((child_key.size() == 2) && (memcmp(child_key.data(), "id", 2) == 0)) {
28-
int64_t x;
29-
if (not (error = child_value.get(x))) {
30-
v.push_back(x);
31-
}
32-
}
33-
simdjson_recurse(v, child_value);
34-
}
35-
} else if (not (error = value.get(child_array))) {
36-
simdjson_recurse(v, child_array);
37-
}
38-
// end of: we are in an object under the key "user"
39-
} else {
40-
simdjson_recurse(v, value);
41-
}
42-
}
43-
}
44-
simdjson_really_inline void simdjson_recurse(std::vector<int64_t> & v, simdjson::dom::element element) {
45-
simdjson_unused simdjson::error_code error;
46-
simdjson::dom::array array;
47-
simdjson::dom::object object;
48-
if (not (error = element.get(array))) {
49-
simdjson_recurse(v, array);
50-
} else if (not (error = element.get(object))) {
51-
simdjson_recurse(v, object);
52-
}
53-
}
54-
55-
56-
57-
58-
5911
class Dom {
6012
public:
6113
simdjson_really_inline bool Run(const padded_string &json);
@@ -65,19 +17,23 @@ class Dom {
6517
private:
6618
dom::parser parser{};
6719
std::vector<int64_t> ids{};
68-
6920
};
70-
void print_vec(const std::vector<int64_t> &v) {
71-
for (auto i : v) {
72-
std::cout << i << " ";
73-
}
74-
std::cout << std::endl;
75-
}
7621

7722
simdjson_really_inline bool Dom::Run(const padded_string &json) {
7823
ids.clear();
79-
dom::element doc = parser.parse(json);
80-
simdjson_recurse(ids, doc);
24+
// Walk the document, parsing as we go
25+
auto doc = parser.parse(json);
26+
for (dom::object tweet : doc["statuses"]) {
27+
// We believe that all statuses have a matching
28+
// user, and we are willing to throw when they do not.
29+
ids.push_back(tweet["user"]["id"]);
30+
// Not all tweets have a "retweeted_status", but when they do
31+
// we want to go and find the user within.
32+
auto retweet = tweet["retweeted_status"];
33+
if(retweet.error() != NO_SUCH_FIELD) {
34+
ids.push_back(retweet["user"]["id"]);
35+
}
36+
}
8137
remove_duplicates(ids);
8238
return true;
8339
}

benchmark/distinctuserid/ondemand.h

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,13 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
3535
auto doc = parser.iterate(json);
3636
for (ondemand::object tweet : doc["statuses"]) {
3737
// We believe that all statuses have a matching
38-
// user, and we are willing to throw when they do not:
39-
//
40-
// You might think that you do not need the braces, but
41-
// you do, otherwise you will get the wrong answer. That is
42-
// because you can only have one active object or array
43-
// at a time.
44-
{
45-
ondemand::object user = tweet["user"];
46-
int64_t id = user["id"];
47-
ids.push_back(id);
48-
}
38+
// user, and we are willing to throw when they do not.
39+
ids.push_back(tweet["user"]["id"]);
4940
// Not all tweets have a "retweeted_status", but when they do
5041
// we want to go and find the user within.
5142
auto retweet = tweet["retweeted_status"];
5243
if(!retweet.error()) {
53-
ondemand::object retweet_content = retweet;
54-
ondemand::object reuser = retweet_content["user"];
55-
int64_t rid = reuser["id"];
56-
ids.push_back(rid);
44+
ids.push_back(retweet["user"]["id"]);
5745
}
5846
}
5947
remove_duplicates(ids);

benchmark/find_tweet/dom.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#pragma once
2+
3+
#if SIMDJSON_EXCEPTIONS
4+
5+
#include "find_tweet.h"
6+
7+
namespace find_tweet {
8+
9+
using namespace simdjson;
10+
11+
class Dom {
12+
public:
13+
simdjson_really_inline bool Run(const padded_string &json);
14+
simdjson_really_inline std::string_view Result() { return text; }
15+
simdjson_really_inline size_t ItemCount() { return 1; }
16+
17+
private:
18+
dom::parser parser{};
19+
std::string_view text{};
20+
};
21+
22+
simdjson_really_inline bool Dom::Run(const padded_string &json) {
23+
text = "";
24+
auto doc = parser.parse(json);
25+
for (dom::object tweet : doc["statuses"]) {
26+
if (uint64_t(tweet["id"]) == TWEET_ID) {
27+
text = tweet["text"];
28+
return true;
29+
}
30+
}
31+
return false;
32+
}
33+
34+
BENCHMARK_TEMPLATE(FindTweet, Dom);
35+
36+
} // namespace find_tweet
37+
38+
#endif // SIMDJSON_EXCEPTIONS

benchmark/find_tweet/find_tweet.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
2+
#pragma once
3+
#include <vector>
4+
#include <cstdint>
5+
#include "event_counter.h"
6+
#include "json_benchmark.h"
7+
8+
9+
//
10+
// Interface
11+
//
12+
13+
namespace find_tweet {
14+
template<typename T> static void FindTweet(benchmark::State &state);
15+
const uint64_t TWEET_ID = 505874901689851900;
16+
} // namespace
17+
18+
//
19+
// Implementation
20+
//
21+
22+
#include "dom.h"
23+
24+
25+
namespace find_tweet {
26+
27+
using namespace simdjson;
28+
29+
template<typename T> static void FindTweet(benchmark::State &state) {
30+
//
31+
// Load the JSON file
32+
//
33+
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
34+
error_code error;
35+
padded_string json;
36+
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
37+
std::cerr << error << std::endl;
38+
state.SkipWithError("error loading");
39+
return;
40+
}
41+
42+
JsonBenchmark<T, Dom>(state, json);
43+
}
44+
45+
} // namespace find_tweet

benchmark/find_tweet/ondemand.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#pragma once
2+
3+
#if SIMDJSON_EXCEPTIONS
4+
5+
#include "find_tweet.h"
6+
7+
namespace find_tweet {
8+
9+
using namespace simdjson;
10+
using namespace simdjson::builtin;
11+
12+
13+
class OnDemand {
14+
public:
15+
OnDemand() {
16+
if(!displayed_implementation) {
17+
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
18+
displayed_implementation = true;
19+
}
20+
}
21+
simdjson_really_inline bool Run(const padded_string &json);
22+
simdjson_really_inline std::string_view Result() { return text; }
23+
simdjson_really_inline size_t ItemCount() { return 1; }
24+
25+
private:
26+
ondemand::parser parser{};
27+
std::string_view text{};
28+
29+
static inline bool displayed_implementation = false;
30+
};
31+
32+
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
33+
text = "";
34+
// Walk the document, parsing as we go
35+
auto doc = parser.iterate(json);
36+
for (ondemand::object tweet : doc["statuses"]) {
37+
if (uint64_t(tweet["id"]) == TWEET_ID) {
38+
text = tweet["text"];
39+
return true;
40+
}
41+
}
42+
return false;
43+
}
44+
45+
BENCHMARK_TEMPLATE(FindTweet, OnDemand);
46+
47+
} // namespace find_tweet
48+
49+
#endif // SIMDJSON_EXCEPTIONS

benchmark/partial_tweets/ondemand.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,15 @@ class OnDemand {
2626
ondemand::parser parser{};
2727
std::vector<tweet> tweets{};
2828

29-
simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
29+
simdjson_really_inline uint64_t nullable_int(ondemand::value value) {
3030
if (value.is_null()) { return 0; }
31-
return std::move(value);
31+
return value;
3232
}
3333

34-
simdjson_really_inline twitter_user read_user(ondemand::object && user) {
35-
// Move user into a local object so it gets destroyed (and moves the iterator)
36-
ondemand::object u = std::move(user);
37-
return { u["id"], u["screen_name"] };
34+
simdjson_really_inline twitter_user read_user(ondemand::object user) {
35+
return { user["id"], user["screen_name"] };
3836
}
37+
3938
static inline bool displayed_implementation = false;
4039
};
4140

0 commit comments

Comments
 (0)