Skip to content

Commit d530ccf

Browse files
committed
Remove depth tracking from ondemand api
1 parent e54bd38 commit d530ccf

15 files changed

+529
-515
lines changed

benchmark/bench_sax.cpp

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,18 @@ simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
4040
return std::move(value);
4141
}
4242

43-
simdjson_really_inline twitter::twitter_user read_user(ondemand::object && u) {
43+
simdjson_really_inline twitter::twitter_user read_user(ondemand::object && user) {
44+
// TODO figure out why we can't use u directly ... the destructor doesn't seem to get invoked when
45+
// read_user() finishes, for some reason.
46+
ondemand::object u = std::move(user);
4447
return { u["id"], u["screen_name"] };
4548
}
4649
simdjson_really_inline void read_tweets(ondemand::parser &parser, padded_string &json, std::vector<twitter::tweet> &tweets) {
4750
// Walk the document, parsing the tweets as we go
4851
auto doc = parser.parse(json);
49-
for (ondemand::object tweet : doc["statuses"]) {
52+
auto root = doc.get_object();
53+
ondemand::array statuses = root["statuses"];
54+
for (ondemand::object tweet : statuses) {
5055
tweets.emplace_back(twitter::tweet{
5156
tweet["created_at"],
5257
tweet["id"],
@@ -108,40 +113,49 @@ simdjson_really_inline void read_tweets(ondemand::parser &parser, padded_string
108113
// { "statuses":
109114
auto doc = parser.parse(json);
110115
ondemand::json_iterator &iter = doc.iterate();
111-
iter.start_object().value();
112-
if (!iter.find_first_field_raw("statuses")) { throw "No statuses field"; }
116+
if (!iter.start_object() || !iter.find_field_raw("statuses")) { throw; }
113117
// { "statuses": [
114-
auto tweets_array = iter.start_array().value();
115-
if (iter.is_empty_array()) { return; }
118+
if (!iter.start_array()) { throw; }
116119

117120
do {
118-
auto tweet_object = iter.start_object().value();
119121
twitter::tweet tweet;
120-
if (!iter.find_first_field_raw("created_at")) { throw "Could not find created_at"; }
122+
123+
if (!iter.start_object() || !iter.find_field_raw("created_at")) { throw; }
121124
tweet.created_at = iter.get_raw_json_string().value().unescape(parser);
122-
if (!iter.find_next_field_raw("id", tweet_object)) { throw "Could not find id"; }
125+
126+
if (!iter.has_next_field() || !iter.find_field_raw("id")) { throw; }
123127
tweet.id = iter.get_uint64();
124-
if (!iter.find_next_field_raw("text", tweet_object)) { throw "Could not find text"; }
128+
129+
if (!iter.has_next_field() || !iter.find_field_raw("text")) { throw; }
125130
tweet.text = iter.get_raw_json_string().value().unescape(parser);
126-
if (!iter.find_next_field_raw("in_reply_to_status_id", tweet_object)) { throw "Could not find in_reply_to_status_id"; }
131+
132+
if (!iter.has_next_field() || !iter.find_field_raw("in_reply_to_status_id")) { throw; }
127133
if (!iter.is_null()) {
128134
tweet.in_reply_to_status_id = iter.get_uint64();
129135
}
130-
if (!iter.find_next_field_raw("user", tweet_object)) { throw "Could not find user"; }
136+
137+
if (!iter.has_next_field() || !iter.find_field_raw("user")) { throw; }
131138
{
132-
auto user_object = iter.start_object().value();
133-
if (!iter.find_first_field_raw("id")) { throw "Could not find user.id"; }
139+
if (!iter.start_object() || !iter.find_field_raw("id")) { throw; }
134140
tweet.user.id = iter.get_uint64();
135-
if (!iter.find_next_field_raw("screen_name", user_object)) { throw "Could not find user.screen_name"; }
141+
142+
if (!iter.has_next_field() || !iter.find_field_raw("screen_name")) { throw; }
136143
tweet.user.screen_name = iter.get_raw_json_string().value().unescape(parser);
144+
145+
iter.skip_container(); // Skip the rest of the user object
137146
}
138-
if (!iter.find_next_field_raw("retweet_count", tweet_object)) { throw "Could not find retweet_count"; }
147+
148+
if (!iter.has_next_field() || !iter.find_field_raw("retweet_count")) { throw; }
139149
tweet.retweet_count = iter.get_uint64();
140-
if (!iter.find_next_field_raw("favorite_count", tweet_object)) { throw "Could not find favorite_count"; }
150+
151+
if (!iter.has_next_field() || !iter.find_field_raw("favorite_count")) { throw; }
141152
tweet.favorite_count = iter.get_uint64();
142153

143154
tweets.push_back(tweet);
144-
} while (iter.next_element(tweets_array));
155+
156+
iter.skip_container(); // Skip the rest of the tweet object
157+
158+
} while (iter.has_next_element());
145159
}
146160

147161
static void iter_tweets(State &state) {
@@ -397,7 +411,10 @@ static void ondemand_largerandom(State &state) {
397411
size_t points = 0;
398412
for (SIMDJSON_UNUSED auto _ : state) {
399413
std::vector<my_point> container;
400-
for (ondemand::object point : parser.parse(json)) {
414+
auto doc = parser.parse(json);
415+
ondemand::array array = doc.get_array();
416+
for (ondemand::object point_object : array) {
417+
auto point = point_object.begin();
401418
container.emplace_back(my_point{(*point).value(), (*++point).value(), (*++point).value()});
402419
}
403420
bytes += json.size();
@@ -450,8 +467,7 @@ static void iter_largerandom(State &state) {
450467
std::vector<my_point> container;
451468
auto doc = parser.parse(json);
452469
ondemand::json_iterator &iter = doc.iterate();
453-
iter.start_array().value();
454-
if (!iter.is_empty_array()) {
470+
if (iter.start_array()) {
455471
do {
456472
container.emplace_back(my_point{first_double(iter), next_double(iter), next_double(iter)});
457473
if (iter.has_next_field()) { throw "Too many fields"; }

src/generic/ondemand.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
#include "generic/ondemand/raw_json_string.h"
33
#include "generic/ondemand/token_iterator.h"
44
#include "generic/ondemand/json_iterator.h"
5+
#include "generic/ondemand/array.h"
56
#include "generic/ondemand/document.h"
67
#include "generic/ondemand/value.h"
7-
#include "generic/ondemand/array.h"
88
#include "generic/ondemand/field.h"
99
#include "generic/ondemand/object.h"
1010
#include "generic/ondemand/parser.h"
@@ -13,9 +13,9 @@
1313
#include "generic/ondemand/raw_json_string-inl.h"
1414
#include "generic/ondemand/token_iterator-inl.h"
1515
#include "generic/ondemand/json_iterator-inl.h"
16+
#include "generic/ondemand/array-inl.h"
1617
#include "generic/ondemand/document-inl.h"
1718
#include "generic/ondemand/value-inl.h"
18-
#include "generic/ondemand/array-inl.h"
1919
#include "generic/ondemand/field-inl.h"
2020
#include "generic/ondemand/object-inl.h"
2121
#include "generic/ondemand/parser-inl.h"

src/generic/ondemand/array-inl.h

Lines changed: 113 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -41,65 +41,72 @@ namespace ondemand {
4141
//
4242

4343
simdjson_really_inline array::array() noexcept = default;
44-
simdjson_really_inline array::array(document *_doc, json_iterator::container _container) noexcept
45-
: doc{_doc}, container{_container}, at_start{true}, error{SUCCESS}
44+
simdjson_really_inline array::array(document *_doc, bool has_value) noexcept
45+
: doc{_doc}, has_next{has_value}, error{SUCCESS}
4646
{
4747
}
48-
simdjson_really_inline array::array(document *_doc, error_code _error) noexcept
49-
: doc{_doc}, container{_doc->iter.current_container()}, at_start{false}, error{_error}
48+
simdjson_really_inline array::array(array &&other) noexcept
49+
: doc{other.doc}, has_next{other.has_next}, error{other.error}
5050
{
51-
SIMDJSON_ASSUME(_error);
51+
// Terminate the other iterator
52+
other.has_next = false;
53+
}
54+
simdjson_really_inline array &array::operator=(array &&other) noexcept {
55+
doc = other.doc;
56+
has_next = other.has_next;
57+
error = other.error;
58+
// Terminate the other iterator
59+
other.has_next = false;
60+
return *this;
5261
}
5362

54-
simdjson_really_inline bool array::finished() const noexcept {
55-
return !doc->iter.in_container(container);
63+
simdjson_really_inline array::~array() noexcept {
64+
if (!error && has_next) {
65+
logger::log_event(doc->iter, "unfinished", "array");
66+
doc->iter.skip_container();
67+
}
5668
}
5769

58-
simdjson_really_inline array array::start(document *doc) noexcept {
59-
error_code error;
60-
json_iterator::container c;
61-
if ((error = doc->iter.start_array().get(c))) { return error_chain(doc, error); }
62-
return array(doc, c);
70+
simdjson_really_inline simdjson_result<array> array::start(document *doc) noexcept {
71+
bool has_value;
72+
SIMDJSON_TRY( doc->iter.start_array().get(has_value) );
73+
return array(doc, has_value);
6374
}
6475
simdjson_really_inline array array::started(document *doc) noexcept {
6576
return array(doc, doc->iter.started_array());
6677
}
67-
simdjson_really_inline array array::error_chain(document *doc, error_code error) noexcept {
68-
return array(doc, error);
69-
}
70-
simdjson_really_inline array array::begin() noexcept {
78+
simdjson_really_inline array::iterator array::begin() noexcept {
7179
return *this;
7280
}
73-
simdjson_really_inline array array::end() noexcept {
74-
return {};
81+
simdjson_really_inline array::iterator array::end() noexcept {
82+
return *this;
7583
}
7684

7785
simdjson_really_inline error_code array::report_error() noexcept {
78-
container = doc->iter.current_container().child(); // Make it so we'll stop
79-
auto result = error;
80-
error = SUCCESS;
81-
return result;
86+
SIMDJSON_ASSUME(error);
87+
has_next = false;
88+
return error;
8289
}
8390

84-
simdjson_really_inline simdjson_result<value> array::operator*() noexcept {
85-
if (error) { return { doc, report_error() }; }
86-
return value::start(doc);
91+
simdjson_really_inline array::iterator::iterator(array &_a) noexcept : a{&_a} {}
92+
93+
simdjson_really_inline array::iterator::iterator() noexcept = default;
94+
simdjson_really_inline array::iterator::iterator(const array::iterator &_a) noexcept = default;
95+
simdjson_really_inline array::iterator &array::iterator::operator=(const array::iterator &_a) noexcept = default;
96+
97+
simdjson_really_inline simdjson_result<value> array::iterator::operator*() noexcept {
98+
if (a->error) { return a->report_error(); }
99+
return value::start(a->doc);
87100
}
88-
simdjson_really_inline bool array::operator==(const array &other) noexcept {
89-
return !(*this != other);
101+
simdjson_really_inline bool array::iterator::operator==(const array::iterator &) noexcept {
102+
return !a->has_next;
90103
}
91-
simdjson_really_inline bool array::operator!=(const array &) noexcept {
92-
// If we're at the start, check for empty array.
93-
if (at_start) {
94-
at_start = false;
95-
return !doc->iter.is_empty_array();
96-
}
97-
return !finished();
104+
simdjson_really_inline bool array::iterator::operator!=(const array::iterator &) noexcept {
105+
return a->has_next;
98106
}
99-
simdjson_really_inline array &array::operator++() noexcept {
100-
SIMDJSON_ASSUME(!at_start);
101-
102-
error = doc->iter.next_element(container).error();
107+
simdjson_really_inline array::iterator &array::iterator::operator++() noexcept {
108+
if (a->error) { return *this; }
109+
a->error = a->doc->iter.has_next_element().get(a->has_next); // If there's an error, has_next stays true.
103110
return *this;
104111
}
105112

@@ -111,25 +118,82 @@ namespace simdjson {
111118

112119
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::simdjson_result(
113120
SIMDJSON_IMPLEMENTATION::ondemand::array &&value
114-
) noexcept :
115-
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>(
116-
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::array>(value)
117-
)
121+
) noexcept
122+
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>(
123+
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::array>(value)
124+
)
118125
{
119126
}
120127
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::simdjson_result(
121-
SIMDJSON_IMPLEMENTATION::ondemand::document *doc,
122128
error_code error
123-
) noexcept :
124-
internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>({ doc, error }, error)
129+
) noexcept
130+
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>(error)
131+
{
132+
}
133+
134+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::begin() noexcept {
135+
if (error()) { return error(); }
136+
return first.begin();
137+
}
138+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::end() noexcept {
139+
if (error()) { return error(); }
140+
return first.end();
141+
}
142+
143+
//
144+
// array::iterator
145+
//
146+
147+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::simdjson_result() noexcept
148+
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>({}, SUCCESS)
149+
{
150+
}
151+
152+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::simdjson_result(
153+
SIMDJSON_IMPLEMENTATION::ondemand::array::iterator &&value
154+
) noexcept
155+
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>(value))
156+
{
157+
}
158+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::simdjson_result(error_code error) noexcept
159+
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>({}, error)
125160
{
126161
}
127162

128-
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::begin() noexcept {
129-
return first;
163+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::simdjson_result(
164+
const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &a
165+
) noexcept
166+
: internal::simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>(a)
167+
{
130168
}
131-
simdjson_really_inline SIMDJSON_IMPLEMENTATION::ondemand::array simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::end() noexcept {
132-
return {};
169+
170+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator=(
171+
const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &a
172+
) noexcept {
173+
first = a.first;
174+
second = a.second;
175+
return *this;
176+
}
177+
178+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator*() noexcept {
179+
if (error()) { return error(); }
180+
return *first;
181+
}
182+
// Assumes it's being compared with the end. true if depth < doc->iter.depth.
183+
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &other) noexcept {
184+
if (error()) { return true; }
185+
return first == other.first;
186+
}
187+
// Assumes it's being compared with the end. true if depth >= doc->iter.depth.
188+
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &other) noexcept {
189+
if (error()) { return false; }
190+
return first != other.first;
191+
}
192+
// Checks for ']' and ','
193+
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator++() noexcept {
194+
if (error()) { return *this; }
195+
++first;
196+
return *this;
133197
}
134198

135199
} // namespace simdjson

0 commit comments

Comments
 (0)