Skip to content

Commit 5d4f063

Browse files
committed
Allow reuse of value to try multiple types
1 parent c3c3899 commit 5d4f063

File tree

10 files changed

+259
-211
lines changed

10 files changed

+259
-211
lines changed

benchmark/kostya/iter.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ class Iter {
2222

2323
simdjson_really_inline simdjson_result<double> first_double(ondemand::json_iterator &iter, const char *key) {
2424
if (!iter.start_object() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
25-
return iter.get_double();
25+
return iter.consume_double();
2626
}
2727

2828
simdjson_really_inline simdjson_result<double> next_double(ondemand::json_iterator &iter, const char *key) {
2929
if (!iter.has_next_field() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
30-
return iter.get_double();
30+
return iter.consume_double();
3131
}
3232

3333
};
@@ -76,11 +76,11 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {
7676
if (!iter.start_array()) { return false; }
7777
do {
7878
if (!iter.start_object() || !iter.find_field_raw("x")) { return false; }
79-
sum.x += iter.get_double();
79+
sum.x += iter.consume_double();
8080
if (!iter.has_next_field() || !iter.find_field_raw("y")) { return false; }
81-
sum.y += iter.get_double();
81+
sum.y += iter.consume_double();
8282
if (!iter.has_next_field() || !iter.find_field_raw("z")) { return false; }
83-
sum.z += iter.get_double();
83+
sum.z += iter.consume_double();
8484
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
8585
count++;
8686
} while (iter.has_next_element());

benchmark/largerandom/iter.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ class Iter {
2222

2323
simdjson_really_inline double first_double(ondemand::json_iterator &iter) {
2424
if (iter.start_object().error() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
25-
return iter.get_double();
25+
return iter.consume_double();
2626
}
2727

2828
simdjson_really_inline double next_double(ondemand::json_iterator &iter) {
2929
if (!iter.has_next_field() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
30-
return iter.get_double();
30+
return iter.consume_double();
3131
}
3232

3333
};
@@ -72,11 +72,11 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {
7272
if (!iter.start_array()) { return false; }
7373
do {
7474
if (!iter.start_object() || iter.field_key().value() != "x" || iter.field_value()) { return false; }
75-
sum.x += iter.get_double();
75+
sum.x += iter.consume_double();
7676
if (!iter.has_next_field() || iter.field_key().value() != "y" || iter.field_value()) { return false; }
77-
sum.y += iter.get_double();
77+
sum.y += iter.consume_double();
7878
if (!iter.has_next_field() || iter.field_key().value() != "z" || iter.field_value()) { return false; }
79-
sum.z += iter.get_double();
79+
sum.z += iter.consume_double();
8080
if (*iter.advance() != '}') { return false; }
8181
count++;
8282
} while (iter.has_next_element());

benchmark/partial_tweets/iter.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,35 +47,35 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {
4747
tweet tweet;
4848

4949
if (!iter.start_object() || !iter.find_field_raw("created_at")) { return false; }
50-
tweet.created_at = iter.get_raw_json_string().value().unescape(iter);
50+
tweet.created_at = iter.consume_raw_json_string().value().unescape(iter);
5151

5252
if (!iter.has_next_field() || !iter.find_field_raw("id")) { return false; }
53-
tweet.id = iter.get_uint64();
53+
tweet.id = iter.consume_uint64();
5454

5555
if (!iter.has_next_field() || !iter.find_field_raw("text")) { return false; }
56-
tweet.text = iter.get_raw_json_string().value().unescape(iter);
56+
tweet.text = iter.consume_raw_json_string().value().unescape(iter);
5757

5858
if (!iter.has_next_field() || !iter.find_field_raw("in_reply_to_status_id")) { return false; }
5959
if (!iter.is_null()) {
60-
tweet.in_reply_to_status_id = iter.get_uint64();
60+
tweet.in_reply_to_status_id = iter.consume_uint64();
6161
}
6262

6363
if (!iter.has_next_field() || !iter.find_field_raw("user")) { return false; }
6464
{
6565
if (!iter.start_object() || !iter.find_field_raw("id")) { return false; }
66-
tweet.user.id = iter.get_uint64();
66+
tweet.user.id = iter.consume_uint64();
6767

6868
if (!iter.has_next_field() || !iter.find_field_raw("screen_name")) { return false; }
69-
tweet.user.screen_name = iter.get_raw_json_string().value().unescape(iter);
69+
tweet.user.screen_name = iter.consume_raw_json_string().value().unescape(iter);
7070

7171
if (iter.skip_container()) { return false; } // Skip the rest of the user object
7272
}
7373

7474
if (!iter.has_next_field() || !iter.find_field_raw("retweet_count")) { return false; }
75-
tweet.retweet_count = iter.get_uint64();
75+
tweet.retweet_count = iter.consume_uint64();
7676

7777
if (!iter.has_next_field() || !iter.find_field_raw("favorite_count")) { return false; }
78-
tweet.favorite_count = iter.get_uint64();
78+
tweet.favorite_count = iter.consume_uint64();
7979

8080
tweets.push_back(tweet);
8181

include/simdjson/generic/ondemand/document-inl.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ simdjson_really_inline simdjson_result<object> document::get_object() & noexcept
4141
}
4242
simdjson_really_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
4343
assert_at_start();
44-
return consume_if_success( iter.get_root_uint64() );
44+
return consume_if_success( iter.parse_uint64(json) );
4545
}
4646
simdjson_really_inline simdjson_result<int64_t> document::get_int64() noexcept {
4747
assert_at_start();
48-
return consume_if_success( iter.get_root_int64() );
48+
return consume_if_success( iter.parse_root_int64(json) );
4949
}
5050
simdjson_really_inline simdjson_result<double> document::get_double() noexcept {
5151
assert_at_start();
52-
return consume_if_success( iter.get_root_double() );
52+
return consume_if_success( iter.parse_root_double(json) );
5353
}
5454
simdjson_really_inline simdjson_result<std::string_view> document::get_string() & noexcept {
5555
return consume_if_success( as_value().get_string() );
@@ -59,11 +59,11 @@ simdjson_really_inline simdjson_result<raw_json_string> document::get_raw_json_s
5959
}
6060
simdjson_really_inline simdjson_result<bool> document::get_bool() noexcept {
6161
assert_at_start();
62-
return consume_if_success( iter.get_root_bool() );
62+
return consume_if_success( iter.parse_root_bool(json) );
6363
}
6464
simdjson_really_inline bool document::is_null() noexcept {
6565
assert_at_start();
66-
if (iter.root_is_null()) { json = nullptr; return true; }
66+
if (iter.root_is_null(json)) { json = nullptr; return true; }
6767
return false;
6868
}
6969

include/simdjson/generic/ondemand/json_iterator-inl.h

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,13 @@ simdjson_really_inline json_iterator::~json_iterator() noexcept {
4040
}
4141
#endif
4242

43-
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_object() noexcept {
44-
if (*advance() != '{') { return report_error(INCORRECT_TYPE, "Not an object"); }
43+
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_object(const uint8_t *json) noexcept {
44+
if (*json != '{') { logger::log_error(*this, "Not an object"); return INCORRECT_TYPE; }
4545
return started_object();
4646
}
47+
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_object() noexcept {
48+
return start_object(advance());
49+
}
4750

4851
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::started_object() noexcept {
4952
if (*peek() == '}') {
@@ -71,7 +74,7 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator:
7174
bool has_next;
7275
do {
7376
raw_json_string actual_key;
74-
SIMDJSON_TRY( get_raw_json_string().get(actual_key) );
77+
SIMDJSON_TRY( consume_raw_json_string().get(actual_key) );
7578
if (*advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); }
7679
if (actual_key == key) {
7780
logger::log_event(*this, "match", key);
@@ -97,11 +100,15 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::field_valu
97100
return SUCCESS;
98101
}
99102

100-
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_array() noexcept {
101-
if (*advance() != '[') { return report_error(INCORRECT_TYPE, "Not an array"); }
103+
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_array(const uint8_t *json) noexcept {
104+
if (*json != '[') { logger::log_error(*this, "Not an array"); return INCORRECT_TYPE; }
102105
return started_array();
103106
}
104107

108+
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_array() noexcept {
109+
return start_array(advance());
110+
}
111+
105112
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::started_array() noexcept {
106113
if (*peek() == ']') {
107114
logger::log_value(*this, "empty array");
@@ -124,46 +131,71 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator:
124131
}
125132
}
126133

127-
SIMDJSON_WARN_UNUSED simdjson_result<raw_json_string> json_iterator::get_raw_json_string() noexcept {
134+
SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> json_iterator::parse_string(const uint8_t *json) noexcept {
135+
return parse_raw_json_string(json).unescape(current_string_buf_loc);
136+
}
137+
SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> json_iterator::consume_string() noexcept {
138+
return parse_string(advance());
139+
}
140+
SIMDJSON_WARN_UNUSED simdjson_result<raw_json_string> json_iterator::parse_raw_json_string(const uint8_t *json) noexcept {
128141
logger::log_value(*this, "string", "", 0);
129-
return raw_json_string(advance()+1);
142+
if (*json != '"') { logger::log_error(*this, "Not a string"); return INCORRECT_TYPE; }
143+
return raw_json_string(json+1);
130144
}
131-
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::get_uint64() noexcept {
145+
SIMDJSON_WARN_UNUSED simdjson_result<raw_json_string> json_iterator::consume_raw_json_string() noexcept {
146+
return parse_raw_json_string(advance());
147+
}
148+
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::parse_uint64(const uint8_t *json) noexcept {
132149
logger::log_value(*this, "uint64", "", 0);
133-
return numberparsing::parse_unsigned(advance());
150+
return numberparsing::parse_unsigned(json);
151+
}
152+
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::consume_uint64() noexcept {
153+
return parse_uint64(advance());
134154
}
135-
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::get_int64() noexcept {
155+
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::parse_int64(const uint8_t *json) noexcept {
136156
logger::log_value(*this, "int64", "", 0);
137-
return numberparsing::parse_integer(advance());
157+
return numberparsing::parse_integer(json);
158+
}
159+
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::consume_int64() noexcept {
160+
return parse_int64(advance());
138161
}
139-
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::get_double() noexcept {
162+
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::parse_double(const uint8_t *json) noexcept {
140163
logger::log_value(*this, "double", "", 0);
141-
return numberparsing::parse_double(advance());
164+
return numberparsing::parse_double(json);
142165
}
143-
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::get_bool() noexcept {
166+
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::consume_double() noexcept {
167+
return parse_double(advance());
168+
}
169+
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::parse_bool(const uint8_t *json) noexcept {
144170
logger::log_value(*this, "bool", "", 0);
145-
auto json = advance();
146171
auto not_true = atomparsing::str4ncmp(json, "true");
147172
auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e');
148173
bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]);
149-
if (error) { return report_error(INCORRECT_TYPE, "not a boolean"); }
174+
if (error) { logger::log_error(*this, "Not a boolean"); return INCORRECT_TYPE; }
150175
return simdjson_result<bool>(!not_true);
151176
}
152-
simdjson_really_inline bool json_iterator::is_null() noexcept {
153-
auto json = peek();
177+
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::consume_bool() noexcept {
178+
return parse_bool(advance());
179+
}
180+
simdjson_really_inline bool json_iterator::is_null(const uint8_t *json) noexcept {
154181
if (!atomparsing::str4ncmp(json, "null")) {
155182
logger::log_value(*this, "null", "", 0);
183+
return true;
184+
}
185+
return false;
186+
}
187+
simdjson_really_inline bool json_iterator::is_null() noexcept {
188+
if (is_null(peek())) {
156189
advance();
157190
return true;
158191
}
159192
return false;
160193
}
161194

162195
template<int N>
163-
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept {
196+
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint8_t (&tmpbuf)[N]) noexcept {
164197
// Truncate whitespace to fit the buffer.
165-
auto len = peek_length();
166-
auto json = advance();
198+
auto len = peek_length(-1);
167199
if (len > N-1) {
168200
if (jsoncharutils::is_not_structural_or_whitespace(json[N])) { return false; }
169201
len = N-1;
@@ -177,39 +209,51 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::advance_to_buffe
177209

178210
constexpr const uint32_t MAX_INT_LENGTH = 1024;
179211

180-
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::get_root_uint64() noexcept {
212+
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::parse_root_uint64(const uint8_t *json) noexcept {
181213
uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
182-
if (!advance_to_buffer(tmpbuf)) { return report_error(NUMBER_ERROR, "Root number more than 20 digits"); }
214+
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; }
183215
logger::log_value(*this, "uint64", "", 0);
184216
auto result = numberparsing::parse_unsigned(buf);
185-
if (result.error()) { report_error(result.error(), "Error parsing unsigned integer"); }
217+
if (result.error()) { logger::log_error(*this, "Error parsing unsigned integer"); return result.error(); }
186218
return result;
187219
}
188-
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::get_root_int64() noexcept {
220+
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::consume_root_uint64() noexcept {
221+
return parse_root_uint64(advance());
222+
}
223+
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::parse_root_int64(const uint8_t *json) noexcept {
189224
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
190-
if (!advance_to_buffer(tmpbuf)) { return report_error(NUMBER_ERROR, "Root number more than 20 characters"); }
225+
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; }
191226
logger::log_value(*this, "int64", "", 0);
192227
auto result = numberparsing::parse_integer(buf);
193228
if (result.error()) { report_error(result.error(), "Error parsing integer"); }
194229
return result;
195230
}
196-
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::get_root_double() noexcept {
231+
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::consume_root_int64() noexcept {
232+
return parse_root_int64(advance());
233+
}
234+
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::parse_root_double(const uint8_t *json) noexcept {
197235
// Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.<fraction>e-308.
198236
uint8_t tmpbuf[1074+8+1];
199-
if (!advance_to_buffer(tmpbuf)) { return report_error(NUMBER_ERROR, "Root float more than 1082 digits"); }
237+
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 1082 characters"); return NUMBER_ERROR; }
200238
logger::log_value(*this, "double", "", 0);
201239
auto result = numberparsing::parse_double(buf);
202240
if (result.error()) { report_error(result.error(), "Error parsing double"); }
203241
return result;
204242
}
205-
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::get_root_bool() noexcept {
243+
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::consume_root_double() noexcept {
244+
return parse_root_double(advance());
245+
}
246+
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::parse_root_bool(const uint8_t *json) noexcept {
206247
uint8_t tmpbuf[5+1];
207-
if (!advance_to_buffer(tmpbuf)) { return INCORRECT_TYPE; } // Too big! Can't be true or false
208-
return get_bool();
248+
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Not a boolean"); return INCORRECT_TYPE; }
249+
return consume_bool();
250+
}
251+
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::consume_root_bool() noexcept {
252+
return parse_root_bool(advance());
209253
}
210-
simdjson_really_inline bool json_iterator::root_is_null() noexcept {
254+
simdjson_really_inline bool json_iterator::root_is_null(const uint8_t *json) noexcept {
211255
uint8_t tmpbuf[4+1];
212-
if (!advance_to_buffer(tmpbuf)) { return false; } // Too big! Can't be null
256+
if (!copy_to_buffer(json, tmpbuf)) { return false; }
213257
return is_null();
214258
}
215259

0 commit comments

Comments
 (0)