Skip to content

Commit 5b534d6

Browse files
committed
Add bool to stream parsing and fix [] more
1 parent cbb36ad commit 5b534d6

File tree

9 files changed

+143
-48
lines changed

9 files changed

+143
-48
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
namespace simdjson {
2+
namespace internal {
3+
namespace atomparsing {
4+
5+
really_inline uint32_t string_to_uint32(const char* str) { return *reinterpret_cast<const uint32_t *>(str); }
6+
7+
WARN_UNUSED
8+
really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
9+
uint32_t srcval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
10+
static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
11+
std::memcpy(&srcval, src, sizeof(uint32_t));
12+
return srcval ^ string_to_uint32(atom);
13+
}
14+
15+
WARN_UNUSED
16+
really_inline bool is_valid_true_atom(const uint8_t *src) {
17+
return (str4ncmp(src, "true") | internal::is_not_structural_or_whitespace(src[4])) == 0;
18+
}
19+
20+
WARN_UNUSED
21+
really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
22+
if (len > 4) { return is_valid_true_atom(src); }
23+
else if (len == 4) { return !str4ncmp(src, "true"); }
24+
else { return false; }
25+
}
26+
27+
WARN_UNUSED
28+
really_inline bool is_valid_false_atom(const uint8_t *src) {
29+
return (str4ncmp(src+1, "alse") | internal::is_not_structural_or_whitespace(src[5])) == 0;
30+
}
31+
32+
WARN_UNUSED
33+
really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
34+
if (len > 5) { return is_valid_false_atom(src); }
35+
else if (len == 5) { return !str4ncmp(src+1, "alse"); }
36+
else { return false; }
37+
}
38+
39+
WARN_UNUSED
40+
really_inline bool is_valid_null_atom(const uint8_t *src) {
41+
return (str4ncmp(src, "null") | internal::is_not_structural_or_whitespace(src[4])) == 0;
42+
}
43+
44+
WARN_UNUSED
45+
really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
46+
if (len > 4) { return is_valid_null_atom(src); }
47+
else if (len == 4) { return !str4ncmp(src, "null"); }
48+
else { return false; }
49+
}
50+
51+
} // namespace atomparsing
52+
} // namespace internal
53+
} // namespace simdjson

include/simdjson/internal/logger.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace simdjson {
55
namespace internal {
66
namespace logger {
77

8-
static constexpr const bool LOG_ENABLED = true;
8+
static constexpr const bool LOG_ENABLED = false;
99
static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
1010
static constexpr const int LOG_EVENT_LEN = 30;
1111
static constexpr const int LOG_BUFFER_LEN = 20;

include/simdjson/stream/document-inl.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ really_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
4040
really_inline simdjson_result<int64_t> document::get_int64() noexcept {
4141
return root.get_int64();
4242
}
43-
// really_inline simdjson_result<bool> document::get_bool() noexcept {
44-
// return root.get_bool();
45-
// }
43+
really_inline simdjson_result<bool> document::get_bool() noexcept {
44+
return root.get_bool();
45+
}
4646

4747
#if SIMDJSON_EXCEPTIONS
4848
really_inline document::operator array() noexcept(false) {
@@ -66,9 +66,9 @@ really_inline document::operator uint64_t() noexcept(false) {
6666
really_inline document::operator int64_t() noexcept(false) {
6767
return root;
6868
}
69-
// really_inline element::operator bool() noexcept(false) {
70-
// return root;
71-
// }
69+
really_inline document::operator bool() noexcept(false) {
70+
return root;
71+
}
7272

7373
really_inline array_iterator document::begin() noexcept(false) {
7474
return root.begin();
@@ -109,9 +109,9 @@ really_inline simdjson_result<uint64_t> simdjson_result<stream::document>::get_u
109109
really_inline simdjson_result<int64_t> simdjson_result<stream::document>::get_int64() noexcept {
110110
return root().get_int64();
111111
}
112-
// really_inline simdjson_result<bool> simdjson_result<stream::document>::get_bool() noexcept {
113-
// return root().get_bool();
114-
// }
112+
really_inline simdjson_result<bool> simdjson_result<stream::document>::get_bool() noexcept {
113+
return root().get_bool();
114+
}
115115

116116
#if SIMDJSON_EXCEPTIONS
117117
really_inline simdjson_result<stream::document>::operator stream::array() noexcept(false) {
@@ -135,9 +135,9 @@ really_inline simdjson_result<stream::document>::operator uint64_t() noexcept(fa
135135
really_inline simdjson_result<stream::document>::operator int64_t() noexcept(false) {
136136
return root();
137137
}
138-
// really_inline simdjson_result<stream::document>::operator bool() noexcept(false) {
139-
// return root();
140-
// }
138+
really_inline simdjson_result<stream::document>::operator bool() noexcept(false) {
139+
return root();
140+
}
141141

142142
really_inline stream::array_iterator simdjson_result<stream::document>::begin() & noexcept(false) {
143143
return root().begin();

include/simdjson/stream/document.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class document {
2525
really_inline simdjson_result<double> get_double() noexcept;
2626
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
2727
really_inline simdjson_result<int64_t> get_int64() noexcept;
28-
// really_inline simdjson_result<bool> get_bool() noexcept;
28+
really_inline simdjson_result<bool> get_bool() noexcept;
2929

3030
#if SIMDJSON_EXCEPTIONS
3131
really_inline operator array() noexcept(false);
@@ -35,7 +35,7 @@ class document {
3535
really_inline operator double() noexcept(false);
3636
really_inline operator uint64_t() noexcept(false);
3737
really_inline operator int64_t() noexcept(false);
38-
// really_inline operator bool() noexcept(false);
38+
really_inline operator bool() noexcept(false);
3939

4040
really_inline array_iterator begin() noexcept(false);
4141
really_inline array_iterator end() noexcept(false);
@@ -67,7 +67,7 @@ struct simdjson_result<stream::document> : public internal::simdjson_result_base
6767
really_inline simdjson_result<double> get_double() noexcept;
6868
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
6969
really_inline simdjson_result<int64_t> get_int64() noexcept;
70-
// really_inline simdjson_result<bool> get_bool() noexcept;
70+
really_inline simdjson_result<bool> get_bool() noexcept;
7171

7272
#if SIMDJSON_EXCEPTIONS
7373
really_inline operator stream::array() noexcept(false);
@@ -77,7 +77,7 @@ struct simdjson_result<stream::document> : public internal::simdjson_result_base
7777
really_inline operator double() noexcept(false);
7878
really_inline operator uint64_t() noexcept(false);
7979
really_inline operator int64_t() noexcept(false);
80-
// really_inline operator bool() noexcept(false);
80+
really_inline operator bool() noexcept(false);
8181

8282
really_inline stream::array_iterator begin() & noexcept(false);
8383
really_inline stream::array_iterator end() & noexcept(false);

include/simdjson/stream/element-inl.h

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "simdjson/stream/object.h"
77
#include "simdjson/internal/logger.h"
88
#include "simdjson/internal/numberparsing.h"
9+
#include "simdjson/internal/atomparsing.h"
910

1011
namespace simdjson {
1112
namespace stream {
@@ -71,6 +72,18 @@ really_inline simdjson_result<int64_t> element::get_int64() noexcept {
7172
internal::logger::log_event("integer", json);
7273
return internal::numberparsing::parse_integer(json.advance());
7374
}
75+
really_inline simdjson_result<bool> element::get_bool() noexcept {
76+
assert(!consumed);
77+
consumed = true;
78+
internal::logger::log_event("bool", json);
79+
auto src = json.advance();
80+
bool is_true = internal::atomparsing::str4ncmp(src, "true") == 0;
81+
bool is_false = (internal::atomparsing::str4ncmp(src, "fals") | (src[4] ^ 'e')) == 0;
82+
if (unlikely((!is_true && !is_false) || internal::is_not_structural_or_whitespace(src[is_true ? 4 : 5]))) {
83+
return INCORRECT_TYPE;
84+
}
85+
return is_true;
86+
}
7487

7588
WARN_UNUSED really_inline bool element::finish(int parent_depth) noexcept {
7689
// If the user didn't even do anything with the element, we have to skip it wholesale.
@@ -91,11 +104,15 @@ WARN_UNUSED really_inline bool element::finish(int parent_depth) noexcept {
91104
case ',':
92105
case ':':
93106
// This is not OK. Go backwards.
107+
internal::logger::log_event("NOT OK", json, true);
94108
json.prev();
95109
return false;
96110
default:
111+
internal::logger::log_event("skip", json, true);
97112
return true;
98113
}
114+
} else {
115+
internal::logger::log_event("already consumed", json, true);
99116
}
100117
// Now we're past any scalar values.
101118
// If we're inside an array or hash, count brackets until we are back to current depth.
@@ -144,9 +161,9 @@ really_inline element::operator uint64_t() noexcept(false) {
144161
really_inline element::operator int64_t() noexcept(false) {
145162
return get_int64();
146163
}
147-
// really_inline element::operator bool() noexcept(false) {
148-
// return get_bool();
149-
// }
164+
really_inline element::operator bool() noexcept(false) {
165+
return get_bool();
166+
}
150167

151168
inline array_iterator element::begin() noexcept(false) {
152169
return get_array().begin();
@@ -197,10 +214,10 @@ really_inline simdjson_result<int64_t> simdjson_result<stream::element&>::get_in
197214
if (error()) { return error(); }
198215
return first.get_int64();
199216
}
200-
// really_inline simdjson_result<bool> simdjson_result<stream::element&>::get_bool() noexcept {
201-
// if (error()) { return error(); }
202-
// return first.get_bool();
203-
// }
217+
really_inline simdjson_result<bool> simdjson_result<stream::element&>::get_bool() noexcept {
218+
if (error()) { return error(); }
219+
return first.get_bool();
220+
}
204221

205222
#if SIMDJSON_EXCEPTIONS
206223

@@ -232,10 +249,10 @@ really_inline simdjson_result<stream::element&>::operator int64_t() noexcept(fal
232249
if (error()) { throw simdjson_error(error()); }
233250
return first;
234251
}
235-
// really_inline simdjson_result<stream::element&>::operator bool() noexcept(false) {
236-
// if (error()) { throw simdjson_error(error()); } }
237-
// return first;
238-
// }
252+
really_inline simdjson_result<stream::element&>::operator bool() noexcept(false) {
253+
if (error()) { throw simdjson_error(error()); }
254+
return first;
255+
}
239256

240257
really_inline stream::array_iterator simdjson_result<stream::element&>::begin() noexcept(false) {
241258
if (error()) { throw simdjson_error(error()); }

include/simdjson/stream/element.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class element {
2626
really_inline simdjson_result<double> get_double() noexcept;
2727
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
2828
really_inline simdjson_result<int64_t> get_int64() noexcept;
29-
// really_inline simdjson_result<bool> get_bool() noexcept;
29+
really_inline simdjson_result<bool> get_bool() noexcept;
3030

3131
#if SIMDJSON_EXCEPTIONS
3232
really_inline operator array() noexcept(false);
@@ -36,7 +36,7 @@ class element {
3636
really_inline operator double() noexcept(false);
3737
really_inline operator uint64_t() noexcept(false);
3838
really_inline operator int64_t() noexcept(false);
39-
// really_inline operator bool() noexcept(false);
39+
really_inline operator bool() noexcept(false);
4040

4141
really_inline array_iterator begin() noexcept(false);
4242
really_inline array_iterator end() noexcept(false);
@@ -78,7 +78,7 @@ struct simdjson_result<stream::element&> : public internal::simdjson_result_base
7878
really_inline simdjson_result<double> get_double() noexcept;
7979
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
8080
really_inline simdjson_result<int64_t> get_int64() noexcept;
81-
// really_inline simdjson_result<bool> get_bool() noexcept;
81+
really_inline simdjson_result<bool> get_bool() noexcept;
8282

8383
#if SIMDJSON_EXCEPTIONS
8484
really_inline operator stream::array() noexcept(false);
@@ -88,7 +88,7 @@ struct simdjson_result<stream::element&> : public internal::simdjson_result_base
8888
really_inline operator double() noexcept(false);
8989
really_inline operator uint64_t() noexcept(false);
9090
really_inline operator int64_t() noexcept(false);
91-
// really_inline operator bool() noexcept(false);
91+
really_inline operator bool() noexcept(false);
9292

9393
really_inline stream::array_iterator begin() noexcept(false);
9494
really_inline stream::array_iterator end() noexcept(false);

include/simdjson/stream/object-inl.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,28 +36,28 @@ really_inline simdjson_result<element&> object::operator[](std::string_view key)
3636
// object::iterator
3737
//
3838
really_inline object::iterator::iterator(object &_parent) noexcept
39-
: parent(_parent) {
39+
: parent{_parent}, value(parent.value.json) {
4040
}
4141
really_inline simdjson_result<field> object::iterator::operator*() noexcept {
4242
// Check the comma
4343
if (parent.at_start) {
4444
// If we're at the start, there's nothing to check. != would have bailed on empty {}
45-
internal::logger::log_event("first field", parent.value.json, true);
45+
internal::logger::log_event("first field", value.json, true);
4646
parent.at_start = false;
4747
} else {
48-
internal::logger::log_event("next field", parent.value.json);
49-
if (*parent.value.json.advance() != ',') {
50-
internal::logger::log_error("missing ,", parent.value.json);
51-
return { field(parent.value.json.get(), parent.value), TAPE_ERROR };
48+
internal::logger::log_event("next field", value.json);
49+
if (*value.json.advance() != ',') {
50+
internal::logger::log_error("missing ,", value.json);
51+
return { field(value.json.get(), value), TAPE_ERROR };
5252
}
5353
}
5454

5555
// Get the key and skip the :
56-
const uint8_t *key = parent.value.json.advance();
57-
if (*key != '"') { assert(error); internal::logger::log_error("non-string key", parent.value.json); }
58-
auto error = (*key == '"' && *parent.value.json.advance() == ':') ? SUCCESS : TAPE_ERROR;
59-
if (*parent.value.json.peek_prev() != ':') { assert(error); internal::logger::log_error("missing :", parent.value.json); }
60-
return { field(key, parent.value), error };
56+
const uint8_t *key = value.json.advance();
57+
if (*key != '"') { assert(error); internal::logger::log_error("non-string key", value.json); }
58+
auto error = (*key == '"' && *value.json.advance() == ':') ? SUCCESS : TAPE_ERROR;
59+
if (*value.json.peek_prev() != ':') { assert(error); internal::logger::log_error("missing :", value.json); }
60+
return { field(key, value), error };
6161
}
6262
really_inline object::iterator &object::iterator::operator++() noexcept {
6363
return *this;
@@ -66,15 +66,15 @@ really_inline bool object::iterator::operator!=(const object::iterator &) noexce
6666
// Finish the previous value if it wasn't finished already
6767
if (!parent.at_start) {
6868
// If finish() fails, it's because it found a stray } or ]
69-
if (!parent.value.finish(parent.depth)) {
69+
if (!value.finish(parent.depth)) {
7070
return true;
7171
}
7272
}
7373
// Stop if we hit }
74-
if (*parent.value.json.get() == '}') {
75-
parent.value.json.depth--;
76-
internal::logger::log_end_event("object", parent.value.json);
77-
parent.value.json.advance();
74+
if (*value.json.get() == '}') {
75+
value.json.depth--;
76+
internal::logger::log_end_event("object", value.json);
77+
value.json.advance();
7878
return false;
7979
}
8080
return true;

include/simdjson/stream/object.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class object {
2121
private:
2222
really_inline iterator(object &parent) noexcept;
2323
object &parent;
24+
element value;
2425
friend class object;
2526
}; // class iterator
2627

tests/streamtests.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,36 @@ namespace stream_tests {
124124
return true;
125125
}
126126

127+
static bool busted_cars() {
128+
std::cout << "Running " << __func__ << std::endl;
129+
auto cars_json = R"( [
130+
{ "make": "Kia", "model": "Soul", "year": 2012, "tire_pressure": [ 30.1, 31.0, 28.6, 28.7 ], "busted": false },
131+
{ "make": "Toyota", "model": "Camry", "year": 2018, "tire_pressure": [ 40.1, 39.9, 37.7, 40.4 ], "busted": true },
132+
{ "make": "Toyota", "model": "Tercel", "year": 1999, "tire_pressure": [ 29.8, 30.0, 30.2, 30.5 ], "busted": false }
133+
] )"_padded;
134+
dom::parser parser;
135+
136+
// Parse and iterate through each car
137+
for (stream::object car : parser.stream(cars_json)) {
138+
// NOTE this parses the string, but we have to do it before we read the year because we have
139+
// a single (hidden) iterator. Let's see if we can avoid the parse unless we choose the car;
140+
// a raw_json_string tied to the string writer will probably do fine.
141+
std::string_view make = car["make"];
142+
std::string_view model = car["model"];
143+
if (car["busted"]) {
144+
std::cout << make << " " << model << " is busted!" << std::endl;
145+
}
146+
}
147+
return true;
148+
}
149+
127150
static bool run() {
128151
return true
129152
&& cars_count()
130153
&& average_tire_pressure_int()
131154
&& average_tire_pressure()
132155
&& newest_model()
156+
&& busted_cars()
133157
;
134158
}
135159
}

0 commit comments

Comments
 (0)