Skip to content

Commit cbb36ad

Browse files
committed
Add string parsing, multiple object lookups
1 parent 5c6531e commit cbb36ad

File tree

11 files changed

+1859
-83
lines changed

11 files changed

+1859
-83
lines changed

include/simdjson/internal/numberparsing.h

Lines changed: 1229 additions & 0 deletions
Large diffs are not rendered by default.

include/simdjson/internal/stringparsing.h

Lines changed: 367 additions & 0 deletions
Large diffs are not rendered by default.

include/simdjson/stream/document-inl.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ really_inline simdjson_result<object> document::get_object() noexcept {
2828
really_inline simdjson_result<raw_json_string> document::get_raw_json_string() noexcept {
2929
return root.get_raw_json_string();
3030
}
31-
// really_inline simdjson_result<std::string_view> document::get_string() noexcept {
32-
// return root.get_string();
33-
// }
31+
really_inline simdjson_result<std::string_view> document::get_string() noexcept {
32+
return root.get_string();
33+
}
3434
really_inline simdjson_result<double> document::get_double() noexcept {
3535
return root.get_double();
3636
}
@@ -54,9 +54,9 @@ really_inline document::operator object() noexcept(false) {
5454
really_inline document::operator raw_json_string() noexcept(false) {
5555
return root;
5656
}
57-
// really_inline document::operator std::string_view() noexcept(false) {
58-
// return root;
59-
// }
57+
really_inline document::operator std::string_view() noexcept(false) {
58+
return root;
59+
}
6060
really_inline document::operator double() noexcept(false) {
6161
return root;
6262
}
@@ -97,9 +97,9 @@ really_inline simdjson_result<stream::object> simdjson_result<stream::document>:
9797
really_inline simdjson_result<stream::raw_json_string> simdjson_result<stream::document>::get_raw_json_string() noexcept {
9898
return root().get_raw_json_string();
9999
}
100-
// really_inline simdjson_result<std::string_view> simdjson_result<stream::document>::get_string() noexcept {
101-
// return root().get_string();
102-
// }
100+
really_inline simdjson_result<std::string_view> simdjson_result<stream::document>::get_string() noexcept {
101+
return root().get_string();
102+
}
103103
really_inline simdjson_result<double> simdjson_result<stream::document>::get_double() noexcept {
104104
return root().get_double();
105105
}
@@ -123,9 +123,9 @@ really_inline simdjson_result<stream::document>::operator stream::object() noexc
123123
really_inline simdjson_result<stream::document>::operator stream::raw_json_string() noexcept(false) {
124124
return root();
125125
}
126-
// really_inline simdjson_result<stream::document>::operator std::string_view() noexcept(false) {
127-
// return root();
128-
// }
126+
really_inline simdjson_result<stream::document>::operator std::string_view() noexcept(false) {
127+
return root();
128+
}
129129
really_inline simdjson_result<stream::document>::operator double() noexcept(false) {
130130
return root();
131131
}

include/simdjson/stream/document.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class document {
2121
really_inline simdjson_result<array> get_array() noexcept;
2222
really_inline simdjson_result<object> get_object() noexcept;
2323
really_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
24-
// really_inline simdjson_result<std::string_view> get_string() noexcept;
24+
really_inline simdjson_result<std::string_view> get_string() noexcept;
2525
really_inline simdjson_result<double> get_double() noexcept;
2626
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
2727
really_inline simdjson_result<int64_t> get_int64() noexcept;
@@ -31,7 +31,7 @@ class document {
3131
really_inline operator array() noexcept(false);
3232
really_inline operator object() noexcept(false);
3333
really_inline operator raw_json_string() noexcept(false);
34-
// really_inline operator std::string_view() noexcept(false);
34+
really_inline operator std::string_view() noexcept(false);
3535
really_inline operator double() noexcept(false);
3636
really_inline operator uint64_t() noexcept(false);
3737
really_inline operator int64_t() noexcept(false);
@@ -63,7 +63,7 @@ struct simdjson_result<stream::document> : public internal::simdjson_result_base
6363
really_inline simdjson_result<stream::array> get_array() & noexcept;
6464
really_inline simdjson_result<stream::object> get_object() & noexcept;
6565
really_inline simdjson_result<stream::raw_json_string> get_raw_json_string() noexcept;
66-
// really_inline simdjson_result<std::string_view> get_string() noexcept;
66+
really_inline simdjson_result<std::string_view> get_string() noexcept;
6767
really_inline simdjson_result<double> get_double() noexcept;
6868
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
6969
really_inline simdjson_result<int64_t> get_int64() noexcept;
@@ -73,7 +73,7 @@ struct simdjson_result<stream::document> : public internal::simdjson_result_base
7373
really_inline operator stream::array() noexcept(false);
7474
really_inline operator stream::object() noexcept(false);
7575
really_inline operator stream::raw_json_string() noexcept(false);
76-
// really_inline operator std::string_view() noexcept(false);
76+
really_inline operator std::string_view() noexcept(false);
7777
really_inline operator double() noexcept(false);
7878
really_inline operator uint64_t() noexcept(false);
7979
really_inline operator int64_t() noexcept(false);

include/simdjson/stream/element-inl.h

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -45,31 +45,31 @@ really_inline simdjson_result<raw_json_string> element::get_raw_json_string() no
4545
if (error) { internal::logger::log_error("not a string", json); }
4646
return { raw_json_string(str+1), error };
4747
}
48-
// really_inline simdjson_result<std::string_view> element::get_string() noexcept {
49-
// assert(!consumed);
50-
// consumed = true;
51-
// internal::logger::log_event("string", json);
52-
// auto [str, error] = get_raw_json_string();
53-
// if (error) { return error; }
54-
// return str.unescape(json.string_buf);
55-
// }
48+
really_inline simdjson_result<std::string_view> element::get_string() noexcept {
49+
assert(!consumed);
50+
consumed = true;
51+
internal::logger::log_event("string", json);
52+
auto [str, error] = get_raw_json_string();
53+
if (error) { return error; }
54+
return str.unescape(json.string_buf);
55+
}
5656
really_inline simdjson_result<double> element::get_double() noexcept {
5757
assert(!consumed);
5858
consumed = true;
5959
internal::logger::log_event("double", json);
60-
return internal::parse_double(json.advance());
60+
return internal::numberparsing::parse_double(json.advance());
6161
}
6262
really_inline simdjson_result<uint64_t> element::get_uint64() noexcept {
6363
assert(!consumed);
6464
consumed = true;
6565
internal::logger::log_event("unsigned", json);
66-
return internal::parse_unsigned(json.advance());
66+
return internal::numberparsing::parse_unsigned(json.advance());
6767
}
6868
really_inline simdjson_result<int64_t> element::get_int64() noexcept {
6969
assert(!consumed);
7070
consumed = true;
7171
internal::logger::log_event("integer", json);
72-
return internal::parse_integer(json.advance());
72+
return internal::numberparsing::parse_integer(json.advance());
7373
}
7474

7575
WARN_UNUSED really_inline bool element::finish(int parent_depth) noexcept {
@@ -132,9 +132,9 @@ really_inline element::operator object() noexcept(false) {
132132
really_inline element::operator raw_json_string() noexcept(false) {
133133
return get_raw_json_string();
134134
}
135-
// really_inline element::operator std::string_view() noexcept(false) {
136-
// return get_string();
137-
// }
135+
really_inline element::operator std::string_view() noexcept(false) {
136+
return get_string();
137+
}
138138
really_inline element::operator double() noexcept(false) {
139139
return get_double();
140140
}
@@ -181,10 +181,10 @@ really_inline simdjson_result<stream::raw_json_string> simdjson_result<stream::e
181181
if (error()) { return error(); }
182182
return first.get_raw_json_string();
183183
}
184-
// really_inline simdjson_result<std::string_view> simdjson_result<stream::element&>::get_string() noexcept {
185-
// if (error()) { return error(); }
186-
// return first.get_string();
187-
// }
184+
really_inline simdjson_result<std::string_view> simdjson_result<stream::element&>::get_string() noexcept {
185+
if (error()) { return error(); }
186+
return first.get_string();
187+
}
188188
really_inline simdjson_result<double> simdjson_result<stream::element&>::get_double() noexcept {
189189
if (error()) { return error(); }
190190
return first.get_double();
@@ -216,10 +216,10 @@ really_inline simdjson_result<stream::element&>::operator stream::raw_json_strin
216216
if (error()) { throw simdjson_error(error()); }
217217
return first;
218218
}
219-
// really_inline simdjson_result<stream::element&>::operator std::string_view() noexcept(false) {
220-
// if (error()) { throw simdjson_error(error()); }
221-
// return first;
222-
// }
219+
really_inline simdjson_result<stream::element&>::operator std::string_view() noexcept(false) {
220+
if (error()) { throw simdjson_error(error()); }
221+
return first;
222+
}
223223
really_inline simdjson_result<stream::element&>::operator double() noexcept(false) {
224224
if (error()) { throw simdjson_error(error()); }
225225
return first;

include/simdjson/stream/element.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class element {
2222
really_inline simdjson_result<array> get_array() noexcept;
2323
really_inline simdjson_result<object> get_object() noexcept;
2424
really_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
25-
// really_inline simdjson_result<std::string_view> get_string() noexcept;
25+
really_inline simdjson_result<std::string_view> get_string() noexcept;
2626
really_inline simdjson_result<double> get_double() noexcept;
2727
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
2828
really_inline simdjson_result<int64_t> get_int64() noexcept;
@@ -32,7 +32,7 @@ class element {
3232
really_inline operator array() noexcept(false);
3333
really_inline operator object() noexcept(false);
3434
really_inline operator raw_json_string() noexcept(false);
35-
// really_inline operator std::string_view() noexcept(false);
35+
really_inline operator std::string_view() noexcept(false);
3636
really_inline operator double() noexcept(false);
3737
really_inline operator uint64_t() noexcept(false);
3838
really_inline operator int64_t() noexcept(false);
@@ -74,7 +74,7 @@ struct simdjson_result<stream::element&> : public internal::simdjson_result_base
7474
really_inline simdjson_result<stream::array> get_array() noexcept;
7575
really_inline simdjson_result<stream::object> get_object() noexcept;
7676
really_inline simdjson_result<stream::raw_json_string> get_raw_json_string() noexcept;
77-
// really_inline simdjson_result<std::string_view> get_string() noexcept;
77+
really_inline simdjson_result<std::string_view> get_string() noexcept;
7878
really_inline simdjson_result<double> get_double() noexcept;
7979
really_inline simdjson_result<uint64_t> get_uint64() noexcept;
8080
really_inline simdjson_result<int64_t> get_int64() noexcept;
@@ -84,7 +84,7 @@ struct simdjson_result<stream::element&> : public internal::simdjson_result_base
8484
really_inline operator stream::array() noexcept(false);
8585
really_inline operator stream::object() noexcept(false);
8686
really_inline operator stream::raw_json_string() noexcept(false);
87-
// really_inline operator std::string_view() noexcept(false);
87+
really_inline operator std::string_view() noexcept(false);
8888
really_inline operator double() noexcept(false);
8989
really_inline operator uint64_t() noexcept(false);
9090
really_inline operator int64_t() noexcept(false);

include/simdjson/stream/object-inl.h

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ namespace stream {
1111
// object
1212
//
1313
really_inline object::object(internal::json_iterator &json) noexcept
14-
: value(json) {
14+
: value(json), depth{json.depth}, at_start{true} {
1515
}
1616

1717
really_inline object::iterator object::begin() noexcept {
18-
return iterator(value, true);
18+
return iterator(*this);
1919
}
2020
really_inline object::iterator object::end() noexcept {
21-
return iterator(value, false);
21+
return iterator(*this);
2222
}
2323
really_inline simdjson_result<element&> object::operator[](std::string_view key) noexcept {
2424
internal::logger::log_event("lookup key", value.json);
@@ -35,46 +35,46 @@ really_inline simdjson_result<element&> object::operator[](std::string_view key)
3535
//
3636
// object::iterator
3737
//
38-
really_inline object::iterator::iterator(element &_value, bool _at_start) noexcept
39-
: value(_value), depth{_value.json.depth}, at_start{_at_start} {
38+
really_inline object::iterator::iterator(object &_parent) noexcept
39+
: parent(_parent) {
4040
}
4141
really_inline simdjson_result<field> object::iterator::operator*() noexcept {
4242
// Check the comma
43-
if (at_start) {
43+
if (parent.at_start) {
4444
// If we're at the start, there's nothing to check. != would have bailed on empty {}
45-
internal::logger::log_event("first field", value.json, true);
46-
at_start = false;
45+
internal::logger::log_event("first field", parent.value.json, true);
46+
parent.at_start = false;
4747
} else {
48-
internal::logger::log_event("next field", value.json);
49-
if (*value.json.advance() != ',') {
50-
internal::logger::log_error("missing ,", value.json);
51-
return { field(value.json.get(), value), TAPE_ERROR };
48+
internal::logger::log_event("next field", parent.value.json);
49+
if (*parent.value.json.advance() != ',') {
50+
internal::logger::log_error("missing ,", parent.value.json);
51+
return { field(parent.value.json.get(), parent.value), TAPE_ERROR };
5252
}
5353
}
5454

5555
// Get the key and skip the :
56-
const uint8_t *key = value.json.advance();
57-
if (*key != '"') { assert(error); internal::logger::log_error("non-string key", value.json); }
58-
auto error = (*key == '"' && *value.json.advance() == ':') ? SUCCESS : TAPE_ERROR;
59-
if (*value.json.peek_prev() != ':') { assert(error); internal::logger::log_error("missing :", value.json); }
60-
return { field(key, value), error };
56+
const uint8_t *key = parent.value.json.advance();
57+
if (*key != '"') { assert(error); internal::logger::log_error("non-string key", parent.value.json); }
58+
auto error = (*key == '"' && *parent.value.json.advance() == ':') ? SUCCESS : TAPE_ERROR;
59+
if (*parent.value.json.peek_prev() != ':') { assert(error); internal::logger::log_error("missing :", parent.value.json); }
60+
return { field(key, parent.value), error };
6161
}
6262
really_inline object::iterator &object::iterator::operator++() noexcept {
6363
return *this;
6464
}
6565
really_inline bool object::iterator::operator!=(const object::iterator &) noexcept {
6666
// Finish the previous value if it wasn't finished already
67-
if (!at_start) {
67+
if (!parent.at_start) {
6868
// If finish() fails, it's because it found a stray } or ]
69-
if (!value.finish(depth)) {
69+
if (!parent.value.finish(parent.depth)) {
7070
return true;
7171
}
7272
}
7373
// Stop if we hit }
74-
if (*value.json.get() == '}') {
75-
value.json.depth--;
76-
internal::logger::log_end_event("object", value.json);
77-
value.json.advance();
74+
if (*parent.value.json.get() == '}') {
75+
parent.value.json.depth--;
76+
internal::logger::log_end_event("object", parent.value.json);
77+
parent.value.json.advance();
7878
return false;
7979
}
8080
return true;

include/simdjson/stream/object.h

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,8 @@ class object {
1919
really_inline iterator &operator++() noexcept;
2020
really_inline bool operator!=(const iterator &other) noexcept;
2121
private:
22-
really_inline iterator(element &value, bool at_start) noexcept;
23-
24-
element &value;
25-
26-
int depth;
27-
28-
/**
29-
* true if we're at the beginning.
30-
*
31-
* This sorta sucks, but the C++ iterator interface doesn't offer any clever ways to differentiate
32-
* the first iteration of a loop from subsequent iterations. We are left with hoping that the
33-
* compiler will notice at_start gets set to false.
34-
*/
35-
bool at_start;
36-
22+
really_inline iterator(object &parent) noexcept;
23+
object &parent;
3724
friend class object;
3825
}; // class iterator
3926

@@ -52,6 +39,17 @@ class object {
5239

5340
element value;
5441

42+
int depth;
43+
44+
/**
45+
* true if we're at the beginning.
46+
*
47+
* This sorta sucks, but the C++ iterator interface doesn't offer any clever ways to differentiate
48+
* the first iteration of a loop from subsequent iterations. We are left with hoping that the
49+
* compiler will notice at_start gets set to false.
50+
*/
51+
bool at_start;
52+
5553
friend class element;
5654
friend class simdjson_result<element&>;
5755
friend class simdjson_result<document>;

include/simdjson/stream/raw_json_string.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include "simdjson/common_defs.h"
55
#include "simdjson/error.h"
6+
#include "simdjson/internal/stringparsing.h"
67

78
namespace simdjson {
89
namespace stream {
@@ -19,9 +20,13 @@ class field;
1920
class raw_json_string {
2021
public:
2122
really_inline const char * raw() const noexcept { return (const char *)buf; }
22-
// really_inline WARN_UNUSED error_code unescape(uint8_t *&dst) const noexcept {
23-
// return simdjson::active_implementation->parse_string(buf, dst);
24-
// }
23+
really_inline WARN_UNUSED simdjson_result<std::string_view> unescape(uint8_t *&dst) const noexcept {
24+
uint8_t *end = internal::stringparsing::parse_string(buf, dst);
25+
if (!end) { return STRING_ERROR; }
26+
std::string_view result((const char *)dst, end-dst);
27+
dst = end;
28+
return result;
29+
}
2530
private:
2631
const uint8_t * const buf;
2732

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)
5353
link_libraries(simdjson)
5454
add_cpp_test(basictests LABELS acceptance per_implementation)
5555
add_cpp_test(errortests LABELS acceptance per_implementation)
56+
add_cpp_test(streamtests LABELS acceptance per_implementation)
5657
add_cpp_test(integer_tests LABELS acceptance per_implementation)
5758
add_cpp_test(jsoncheck LABELS acceptance per_implementation)
5859
add_cpp_test(parse_many_test LABELS acceptance per_implementation)

0 commit comments

Comments
 (0)