Skip to content

Commit 9f26571

Browse files
authored
Merge pull request simdjson#629 from simdjson/jkeiser/parse-element
Return document::element from parser.parse()
2 parents 32afcd2 + 5ad4050 commit 9f26571

18 files changed

+138
-557
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ The simdjson library is easily consumable with a single .h and .cpp file.
4949
#include "simdjson.h"
5050
int main(void) {
5151
simdjson::document::parser parser;
52-
simdjson::document& tweets = parser.load("twitter.json");
52+
simdjson::document::element tweets = parser.load("twitter.json");
5353
std::cout << tweets["search_metadata"]["count"] << " results." << std::endl;
5454
}
5555
```

benchmark/bench_dom_api.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const padded_string EMPTY_ARRAY("[]", 2);
1717
static void twitter_count(State& state) {
1818
// Prints the number of results in twitter.json
1919
document::parser parser;
20-
document &doc = parser.load(JSON_TEST_PATH);
20+
document::element doc = parser.load(JSON_TEST_PATH);
2121
for (auto _ : state) {
2222
uint64_t result_count = doc["search_metadata"]["count"];
2323
if (result_count != 100) { return; }
@@ -45,7 +45,7 @@ BENCHMARK(iterator_twitter_count);
4545
static void twitter_default_profile(State& state) {
4646
// Count unique users with a default profile.
4747
document::parser parser;
48-
document &doc = parser.load(JSON_TEST_PATH);
48+
document::element doc = parser.load(JSON_TEST_PATH);
4949
for (auto _ : state) {
5050
set<string_view> default_users;
5151
for (document::object tweet : doc["statuses"].as_array()) {
@@ -62,7 +62,7 @@ BENCHMARK(twitter_default_profile);
6262
static void twitter_image_sizes(State& state) {
6363
// Count unique image sizes
6464
document::parser parser;
65-
document &doc = parser.load(JSON_TEST_PATH);
65+
document::element doc = parser.load(JSON_TEST_PATH);
6666
for (auto _ : state) {
6767
set<tuple<uint64_t, uint64_t>> image_sizes;
6868
for (document::object tweet : doc["statuses"].as_array()) {
@@ -85,7 +85,7 @@ BENCHMARK(twitter_image_sizes);
8585
static void error_code_twitter_count(State& state) noexcept {
8686
// Prints the number of results in twitter.json
8787
document::parser parser;
88-
document &doc = parser.load(JSON_TEST_PATH);
88+
document::element doc = parser.load(JSON_TEST_PATH);
8989
for (auto _ : state) {
9090
auto [value, error] = doc["search_metadata"]["count"].as_uint64_t();
9191
if (error) { return; }
@@ -97,7 +97,7 @@ BENCHMARK(error_code_twitter_count);
9797
static void error_code_twitter_default_profile(State& state) noexcept {
9898
// Count unique users with a default profile.
9999
document::parser parser;
100-
document &doc = parser.load(JSON_TEST_PATH);
100+
document::element doc = parser.load(JSON_TEST_PATH);
101101
for (auto _ : state) {
102102
set<string_view> default_users;
103103

@@ -161,7 +161,7 @@ BENCHMARK(iterator_twitter_default_profile);
161161
static void error_code_twitter_image_sizes(State& state) noexcept {
162162
// Count unique image sizes
163163
document::parser parser;
164-
document &doc = parser.load(JSON_TEST_PATH);
164+
document::element doc = parser.load(JSON_TEST_PATH);
165165
for (auto _ : state) {
166166
set<tuple<uint64_t, uint64_t>> image_sizes;
167167
auto [statuses, error] = doc["statuses"].as_array();

benchmark/bench_parse_call.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void parser_parse_exception(State& state) {
2929
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
3030
for (auto _ : state) {
3131
try {
32-
UNUSED document &doc = parser.parse(EMPTY_ARRAY);
32+
UNUSED document::element doc = parser.parse(EMPTY_ARRAY);
3333
} catch(simdjson_error &j) {
3434
return;
3535
}
@@ -56,7 +56,7 @@ static void document_parse_exception(State& state) {
5656
for (auto _ : state) {
5757
try {
5858
document::parser parser;
59-
UNUSED document &doc = parser.parse(EMPTY_ARRAY);
59+
UNUSED document::element doc = parser.parse(EMPTY_ARRAY);
6060
} catch(simdjson_error &j) {
6161
return;
6262
}

benchmark/distinctuseridcompetition.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ void simdjson_recurse(std::vector<int64_t> & v, simdjson::document::element elem
9595
}
9696

9797
__attribute__((noinline)) std::vector<int64_t>
98-
simdjson_just_dom(simdjson::document &doc) {
98+
simdjson_just_dom(simdjson::document::element doc) {
9999
std::vector<int64_t> answer;
100-
simdjson_recurse(answer, doc.root());
100+
simdjson_recurse(answer, doc);
101101
remove_duplicates(answer);
102102
return answer;
103103
}
@@ -106,8 +106,8 @@ __attribute__((noinline)) std::vector<int64_t>
106106
simdjson_compute_stats(const simdjson::padded_string &p) {
107107
std::vector<int64_t> answer;
108108
simdjson::document::parser parser;
109-
simdjson::document &doc = parser.parse(p);
110-
simdjson_recurse(answer, doc.root());
109+
simdjson::document::element doc = parser.parse(p);
110+
simdjson_recurse(answer, doc);
111111
remove_duplicates(answer);
112112
return answer;
113113
}
@@ -368,7 +368,7 @@ int main(int argc, char *argv[]) {
368368
BEST_TIME("sasjon (just parse) ", sasjon_just_parse(p), false, , repeat,
369369
volume, !just_data);
370370
simdjson::document::parser parser;
371-
simdjson::document &doc = parser.parse(p);
371+
simdjson::document::element doc = parser.parse(p);
372372
BEST_TIME("simdjson (just dom) ", simdjson_just_dom(doc).size(), size,
373373
, repeat, volume, !just_data);
374374
char *buffer = (char *)malloc(p.size() + 1);

benchmark/parseandstatcompetition.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ simdjson_compute_stats(const simdjson::padded_string &p) {
9595
return s;
9696
}
9797
s.valid = true;
98-
simdjson_recurse(s, doc.root());
98+
simdjson_recurse(s, doc);
9999
return s;
100100
}
101101

benchmark/statisticalmodel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
104104
reinterpret_cast<const uint8_t *>(p.data()), p.size());
105105
answer.byte_count = p.size();
106106
answer.structural_indexes_count = parser.n_structural_indexes;
107-
simdjson_recurse(answer, doc.root());
107+
simdjson_recurse(answer, doc);
108108
return answer;
109109
}
110110

doc/basics.md

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,22 @@ The simdjson library offers a simple DOM tree API, which you can access by creat
3838

3939
```c++
4040
document::parser parser;
41-
document &doc = parser.load(filename); // load and parse a file
41+
document::element doc = parser.load(filename); // load and parse a file
4242
```
4343

4444
Or by creating a padded string (for efficiency reasons, simdjson requires a string with
4545
SIMDJSON_PADDING bytes at the end) and calling `parse()`:
4646

4747
```c++
4848
document::parser parser;
49-
document &doc = parser.parse("[1,2,3]"_padded); // parse a string
49+
document::element doc = parser.parse("[1,2,3]"_padded); // parse a string
5050
```
5151

5252
Using the Parsed JSON
5353
---------------------
5454

55-
Once you have a document, you can navigate it with idiomatic C++ iterators, operators and casts.
55+
Once you have an element, you can navigate it with idiomatic C++ iterators, operators and casts.
5656

57-
* **Document Root:** To get the top level JSON element, get `doc.root()`. Many of the
58-
methods below will work on the document object itself, as well.
5957
* **Extracting Values:** You can cast a JSON element to a native type: `double(element)` or
6058
`double x = json_element`. This works for double, uint64_t, int64_t, bool,
6159
document::object and document::array. You can also use is_*typename*()` to test if it is a
@@ -112,7 +110,7 @@ auto cars_json = R"( [
112110
{ "make": "Toyota", "model": "Tercel", "year": 1999, "tire_pressure": [ 29.8, 30.0, 30.2, 30.5 ] }
113111
] )"_padded;
114112
document::parser parser;
115-
document &cars = parser.parse(cars_json);
113+
document::element cars = parser.parse(cars_json);
116114
cout << cars["/0/tire_pressure/1"] << endl; // Prints 39.9
117115
```
118116

@@ -123,7 +121,7 @@ All simdjson APIs that can fail return `simdjson_result<T>`, which is a &lt;valu
123121
pair. The error codes and values can be accessed directly, reading the error like so:
124122

125123
```c++
126-
auto [doc, error] = parser.parse(json); // doc is a document&
124+
auto [doc, error] = parser.parse(json); // doc is a document::element
127125
if (error) { cerr << error << endl; exit(1); }
128126
// Use document here now that we've checked for the error
129127
```
@@ -138,7 +136,7 @@ behavior.
138136
> circumvent this, you can use this instead:
139137
>
140138
> ```c++
141-
> document &doc;
139+
> document::element doc;
142140
> error_code error;
143141
> parser.parse(json).tie(doc, error); // <-- Assigns to doc and error just like "auto [doc, error]"
144142
> ```
@@ -199,7 +197,7 @@ for (document::element car_element : cars) {
199197
Users more comfortable with an exception flow may choose to directly cast the `simdjson_result<T>` to the desired type:
200198

201199
```c++
202-
document &doc = parser.parse(json); // Throws an exception if there was an error!
200+
document::element doc = parser.parse(json); // Throws an exception if there was an error!
203201
```
204202

205203
When used this way, a `simdjson_error` exception will be thrown if an error occurs, preventing the
@@ -219,7 +217,7 @@ auto ndjson = R"(
219217
{ "foo": 3 }
220218
)"_padded;
221219
document::parser parser;
222-
for (document &doc : parser.load_many(filename)) {
220+
for (document::element doc : parser.load_many(filename)) {
223221
cout << doc["foo"] << endl;
224222
}
225223
// Prints 1 2 3

doc/performance.md

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,64 +21,31 @@ buffers hot in cache and keeping memory allocation and initialization to a minim
2121
document::parser parser;
2222

2323
// This initializes buffers and a document big enough to handle this JSON.
24-
document &doc = parser.parse("[ true, false ]"_padded);
24+
document::element doc = parser.parse("[ true, false ]"_padded);
2525
cout << doc << endl;
2626

2727
// This reuses the existing buffers, and reuses and *overwrites* the old document
2828
doc = parser.parse("[1, 2, 3]"_padded);
2929
cout << doc << endl;
3030

3131
// This also reuses the existing buffers, and reuses and *overwrites* the old document
32-
document &doc2 = parser.parse("true"_padded);
32+
document::element doc2 = parser.parse("true"_padded);
3333
// Even if you keep the old reference around, doc and doc2 refer to the same document.
3434
cout << doc << endl;
3535
cout << doc2 << endl;
3636
```
3737

38-
It's not just internal buffers though. The simdjson library reuses the document itself. Notice that reference?
39-
`document &doc`? That's key. You are only *borrowing* the document from simdjson, which purposely
40-
reuses and overwrites it each time you call parse. This prevent wasteful and unnecessary memory
41-
allocation in 99% of cases where JSON is just read, used, and converted to native values
42-
or thrown away.
38+
It's not just internal buffers though. The simdjson library reuses the document itself. document::element, document::object and document::array are *references* to the internal document.
39+
You are only *borrowing* the document from simdjson, which purposely reuses and overwrites it each
40+
time you call parse. This prevent wasteful and unnecessary memory allocation in 99% of cases where
41+
JSON is just read, used, and converted to native values or thrown away.
4342

4443
> **You are only borrowing the document from the simdjson parser. Don't keep it long term!**
4544
4645
This is key: don't keep the `document&`, `document::element`, `document::array`, `document::object`
4746
or `string_view` objects you get back from the API. Convert them to C++ native values, structs and
4847
arrays that you own.
4948

50-
### Keeping documents around for longer
51-
52-
If you really need to keep parsed JSON documents around for a long time, you can **take** the
53-
document by declaring an actual `document` value.
54-
55-
```c++
56-
document::parser parser;
57-
58-
// This initializes buffers and a document big enough to handle this JSON.
59-
// By casting to document instead of document&, it "steals" the document from the parser so that it
60-
// cannot be overwritten.
61-
document keep_doc = parser.parse("[ true, false ]"_padded);
62-
63-
// This reuses the existing buffers, but initializes a new document.
64-
document &doc = parser.parse("[1, 2, 3]"_padded);
65-
66-
// Now keep_doc and doc refer to different documents.
67-
cout << keep_doc << endl;
68-
cout << doc << endl;
69-
```
70-
71-
If you're using error codes, it can be done like this:
72-
73-
```c++
74-
auto [doc_ref, error] = parser.parse(json); // doc_ref is a document&
75-
if (error) { cerr << error << endl; exit(1); }
76-
document keep_doc = doc_ref; // "steal" the document from the parser
77-
```
78-
79-
This won't allocate anything or copy document memory: instead, it will *steal* the document memory
80-
from the parser. The parser will simply allocate new document memory the next time you call parse.
81-
8249
Server Loops: Long-Running Processes and Memory Capacity
8350
--------------------------------------------------------
8451

0 commit comments

Comments
 (0)