@@ -52,11 +52,13 @@ class document {
52
52
class object ;
53
53
class key_value_pair ;
54
54
class parser ;
55
+ class stream ;
55
56
56
57
template <typename T=element>
57
58
class element_result ;
58
59
class doc_result ;
59
60
class doc_ref_result ;
61
+ class stream_result ;
60
62
61
63
// Nested classes. See definitions later in file.
62
64
using iterator = document_iterator<DEFAULT_MAX_DEPTH>;
@@ -315,6 +317,7 @@ class document::doc_ref_result {
315
317
private:
316
318
doc_ref_result (document &_doc, error_code _error) noexcept ;
317
319
friend class document ::parser;
320
+ friend class document ::stream;
318
321
}; // class document::doc_ref_result
319
322
320
323
/* *
@@ -927,6 +930,255 @@ class document::parser {
927
930
// We do not want to allow implicit conversion from C string to std::string.
928
931
really_inline doc_ref_result parse (const char *buf) noexcept = delete;
929
932
933
+ /* *
934
+ * Parse a buffer containing many JSON documents.
935
+ *
936
+ * document::parser parser;
937
+ * for (const document &doc : parser.parse_many(buf, len)) {
938
+ * cout << std::string(doc["title"]) << endl;
939
+ * }
940
+ *
941
+ * ### Format
942
+ *
943
+ * The buffer must contain a series of one or more JSON documents, concatenated into a single
944
+ * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
945
+ * then starts parsing the next document at that point. (It does this with more parallelism and
946
+ * lookahead than you might think, though.)
947
+ *
948
+ * documents that consist of an object or array may omit the whitespace between them, concatenating
949
+ * with no separator. documents that consist of a single primitive (i.e. documents that are not
950
+ * arrays or objects) MUST be separated with whitespace.
951
+ *
952
+ * ### Error Handling
953
+ *
954
+ * All errors are returned during iteration: if there is a global error such as memory allocation,
955
+ * it will be yielded as the first result. Iteration always stops after the first error.
956
+ *
957
+ * As with all other simdjson methods, non-exception error handling is readily available through
958
+ * the same interface, requiring you to check the error before using the document:
959
+ *
960
+ * document::parser parser;
961
+ * for (auto [doc, error] : parser.parse_many(buf, len)) {
962
+ * if (error) { cerr << error_message(error) << endl; exit(1); }
963
+ * cout << std::string(doc["title"]) << endl;
964
+ * }
965
+ *
966
+ * ### REQUIRED: Buffer Padding
967
+ *
968
+ * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
969
+ * those bytes are initialized to, as long as they are allocated.
970
+ *
971
+ * ### Threads
972
+ *
973
+ * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
974
+ * hood to do some lookahead.
975
+ *
976
+ * ### Parser Capacity
977
+ *
978
+ * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
979
+ * allocated, it must have a capacity at least as large as batch_size.
980
+ *
981
+ * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
982
+ * @param len The length of the concatenated JSON.
983
+ * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
984
+ * spot is cache-related: small enough to fit in cache, yet big enough to
985
+ * parse as many documents as possible in one tight loop.
986
+ * Defaults to 10MB, which has been a reasonable sweet spot in our tests.
987
+ * @return The stream. If there is an error, it will be returned during iteration. An empty input
988
+ * will yield 0 documents rather than an EMPTY error. Errors:
989
+ * - MEMALLOC if the parser is unallocated and memory allocation fails
990
+ * - CAPACITY if the parser already has a capacity, and it is less than batch_size
991
+ * - other json errors if parsing fails.
992
+ */
993
+ inline stream parse_many (const uint8_t *buf, size_t len, size_t batch_size = 1000000 ) noexcept ;
994
+
995
+ /* *
996
+ * Parse a buffer containing many JSON documents.
997
+ *
998
+ * document::parser parser;
999
+ * for (const document &doc : parser.parse_many(buf, len)) {
1000
+ * cout << std::string(doc["title"]) << endl;
1001
+ * }
1002
+ *
1003
+ * ### Format
1004
+ *
1005
+ * The buffer must contain a series of one or more JSON documents, concatenated into a single
1006
+ * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
1007
+ * then starts parsing the next document at that point. (It does this with more parallelism and
1008
+ * lookahead than you might think, though.)
1009
+ *
1010
+ * documents that consist of an object or array may omit the whitespace between them, concatenating
1011
+ * with no separator. documents that consist of a single primitive (i.e. documents that are not
1012
+ * arrays or objects) MUST be separated with whitespace.
1013
+ *
1014
+ * ### Error Handling
1015
+ *
1016
+ * All errors are returned during iteration: if there is a global error such as memory allocation,
1017
+ * it will be yielded as the first result. Iteration always stops after the first error.
1018
+ *
1019
+ * As with all other simdjson methods, non-exception error handling is readily available through
1020
+ * the same interface, requiring you to check the error before using the document:
1021
+ *
1022
+ * document::parser parser;
1023
+ * for (auto [doc, error] : parser.parse_many(buf, len)) {
1024
+ * if (error) { cerr << error_message(error) << endl; exit(1); }
1025
+ * cout << std::string(doc["title"]) << endl;
1026
+ * }
1027
+ *
1028
+ * ### REQUIRED: Buffer Padding
1029
+ *
1030
+ * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
1031
+ * those bytes are initialized to, as long as they are allocated.
1032
+ *
1033
+ * ### Threads
1034
+ *
1035
+ * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
1036
+ * hood to do some lookahead.
1037
+ *
1038
+ * ### Parser Capacity
1039
+ *
1040
+ * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
1041
+ * allocated, it must have a capacity at least as large as batch_size.
1042
+ *
1043
+ * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
1044
+ * @param len The length of the concatenated JSON.
1045
+ * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
1046
+ * spot is cache-related: small enough to fit in cache, yet big enough to
1047
+ * parse as many documents as possible in one tight loop.
1048
+ * Defaults to 10MB, which has been a reasonable sweet spot in our tests.
1049
+ * @return The stream. If there is an error, it will be returned during iteration. An empty input
1050
+ * will yield 0 documents rather than an EMPTY error. Errors:
1051
+ * - MEMALLOC if the parser is unallocated and memory allocation fails
1052
+ * - CAPACITY if the parser already has a capacity, and it is less than batch_size
1053
+ * - other json errors if parsing fails
1054
+ */
1055
+ inline stream parse_many (const char *buf, size_t len, size_t batch_size = 1000000 ) noexcept ;
1056
+
1057
+ /* *
1058
+ * Parse a buffer containing many JSON documents.
1059
+ *
1060
+ * document::parser parser;
1061
+ * for (const document &doc : parser.parse_many(buf, len)) {
1062
+ * cout << std::string(doc["title"]) << endl;
1063
+ * }
1064
+ *
1065
+ * ### Format
1066
+ *
1067
+ * The buffer must contain a series of one or more JSON documents, concatenated into a single
1068
+ * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
1069
+ * then starts parsing the next document at that point. (It does this with more parallelism and
1070
+ * lookahead than you might think, though.)
1071
+ *
1072
+ * documents that consist of an object or array may omit the whitespace between them, concatenating
1073
+ * with no separator. documents that consist of a single primitive (i.e. documents that are not
1074
+ * arrays or objects) MUST be separated with whitespace.
1075
+ *
1076
+ * ### Error Handling
1077
+ *
1078
+ * All errors are returned during iteration: if there is a global error such as memory allocation,
1079
+ * it will be yielded as the first result. Iteration always stops after the first error.
1080
+ *
1081
+ * As with all other simdjson methods, non-exception error handling is readily available through
1082
+ * the same interface, requiring you to check the error before using the document:
1083
+ *
1084
+ * document::parser parser;
1085
+ * for (auto [doc, error] : parser.parse_many(buf, len)) {
1086
+ * if (error) { cerr << error_message(error) << endl; exit(1); }
1087
+ * cout << std::string(doc["title"]) << endl;
1088
+ * }
1089
+ *
1090
+ * ### REQUIRED: Buffer Padding
1091
+ *
1092
+ * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
1093
+ * those bytes are initialized to, as long as they are allocated.
1094
+ *
1095
+ * ### Threads
1096
+ *
1097
+ * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
1098
+ * hood to do some lookahead.
1099
+ *
1100
+ * ### Parser Capacity
1101
+ *
1102
+ * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
1103
+ * allocated, it must have a capacity at least as large as batch_size.
1104
+ *
1105
+ * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
1106
+ * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
1107
+ * spot is cache-related: small enough to fit in cache, yet big enough to
1108
+ * parse as many documents as possible in one tight loop.
1109
+ * Defaults to 10MB, which has been a reasonable sweet spot in our tests.
1110
+ * @return he stream. If there is an error, it will be returned during iteration. An empty input
1111
+ * will yield 0 documents rather than an EMPTY error. Errors:
1112
+ * - MEMALLOC if the parser is unallocated and memory allocation fails
1113
+ * - CAPACITY if the parser already has a capacity, and it is less than batch_size
1114
+ * - other json errors if parsing fails
1115
+ */
1116
+ inline stream parse_many (const std::string &s, size_t batch_size = 1000000 ) noexcept ;
1117
+
1118
+ /* *
1119
+ * Parse a buffer containing many JSON documents.
1120
+ *
1121
+ * document::parser parser;
1122
+ * for (const document &doc : parser.parse_many(buf, len)) {
1123
+ * cout << std::string(doc["title"]) << endl;
1124
+ * }
1125
+ *
1126
+ * ### Format
1127
+ *
1128
+ * The buffer must contain a series of one or more JSON documents, concatenated into a single
1129
+ * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
1130
+ * then starts parsing the next document at that point. (It does this with more parallelism and
1131
+ * lookahead than you might think, though.)
1132
+ *
1133
+ * documents that consist of an object or array may omit the whitespace between them, concatenating
1134
+ * with no separator. documents that consist of a single primitive (i.e. documents that are not
1135
+ * arrays or objects) MUST be separated with whitespace.
1136
+ *
1137
+ * ### Error Handling
1138
+ *
1139
+ * All errors are returned during iteration: if there is a global error such as memory allocation,
1140
+ * it will be yielded as the first result. Iteration always stops after the first error.
1141
+ *
1142
+ * As with all other simdjson methods, non-exception error handling is readily available through
1143
+ * the same interface, requiring you to check the error before using the document:
1144
+ *
1145
+ * document::parser parser;
1146
+ * for (auto [doc, error] : parser.parse_many(buf, len)) {
1147
+ * if (error) { cerr << error_message(error) << endl; exit(1); }
1148
+ * cout << std::string(doc["title"]) << endl;
1149
+ * }
1150
+ *
1151
+ * ### REQUIRED: Buffer Padding
1152
+ *
1153
+ * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
1154
+ * those bytes are initialized to, as long as they are allocated.
1155
+ *
1156
+ * ### Threads
1157
+ *
1158
+ * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
1159
+ * hood to do some lookahead.
1160
+ *
1161
+ * ### Parser Capacity
1162
+ *
1163
+ * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
1164
+ * allocated, it must have a capacity at least as large as batch_size.
1165
+ *
1166
+ * @param s The concatenated JSON to parse.
1167
+ * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
1168
+ * spot is cache-related: small enough to fit in cache, yet big enough to
1169
+ * parse as many documents as possible in one tight loop.
1170
+ * Defaults to 10MB, which has been a reasonable sweet spot in our tests.
1171
+ * @return he stream. If there is an error, it will be returned during iteration. An empty input
1172
+ * will yield 0 documents rather than an EMPTY error. Errors:
1173
+ * - MEMALLOC if the parser is unallocated and memory allocation fails
1174
+ * - CAPACITY if the parser already has a capacity, and it is less than batch_size
1175
+ * - other json errors if parsing fails
1176
+ */
1177
+ inline stream parse_many (const padded_string &s, size_t batch_size = 1000000 ) noexcept ;
1178
+
1179
+ // We do not want to allow implicit conversion from C string to std::string.
1180
+ really_inline doc_ref_result parse_many (const char *buf, size_t batch_size = 1000000 ) noexcept = delete;
1181
+
930
1182
/* *
931
1183
* Current capacity: the largest document this parser can support without reallocating.
932
1184
*/
0 commit comments