Skip to content

Commit 725ca01

Browse files
authored
add ndjson fuzzer (simdjson#1304)
* add ndjson fuzzer * reproduce simdjson#1310 in the newly added unit test Had to replace the input, because: 1) the fuzzer uses the first part of the input to determine the batch_size to use, so that has to be cut off 2) the master now protects against low values of batch_size I also made the test not return early, so the error is triggered.
1 parent 59c857e commit 725ca01

File tree

5 files changed

+41
-2
lines changed

5 files changed

+41
-2
lines changed

.github/workflows/fuzzers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
# fuzzers that change behaviour with SIMDJSON_FORCE_IMPLEMENTATION
1818
defaultimplfuzzers: atpointer dump dump_raw_tape element minify parser print_json
1919
# fuzzers that loop over the implementations themselves, or don't need to switch.
20-
implfuzzers: implementations minifyimpl ondemand padded utf8
20+
implfuzzers: implementations minifyimpl ndjson ondemand padded utf8
2121
implementations: haswell westmere fallback
2222
UBSAN_OPTIONS: halt_on_error=1
2323
MAXLEN: -max_len=4000

fuzz/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ if(ENABLE_FUZZING)
5757
implement_fuzzer(fuzz_implementations) # parses and serializes again, compares across implementations
5858
implement_fuzzer(fuzz_minify) # minify *with* parsing
5959
implement_fuzzer(fuzz_minifyimpl) # minify *without* parsing, plus compare implementations
60+
implement_fuzzer(fuzz_ndjson) # the ndjson api
6061
implement_fuzzer(fuzz_ondemand)
6162
implement_fuzzer(fuzz_padded)
6263
implement_fuzzer(fuzz_parser)

fuzz/FuzzUtils.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,14 @@ struct FuzzData {
105105
return {};
106106
}
107107

108+
// consumes the rest of the data as a string view
109+
std::string_view remainder_as_stringview() {
110+
std::string_view ret{chardata(),Size};
111+
Data+=Size;
112+
Size=0;
113+
return ret;
114+
}
115+
108116
// split the remainder of the data into string views,
109117
std::vector<std::string_view> splitIntoStrings() {
110118
std::vector<std::string_view> ret;

fuzz/fuzz_ndjson.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "simdjson.h"
2+
#include <cstddef>
3+
#include <cstdint>
4+
#include <string>
5+
6+
#include "FuzzUtils.h"
7+
#include "NullBuffer.h"
8+
9+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
10+
FuzzData fd(Data, Size);
11+
const auto batch_size=static_cast<size_t>(fd.getInt<0,1000>());
12+
const auto json=simdjson::padded_string{fd.remainder_as_stringview()};
13+
simdjson::dom::parser parser;
14+
#if SIMDJSON_EXCEPTIONS
15+
try {
16+
#endif
17+
simdjson::dom::document_stream docs;
18+
if(parser.parse_many(json,batch_size).get(docs)) {
19+
return 0;
20+
}
21+
22+
size_t bool_count=0;
23+
for (auto doc : docs) {
24+
bool_count+=doc.is_bool();
25+
}
26+
#if SIMDJSON_EXCEPTIONS
27+
} catch(...) {
28+
}
29+
#endif
30+
return 0;
31+
}

fuzz/ossfuzz.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ cmake .. \
3030
-DENABLE_FUZZING=On \
3131
-DSIMDJSON_COMPETITION=Off \
3232
-DSIMDJSON_FUZZ_LINKMAIN=Off \
33-
-DSIMDJSON_GIT=Off \
3433
-DSIMDJSON_GOOGLE_BENCHMARKS=Off \
3534
-DSIMDJSON_DISABLE_DEPRECATED_API=On \
3635
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE

0 commit comments

Comments
 (0)