Skip to content

Commit 92334a8

Browse files
committed
Better tests.
1 parent c1f27fb commit 92334a8

File tree

7 files changed

+78
-19
lines changed

7 files changed

+78
-19
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ endif # ifeq ($(SANITIZE),1)
5959
endif # ifeq ($(MEMSANITIZE),1)
6060

6161
MAINEXECUTABLES=parse minify json2json jsonstats statisticalmodel jsonpointer
62-
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
62+
TESTEXECUTABLES=jsoncheck integer_tests numberparsingcheck stringparsingcheck pointercheck
6363
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
6464
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
6565

@@ -96,19 +96,21 @@ benchmark:
9696
bash ./scripts/parser.sh
9797
bash ./scripts/parseandstat.sh
9898

99-
test: jsoncheck numberparsingcheck stringparsingcheck basictests allparserscheckfile minify json2json pointercheck
99+
test: jsoncheck integer_tests numberparsingcheck stringparsingcheck basictests allparserscheckfile minify json2json pointercheck
100100
./basictests
101101
./numberparsingcheck
102+
./integer_tests
102103
./stringparsingcheck
103104
./jsoncheck
104105
./pointercheck
105106
./scripts/testjson2json.sh
106107
./scripts/issue150.sh
107108
@echo "It looks like the code is good!"
108109

109-
quiettest: jsoncheck numberparsingcheck stringparsingcheck basictests allparserscheckfile minify json2json pointercheck
110+
quiettest: jsoncheck integer_tests numberparsingcheck stringparsingcheck basictests allparserscheckfile minify json2json pointercheck
110111
./basictests
111112
./numberparsingcheck
113+
./integer_tests
112114
./stringparsingcheck
113115
./jsoncheck
114116
./pointercheck
@@ -158,6 +160,10 @@ basictests:tests/basictests.cpp $(HEADERS) $(LIBFILES)
158160
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
159161
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
160162

163+
integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
164+
$(CXX) $(CXXFLAGS) -o integer_tests tests/integer_tests.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp -I. $(LIBFLAGS)
165+
166+
161167

162168
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
163169
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ _We do not aim to provide a general-purpose JSON library._ A library like RapidJ
346346
## Features
347347
348348
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
349-
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers in [-9223372036854775808,9223372036854775808), like a Java `long` or a C/C++ `long long`. Among the parsers that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson rejects JSON files with integers larger than or equal to 2147483648. RapidJSON will parse a file containing an overly long integer like 18446744073709551616 as a floating-point number.) When we cannot represent exactly an integer as a signed 64-bit value, we reject the JSON document.
349+
- We parse integers and floating-point numbers as separate types which allows us to support large signed 64-bit integers in [-9223372036854775808,9223372036854775808), like a Java `long` or a C/C++ `long long` and large unsigned integers up to the value 18446744073709551615. Among the parsers that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson rejects JSON files with integers larger than or equal to 2147483648. RapidJSON will parse a file containing an overly long integer like 18446744073709551616 as a floating-point number.) When we cannot represent exactly an integer as a signed or unsigned 64-bit value, we reject the JSON document.
350350
- We support the full range of 64-bit floating-point numbers (binary64). The values range from ` std::numeric_limits<double>::lowest()` to `std::numeric_limits<double>::max()`, so from -1.7976e308 all the way to 1.7975e308. Extreme values (less or equal to -1e308, greater or equal to 1e308) are rejected: we refuse to parse the input document.
351351
- We test for accurate float parsing with a bound on the [unit of least precision (ULP)](https://en.wikipedia.org/wiki/Unit_in_the_last_place) of one. Practically speaking, this implies 15 digits of accuracy or better.
352352
- We do full UTF-8 validation as part of the parsing. (Parsers like fastjson, gason and dropbox json11 do not do UTF-8 validation. The sajson parser does incomplete UTF-8 validation, accepting code point

include/simdjson/numberparsing.h

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifdef JSON_TEST_NUMBERS // for unit testing
1111
void found_invalid_number(const uint8_t *buf);
1212
void found_integer(int64_t result, const uint8_t *buf);
13+
void found_unsigned_integer(uint64_t result, const uint8_t *buf);
1314
void found_float(double result, const uint8_t *buf);
1415
#endif
1516

@@ -370,33 +371,45 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
370371
}
371372
if (negative) {
372373
if (i > 0x8000000000000000) {
373-
// overflows!
374+
// overflows!
374375
#ifdef JSON_TEST_NUMBERS // for unit testing
375376
found_invalid_number(buf + offset);
376377
#endif
377378
return false; // overflow
378379
} else if (i == 0x8000000000000000) {
380+
// In two's complement, we cannot represent 0x8000000000000000
381+
// as a positive signed integer, but the negative version is
382+
// possible.
379383
constexpr int64_t signed_answer = INT64_MIN;
380384
pj.write_tape_s64(signed_answer);
381385
#ifdef JSON_TEST_NUMBERS // for unit testing
382386
found_integer(signed_answer, buf + offset);
383387
#endif
384388
return is_structural_or_whitespace(*p);
385389
}
386-
} else {
390+
int64_t signed_answer = -static_cast<int64_t>(i);
391+
pj.write_tape_s64(signed_answer);
387392
#ifdef JSON_TEST_NUMBERS // for unit testing
388-
found_integer(i, buf + offset);
393+
found_integer(signed_answer, buf + offset);
389394
#endif
390-
pj.write_tape_u64(i);
391395
return is_structural_or_whitespace(*p);
392-
}
393-
int64_t signed_answer =
394-
negative ? -static_cast<int64_t>(i) : static_cast<int64_t>(i);
395-
pj.write_tape_s64(signed_answer);
396+
} else {
397+
// we have a positive integer, the contract is that
398+
// we try to represent it as a signed integer and only
399+
// fallback on unsigned integers if absolutely necessary.
400+
if(i < 0x8000000000000000) {
396401
#ifdef JSON_TEST_NUMBERS // for unit testing
397-
found_integer(signed_answer, buf + offset);
402+
found_integer(i, buf + offset);
398403
#endif
399-
return is_structural_or_whitespace(*p);
404+
pj.write_tape_s64(i);
405+
} else {
406+
#ifdef JSON_TEST_NUMBERS // for unit testing
407+
found_unsigned_integer(i, buf + offset);
408+
#endif
409+
pj.write_tape_u64(i);
410+
}
411+
return is_structural_or_whitespace(*p);
412+
}
400413
}
401414

402415
// parse the number at buf + offset

jsonchecker/pass22.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
18446744073709551615

tape.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ $ ./json2json -d jsonexamples/small/demo.json
6666
6767
## General formal of the tape elements
6868
69-
Most tape elements are written as `('c' << 56) + x` where `'c'` is some ASCII character determining the type of the element (out of 't', 'f', 'n', 'l', 'd', '"', '{', '}', '[', ']' ,'r') and where `x` is a 56-bit value called the payload. The payload is normally interpreted as an unsigned 56-bit integer. Note that 56-bit integers can be quite large.
69+
Most tape elements are written as `('c' << 56) + x` where `'c'` is some ASCII character determining the type of the element (out of 't', 'f', 'n', 'l', 'u', 'd', '"', '{', '}', '[', ']' ,'r') and where `x` is a 56-bit value called the payload. The payload is normally interpreted as an unsigned 56-bit integer. Note that 56-bit integers can be quite large.
7070
7171
7272
Performance consideration: We believe that accessing the tape in regular units of 64 bits is more important for performance than saving memory.
@@ -84,6 +84,8 @@ Simple JSON nodes are represented with one tape element:
8484
8585
Integer values are represented as two 64-bit tape elements:
8686
- The 64-bit value `('l' << 56)` followed by the 64-bit integer value litterally. Integer values are assumed to be signed 64-bit values, using two's complement notation.
87+
- The 64-bit value `('u' << 56)` followed by the 64-bit integer value litterally. Integer values are assumed to be unsigned 64-bit values.
88+
8789
8890
Float values are represented as two 64-bit tape elements:
8991
- The 64-bit value `('d' << 56)` followed by the 64-bit double value litterally in standard IEEE 754 notation.

tests/integer_tests.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,38 @@ static void parse_and_validate(const std::string src, T expected) {
3636
result = expected == actual;
3737
}
3838
std::cout << std::boolalpha << "test: " << result << std::endl;
39-
assert(result);
39+
if(!result) {
40+
std::cerr << "bug detected" << std::endl;
41+
throw std::runtime_error("bug");
42+
}
43+
}
44+
45+
static bool parse_and_check_signed(const std::string src) {
46+
std::cout << "src: " << src << ", expecting signed" << std::endl;
47+
const padded_string pstr{src};
48+
auto json = build_parsed_json(pstr);
49+
50+
assert(json.is_valid());
51+
ParsedJson::Iterator it{json};
52+
assert(it.down());
53+
assert(it.next());
54+
return it.is_integer() && it.is_number();
55+
}
56+
57+
static bool parse_and_check_unsigned(const std::string src) {
58+
std::cout << "src: " << src << ", expecting unsigned" << std::endl;
59+
const padded_string pstr{src};
60+
auto json = build_parsed_json(pstr);
61+
62+
assert(json.is_valid());
63+
ParsedJson::Iterator it{json};
64+
assert(it.down());
65+
assert(it.next());
66+
return it.is_unsigned_integer() && it.is_number();
4067
}
4168

69+
70+
4271
int main() {
4372
using std::numeric_limits;
4473
constexpr auto int64_max = numeric_limits<int64_t>::max();
@@ -49,8 +78,17 @@ int main() {
4978
parse_and_validate(make_json(int64_min), int64_min);
5079
parse_and_validate(make_json(uint64_max), uint64_max);
5180
parse_and_validate(make_json(uint64_min), uint64_min);
52-
5381
constexpr auto int64_max_plus1 = static_cast<uint64_t>(int64_max) + 1;
5482
parse_and_validate(make_json(int64_max_plus1), int64_max_plus1);
83+
if(!parse_and_check_signed(make_json(int64_max))) {
84+
std::cerr << "bug: large signed integers should be represented as signed integers" << std::endl;
85+
return EXIT_FAILURE;
86+
}
87+
if(!parse_and_check_unsigned(make_json(uint64_max))) {
88+
std::cerr << "bug: a large unsigned integers is not represented as an unsigned integer" << std::endl;
89+
return EXIT_FAILURE;
90+
}
91+
std::cout << "All ok." << std::endl;
92+
return EXIT_SUCCESS;
5593
}
5694

tests/numberparsingcheck.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,7 @@ void found_integer(int64_t result, const uint8_t *buf) {
9090
}
9191
}
9292

93-
// TODO fix duplicated overload
94-
void found_integer(uint64_t result, const uint8_t *buf) {
93+
void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
9594
int_count++;
9695
char *endptr;
9796
unsigned long long expected = strtoull((const char *)buf, &endptr, 10);

0 commit comments

Comments
 (0)