Skip to content

Commit 86b5928

Browse files
committed
Use parse_digit for decimal and exp parsing as well
1 parent 6dbd15a commit 86b5928

File tree

1 file changed

+37
-50
lines changed

1 file changed

+37
-50
lines changed

src/generic/stage2/numberparsing.h

Lines changed: 37 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -256,19 +256,25 @@ bool slow_float_parsing(UNUSED const char * src, W writer) {
256256
return INVALID_NUMBER((const uint8_t *)src);
257257
}
258258

259+
template<typename I>
260+
NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
261+
really_inline bool parse_digit(const char c, I &i) {
262+
const unsigned char digit = static_cast<unsigned char>(c - '0');
263+
if (digit > 9) {
264+
return false;
265+
}
266+
// PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
267+
i = 10 * i + digit; // might overflow, we will handle the overflow later
268+
return true;
269+
}
270+
259271
really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) {
260272
// we continue with the fiction that we have an integer. If the
261273
// floating point number is representable as x * 10^z for some integer
262274
// z that fits in 53 bits, then we will be able to convert back the
263275
// the integer into a float in a lossless manner.
264276
const char *const first_after_period = p;
265277

266-
unsigned char digit = static_cast<unsigned char>(*p - '0');
267-
if (digit > 9) { return INVALID_NUMBER(src); } // There must be at least one digit after the .
268-
++p;
269-
i = i * 10 + digit; // might overflow + multiplication by 10 is likely
270-
// cheaper than arbitrary mult.
271-
// we will handle the overflow later
272278
#ifdef SWAR_NUMBER_PARSING
273279
// this helps if we have lots of decimals!
274280
// this turns out to be frequent enough.
@@ -277,57 +283,38 @@ really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p
277283
p += 8;
278284
}
279285
#endif
280-
digit = static_cast<unsigned char>(*p - '0');
281-
while (digit <= 9) {
282-
++p;
283-
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
284-
// because we have parse_highprecision_float later.
285-
digit = static_cast<unsigned char>(*p - '0');
286-
}
286+
// Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
287+
if (parse_digit(*p, i)) { ++p; }
288+
while (parse_digit(*p, i)) { p++; }
287289
exponent = first_after_period - p;
288-
return true;
289-
}
290-
291-
template<typename I>
292-
really_inline bool parse_digit(const char c, I &i) {
293-
const unsigned char digit = static_cast<unsigned char>(c - '0');
294-
if (digit <= 9) {
295-
// a multiplication by 10 is cheaper than an arbitrary integer
296-
// multiplication
297-
i = 10 * i + digit; // might overflow, we will handle the overflow later
298-
return true;
299-
} else {
300-
return false;
290+
// Decimal without digits (123.) is illegal
291+
if (exponent == 0) {
292+
return INVALID_NUMBER(src);
301293
}
302-
}
303-
template<typename I>
304-
really_inline bool parse_first_digit(const char c, I &i) {
305-
const unsigned char digit = static_cast<unsigned char>(c - '0');
306-
i = digit;
307-
return digit <= 9;
294+
return true;
308295
}
309296

310297
really_inline bool parse_exponent(UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) {
311-
bool neg_exp = false;
312-
if ('-' == *p) {
313-
neg_exp = true;
314-
++p;
315-
} else if ('+' == *p) {
316-
++p;
317-
}
298+
// Exp Sign: -123.456e[-]78
299+
bool neg_exp = ('-' == *p);
300+
if (neg_exp || '+' == *p) { p++; } // Skip + as well
301+
302+
// Exponent: -123.456e-[78]
303+
auto start_exp = p;
304+
int64_t exp_number = 0;
305+
while (parse_digit(*p, exp_number)) { ++p; }
306+
exponent += (neg_exp ? -exp_number : exp_number);
318307

319-
// e[+-] must be followed by a number
320-
int64_t exp_number;
321-
if (!parse_first_digit(*p, exp_number)) { return INVALID_NUMBER(src); }
322-
++p;
323-
if (parse_digit(*p, exp_number)) { ++p; }
324-
if (parse_digit(*p, exp_number)) { ++p; }
325-
while (parse_digit(*p, exp_number)) {
326-
++p;
327-
// we need to check for overflows; we refuse to parse this
328-
if (exp_number > 0x100000000) { return INVALID_NUMBER(src); }
308+
// If there were no digits, it's an error.
309+
// If there were more than 18 digits, we may have overflowed the integer.
310+
if (unlikely(p == start_exp || p > start_exp+18)) {
311+
// Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
312+
while (*start_exp == '0') { start_exp++; }
313+
// 19 digits could overflow int64_t and is kind of absurd anyway. We don't
314+
// support exponents smaller than -9,999,999,999,999,999,999 and bigger
315+
// than 9,999,999,999,999,999,999.
316+
if (p == start_exp || p > start_exp+18) { return INVALID_NUMBER(src); }
329317
}
330-
exponent += (neg_exp ? -exp_number : exp_number);
331318
return true;
332319
}
333320

0 commit comments

Comments
 (0)