@@ -256,19 +256,25 @@ bool slow_float_parsing(UNUSED const char * src, W writer) {
256
256
return INVALID_NUMBER ((const uint8_t *)src);
257
257
}
258
258
259
+ template <typename I>
260
+ NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
261
+ really_inline bool parse_digit (const char c, I &i) {
262
+ const unsigned char digit = static_cast <unsigned char >(c - ' 0' );
263
+ if (digit > 9 ) {
264
+ return false ;
265
+ }
266
+ // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
267
+ i = 10 * i + digit; // might overflow, we will handle the overflow later
268
+ return true ;
269
+ }
270
+
259
271
really_inline bool parse_decimal (UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) {
260
272
// we continue with the fiction that we have an integer. If the
261
273
// floating point number is representable as x * 10^z for some integer
262
274
// z that fits in 53 bits, then we will be able to convert back the
263
275
// the integer into a float in a lossless manner.
264
276
const char *const first_after_period = p;
265
277
266
- unsigned char digit = static_cast <unsigned char >(*p - ' 0' );
267
- if (digit > 9 ) { return INVALID_NUMBER (src); } // There must be at least one digit after the .
268
- ++p;
269
- i = i * 10 + digit; // might overflow + multiplication by 10 is likely
270
- // cheaper than arbitrary mult.
271
- // we will handle the overflow later
272
278
#ifdef SWAR_NUMBER_PARSING
273
279
// this helps if we have lots of decimals!
274
280
// this turns out to be frequent enough.
@@ -277,57 +283,38 @@ really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p
277
283
p += 8 ;
278
284
}
279
285
#endif
280
- digit = static_cast <unsigned char >(*p - ' 0' );
281
- while (digit <= 9 ) {
282
- ++p;
283
- i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
284
- // because we have parse_highprecision_float later.
285
- digit = static_cast <unsigned char >(*p - ' 0' );
286
- }
286
+ // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
287
+ if (parse_digit (*p, i)) { ++p; }
288
+ while (parse_digit (*p, i)) { p++; }
287
289
exponent = first_after_period - p;
288
- return true ;
289
- }
290
-
291
- template <typename I>
292
- really_inline bool parse_digit (const char c, I &i) {
293
- const unsigned char digit = static_cast <unsigned char >(c - ' 0' );
294
- if (digit <= 9 ) {
295
- // a multiplication by 10 is cheaper than an arbitrary integer
296
- // multiplication
297
- i = 10 * i + digit; // might overflow, we will handle the overflow later
298
- return true ;
299
- } else {
300
- return false ;
290
+ // Decimal without digits (123.) is illegal
291
+ if (exponent == 0 ) {
292
+ return INVALID_NUMBER (src);
301
293
}
302
- }
303
- template <typename I>
304
- really_inline bool parse_first_digit (const char c, I &i) {
305
- const unsigned char digit = static_cast <unsigned char >(c - ' 0' );
306
- i = digit;
307
- return digit <= 9 ;
294
+ return true ;
308
295
}
309
296
310
297
really_inline bool parse_exponent (UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) {
311
- bool neg_exp = false ;
312
- if (' -' == *p) {
313
- neg_exp = true ;
314
- ++p;
315
- } else if (' +' == *p) {
316
- ++p;
317
- }
298
+ // Exp Sign: -123.456e[-]78
299
+ bool neg_exp = (' -' == *p);
300
+ if (neg_exp || ' +' == *p) { p++; } // Skip + as well
301
+
302
+ // Exponent: -123.456e-[78]
303
+ auto start_exp = p;
304
+ int64_t exp_number = 0 ;
305
+ while (parse_digit (*p, exp_number)) { ++p; }
306
+ exponent += (neg_exp ? -exp_number : exp_number);
318
307
319
- // e[+-] must be followed by a number
320
- int64_t exp_number;
321
- if (!parse_first_digit (*p, exp_number)) { return INVALID_NUMBER (src); }
322
- ++p;
323
- if (parse_digit (*p, exp_number)) { ++p; }
324
- if (parse_digit (*p, exp_number)) { ++p; }
325
- while (parse_digit (*p, exp_number)) {
326
- ++p;
327
- // we need to check for overflows; we refuse to parse this
328
- if (exp_number > 0x100000000 ) { return INVALID_NUMBER (src); }
308
+ // If there were no digits, it's an error.
309
+ // If there were more than 18 digits, we may have overflowed the integer.
310
+ if (unlikely (p == start_exp || p > start_exp+18 )) {
311
+ // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
312
+ while (*start_exp == ' 0' ) { start_exp++; }
313
+ // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
314
+ // support exponents smaller than -9,999,999,999,999,999,999 and bigger
315
+ // than 9,999,999,999,999,999,999.
316
+ if (p == start_exp || p > start_exp+18 ) { return INVALID_NUMBER (src); }
329
317
}
330
- exponent += (neg_exp ? -exp_number : exp_number);
331
318
return true ;
332
319
}
333
320
0 commit comments