Skip to content

Commit 72c83d9

Browse files
authored
This avoids locale-dependent number parsing at the standard library level (simdjson#1157)
* This avoids locale-dependent number parsing at the standard library level. * Adding missing cast. * Inserting the missing "endif" * Trial and error. * Another attempt. * Another tweak. * Another fix. * Restricting it even more. * Tweaking our symbol checks. * Somewhat smarter tests. * Nice comments. * Minor simplification. * Adding cerr.
1 parent bfbac12 commit 72c83d9

File tree

4 files changed

+92
-5
lines changed

4 files changed

+92
-5
lines changed

include/simdjson/common_defs.h

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
7373
#define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
7474
// Get rid of Intellisense-only warnings (Code Analysis)
7575
// Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
76-
#if defined(_MSC_VER) && (_MSC_VER>=1910)
76+
#ifdef __has_include
7777
#if __has_include(<CppCoreCheck\Warnings.h>)
7878
#include <CppCoreCheck\Warnings.h>
7979
#define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
@@ -196,4 +196,53 @@ namespace std {
196196
#endif // SIMDJSON_HAS_STRING_VIEW
197197
#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
198198

199+
200+
201+
202+
203+
/**
204+
* We may fall back on the system's number parsing, and we want
205+
* to be able to call a locale-insensitive number parser. It unfortunately
206+
* means that we need to load up locale headers.
207+
* The locale.h header is generally available:
208+
*/
209+
#include <locale.h>
210+
/**
211+
* Determining whether we should import xlocale.h or not is
212+
* a bit of a nightmare. Visual Studio and recent recent GLIBC (GCC) do not need it.
213+
* However, FreeBSD and Apple platforms will need it.
214+
* And we would want to cover as many platforms as possible.
215+
*/
216+
#ifdef __has_include
217+
// This is the easy case: we have __has_include and can check whether
218+
// xlocale is available. If so, we load it up.
219+
#if __has_include(<xlocale.h>)
220+
#include <xlocale.h>
221+
#endif // __has_include
222+
#else // We do not have __has_include
223+
// Here we do not have __has_include
224+
// We first check for __GLIBC__
225+
#ifdef __GLIBC__ // If we have __GLIBC__ then we should have features.h which should help.
226+
// Note that having __GLIBC__ does not imply that we are compiling against glibc. But
227+
// we hope that any platform that defines __GLIBC__ will mimick glibc.
228+
#include <features.h>
229+
// Check whether we have an old GLIBC.
230+
#if !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
231+
#include <xlocale.h> // Old glibc needs xlocale, otherwise xlocale is unavailable.
232+
#endif // !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
233+
#else // __GLIBC__
234+
// Ok. So we do not have __GLIBC__
235+
// We assume that everything that is not GLIBC and not on old freebsd or windows
236+
// needs xlocale.
237+
// It is likely that recent FreeBSD and Apple platforms load xlocale.h next:
238+
#if !(defined(_WIN32) || (__FreeBSD_version < 1000010))
239+
#include <xlocale.h> // Will always happen under apple.
240+
#endif //
241+
#endif // __GLIBC__
242+
#endif // __has_include
243+
/**
244+
* End of the crazy locale headers.
245+
*/
246+
247+
199248
#endif // SIMDJSON_COMMON_DEFS_H

src/CMakeLists.txt

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,24 @@ if(NOT SIMDJSON_SANITIZE)
5858
else()
5959
add_test(
6060
NAME "avoid_abort"
61-
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} abort || exit 0 && exit 1"
61+
# Under FreeBSD, the __cxa_guard_abort symbol may appear but it is fine.
62+
# So we want to look for <space><possibly _>abort as a test.
63+
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*abort' || exit 0 && exit 1"
64+
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
65+
)
66+
add_test(
67+
NAME "avoid_cout"
68+
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*cout' || exit 0 && exit 1"
69+
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
70+
)
71+
add_test(
72+
NAME "avoid_cerr"
73+
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*cerr' || exit 0 && exit 1"
74+
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
75+
)
76+
add_test(
77+
NAME "avoid_printf"
78+
COMMAND sh -c "${NM} $<TARGET_FILE_NAME:simdjson> | ${GREP} ' _*printf' || exit 0 && exit 1"
6279
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
6380
)
6481
add_test(

src/generic/stage2/numberparsing.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,16 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
226226

227227
static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) {
228228
char *endptr;
229-
*outDouble = strtod((const char *)ptr, &endptr);
229+
// We want to call strtod with the C (default) locale to avoid
230+
// potential issues in case someone has a different locale.
231+
// Unfortunately, Visual Studio has a different syntax.
232+
#ifdef _WIN32
233+
static _locale_t c_locale = _create_locale(LC_ALL, "C");
234+
*outDouble = _strtod_l((const char *)ptr, &endptr, c_locale);
235+
#else
236+
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
237+
*outDouble = strtod_l((const char *)ptr, &endptr, c_locale);
238+
#endif
230239
// Some libraries will set errno = ERANGE when the value is subnormal,
231240
// yet we may want to be able to parse subnormal values.
232241
// However, we do not want to tolerate NAN or infinite values.

tests/numberparsingcheck.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,13 @@ bool is_in_bad_list(const char *buf) {
7070
void found_invalid_number(const uint8_t *buf) {
7171
invalid_count++;
7272
char *endptr;
73-
double expected = strtod((const char *)buf, &endptr);
73+
#ifdef _WIN32
74+
static _locale_t c_locale = _create_locale(LC_ALL, "C");
75+
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
76+
#else
77+
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
78+
double expected = strtod_l((const char *)buf, &endptr, c_locale);
79+
#endif
7480
if (endptr != (const char *)buf) {
7581
if (!is_in_bad_list((const char *)buf)) {
7682
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
@@ -115,7 +121,13 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
115121
void found_float(double result, const uint8_t *buf) {
116122
char *endptr;
117123
float_count++;
118-
double expected = strtod((const char *)buf, &endptr);
124+
#ifdef _WIN32
125+
static _locale_t c_locale = _create_locale(LC_ALL, "C");
126+
double expected = _strtod_l((const char *)buf, &endptr, c_locale);
127+
#else
128+
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
129+
double expected = strtod_l((const char *)buf, &endptr, c_locale);
130+
#endif
119131
if (endptr == (const char *)buf) {
120132
fprintf(stderr,
121133
"parsed %f from %.32s whereas strtod refuses to parse a float, ",

0 commit comments

Comments
 (0)