Skip to content

Commit adbb27a

Browse files
committed
Reject non-ASCII locale names.
Commit bf03cfd started scanning all available BCP 47 locale names on Windows. This caused an abort/crash in the Windows runtime library if the default locale name contained non-ASCII characters, because of our use of the setlocale() save/restore pattern with "char" strings. After switching to another locale with a different encoding, the saved name could no longer be understood, and setlocale() would abort. "Turkish_Türkiye.1254" is the example from recent reports, but there are other examples of countries and languages with non-ASCII characters in their names, and they appear in Windows' (old style) locale names. To defend against this: 1. In initdb, reject non-ASCII locale names given explicity on the command line, or returned by the operating system environment with setlocale(..., ""), or "canonicalized" by the operating system when we set it. 2. In initdb only, perform the save-and-restore with Windows' non-standard wchar_t variant of setlocale(), so that it is not subject to round trip failures stemming from char string encoding confusion. 3. In the backend, we don't have to worry about the save-and-restore problem because we have already vetted the defaults, so we just have to make sure that CREATE DATABASE also rejects non-ASCII names in any new databases. SET lc_XXX doesn't suffer from the problem, but the ban applies to it too because it uses check_locale(). CREATE COLLATION doesn't suffer from the problem either, but it doesn't use check_locale() so it is not included in the new ban for now, to minimize the change. Anyone who encounters the new error message should either create a new duplicated locale with an ASCII-only name using Windows Locale Builder, or consider using BCP 47 names like "tr-TR". Users already couldn't initialize a cluster with "Turkish_Türkiye.1254" on PostgreSQL 16+, but the new failure mode is an error message that explains why, instead of a crash. Back-patch to 16, where bf03cfd landed. Older versions are affected in theory too, but only 16 and later are causing crash reports. Reviewed-by: Andrew Dunstan <andrew@dunslane.net> (the idea, not the patch) Reported-by: Haifang Wang (Centific Technologies Inc) <v-haiwang@microsoft.com> Discussion: https://postgr.es/m/PH8PR21MB3902F334A3174C54058F792CE5182%40PH8PR21MB3902.namprd21.prod.outlook.com
1 parent f22e84d commit adbb27a

File tree

2 files changed

+93
-17
lines changed

2 files changed

+93
-17
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "catalog/pg_collation.h"
5959
#include "catalog/pg_database.h"
6060
#include "common/hashfn.h"
61+
#include "common/string.h"
6162
#include "mb/pg_wchar.h"
6263
#include "miscadmin.h"
6364
#include "utils/builtins.h"
@@ -341,6 +342,16 @@ check_locale(int category, const char *locale, char **canonname)
341342
char *save;
342343
char *res;
343344

345+
/* Don't let Windows' non-ASCII locale names in. */
346+
if (!pg_is_ascii(locale))
347+
{
348+
ereport(WARNING,
349+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
350+
errmsg("locale name \"%s\" contains non-ASCII characters",
351+
locale)));
352+
return false;
353+
}
354+
344355
if (canonname)
345356
*canonname = NULL; /* in case of failure */
346357

@@ -363,6 +374,18 @@ check_locale(int category, const char *locale, char **canonname)
363374
elog(WARNING, "failed to restore old locale \"%s\"", save);
364375
pfree(save);
365376

377+
/* Don't let Windows' non-ASCII locale names out. */
378+
if (canonname && *canonname && !pg_is_ascii(*canonname))
379+
{
380+
ereport(WARNING,
381+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
382+
errmsg("locale name \"%s\" contains non-ASCII characters",
383+
*canonname)));
384+
pfree(*canonname);
385+
*canonname = NULL;
386+
return false;
387+
}
388+
366389
return (res != NULL);
367390
}
368391

src/bin/initdb/initdb.c

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,61 @@ do { \
340340
output_failed = true, output_errno = errno; \
341341
} while (0)
342342

343+
#ifdef WIN32
344+
typedef wchar_t *save_locale_t;
345+
#else
346+
typedef char *save_locale_t;
347+
#endif
348+
349+
/*
350+
* Save a copy of the current global locale's name, for the given category.
351+
* The returned value must be passed to restore_global_locale().
352+
*
353+
* Since names from the environment haven't been vetted for non-ASCII
354+
* characters, we use the wchar_t variant of setlocale() on Windows. Otherwise
355+
* they might not survive a save-restore round trip: when restoring, the name
356+
* itself might be interpreted with a different encoding by plain setlocale(),
357+
* after we switch to another locale in between. (This is a problem only in
358+
* initdb, not in similar backend code where the global locale's name should
359+
* already have been verified as ASCII-only.)
360+
*/
361+
static save_locale_t
362+
save_global_locale(int category)
363+
{
364+
save_locale_t save;
365+
366+
#ifdef WIN32
367+
save = _wsetlocale(category, NULL);
368+
if (!save)
369+
pg_fatal("_wsetlocale() failed");
370+
save = wcsdup(save);
371+
if (!save)
372+
pg_fatal("out of memory");
373+
#else
374+
save = setlocale(category, NULL);
375+
if (!save)
376+
pg_fatal("setlocale() failed");
377+
save = pg_strdup(save);
378+
#endif
379+
return save;
380+
}
381+
382+
/*
383+
* Restore the global locale returned by save_global_locale().
384+
*/
385+
static void
386+
restore_global_locale(int category, save_locale_t save)
387+
{
388+
#ifdef WIN32
389+
if (!_wsetlocale(category, save))
390+
pg_fatal("failed to restore old locale");
391+
#else
392+
if (!setlocale(category, save))
393+
pg_fatal("failed to restore old locale \"%s\"", save);
394+
#endif
395+
free(save);
396+
}
397+
343398
/*
344399
* Escape single quotes and backslashes, suitably for insertions into
345400
* configuration files or SQL E'' strings.
@@ -2074,16 +2129,13 @@ locale_date_order(const char *locale)
20742129
char *posD;
20752130
char *posM;
20762131
char *posY;
2077-
char *save;
2132+
save_locale_t save;
20782133
size_t res;
20792134
int result;
20802135

20812136
result = DATEORDER_MDY; /* default */
20822137

2083-
save = setlocale(LC_TIME, NULL);
2084-
if (!save)
2085-
return result;
2086-
save = pg_strdup(save);
2138+
save = save_global_locale(LC_TIME);
20872139

20882140
setlocale(LC_TIME, locale);
20892141

@@ -2094,8 +2146,7 @@ locale_date_order(const char *locale)
20942146

20952147
res = my_strftime(buf, sizeof(buf), "%x", &testtime);
20962148

2097-
setlocale(LC_TIME, save);
2098-
free(save);
2149+
restore_global_locale(LC_TIME, save);
20992150

21002151
if (res == 0)
21012152
return result;
@@ -2132,18 +2183,17 @@ locale_date_order(const char *locale)
21322183
static void
21332184
check_locale_name(int category, const char *locale, char **canonname)
21342185
{
2135-
char *save;
2186+
save_locale_t save;
21362187
char *res;
21372188

2189+
/* Don't let Windows' non-ASCII locale names in. */
2190+
if (locale && !pg_is_ascii(locale))
2191+
pg_fatal("locale name \"%s\" contains non-ASCII characters", locale);
2192+
21382193
if (canonname)
21392194
*canonname = NULL; /* in case of failure */
21402195

2141-
save = setlocale(category, NULL);
2142-
if (!save)
2143-
pg_fatal("setlocale() failed");
2144-
2145-
/* save may be pointing at a modifiable scratch variable, so copy it. */
2146-
save = pg_strdup(save);
2196+
save = save_global_locale(category);
21472197

21482198
/* for setlocale() call */
21492199
if (!locale)
@@ -2157,9 +2207,7 @@ check_locale_name(int category, const char *locale, char **canonname)
21572207
*canonname = pg_strdup(res);
21582208

21592209
/* restore old value. */
2160-
if (!setlocale(category, save))
2161-
pg_fatal("failed to restore old locale \"%s\"", save);
2162-
free(save);
2210+
restore_global_locale(category, save);
21632211

21642212
/* complain if locale wasn't valid */
21652213
if (res == NULL)
@@ -2183,6 +2231,11 @@ check_locale_name(int category, const char *locale, char **canonname)
21832231
pg_fatal("invalid locale settings; check LANG and LC_* environment variables");
21842232
}
21852233
}
2234+
2235+
/* Don't let Windows' non-ASCII locale names out. */
2236+
if (canonname && !pg_is_ascii(*canonname))
2237+
pg_fatal("locale name \"%s\" contains non-ASCII characters",
2238+
*canonname);
21862239
}
21872240

21882241
/*

0 commit comments

Comments
 (0)