Skip to content

Commit 1671f99

Browse files
committed
Validate ICU locales.
For ICU collations, ensure that the locale's language exists in ICU, and that the locale can be opened. Basic validation helps avoid minor mistakes and misspellings, which often fall back to the root locale instead of the intended locale. It's even more important to avoid such mistakes in ICU versions 54 and earlier, where the same (misspelled) locale string could fall back to different locales depending on the environment. Discussion: https://postgr.es/m/11b1eeb7e7667fdd4178497aeb796c48d26e69b9.camel@j-davis.com Discussion: https://postgr.es/m/df2efad0cae7c65180df8e5ebb709e5eb4f2a82b.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent b7cea58 commit 1671f99

File tree

11 files changed

+210
-13
lines changed

11 files changed

+210
-13
lines changed

doc/src/sgml/config.sgml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9804,6 +9804,32 @@ SET XML OPTION { DOCUMENT | CONTENT };
98049804
</listitem>
98059805
</varlistentry>
98069806

9807+
<varlistentry id="guc-icu-validation-level" xreflabel="icu_validation_level">
9808+
<term><varname>icu_validation_level</varname> (<type>enum</type>)
9809+
<indexterm>
9810+
<primary><varname>icu_validation_level</varname> configuration parameter</primary>
9811+
</indexterm>
9812+
</term>
9813+
<listitem>
9814+
<para>
9815+
When ICU locale validation problems are encountered, controls which
9816+
<link linkend="runtime-config-severity-levels">message level</link> is
9817+
used to report the problem. Valid values are
9818+
<literal>DISABLED</literal>, <literal>DEBUG5</literal>,
9819+
<literal>DEBUG4</literal>, <literal>DEBUG3</literal>,
9820+
<literal>DEBUG2</literal>, <literal>DEBUG1</literal>,
9821+
<literal>INFO</literal>, <literal>NOTICE</literal>,
9822+
<literal>WARNING</literal>, <literal>ERROR</literal>, and
9823+
<literal>LOG</literal>.
9824+
</para>
9825+
<para>
9826+
If set to <literal>DISABLED</literal>, does not report validation
9827+
problems at all. Otherwise reports problems at the given message
9828+
level. The default is <literal>ERROR</literal>.
9829+
</para>
9830+
</listitem>
9831+
</varlistentry>
9832+
98079833
<varlistentry id="guc-default-text-search-config" xreflabel="default_text_search_config">
98089834
<term><varname>default_text_search_config</varname> (<type>string</type>)
98099835
<indexterm>

src/backend/commands/collationcmds.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
258258
ereport(ERROR,
259259
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
260260
errmsg("parameter \"locale\" must be specified")));
261+
262+
icu_validate_locale(colliculocale);
261263
}
262264

263265
/*

src/backend/commands/dbcommands.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
10581058
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
10591059
errmsg("ICU locale must be specified")));
10601060

1061-
check_icu_locale(dbiculocale);
1061+
icu_validate_locale(dbiculocale);
10621062
}
10631063
else
10641064
{

src/backend/utils/adt/pg_locale.c

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "catalog/pg_collation.h"
5959
#include "catalog/pg_control.h"
6060
#include "mb/pg_wchar.h"
61+
#include "miscadmin.h"
6162
#include "utils/builtins.h"
6263
#include "utils/formatting.h"
6364
#include "utils/guc_hooks.h"
@@ -95,6 +96,8 @@ char *locale_monetary;
9596
char *locale_numeric;
9697
char *locale_time;
9798

99+
int icu_validation_level = ERROR;
100+
98101
/*
99102
* lc_time localization cache.
100103
*
@@ -2821,24 +2824,77 @@ icu_set_collation_attributes(UCollator *collator, const char *loc,
28212824
pfree(lower_str);
28222825
}
28232826

2824-
#endif /* USE_ICU */
2827+
#endif
28252828

28262829
/*
2827-
* Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
2830+
* Perform best-effort check that the locale is a valid one.
28282831
*/
28292832
void
2830-
check_icu_locale(const char *icu_locale)
2833+
icu_validate_locale(const char *loc_str)
28312834
{
28322835
#ifdef USE_ICU
2833-
UCollator *collator;
2836+
UCollator *collator;
2837+
UErrorCode status;
2838+
char lang[ULOC_LANG_CAPACITY];
2839+
bool found = false;
2840+
int elevel = icu_validation_level;
2841+
2842+
/* no validation */
2843+
if (elevel < 0)
2844+
return;
2845+
2846+
/* downgrade to WARNING during pg_upgrade */
2847+
if (IsBinaryUpgrade && elevel > WARNING)
2848+
elevel = WARNING;
2849+
2850+
/* validate that we can extract the language */
2851+
status = U_ZERO_ERROR;
2852+
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2853+
if (U_FAILURE(status))
2854+
{
2855+
ereport(elevel,
2856+
(errmsg("could not get language from ICU locale \"%s\": %s",
2857+
loc_str, u_errorName(status)),
2858+
errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
2859+
return;
2860+
}
2861+
2862+
/* check for special language name */
2863+
if (strcmp(lang, "") == 0 ||
2864+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2865+
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2866+
found = true;
28342867

2835-
collator = pg_ucol_open(icu_locale);
2868+
/* search for matching language within ICU */
2869+
for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
2870+
{
2871+
const char *otherloc = uloc_getAvailable(i);
2872+
char otherlang[ULOC_LANG_CAPACITY];
2873+
2874+
status = U_ZERO_ERROR;
2875+
uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
2876+
if (U_FAILURE(status))
2877+
continue;
2878+
2879+
if (strcmp(lang, otherlang) == 0)
2880+
found = true;
2881+
}
2882+
2883+
if (!found)
2884+
ereport(elevel,
2885+
(errmsg("ICU locale \"%s\" has unknown language \"%s\"",
2886+
loc_str, lang),
2887+
errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
2888+
2889+
/* check that it can be opened */
2890+
collator = pg_ucol_open(loc_str);
28362891
ucol_close(collator);
2837-
#else
2892+
#else /* not USE_ICU */
2893+
/* could get here if a collation was created by a build with ICU */
28382894
ereport(ERROR,
28392895
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
28402896
errmsg("ICU is not supported in this build")));
2841-
#endif
2897+
#endif /* not USE_ICU */
28422898
}
28432899

28442900
/*

src/backend/utils/misc/guc_tables.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,22 @@ static const struct config_enum_entry intervalstyle_options[] = {
166166
{NULL, 0, false}
167167
};
168168

169+
static const struct config_enum_entry icu_validation_level_options[] = {
170+
{"disabled", -1, false},
171+
{"debug5", DEBUG5, false},
172+
{"debug4", DEBUG4, false},
173+
{"debug3", DEBUG3, false},
174+
{"debug2", DEBUG2, false},
175+
{"debug1", DEBUG1, false},
176+
{"debug", DEBUG2, true},
177+
{"log", LOG, false},
178+
{"info", INFO, true},
179+
{"notice", NOTICE, false},
180+
{"warning", WARNING, false},
181+
{"error", ERROR, false},
182+
{NULL, 0, false}
183+
};
184+
169185
StaticAssertDecl(lengthof(intervalstyle_options) == (INTSTYLE_ISO_8601 + 2),
170186
"array length mismatch");
171187

@@ -4643,6 +4659,16 @@ struct config_enum ConfigureNamesEnum[] =
46434659
NULL, NULL, NULL
46444660
},
46454661

4662+
{
4663+
{"icu_validation_level", PGC_USERSET, CLIENT_CONN_LOCALE,
4664+
gettext_noop("Log level for reporting invalid ICU locale strings."),
4665+
NULL
4666+
},
4667+
&icu_validation_level,
4668+
ERROR, icu_validation_level_options,
4669+
NULL, NULL, NULL
4670+
},
4671+
46464672
{
46474673
{"log_error_verbosity", PGC_SUSET, LOGGING_WHAT,
46484674
gettext_noop("Sets the verbosity of logged messages."),

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,9 @@
731731
#lc_numeric = 'C' # locale for number formatting
732732
#lc_time = 'C' # locale for time formatting
733733

734+
#icu_validation_level = ERROR # report ICU locale validation
735+
# errors at the given level
736+
734737
# default configuration for text search
735738
#default_text_search_config = 'pg_catalog.simple'
736739

src/bin/initdb/initdb.c

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2242,6 +2242,58 @@ check_icu_locale_encoding(int user_enc)
22422242
return true;
22432243
}
22442244

2245+
/*
2246+
* Perform best-effort check that the locale is a valid one. Should be
2247+
* consistent with pg_locale.c, except that it doesn't need to open the
2248+
* collator (that will happen during post-bootstrap initialization).
2249+
*/
2250+
static void
2251+
icu_validate_locale(const char *loc_str)
2252+
{
2253+
#ifdef USE_ICU
2254+
UErrorCode status;
2255+
char lang[ULOC_LANG_CAPACITY];
2256+
bool found = false;
2257+
2258+
/* validate that we can extract the language */
2259+
status = U_ZERO_ERROR;
2260+
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2261+
if (U_FAILURE(status))
2262+
{
2263+
pg_fatal("could not get language from locale \"%s\": %s",
2264+
loc_str, u_errorName(status));
2265+
return;
2266+
}
2267+
2268+
/* check for special language name */
2269+
if (strcmp(lang, "") == 0 ||
2270+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2271+
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2272+
found = true;
2273+
2274+
/* search for matching language within ICU */
2275+
for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
2276+
{
2277+
const char *otherloc = uloc_getAvailable(i);
2278+
char otherlang[ULOC_LANG_CAPACITY];
2279+
2280+
status = U_ZERO_ERROR;
2281+
uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
2282+
if (U_FAILURE(status))
2283+
continue;
2284+
2285+
if (strcmp(lang, otherlang) == 0)
2286+
found = true;
2287+
}
2288+
2289+
if (!found)
2290+
pg_fatal("locale \"%s\" has unknown language \"%s\"",
2291+
loc_str, lang);
2292+
#else
2293+
pg_fatal("ICU is not supported in this build");
2294+
#endif
2295+
}
2296+
22452297
/*
22462298
* Determine default ICU locale by opening the default collator and reading
22472299
* its locale.
@@ -2344,9 +2396,11 @@ setlocales(void)
23442396
printf(_("Using default ICU locale \"%s\".\n"), icu_locale);
23452397
}
23462398

2399+
icu_validate_locale(icu_locale);
2400+
23472401
/*
2348-
* In supported builds, the ICU locale ID will be checked by the
2349-
* backend during post-bootstrap initialization.
2402+
* In supported builds, the ICU locale ID will be opened during
2403+
* post-bootstrap initialization, which will perform extra checks.
23502404
*/
23512405
#ifndef USE_ICU
23522406
pg_fatal("ICU is not supported in this build");

src/bin/initdb/t/001_initdb.pl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,24 @@
128128
],
129129
qr/error: encoding mismatch/,
130130
'fails for encoding not supported by ICU');
131+
132+
command_fails_like(
133+
[
134+
'initdb', '--no-sync',
135+
'--locale-provider=icu',
136+
'--icu-locale=nonsense-nowhere', "$tempdir/dataX"
137+
],
138+
qr/error: locale "nonsense-nowhere" has unknown language "nonsense"/,
139+
'fails for nonsense language');
140+
141+
command_fails_like(
142+
[
143+
'initdb', '--no-sync',
144+
'--locale-provider=icu',
145+
'--icu-locale=@colNumeric=lower', "$tempdir/dataX"
146+
],
147+
qr/could not open collator for locale "\@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR/,
148+
'fails for invalid collation argument');
131149
}
132150
else
133151
{

src/include/utils/pg_locale.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ extern PGDLLIMPORT char *locale_messages;
4040
extern PGDLLIMPORT char *locale_monetary;
4141
extern PGDLLIMPORT char *locale_numeric;
4242
extern PGDLLIMPORT char *locale_time;
43+
extern PGDLLIMPORT int icu_validation_level;
4344

4445
/* lc_time localization cache */
4546
extern PGDLLIMPORT char *localized_abbrev_days[];
@@ -118,11 +119,12 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
118119
extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
119120
size_t srclen, pg_locale_t locale);
120121

122+
extern void icu_validate_locale(const char *loc_str);
123+
121124
#ifdef USE_ICU
122125
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
123126
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
124127
#endif
125-
extern void check_icu_locale(const char *icu_locale);
126128

127129
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
128130
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,14 @@ END
10351035
$$;
10361036
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
10371037
ERROR: parameter "locale" must be specified
1038-
CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx;
1038+
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
1039+
ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense"
1040+
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
1041+
SET icu_validation_level = WARNING;
1042+
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
1043+
WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense"
1044+
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
1045+
RESET icu_validation_level;
10391046
CREATE COLLATION test4 FROM nonsense;
10401047
ERROR: collation "nonsense" for encoding "UTF8" does not exist
10411048
CREATE COLLATION test5 FROM test0;

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,10 @@ BEGIN
371371
END
372372
$$;
373373
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
374-
CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx;
374+
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
375+
SET icu_validation_level = WARNING;
376+
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
377+
RESET icu_validation_level;
375378

376379
CREATE COLLATION test4 FROM nonsense;
377380
CREATE COLLATION test5 FROM test0;

0 commit comments

Comments
 (0)