Skip to content

Commit 3b50275

Browse files
committed
Handle the "und" locale in ICU versions 54 and older.
The "und" locale is an alternative spelling of the root locale, but it was not recognized until ICU 55. To maintain common behavior across all supported ICU versions, check for "und" and replace with "root" before opening. Previously, the lack of support for "und" was dangerous, because versions 54 and older fall back to the environment when a locale is not found. If the user specified "und" for the language (which is expected and documented), it could not only resolve to the wrong collator, but it could unexpectedly change (which could lead to corrupt indexes). This effectively reverts commit d72900b, which worked around the problem for the built-in "unicode" collation, and is no longer necessary. Discussion: https://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Discussion: https://postgr.es/m/0c6fa66f2753217d2a40480a96bd2ccf023536a1.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent 949e2e7 commit 3b50275

File tree

4 files changed

+44
-1
lines changed

4 files changed

+44
-1
lines changed

src/backend/utils/adt/pg_locale.c

+34
Original file line numberDiff line numberDiff line change
@@ -2503,6 +2503,7 @@ pg_ucol_open(const char *loc_str)
25032503
{
25042504
UCollator *collator;
25052505
UErrorCode status;
2506+
char *fixed_str = NULL;
25062507

25072508
/*
25082509
* Must never open default collator, because it depends on the environment
@@ -2517,6 +2518,36 @@ pg_ucol_open(const char *loc_str)
25172518
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
25182519
errmsg("opening default collator is not supported")));
25192520

2521+
/*
2522+
* In ICU versions 54 and earlier, "und" is not a recognized spelling of
2523+
* the root locale. If the first component of the locale is "und", replace
2524+
* with "root" before opening.
2525+
*/
2526+
if (U_ICU_VERSION_MAJOR_NUM < 55)
2527+
{
2528+
char lang[ULOC_LANG_CAPACITY];
2529+
2530+
status = U_ZERO_ERROR;
2531+
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2532+
if (U_FAILURE(status))
2533+
{
2534+
ereport(ERROR,
2535+
(errmsg("could not get language from locale \"%s\": %s",
2536+
loc_str, u_errorName(status))));
2537+
}
2538+
2539+
if (strcmp(lang, "und") == 0)
2540+
{
2541+
const char *remainder = loc_str + strlen("und");
2542+
2543+
fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
2544+
strcpy(fixed_str, "root");
2545+
strcat(fixed_str, remainder);
2546+
2547+
loc_str = fixed_str;
2548+
}
2549+
}
2550+
25202551
status = U_ZERO_ERROR;
25212552
collator = ucol_open(loc_str, &status);
25222553
if (U_FAILURE(status))
@@ -2527,6 +2558,9 @@ pg_ucol_open(const char *loc_str)
25272558
if (U_ICU_VERSION_MAJOR_NUM < 54)
25282559
icu_set_collation_attributes(collator, loc_str);
25292560

2561+
if (fixed_str != NULL)
2562+
pfree(fixed_str);
2563+
25302564
return collator;
25312565
}
25322566

src/bin/initdb/initdb.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1701,7 +1701,7 @@ setup_collation(FILE *cmdfd)
17011701
* that they win if libc defines a locale with the same name.
17021702
*/
17031703
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
1704-
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, '');\n\n",
1704+
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
17051705
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
17061706

17071707
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"

src/test/regress/expected/collate.icu.utf8.out

+7
Original file line numberDiff line numberDiff line change
@@ -1312,6 +1312,13 @@ SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
13121312
t
13131313
(1 row)
13141314

1315+
CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
1316+
SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
1317+
?column?
1318+
----------
1319+
t
1320+
(1 row)
1321+
13151322
CREATE TABLE test1cs (x text COLLATE case_sensitive);
13161323
CREATE TABLE test2cs (x text COLLATE case_sensitive);
13171324
CREATE TABLE test3cs (x text COLLATE case_sensitive);

src/test/regress/sql/collate.icu.utf8.sql

+2
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,8 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
521521
-- test language tags
522522
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
523523
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
524+
CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
525+
SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
524526

525527
CREATE TABLE test1cs (x text COLLATE case_sensitive);
526528
CREATE TABLE test2cs (x text COLLATE case_sensitive);

0 commit comments

Comments
 (0)