Skip to content

Commit 30a53b7

Browse files
committed
Allow tailoring of ICU locales with custom rules
This exposes the ICU facility to add custom collation rules to a standard collation. New options are added to CREATE COLLATION, CREATE DATABASE, createdb, and initdb to set the rules. Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at> Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Discussion: https://www.postgresql.org/message-id/flat/821c71a4-6ef0-d366-9acf-bb8e367f739f@enterprisedb.com
1 parent b1534ed commit 30a53b7

22 files changed

+380
-59
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2428,6 +2428,15 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
24282428
</para></entry>
24292429
</row>
24302430

2431+
<row>
2432+
<entry role="catalog_table_entry"><para role="column_definition">
2433+
<structfield>collicurules</structfield> <type>text</type>
2434+
</para>
2435+
<para>
2436+
ICU collation rules for this collation object
2437+
</para></entry>
2438+
</row>
2439+
24312440
<row>
24322441
<entry role="catalog_table_entry"><para role="column_definition">
24332442
<structfield>collversion</structfield> <type>text</type>
@@ -3106,6 +3115,15 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
31063115
</para></entry>
31073116
</row>
31083117

3118+
<row>
3119+
<entry role="catalog_table_entry"><para role="column_definition">
3120+
<structfield>daticurules</structfield> <type>text</type>
3121+
</para>
3122+
<para>
3123+
ICU collation rules for this database
3124+
</para></entry>
3125+
</row>
3126+
31093127
<row>
31103128
<entry role="catalog_table_entry"><para role="column_definition">
31113129
<structfield>datcollversion</structfield> <type>text</type>

doc/src/sgml/ref/create_collation.sgml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> (
2727
[ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
2828
[ PROVIDER = <replaceable>provider</replaceable>, ]
2929
[ DETERMINISTIC = <replaceable>boolean</replaceable>, ]
30+
[ RULES = <replaceable>rules</replaceable>, ]
3031
[ VERSION = <replaceable>version</replaceable> ]
3132
)
3233
CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
@@ -149,6 +150,19 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
149150
</listitem>
150151
</varlistentry>
151152

153+
<varlistentry>
154+
<term><replaceable>rules</replaceable></term>
155+
156+
<listitem>
157+
<para>
158+
Specifies additional collation rules to customize the behavior of the
159+
collation. This is supported for ICU only. See <ulink
160+
url="https://unicode-org.github.io/icu/userguide/collation/customization/"/>
161+
for details on the syntax.
162+
</para>
163+
</listitem>
164+
</varlistentry>
165+
152166
<varlistentry>
153167
<term><replaceable>version</replaceable></term>
154168

@@ -228,6 +242,14 @@ CREATE COLLATION german_phonebook (provider = icu, locale = 'de-u-co-phonebk');
228242
</programlisting>
229243
</para>
230244

245+
<para>
246+
To create a collation using the ICU provider, based on the English ICU
247+
locale, with custom rules:
248+
<programlisting>
249+
<![CDATA[CREATE COLLATION en_custom (provider = icu, locale = 'en', rules = '&a < g');]]>
250+
</programlisting>
251+
</para>
252+
231253
<para>
232254
To create a collation from an existing collation:
233255
<programlisting>

doc/src/sgml/ref/create_database.sgml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
3030
[ LC_COLLATE [=] <replaceable class="parameter">lc_collate</replaceable> ]
3131
[ LC_CTYPE [=] <replaceable class="parameter">lc_ctype</replaceable> ]
3232
[ ICU_LOCALE [=] <replaceable class="parameter">icu_locale</replaceable> ]
33+
[ ICU_RULES [=] <replaceable class="parameter">icu_rules</replaceable> ]
3334
[ LOCALE_PROVIDER [=] <replaceable class="parameter">locale_provider</replaceable> ]
3435
[ COLLATION_VERSION = <replaceable>collation_version</replaceable> ]
3536
[ TABLESPACE [=] <replaceable class="parameter">tablespace_name</replaceable> ]
@@ -192,6 +193,19 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
192193
</listitem>
193194
</varlistentry>
194195

196+
<varlistentry id="create-database-icu-rules">
197+
<term><replaceable class="parameter">icu_rules</replaceable></term>
198+
<listitem>
199+
<para>
200+
Specifies additional collation rules to customize the behavior of the
201+
default collation of this database. This is supported for ICU only.
202+
See <ulink
203+
url="https://unicode-org.github.io/icu/userguide/collation/customization/"/>
204+
for details on the syntax.
205+
</para>
206+
</listitem>
207+
</varlistentry>
208+
195209
<varlistentry id="create-database-locale-provider">
196210
<term><replaceable>locale_provider</replaceable></term>
197211

doc/src/sgml/ref/createdb.sgml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,16 @@ PostgreSQL documentation
157157
</listitem>
158158
</varlistentry>
159159

160+
<varlistentry>
161+
<term><option>--icu-rules=<replaceable class="parameter">rules</replaceable></option></term>
162+
<listitem>
163+
<para>
164+
Specifies additional collation rules to customize the behavior of the
165+
default collation of this database. This is supported for ICU only.
166+
</para>
167+
</listitem>
168+
</varlistentry>
169+
160170
<varlistentry>
161171
<term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
162172
<listitem>

doc/src/sgml/ref/initdb.sgml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,16 @@ PostgreSQL documentation
239239
</listitem>
240240
</varlistentry>
241241

242+
<varlistentry id="app-initdb-icu-rules">
243+
<term><option>--icu-rules=<replaceable>rules</replaceable></option></term>
244+
<listitem>
245+
<para>
246+
Specifies additional collation rules to customize the behavior of the
247+
default collation. This is supported for ICU only.
248+
</para>
249+
</listitem>
250+
</varlistentry>
251+
242252
<varlistentry id="app-initdb-data-checksums" xreflabel="data checksums">
243253
<term><option>-k</option></term>
244254
<term><option>--data-checksums</option></term>

src/backend/catalog/pg_collation.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ CollationCreate(const char *collname, Oid collnamespace,
5050
int32 collencoding,
5151
const char *collcollate, const char *collctype,
5252
const char *colliculocale,
53+
const char *collicurules,
5354
const char *collversion,
5455
bool if_not_exists,
5556
bool quiet)
@@ -194,6 +195,10 @@ CollationCreate(const char *collname, Oid collnamespace,
194195
values[Anum_pg_collation_colliculocale - 1] = CStringGetTextDatum(colliculocale);
195196
else
196197
nulls[Anum_pg_collation_colliculocale - 1] = true;
198+
if (collicurules)
199+
values[Anum_pg_collation_collicurules - 1] = CStringGetTextDatum(collicurules);
200+
else
201+
nulls[Anum_pg_collation_collicurules - 1] = true;
197202
if (collversion)
198203
values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion);
199204
else

src/backend/commands/collationcmds.c

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
6464
DefElem *lcctypeEl = NULL;
6565
DefElem *providerEl = NULL;
6666
DefElem *deterministicEl = NULL;
67+
DefElem *rulesEl = NULL;
6768
DefElem *versionEl = NULL;
6869
char *collcollate;
6970
char *collctype;
7071
char *colliculocale;
72+
char *collicurules;
7173
bool collisdeterministic;
7274
int collencoding;
7375
char collprovider;
@@ -99,6 +101,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
99101
defelp = &providerEl;
100102
else if (strcmp(defel->defname, "deterministic") == 0)
101103
defelp = &deterministicEl;
104+
else if (strcmp(defel->defname, "rules") == 0)
105+
defelp = &rulesEl;
102106
else if (strcmp(defel->defname, "version") == 0)
103107
defelp = &versionEl;
104108
else
@@ -161,6 +165,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
161165
else
162166
colliculocale = NULL;
163167

168+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
169+
if (!isnull)
170+
collicurules = TextDatumGetCString(datum);
171+
else
172+
collicurules = NULL;
173+
164174
ReleaseSysCache(tp);
165175

166176
/*
@@ -182,6 +192,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
182192
collcollate = NULL;
183193
collctype = NULL;
184194
colliculocale = NULL;
195+
collicurules = NULL;
185196

186197
if (providerEl)
187198
collproviderstr = defGetString(providerEl);
@@ -191,6 +202,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
191202
else
192203
collisdeterministic = true;
193204

205+
if (rulesEl)
206+
collicurules = defGetString(rulesEl);
207+
194208
if (versionEl)
195209
collversion = defGetString(versionEl);
196210

@@ -297,6 +311,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
297311
collcollate,
298312
collctype,
299313
colliculocale,
314+
collicurules,
300315
collversion,
301316
if_not_exists,
302317
false); /* not quiet */
@@ -680,7 +695,7 @@ create_collation_from_locale(const char *locale, int nspid,
680695
*/
681696
collid = CollationCreate(locale, nspid, GetUserId(),
682697
COLLPROVIDER_LIBC, true, enc,
683-
locale, locale, NULL,
698+
locale, locale, NULL, NULL,
684699
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
685700
true, true);
686701
if (OidIsValid(collid))
@@ -755,7 +770,7 @@ win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
755770

756771
collid = CollationCreate(alias, param->nspid, GetUserId(),
757772
COLLPROVIDER_LIBC, true, enc,
758-
localebuf, localebuf, NULL,
773+
localebuf, localebuf, NULL, NULL,
759774
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
760775
true, true);
761776
if (OidIsValid(collid))
@@ -889,7 +904,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
889904

890905
collid = CollationCreate(alias, nspid, GetUserId(),
891906
COLLPROVIDER_LIBC, true, enc,
892-
locale, locale, NULL,
907+
locale, locale, NULL, NULL,
893908
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
894909
true, true);
895910
if (OidIsValid(collid))
@@ -951,7 +966,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
951966
collid = CollationCreate(psprintf("%s-x-icu", langtag),
952967
nspid, GetUserId(),
953968
COLLPROVIDER_ICU, true, -1,
954-
NULL, NULL, iculocstr,
969+
NULL, NULL, iculocstr, NULL,
955970
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
956971
true, true);
957972
if (OidIsValid(collid))

0 commit comments

Comments
 (0)