@@ -386,11 +386,12 @@ initdb --locale-provider=icu --icu-locale=en
386
386
linkend="icu-language-tag">Language Tag</link>.
387
387
388
388
<programlisting>
389
- CREATE COLLATION mycollation1 (PROVIDER = icu, LOCALE = 'ja-JP');
390
- CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr');
389
+ CREATE COLLATION mycollation1 (provider = icu, locale = 'ja-JP');
390
+ CREATE COLLATION mycollation2 (provider = icu, locale = 'fr');
391
391
</programlisting>
392
392
</para>
393
393
</sect3>
394
+
394
395
<sect3 id="icu-canonicalization">
395
396
<title>Locale Canonicalization and Validation</title>
396
397
<para>
@@ -399,14 +400,14 @@ CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr');
399
400
language tag if not already in that form. For instance,
400
401
401
402
<screen>
402
- CREATE COLLATION mycollation3 (PROVIDER = icu, LOCALE = 'en-US-u-kn-true');
403
+ CREATE COLLATION mycollation3 (provider = icu, locale = 'en-US-u-kn-true');
403
404
NOTICE: using standard form "en-US-u-kn" for locale "en-US-u-kn-true"
404
- CREATE COLLATION mycollation4 (PROVIDER = icu, LOCALE = 'de_DE.utf8');
405
+ CREATE COLLATION mycollation4 (provider = icu, locale = 'de_DE.utf8');
405
406
NOTICE: using standard form "de-DE" for locale "de_DE.utf8"
406
407
</screen>
407
408
408
- If you see this notice, ensure that the <symbol>PROVIDER </symbol> and
409
- <symbol>LOCALE </symbol> are the expected result. For consistent results
409
+ If you see this notice, ensure that the <symbol>provider </symbol> and
410
+ <symbol>locale </symbol> are the expected result. For consistent results
410
411
when using the ICU provider, specify the canonical <link
411
412
linkend="icu-language-tag">language tag</link> instead of relying on the
412
413
transformation.
@@ -427,7 +428,7 @@ NOTICE: using standard form "de-DE" for locale "de_DE.utf8"
427
428
the following warning:
428
429
429
430
<screen>
430
- CREATE COLLATION nonsense (PROVIDER = icu, LOCALE = 'nonsense');
431
+ CREATE COLLATION nonsense (provider = icu, locale = 'nonsense');
431
432
WARNING: ICU locale "nonsense" has unknown language "nonsense"
432
433
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
433
434
CREATE COLLATION
@@ -438,6 +439,7 @@ CREATE COLLATION
438
439
still be created, but the behavior may not be what the user intended.
439
440
</para>
440
441
</sect3>
442
+
441
443
<sect3 id="icu-language-tag">
442
444
<title>Language Tag</title>
443
445
<para>
@@ -484,7 +486,7 @@ CREATE COLLATION
484
486
of digits as a single number:
485
487
486
488
<screen>
487
- CREATE COLLATION mycollation5 (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'en-US-u-kn-ks-level2');
489
+ CREATE COLLATION mycollation5 (provider = icu, deterministic = false, locale = 'en-US-u-kn-ks-level2');
488
490
SELECT 'aB' = 'Ab' COLLATE mycollation5 as result;
489
491
result
490
492
--------
@@ -1109,16 +1111,16 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
1109
1111
1110
1112
<programlisting>
1111
1113
-- ignore differences in accents and case
1112
- CREATE COLLATION ignore_accent_case (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ks-level1');
1114
+ CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1');
1113
1115
SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true
1114
1116
SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true
1115
1117
1116
1118
-- upper case letters sort before lower case.
1117
- CREATE COLLATION upper_first (PROVIDER= icu, LOCALE = 'und-u-kf-upper');
1119
+ CREATE COLLATION upper_first (provider = icu, locale = 'und-u-kf-upper');
1118
1120
SELECT 'B' < 'b' COLLATE upper_first; -- true
1119
1121
1120
1122
-- treat digits numerically and ignore punctuation
1121
- CREATE COLLATION num_ignore_punct (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ka-shifted-kn');
1123
+ CREATE COLLATION num_ignore_punct (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-kn');
1122
1124
SELECT 'id-45' < 'id-123' COLLATE num_ignore_punct; -- true
1123
1125
SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
1124
1126
</programlisting>
@@ -1136,6 +1138,13 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
1136
1138
linkend="icu-collation-settings-table">collation settings</link>. Higher
1137
1139
levels correspond to finer textual features.
1138
1140
</para>
1141
+ <para>
1142
+ <xref linkend="icu-collation-levels"/> shows which textual feature
1143
+ differences are considered significant when determining equality at the
1144
+ given level. The unicode character <literal>U+2063</literal> is an
1145
+ invisible separator, and as seen in the table, is ignored for at all
1146
+ levels of comparison less than <literal>identic</literal>.
1147
+ </para>
1139
1148
<para>
1140
1149
<table id="icu-collation-levels">
1141
1150
<title>ICU Collation Levels</title>
@@ -1215,30 +1224,23 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
1215
1224
</tgroup>
1216
1225
</table>
1217
1226
1218
- The above table shows which textual feature differences are
1219
- considered significant when determining equality at the given level. The
1220
- unicode character <literal>U+2063</literal> is an invisible separator,
1221
- and as seen in the table, is ignored for at all levels of comparison less
1222
- than <literal>identic</literal>.
1223
- </para>
1224
- <para>
1225
1227
At every level, even with full normalization off, basic normalization is
1226
1228
performed. For example, <literal>'á'</literal> may be composed of the
1227
1229
code points <literal>U&'\0061\0301'</literal> or the single code
1228
1230
point <literal>U&'\00E1'</literal>, and those sequences will be
1229
1231
considered equal even at the <literal>identic</literal> level. To treat
1230
1232
any difference in code point representation as distinct, use a collation
1231
- created with <symbol>DETERMINISTIC </symbol> set to
1233
+ created with <symbol>deterministic </symbol> set to
1232
1234
<literal>true</literal>.
1233
1235
</para>
1234
1236
<sect4 id="icu-collation-level-examples">
1235
1237
<title>Collation Level Examples</title>
1236
1238
<para>
1237
1239
1238
1240
<programlisting>
1239
- CREATE COLLATION level3 (PROVIDER= icu, DETERMINISTIC= false, LOCALE= 'und-u-ka-shifted-ks-level3');
1240
- CREATE COLLATION level4 (PROVIDER= icu, DETERMINISTIC= false, LOCALE= 'und-u-ka-shifted-ks-level4');
1241
- CREATE COLLATION identic (PROVIDER= icu, DETERMINISTIC= false, LOCALE= 'und-u-ka-shifted-ks-identic');
1241
+ CREATE COLLATION level3 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level3');
1242
+ CREATE COLLATION level4 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level4');
1243
+ CREATE COLLATION identic (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-identic');
1242
1244
1243
1245
-- invisible separator ignored at all levels except identic
1244
1246
SELECT 'ab' = U&'a\2063b' COLLATE level4; -- true
@@ -1252,8 +1254,14 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1252
1254
</para>
1253
1255
</sect4>
1254
1256
</sect3>
1257
+
1255
1258
<sect3 id="icu-collation-settings">
1256
1259
<title>Collation Settings for an ICU Locale</title>
1260
+ <para>
1261
+ <xref linkend="icu-collation-settings-table"/> shows the available
1262
+ collation settings, which can be used as part of a language tag to
1263
+ customize a collation.
1264
+ </para>
1257
1265
<para>
1258
1266
<table id="icu-collation-settings-table">
1259
1267
<title>ICU Collation Settings</title>
@@ -1272,14 +1280,11 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1272
1280
</thead>
1273
1281
<tbody>
1274
1282
<row>
1275
- <entry><literal>ks </literal></entry>
1276
- <entry><literal>level1 </literal>, <literal>level2 </literal>, <literal>level3 </literal>, <literal>level4</literal>, <literal>identic</literal ></entry>
1277
- <entry><literal>level3 </literal></entry>
1283
+ <entry><literal>co </literal></entry>
1284
+ <entry><literal>emoji </literal>, <literal>phonebk </literal>, <literal>standard </literal>, <replaceable>...</replaceable ></entry>
1285
+ <entry><literal>standard </literal></entry>
1278
1286
<entry>
1279
- Sensitivity (or "strength") when determining equality, with
1280
- <literal>level1</literal> the least sensitive to differences and
1281
- <literal>identic</literal> the most sensitive to differences. See
1282
- <xref linkend="icu-collation-levels"/> for details.
1287
+ Collation type. See <xref linkend="icu-external-references"/> for additional options and details.
1283
1288
</entry>
1284
1289
</row>
1285
1290
<row>
@@ -1304,29 +1309,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1304
1309
before <literal>'aé'</literal>.
1305
1310
</entry>
1306
1311
</row>
1307
- <row>
1308
- <entry><literal>kk</literal></entry>
1309
- <entry><literal>true</literal>, <literal>false</literal></entry>
1310
- <entry><literal>false</literal></entry>
1311
- <entry>
1312
- <para>
1313
- Enable full normalization; may affect performance. Basic
1314
- normalization is performed even when set to
1315
- <literal>false</literal>. Locales for languages that require full
1316
- normalization typically enable it by default.
1317
- </para>
1318
- <para>
1319
- Full normalization is important in some cases, such as when
1320
- multiple accents are applied to a single character. For example,
1321
- the code point sequences <literal>U&'\0065\0323\0302'</literal>
1322
- and <literal>U&'\0065\0302\0323'</literal> represent
1323
- an <literal>e</literal> with circumflex and dot-below accents
1324
- applied in different orders. With full normalization
1325
- on, these code point sequences are treated as equal; otherwise they
1326
- are unequal.
1327
- </para>
1328
- </entry>
1329
- </row>
1330
1312
<row>
1331
1313
<entry><literal>kc</literal></entry>
1332
1314
<entry><literal>true</literal>, <literal>false</literal></entry>
@@ -1368,6 +1350,29 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1368
1350
<literal>'id-123'</literal>.
1369
1351
</entry>
1370
1352
</row>
1353
+ <row>
1354
+ <entry><literal>kk</literal></entry>
1355
+ <entry><literal>true</literal>, <literal>false</literal></entry>
1356
+ <entry><literal>false</literal></entry>
1357
+ <entry>
1358
+ <para>
1359
+ Enable full normalization; may affect performance. Basic
1360
+ normalization is performed even when set to
1361
+ <literal>false</literal>. Locales for languages that require full
1362
+ normalization typically enable it by default.
1363
+ </para>
1364
+ <para>
1365
+ Full normalization is important in some cases, such as when
1366
+ multiple accents are applied to a single character. For example,
1367
+ the code point sequences <literal>U&'\0065\0323\0302'</literal>
1368
+ and <literal>U&'\0065\0302\0323'</literal> represent
1369
+ an <literal>e</literal> with circumflex and dot-below accents
1370
+ applied in different orders. With full normalization
1371
+ on, these code point sequences are treated as equal; otherwise they
1372
+ are unequal.
1373
+ </para>
1374
+ </entry>
1375
+ </row>
1371
1376
<row>
1372
1377
<entry><literal>kr</literal></entry>
1373
1378
<entry>
@@ -1393,6 +1398,17 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1393
1398
</para>
1394
1399
</entry>
1395
1400
</row>
1401
+ <row>
1402
+ <entry><literal>ks</literal></entry>
1403
+ <entry><literal>level1</literal>, <literal>level2</literal>, <literal>level3</literal>, <literal>level4</literal>, <literal>identic</literal></entry>
1404
+ <entry><literal>level3</literal></entry>
1405
+ <entry>
1406
+ Sensitivity (or "strength") when determining equality, with
1407
+ <literal>level1</literal> the least sensitive to differences and
1408
+ <literal>identic</literal> the most sensitive to differences. See
1409
+ <xref linkend="icu-collation-levels"/> for details.
1410
+ </entry>
1411
+ </row>
1396
1412
<row>
1397
1413
<entry><literal>kv</literal></entry>
1398
1414
<entry>
@@ -1410,14 +1426,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1410
1426
to <literal>level3</literal> or lower to take effect.
1411
1427
</entry>
1412
1428
</row>
1413
- <row>
1414
- <entry><literal>co</literal></entry>
1415
- <entry><literal>emoji</literal>, <literal>phonebk</literal>, <literal>standard</literal>, <replaceable>...</replaceable></entry>
1416
- <entry><literal>standard</literal></entry>
1417
- <entry>
1418
- Collation type. See <xref linkend="icu-external-references"/> for additional options and details.
1419
- </entry>
1420
- </row>
1421
1429
</tbody>
1422
1430
</tgroup>
1423
1431
</table>
@@ -1428,7 +1436,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1428
1436
<note>
1429
1437
<para>
1430
1438
For many collation settings, you must create the collation with
1431
- <option>DETERMINISTIC </option> set to <literal>false</literal> for the
1439
+ <option>deterministic </option> set to <literal>false</literal> for the
1432
1440
setting to have the desired effect (see <xref
1433
1441
linkend="collation-nondeterministic"/>). Additionally, some settings
1434
1442
only take effect when the key <literal>ka</literal> is set to
@@ -1437,6 +1445,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1437
1445
</para>
1438
1446
</note>
1439
1447
</sect3>
1448
+
1440
1449
<sect3 id="icu-locale-examples">
1441
1450
<title>Examples</title>
1442
1451
<para>
@@ -1487,6 +1496,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
1487
1496
</variablelist>
1488
1497
</para>
1489
1498
</sect3>
1499
+
1490
1500
<sect3 id="icu-external-references">
1491
1501
<title>External References for ICU</title>
1492
1502
<para>
0 commit comments