Skip to content

Commit cd7301f

Browse files
jeff-davispull[bot]
authored andcommitted
Use ICU by default at initdb time.
If the ICU locale is not specified, initialize the default collator and retrieve the locale name from that. Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent 4075b92 commit cd7301f

File tree

18 files changed

+147
-42
lines changed

18 files changed

+147
-42
lines changed

contrib/citext/expected/citext_utf8.out

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
/*
22
* This test must be run in a database with UTF-8 encoding
33
* and a Unicode-aware locale.
4+
*
5+
* Also disable this file for ICU, because the test for the the
6+
* Turkish dotted I is not correct for many ICU locales. citext always
7+
* uses the default collation, so it's not easy to restrict the test
8+
* to the "tr-TR-x-icu" collation where it will succeed.
49
*/
510
SELECT getdatabaseencoding() <> 'UTF8' OR
6-
current_setting('lc_ctype') = 'C'
11+
current_setting('lc_ctype') = 'C' OR
12+
(SELECT datlocprovider='i' FROM pg_database
13+
WHERE datname=current_database())
714
AS skip_test \gset
815
\if :skip_test
916
\quit
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
/*
22
* This test must be run in a database with UTF-8 encoding
33
* and a Unicode-aware locale.
4+
*
5+
* Also disable this file for ICU, because the test for the the
6+
* Turkish dotted I is not correct for many ICU locales. citext always
7+
* uses the default collation, so it's not easy to restrict the test
8+
* to the "tr-TR-x-icu" collation where it will succeed.
49
*/
510
SELECT getdatabaseencoding() <> 'UTF8' OR
6-
current_setting('lc_ctype') = 'C'
11+
current_setting('lc_ctype') = 'C' OR
12+
(SELECT datlocprovider='i' FROM pg_database
13+
WHERE datname=current_database())
714
AS skip_test \gset
815
\if :skip_test
916
\quit

contrib/citext/sql/citext_utf8.sql

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
/*
22
* This test must be run in a database with UTF-8 encoding
33
* and a Unicode-aware locale.
4+
*
5+
* Also disable this file for ICU, because the test for the the
6+
* Turkish dotted I is not correct for many ICU locales. citext always
7+
* uses the default collation, so it's not easy to restrict the test
8+
* to the "tr-TR-x-icu" collation where it will succeed.
49
*/
510

611
SELECT getdatabaseencoding() <> 'UTF8' OR
7-
current_setting('lc_ctype') = 'C'
12+
current_setting('lc_ctype') = 'C' OR
13+
(SELECT datlocprovider='i' FROM pg_database
14+
WHERE datname=current_database())
815
AS skip_test \gset
916
\if :skip_test
1017
\quit

contrib/unaccent/expected/unaccent.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
-- unaccent is broken if the default collation is provided by ICU and
2+
-- LC_CTYPE=C
3+
SELECT current_setting('lc_ctype') = 'C' AND
4+
(SELECT datlocprovider='i' FROM pg_database
5+
WHERE datname=current_database())
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit
9+
\endif
110
CREATE EXTENSION unaccent;
211
-- must have a UTF8 database
312
SELECT getdatabaseencoding();
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- unaccent is broken if the default collation is provided by ICU and
2+
-- LC_CTYPE=C
3+
SELECT current_setting('lc_ctype') = 'C' AND
4+
(SELECT datlocprovider='i' FROM pg_database
5+
WHERE datname=current_database())
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit

contrib/unaccent/sql/unaccent.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
2+
-- unaccent is broken if the default collation is provided by ICU and
3+
-- LC_CTYPE=C
4+
SELECT current_setting('lc_ctype') = 'C' AND
5+
(SELECT datlocprovider='i' FROM pg_database
6+
WHERE datname=current_database())
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit
10+
\endif
11+
112
CREATE EXTENSION unaccent;
213

314
-- must have a UTF8 database

doc/src/sgml/ref/initdb.sgml

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,28 @@ PostgreSQL documentation
8989
and character set encoding. These can also be set separately for each
9090
database when it is created. <command>initdb</command> determines those
9191
settings for the template databases, which will serve as the default for
92-
all other databases. By default, <command>initdb</command> uses the
93-
locale provider <literal>libc</literal>, takes the locale settings from
94-
the environment, and determines the encoding from the locale settings.
95-
This is almost always sufficient, unless there are special requirements.
92+
all other databases.
93+
</para>
94+
95+
<para>
96+
By default, <command>initdb</command> uses the ICU library to provide
97+
locale services if the server was built with ICU support; otherwise it uses
98+
the <literal>libc</literal> locale provider (see <xref
99+
linkend="locale-providers"/>). To choose the specific ICU locale ID to
100+
apply, use the option <option>--icu-locale</option>. Note that for
101+
implementation reasons and to support legacy code,
102+
<command>initdb</command> will still select and initialize libc locale
103+
settings when the ICU locale provider is used.
104+
</para>
105+
106+
<para>
107+
Alternatively, <command>initdb</command> can use the locale provider
108+
<literal>libc</literal>. To select this option, specify
109+
<literal>--locale-provider=libc</literal>, or build the server without ICU
110+
support. The <literal>libc</literal> locale provider takes the locale
111+
settings from the environment, and determines the encoding from the locale
112+
settings. This is almost always sufficient, unless there are special
113+
requirements.
96114
</para>
97115

98116
<para>
@@ -103,17 +121,6 @@ PostgreSQL documentation
103121
categories can give nonsensical results, so this should be used with care.
104122
</para>
105123

106-
<para>
107-
Alternatively, the ICU library can be used to provide locale services.
108-
(Again, this only sets the default for subsequently created databases.) To
109-
select this option, specify <literal>--locale-provider=icu</literal>.
110-
To choose the specific ICU locale ID to apply, use the option
111-
<option>--icu-locale</option>. Note that
112-
for implementation reasons and to support legacy code,
113-
<command>initdb</command> will still select and initialize libc locale
114-
settings when the ICU locale provider is used.
115-
</para>
116-
117124
<para>
118125
When <command>initdb</command> runs, it will print out the locale settings
119126
it has chosen. If you have complex requirements or specified multiple
@@ -234,7 +241,13 @@ PostgreSQL documentation
234241
<term><option>--icu-locale=<replaceable>locale</replaceable></option></term>
235242
<listitem>
236243
<para>
237-
Specifies the ICU locale ID, if the ICU locale provider is used.
244+
Specifies the ICU locale when the ICU provider is used. Locale support
245+
is described in <xref linkend="locale"/>.
246+
</para>
247+
<para>
248+
If this option is not specified, the locale is inherited from the
249+
environment in which <command>initdb</command> runs. The environment's
250+
locale is matched to a similar ICU locale name, if possible.
238251
</para>
239252
</listitem>
240253
</varlistentry>
@@ -307,10 +320,12 @@ PostgreSQL documentation
307320
<term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
308321
<listitem>
309322
<para>
310-
This option sets the locale provider for databases created in the
311-
new cluster. It can be overridden in the <command>CREATE
323+
This option sets the locale provider for databases created in the new
324+
cluster. It can be overridden in the <command>CREATE
312325
DATABASE</command> command when new databases are subsequently
313-
created. The default is <literal>libc</literal>.
326+
created. The default is <literal>icu</literal> if the server was
327+
built with ICU support; otherwise the default is
328+
<literal>libc</literal> (see <xref linkend="locale-providers"/>).
314329
</para>
315330
</listitem>
316331
</varlistentry>

src/bin/initdb/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ subdir = src/bin/initdb
1616
top_builddir = ../../..
1717
include $(top_builddir)/src/Makefile.global
1818

19-
override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
19+
override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS)
2020

2121
# Note: it's important that we link to encnames.o from libpgcommon, not
2222
# from libpq, else we have risks of version skew if we run with a libpq
2323
# shared library from a different PG version. The libpq_pgport macro
2424
# should ensure that that happens.
2525
#
2626
# We need libpq only because fe_utils does.
27-
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
27+
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS)
2828

2929
# use system timezone data?
3030
ifneq (,$(with_system_tzdata))

src/bin/initdb/initdb.c

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353
#include <netdb.h>
5454
#include <sys/socket.h>
5555
#include <sys/stat.h>
56+
#ifdef USE_ICU
57+
#include <unicode/ucol.h>
58+
#endif
5659
#include <unistd.h>
5760
#include <signal.h>
5861
#include <time.h>
@@ -133,7 +136,11 @@ static char *lc_monetary = NULL;
133136
static char *lc_numeric = NULL;
134137
static char *lc_time = NULL;
135138
static char *lc_messages = NULL;
139+
#ifdef USE_ICU
140+
static char locale_provider = COLLPROVIDER_ICU;
141+
#else
136142
static char locale_provider = COLLPROVIDER_LIBC;
143+
#endif
137144
static char *icu_locale = NULL;
138145
static char *icu_rules = NULL;
139146
static const char *default_text_search_config = NULL;
@@ -2028,6 +2035,50 @@ check_icu_locale_encoding(int user_enc)
20282035
return true;
20292036
}
20302037

2038+
/*
2039+
* Check that ICU accepts the locale name; or if not specified, retrieve the
2040+
* default ICU locale.
2041+
*/
2042+
static void
2043+
check_icu_locale(void)
2044+
{
2045+
#ifdef USE_ICU
2046+
UCollator *collator;
2047+
UErrorCode status;
2048+
2049+
status = U_ZERO_ERROR;
2050+
collator = ucol_open(icu_locale, &status);
2051+
if (U_FAILURE(status))
2052+
{
2053+
if (icu_locale)
2054+
pg_fatal("could not open collator for locale \"%s\": %s",
2055+
icu_locale, u_errorName(status));
2056+
else
2057+
pg_fatal("could not open collator for default locale: %s",
2058+
u_errorName(status));
2059+
}
2060+
2061+
/* if not specified, get locale from default collator */
2062+
if (icu_locale == NULL)
2063+
{
2064+
const char *default_locale;
2065+
2066+
status = U_ZERO_ERROR;
2067+
default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE,
2068+
&status);
2069+
if (U_FAILURE(status))
2070+
{
2071+
ucol_close(collator);
2072+
pg_fatal("could not determine default ICU locale");
2073+
}
2074+
2075+
icu_locale = pg_strdup(default_locale);
2076+
}
2077+
2078+
ucol_close(collator);
2079+
#endif
2080+
}
2081+
20312082
/*
20322083
* set up the locale variables
20332084
*
@@ -2081,8 +2132,7 @@ setlocales(void)
20812132

20822133
if (locale_provider == COLLPROVIDER_ICU)
20832134
{
2084-
if (!icu_locale)
2085-
pg_fatal("ICU locale must be specified");
2135+
check_icu_locale();
20862136

20872137
/*
20882138
* In supported builds, the ICU locale ID will be checked by the

src/bin/initdb/t/001_initdb.pl

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,6 @@
9797

9898
if ($ENV{with_icu} eq 'yes')
9999
{
100-
command_fails_like(
101-
[ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
102-
qr/initdb: error: ICU locale must be specified/,
103-
'locale provider ICU requires --icu-locale');
104-
105100
command_ok(
106101
[
107102
'initdb', '--no-sync',
@@ -116,7 +111,7 @@
116111
'--locale-provider=icu', '--icu-locale=@colNumeric=lower',
117112
"$tempdir/dataX"
118113
],
119-
qr/FATAL: could not open collator for locale/,
114+
qr/error: could not open collator for locale/,
120115
'fails for invalid ICU locale');
121116

122117
command_fails_like(

src/bin/pg_dump/t/002_pg_dump.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@
17581758
create_sql =>
17591759
"CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;",
17601760
regexp => qr/^
1761-
\QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C';\E
1761+
\QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C'\E
17621762
/xm,
17631763
like => { pg_dumpall_dbprivs => 1, },
17641764
},

src/bin/scripts/t/020_createdb.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
program_options_handling_ok('createdb');
1414

1515
my $node = PostgreSQL::Test::Cluster->new('main');
16-
$node->init;
16+
$node->init(extra => ['--locale-provider=libc']);
1717
$node->start;
1818

1919
$node->issues_sql_like(

src/interfaces/ecpg/test/Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ override CPPFLAGS := \
1414
'-DSHELLPROG="$(SHELL)"' \
1515
$(CPPFLAGS)
1616

17-
# default encoding for regression tests
18-
ENCODING = SQL_ASCII
19-
2017
ifneq ($(build_os),mingw32)
2118
abs_builddir := $(shell pwd)
2219
else

src/interfaces/ecpg/test/connect/test5.pgc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ exec sql end declare section;
5555
exec sql connect to 'unix:postgresql://localhost/ecpg2_regression' as main user :user USING "connectpw";
5656
exec sql disconnect main;
5757

58-
exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=latin1 as main user regress_ecpg_user1/connectpw;
58+
exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=sql_ascii as main user regress_ecpg_user1/connectpw;
5959
exec sql disconnect main;
6060

6161
exec sql connect to "unix:postgresql://200.46.204.71/ecpg2_regression" as main user regress_ecpg_user1/connectpw;

src/interfaces/ecpg/test/expected/connect-test5.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ main(void)
117117
#line 56 "test5.pgc"
118118

119119

120-
{ ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=latin1" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
120+
{ ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=sql_ascii" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
121121
#line 58 "test5.pgc"
122122

123123
{ ECPGdisconnect(__LINE__, "main");}

src/interfaces/ecpg/test/expected/connect-test5.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
[NO_PID]: sqlca: code: 0, state: 00000
5151
[NO_PID]: ecpg_finish: connection main closed
5252
[NO_PID]: sqlca: code: 0, state: 00000
53-
[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=latin1 for user regress_ecpg_user1
53+
[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=sql_ascii for user regress_ecpg_user1
5454
[NO_PID]: sqlca: code: 0, state: 00000
5555
[NO_PID]: ecpg_finish: connection main closed
5656
[NO_PID]: sqlca: code: 0, state: 00000

src/interfaces/ecpg/test/meson.build

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ ecpg_test_files = files(
6969
ecpg_regress_args = [
7070
'--dbname=ecpg1_regression,ecpg2_regression',
7171
'--create-role=regress_ecpg_user1,regress_ecpg_user2',
72-
'--encoding=SQL_ASCII',
7372
]
7473

7574
tests += {

src/test/icu/t/010_database.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
}
1313

1414
my $node1 = PostgreSQL::Test::Cluster->new('node1');
15-
$node1->init;
15+
$node1->init(extra => ['--locale-provider=libc']);
1616
$node1->start;
1717

1818
$node1->safe_psql('postgres',

0 commit comments

Comments
 (0)