Skip to content

Commit c45dc7f

Browse files
committed
initdb: derive encoding from locale for ICU; similar to libc.
Previously, the default encoding was derived from the locale when using libc; while the default was always UTF-8 when using ICU. That would throw an error when the locale was not compatible with UTF-8. This commit causes initdb to derive the default encoding from the locale for both providers. If --no-locale is specified (or if the locale is C or POSIX), the default encoding will be UTF-8 for ICU (because ICU does not support SQL_ASCII) and SQL_ASCII for libc. Per buildfarm failure on system "hoverfly" related to commit 27b6237. Discussion: https://postgr.es/m/d191d5841347301a8f1238f609471ddd957fc47e.camel%40j-davis.com
1 parent 3e623eb commit c45dc7f

File tree

6 files changed

+29
-27
lines changed

6 files changed

+29
-27
lines changed

contrib/unaccent/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@ tests += {
3737
'sql': [
3838
'unaccent',
3939
],
40-
'regress_args': ['--encoding=UTF8'],
40+
'regress_args': ['--encoding=UTF8', '--no-locale'],
4141
},
4242
}

doc/src/sgml/ref/initdb.sgml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,13 +213,19 @@ PostgreSQL documentation
213213
<term><option>--encoding=<replaceable class="parameter">encoding</replaceable></option></term>
214214
<listitem>
215215
<para>
216-
Selects the encoding of the template databases. This will also
217-
be the default encoding of any database you create later,
218-
unless you override it then. The default is derived from the locale,
219-
if the libc locale provider is used, or <literal>UTF8</literal> if the
220-
ICU locale provider is used. The character sets supported by
221-
the <productname>PostgreSQL</productname> server are described
222-
in <xref linkend="multibyte-charset-supported"/>.
216+
Selects the encoding of the template databases. This will also be the
217+
default encoding of any database you create later, unless you override
218+
it then. The character sets supported by the
219+
<productname>PostgreSQL</productname> server are described in <xref
220+
linkend="multibyte-charset-supported"/>.
221+
</para>
222+
<para>
223+
By default, the template database encoding is derived from the
224+
locale. If <xref linkend="app-initdb-option-no-locale"/> is specified
225+
(or equivalently, if the locale is <literal>C</literal> or
226+
<literal>POSIX</literal>), then the default is <literal>UTF8</literal>
227+
for the ICU provider and <literal>SQL_ASCII</literal> for the
228+
<literal>libc</literal> provider.
223229
</para>
224230
</listitem>
225231
</varlistentry>

src/bin/initdb/initdb.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2350,18 +2350,19 @@ setup_locale_encoding(void)
23502350
lc_time);
23512351
}
23522352

2353-
if (!encoding && locale_provider == COLLPROVIDER_ICU)
2354-
{
2355-
encodingid = PG_UTF8;
2356-
printf(_("The default database encoding has been set to \"%s\".\n"),
2357-
pg_encoding_to_char(encodingid));
2358-
}
2359-
else if (!encoding)
2353+
if (!encoding)
23602354
{
23612355
int ctype_enc;
23622356

23632357
ctype_enc = pg_get_encoding_from_locale(lc_ctype, true);
23642358

2359+
/*
2360+
* If ctype_enc=SQL_ASCII, it's compatible with any encoding. ICU does
2361+
* not support SQL_ASCII, so select UTF-8 instead.
2362+
*/
2363+
if (locale_provider == COLLPROVIDER_ICU && ctype_enc == PG_SQL_ASCII)
2364+
ctype_enc = PG_UTF8;
2365+
23652366
if (ctype_enc == -1)
23662367
{
23672368
/* Couldn't recognize the locale's codeset */

src/bin/pg_upgrade/t/002_pg_upgrade.pl

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ sub filter_dump
108108

109109
my $original_encoding = "6"; # UTF-8
110110
my $original_provider = "c";
111-
my $original_collate = "C";
111+
my $original_locale = "C";
112112
my $original_iculocale = "";
113113
my $provider_field = "'c' AS datlocprovider";
114114
my $iculocale_field = "NULL AS daticulocale";
@@ -123,7 +123,7 @@ sub filter_dump
123123
my @initdb_params = @custom_opts;
124124

125125
push @initdb_params, ('--encoding', 'UTF-8');
126-
push @initdb_params, ('--lc-collate', $original_collate);
126+
push @initdb_params, ('--locale', $original_locale);
127127
if ($original_provider eq "i")
128128
{
129129
push @initdb_params, ('--locale-provider', 'icu');
@@ -136,16 +136,12 @@ sub filter_dump
136136

137137
my $result;
138138
$result = $oldnode->safe_psql(
139-
'postgres', "SELECT encoding, $provider_field, datcollate, $iculocale_field
139+
'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field
140140
FROM pg_database WHERE datname='template0'");
141-
is($result, "$original_encoding|$original_provider|$original_collate|$original_iculocale",
141+
is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale",
142142
"check locales in original cluster"
143143
);
144144

145-
# check ctype, which was acquired from environment by initdb
146-
my $original_ctype = $oldnode->safe_psql(
147-
'postgres', q{SELECT datctype FROM pg_database WHERE datname='template0'});
148-
149145
# The default location of the source code is the root of this directory.
150146
my $srcdir = abs_path("../../..");
151147

@@ -224,7 +220,6 @@ sub filter_dump
224220
# cluster.
225221
push @initdb_params, ('--encoding', 'SQL_ASCII');
226222
push @initdb_params, ('--locale-provider', 'libc');
227-
push @initdb_params, ('--lc-ctype', 'C');
228223

229224
$node_params{extra} = \@initdb_params;
230225
$newnode->init(%node_params);
@@ -401,7 +396,7 @@ sub filter_dump
401396
$result = $newnode->safe_psql(
402397
'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field
403398
FROM pg_database WHERE datname='template0'");
404-
is($result, "$original_encoding|$original_provider|$original_collate|$original_ctype|$original_iculocale",
399+
is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale",
405400
"check that locales in new cluster match original cluster"
406401
);
407402

src/bin/scripts/t/020_createdb.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
[
4242
'createdb', '-T',
4343
'template0', '-E', 'UTF8', '--locale-provider=icu',
44-
'--icu-locale=en', 'foobar5'
44+
'--locale=C', '--icu-locale=en', 'foobar5'
4545
],
4646
qr/statement: CREATE DATABASE foobar5 .* LOCALE_PROVIDER icu ICU_LOCALE 'en'/,
4747
'create database with ICU locale specified');

src/test/icu/t/010_database.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# Test error cases in CREATE DATABASE involving locale-related options
5555

5656
my ($ret, $stdout, $stderr) = $node1->psql('postgres',
57-
q{CREATE DATABASE dbicu LOCALE_PROVIDER icu TEMPLATE template0 ENCODING UTF8});
57+
q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8});
5858
isnt($ret, 0,
5959
"ICU locale must be specified for ICU provider: exit code not 0");
6060
like(

0 commit comments

Comments
 (0)