Skip to content

Commit f89e4df

Browse files
committed
Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.
Tweak the test so that it does not depend on the platform using ".utf8" as the extension signifying that a locale uses UTF8 encoding. For the most part this just requires using the abbreviated collation names "en_US" etc, though I had to work a bit harder on the collation creation tests. This opens the door to using the test on platforms that spell locales differently, for example ".utf-8" or ".UTF-8". Also, the test is now somewhat useful with server encodings other than UTF8; though depending on which encoding is selected, different subsets of it will fail for lack of character set support.
1 parent a19002d commit f89e4df

File tree

2 files changed

+99
-67
lines changed

2 files changed

+99
-67
lines changed

src/test/regress/expected/collate.linux.utf8.out

Lines changed: 63 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
SET client_encoding TO UTF8;
66
CREATE TABLE collate_test1 (
77
a int,
8-
b text COLLATE "en_US.utf8" NOT NULL
8+
b text COLLATE "en_US" NOT NULL
99
);
1010
\d collate_test1
11-
Table "public.collate_test1"
12-
Column | Type | Modifiers
13-
--------+---------+-----------------------------
11+
Table "public.collate_test1"
12+
Column | Type | Modifiers
13+
--------+---------+------------------------
1414
a | integer |
15-
b | text | collate en_US.utf8 not null
15+
b | text | collate en_US not null
1616

1717
CREATE TABLE collate_test_fail (
1818
a int,
@@ -29,25 +29,25 @@ ERROR: collation "foo" for encoding "UTF8" does not exist
2929
LINE 3: b text COLLATE "foo"
3030
^
3131
CREATE TABLE collate_test_fail (
32-
a int COLLATE "en_US.utf8",
32+
a int COLLATE "en_US",
3333
b text
3434
);
3535
ERROR: collations are not supported by type integer
36-
LINE 2: a int COLLATE "en_US.utf8",
36+
LINE 2: a int COLLATE "en_US",
3737
^
3838
CREATE TABLE collate_test_like (
3939
LIKE collate_test1
4040
);
4141
\d collate_test_like
42-
Table "public.collate_test_like"
43-
Column | Type | Modifiers
44-
--------+---------+-----------------------------
42+
Table "public.collate_test_like"
43+
Column | Type | Modifiers
44+
--------+---------+------------------------
4545
a | integer |
46-
b | text | collate en_US.utf8 not null
46+
b | text | collate en_US not null
4747

4848
CREATE TABLE collate_test2 (
4949
a int,
50-
b text COLLATE "sv_SE.utf8"
50+
b text COLLATE "sv_SE"
5151
);
5252
CREATE TABLE collate_test3 (
5353
a int,
@@ -105,16 +105,12 @@ SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
105105
3 | bbc
106106
(2 rows)
107107

108-
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8";
109-
ERROR: collation mismatch between explicit collations "C" and "en_US.utf8"
110-
LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
111-
^
112108
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
113109
ERROR: collation mismatch between explicit collations "C" and "en_US"
114110
LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
115111
^
116-
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8";
117-
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails
112+
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
113+
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
118114
ERROR: collations are not supported by type integer
119115
CREATE TABLE collate_test4 (
120116
a int,
@@ -132,7 +128,7 @@ SELECT a, b FROM collate_test4 ORDER BY b;
132128

133129
CREATE TABLE collate_test5 (
134130
a int,
135-
b testdomain_sv COLLATE "en_US.utf8"
131+
b testdomain_sv COLLATE "en_US"
136132
);
137133
INSERT INTO collate_test5 SELECT * FROM collate_test1;
138134
SELECT a, b FROM collate_test5 ORDER BY b;
@@ -209,13 +205,13 @@ SELECT * FROM collate_test3 ORDER BY b;
209205
(4 rows)
210206

211207
-- constant expression folding
212-
SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true";
208+
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
213209
true
214210
------
215211
t
216212
(1 row)
217213

218-
SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
214+
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
219215
false
220216
-------
221217
f
@@ -224,8 +220,8 @@ SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
224220
-- upper/lower
225221
CREATE TABLE collate_test10 (
226222
a int,
227-
x text COLLATE "en_US.utf8",
228-
y text COLLATE "tr_TR.utf8"
223+
x text COLLATE "en_US",
224+
y text COLLATE "tr_TR"
229225
);
230226
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
231227
SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
@@ -293,33 +289,45 @@ SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
293289
4 | ABC
294290
(4 rows)
295291

296-
SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true";
292+
SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
297293
true
298294
------
299295
t
300296
(1 row)
301297

302-
SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false";
298+
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
299+
false
300+
-------
301+
f
302+
(1 row)
303+
304+
SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
303305
false
304306
-------
305307
f
306308
(1 row)
307309

310+
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
311+
true
312+
------
313+
t
314+
(1 row)
315+
308316
-- The following actually exercises the selectivity estimation for ILIKE.
309317
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
310318
relname
311319
---------
312320
(0 rows)
313321

314322
-- to_char
315-
SET lc_time TO 'tr_TR.utf8';
323+
SET lc_time TO 'tr_TR';
316324
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
317325
to_char
318326
-------------
319327
01 NIS 2010
320328
(1 row)
321329

322-
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8");
330+
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
323331
to_char
324332
-------------
325333
01 NİS 2010
@@ -602,7 +610,7 @@ SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
602610
(8 rows)
603611

604612
SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
605-
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
613+
ERROR: collation mismatch between implicit collations "en_US" and "C"
606614
LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat...
607615
^
608616
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
@@ -616,12 +624,12 @@ SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3
616624
(4 rows)
617625

618626
SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
619-
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
627+
ERROR: collation mismatch between implicit collations "en_US" and "C"
620628
LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col...
621629
^
622630
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
623631
SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
624-
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
632+
ERROR: collation mismatch between implicit collations "en_US" and "C"
625633
LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla...
626634
^
627635
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
@@ -640,7 +648,7 @@ select x || y from collate_test10; -- ok, because || is not collation aware
640648
(2 rows)
641649

642650
select x, y from collate_test10 order by x || y; -- not so ok
643-
ERROR: collation mismatch between implicit collations "en_US.utf8" and "tr_TR.utf8"
651+
ERROR: collation mismatch between implicit collations "en_US" and "tr_TR"
644652
LINE 1: select x, y from collate_test10 order by x || y;
645653
^
646654
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
@@ -823,26 +831,36 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
823831
-- schema manipulation commands
824832
CREATE ROLE regress_test_role;
825833
CREATE SCHEMA test_schema;
826-
CREATE COLLATION test0 (locale = 'en_US.utf8');
827-
CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail
834+
-- We need to do this this way to cope with varying names for encodings:
835+
do $$
836+
BEGIN
837+
EXECUTE 'CREATE COLLATION test0 (locale = ' ||
838+
quote_literal(current_setting('lc_collate')) || ');';
839+
END
840+
$$;
841+
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
828842
ERROR: collation "test0" for encoding "UTF8" already exists
829-
CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8');
830-
CREATE COLLATION test2 (locale = 'en_US'); -- fail
831-
ERROR: encoding UTF8 does not match locale en_US
832-
DETAIL: The chosen LC_CTYPE setting requires encoding LATIN1.
833-
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail
843+
do $$
844+
BEGIN
845+
EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
846+
quote_literal(current_setting('lc_collate')) ||
847+
', lc_ctype = ' ||
848+
quote_literal(current_setting('lc_ctype')) || ');';
849+
END
850+
$$;
851+
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
834852
ERROR: parameter "lc_ctype" must be specified
835853
CREATE COLLATION testx (locale = 'nonsense'); -- fail
836854
ERROR: could not create locale "nonsense": No such file or directory
837855
CREATE COLLATION test4 FROM nonsense;
838856
ERROR: collation "nonsense" for encoding "UTF8" does not exist
839857
CREATE COLLATION test5 FROM test0;
840-
SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
841-
collname | collencoding | collcollate | collctype
842-
----------+--------------+-------------+------------
843-
test0 | 6 | en_US.utf8 | en_US.utf8
844-
test1 | 6 | en_US.utf8 | de_DE.utf8
845-
test5 | 6 | en_US.utf8 | en_US.utf8
858+
SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
859+
collname
860+
----------
861+
test0
862+
test1
863+
test5
846864
(3 rows)
847865

848866
ALTER COLLATION test1 RENAME TO test11;
@@ -879,7 +897,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
879897
DROP SCHEMA test_schema;
880898
DROP ROLE regress_test_role;
881899
-- dependencies
882-
CREATE COLLATION test0 (locale = 'en_US.utf8');
900+
CREATE COLLATION test0 FROM "C";
883901
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
884902
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
885903
CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);

src/test/regress/sql/collate.linux.utf8.sql

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ SET client_encoding TO UTF8;
88

99
CREATE TABLE collate_test1 (
1010
a int,
11-
b text COLLATE "en_US.utf8" NOT NULL
11+
b text COLLATE "en_US" NOT NULL
1212
);
1313

1414
\d collate_test1
@@ -24,7 +24,7 @@ CREATE TABLE collate_test_fail (
2424
);
2525

2626
CREATE TABLE collate_test_fail (
27-
a int COLLATE "en_US.utf8",
27+
a int COLLATE "en_US",
2828
b text
2929
);
3030

@@ -36,7 +36,7 @@ CREATE TABLE collate_test_like (
3636

3737
CREATE TABLE collate_test2 (
3838
a int,
39-
b text COLLATE "sv_SE.utf8"
39+
b text COLLATE "sv_SE"
4040
);
4141

4242
CREATE TABLE collate_test3 (
@@ -56,12 +56,11 @@ SELECT * FROM collate_test3 WHERE b >= 'BBC';
5656
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
5757
SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
5858
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
59-
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8";
6059
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
6160

6261

63-
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8";
64-
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails
62+
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
63+
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
6564
CREATE TABLE collate_test4 (
6665
a int,
6766
b testdomain_sv
@@ -71,7 +70,7 @@ SELECT a, b FROM collate_test4 ORDER BY b;
7170

7271
CREATE TABLE collate_test5 (
7372
a int,
74-
b testdomain_sv COLLATE "en_US.utf8"
73+
b testdomain_sv COLLATE "en_US"
7574
);
7675
INSERT INTO collate_test5 SELECT * FROM collate_test1;
7776
SELECT a, b FROM collate_test5 ORDER BY b;
@@ -89,15 +88,15 @@ SELECT * FROM collate_test2 ORDER BY b;
8988
SELECT * FROM collate_test3 ORDER BY b;
9089

9190
-- constant expression folding
92-
SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true";
93-
SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
91+
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
92+
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
9493

9594
-- upper/lower
9695

9796
CREATE TABLE collate_test10 (
9897
a int,
99-
x text COLLATE "en_US.utf8",
100-
y text COLLATE "tr_TR.utf8"
98+
x text COLLATE "en_US",
99+
y text COLLATE "tr_TR"
101100
);
102101

103102
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
@@ -116,18 +115,21 @@ SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
116115
SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
117116
SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
118117

119-
SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true";
120-
SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false";
118+
SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
119+
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
120+
121+
SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
122+
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
121123

122124
-- The following actually exercises the selectivity estimation for ILIKE.
123125
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
124126

125127

126128
-- to_char
127129

128-
SET lc_time TO 'tr_TR.utf8';
130+
SET lc_time TO 'tr_TR';
129131
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
130-
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8");
132+
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
131133

132134

133135
-- backwards parsing
@@ -268,17 +270,29 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
268270
CREATE ROLE regress_test_role;
269271
CREATE SCHEMA test_schema;
270272

271-
CREATE COLLATION test0 (locale = 'en_US.utf8');
272-
CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail
273-
CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8');
274-
CREATE COLLATION test2 (locale = 'en_US'); -- fail
275-
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail
273+
-- We need to do this this way to cope with varying names for encodings:
274+
do $$
275+
BEGIN
276+
EXECUTE 'CREATE COLLATION test0 (locale = ' ||
277+
quote_literal(current_setting('lc_collate')) || ');';
278+
END
279+
$$;
280+
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
281+
do $$
282+
BEGIN
283+
EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
284+
quote_literal(current_setting('lc_collate')) ||
285+
', lc_ctype = ' ||
286+
quote_literal(current_setting('lc_ctype')) || ');';
287+
END
288+
$$;
289+
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
276290
CREATE COLLATION testx (locale = 'nonsense'); -- fail
277291

278292
CREATE COLLATION test4 FROM nonsense;
279293
CREATE COLLATION test5 FROM test0;
280294

281-
SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
295+
SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
282296

283297
ALTER COLLATION test1 RENAME TO test11;
284298
ALTER COLLATION test0 RENAME TO test11; -- fail
@@ -307,7 +321,7 @@ DROP ROLE regress_test_role;
307321

308322
-- dependencies
309323

310-
CREATE COLLATION test0 (locale = 'en_US.utf8');
324+
CREATE COLLATION test0 FROM "C";
311325

312326
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
313327
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;

0 commit comments

Comments
 (0)