Skip to content

Commit c2e8bd2

Browse files
committed
Enable routine running of citext's UTF8-specific test cases.
These test cases have been commented out since citext was invented, because at the time we had no nice way to deal with tests that have restrictions such as requiring UTF8 encoding. But now we do have a convention for that, ie put them into a separate test file with an early-exit path. So let's enable these tests to run when their prerequisites are satisfied. (We may have to tighten the prerequisites beyond the "encoding = UTF8 and locale != C" checks made here. But let's put it on the buildfarm and see what blows up.) Dag Lem Discussion: https://postgr.es/m/ygezgoacs4e.fsf_-_@sid.nimrod.no
1 parent 6ce1608 commit c2e8bd2

File tree

7 files changed

+207
-94
lines changed

7 files changed

+207
-94
lines changed

contrib/citext/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ DATA = citext--1.4.sql \
1111
citext--1.0--1.1.sql
1212
PGFILEDESC = "citext - case-insensitive character string data type"
1313

14-
REGRESS = citext
14+
REGRESS = citext citext_utf8
1515

1616
ifdef USE_PGXS
1717
PG_CONFIG = pg_config

contrib/citext/expected/citext.out

-29
Original file line numberDiff line numberDiff line change
@@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t;
4848
t
4949
(1 row)
5050

51-
-- Multibyte sanity tests. Uncomment to run.
52-
-- SELECT 'À'::citext = 'À'::citext AS t;
53-
-- SELECT 'À'::citext = 'à'::citext AS t;
54-
-- SELECT 'À'::text = 'à'::text AS f; -- text wins.
55-
-- SELECT 'À'::citext <> 'B'::citext AS t;
56-
-- Test combining characters making up canonically equivalent strings.
57-
-- SELECT 'Ä'::text <> 'Ä'::text AS t;
58-
-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
59-
-- Test the Turkish dotted I. The lowercase is a single byte while the
60-
-- uppercase is multibyte. This is why the comparison code can't be optimized
61-
-- to compare string lengths.
62-
-- SELECT 'i'::citext = 'İ'::citext AS t;
63-
-- Regression.
64-
-- SELECT 'láska'::citext <> 'laská'::citext AS t;
65-
-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
66-
-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
67-
-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
68-
-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
69-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
70-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
71-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
72-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
73-
-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
7451
-- Test > and >=
7552
SELECT 'B'::citext > 'a'::citext AS t;
7653
t
@@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
26142591
t
26152592
(1 row)
26162593

2617-
-- Multi-byte tests below are disabled like the sanity tests above.
2618-
-- Uncomment to run them.
26192594
-- Test ~<~ and ~<=~
26202595
SELECT 'a'::citext ~<~ 'B'::citext AS t;
26212596
t
@@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f;
26292604
f
26302605
(1 row)
26312606

2632-
-- SELECT 'à'::citext ~<~ 'À'::citext AS f;
26332607
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
26342608
t
26352609
---
@@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t;
26422616
t
26432617
(1 row)
26442618

2645-
-- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
26462619
-- Test ~>~ and ~>=~
26472620
SELECT 'B'::citext ~>~ 'a'::citext AS t;
26482621
t
@@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t;
26562629
t
26572630
(1 row)
26582631

2659-
-- SELECT 'à'::citext ~>~ 'À'::citext AS f;
26602632
SELECT 'B'::citext ~>~ 'b'::citext AS f;
26612633
f
26622634
---
@@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t;
26692641
t
26702642
(1 row)
26712643

2672-
-- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
26732644
-- Test implicit casting. citext casts to text, but not vice-versa.
26742645
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
26752646
t

contrib/citext/expected/citext_1.out

-29
Original file line numberDiff line numberDiff line change
@@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t;
4848
t
4949
(1 row)
5050

51-
-- Multibyte sanity tests. Uncomment to run.
52-
-- SELECT 'À'::citext = 'À'::citext AS t;
53-
-- SELECT 'À'::citext = 'à'::citext AS t;
54-
-- SELECT 'À'::text = 'à'::text AS f; -- text wins.
55-
-- SELECT 'À'::citext <> 'B'::citext AS t;
56-
-- Test combining characters making up canonically equivalent strings.
57-
-- SELECT 'Ä'::text <> 'Ä'::text AS t;
58-
-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
59-
-- Test the Turkish dotted I. The lowercase is a single byte while the
60-
-- uppercase is multibyte. This is why the comparison code can't be optimized
61-
-- to compare string lengths.
62-
-- SELECT 'i'::citext = 'İ'::citext AS t;
63-
-- Regression.
64-
-- SELECT 'láska'::citext <> 'laská'::citext AS t;
65-
-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
66-
-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
67-
-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
68-
-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
69-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
70-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
71-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
72-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
73-
-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
7451
-- Test > and >=
7552
SELECT 'B'::citext > 'a'::citext AS t;
7653
t
@@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
26142591
t
26152592
(1 row)
26162593

2617-
-- Multi-byte tests below are disabled like the sanity tests above.
2618-
-- Uncomment to run them.
26192594
-- Test ~<~ and ~<=~
26202595
SELECT 'a'::citext ~<~ 'B'::citext AS t;
26212596
t
@@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f;
26292604
f
26302605
(1 row)
26312606

2632-
-- SELECT 'à'::citext ~<~ 'À'::citext AS f;
26332607
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
26342608
t
26352609
---
@@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t;
26422616
t
26432617
(1 row)
26442618

2645-
-- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
26462619
-- Test ~>~ and ~>=~
26472620
SELECT 'B'::citext ~>~ 'a'::citext AS t;
26482621
t
@@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t;
26562629
t
26572630
(1 row)
26582631

2659-
-- SELECT 'à'::citext ~>~ 'À'::citext AS f;
26602632
SELECT 'B'::citext ~>~ 'b'::citext AS f;
26612633
f
26622634
---
@@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t;
26692641
t
26702642
(1 row)
26712643

2672-
-- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
26732644
-- Test implicit casting. citext casts to text, but not vice-versa.
26742645
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
26752646
t
+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding
3+
* and a Unicode-aware locale.
4+
*/
5+
SELECT getdatabaseencoding() <> 'UTF8' OR
6+
current_setting('lc_ctype') = 'C'
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit
10+
\endif
11+
set client_encoding = utf8;
12+
-- CREATE EXTENSION IF NOT EXISTS citext;
13+
-- Multibyte sanity tests.
14+
SELECT 'À'::citext = 'À'::citext AS t;
15+
t
16+
---
17+
t
18+
(1 row)
19+
20+
SELECT 'À'::citext = 'à'::citext AS t;
21+
t
22+
---
23+
t
24+
(1 row)
25+
26+
SELECT 'À'::text = 'à'::text AS f; -- text wins.
27+
f
28+
---
29+
f
30+
(1 row)
31+
32+
SELECT 'À'::citext <> 'B'::citext AS t;
33+
t
34+
---
35+
t
36+
(1 row)
37+
38+
-- Test combining characters making up canonically equivalent strings.
39+
SELECT 'Ä'::text <> 'Ä'::text AS t;
40+
t
41+
---
42+
t
43+
(1 row)
44+
45+
SELECT 'Ä'::citext <> 'Ä'::citext AS t;
46+
t
47+
---
48+
t
49+
(1 row)
50+
51+
-- Test the Turkish dotted I. The lowercase is a single byte while the
52+
-- uppercase is multibyte. This is why the comparison code can't be optimized
53+
-- to compare string lengths.
54+
SELECT 'i'::citext = 'İ'::citext AS t;
55+
t
56+
---
57+
t
58+
(1 row)
59+
60+
-- Regression.
61+
SELECT 'láska'::citext <> 'laská'::citext AS t;
62+
t
63+
---
64+
t
65+
(1 row)
66+
67+
SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
68+
t
69+
---
70+
t
71+
(1 row)
72+
73+
SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
74+
t
75+
---
76+
t
77+
(1 row)
78+
79+
SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
80+
t
81+
---
82+
t
83+
(1 row)
84+
85+
SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
86+
t
87+
---
88+
t
89+
(1 row)
90+
91+
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t;
92+
t
93+
---
94+
t
95+
(1 row)
96+
97+
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t;
98+
t
99+
---
100+
t
101+
(1 row)
102+
103+
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t;
104+
t
105+
---
106+
t
107+
(1 row)
108+
109+
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t;
110+
t
111+
---
112+
t
113+
(1 row)
114+
115+
SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t;
116+
t
117+
---
118+
t
119+
(1 row)
120+
121+
-- Test ~<~ and ~<=~
122+
SELECT 'à'::citext ~<~ 'À'::citext AS f;
123+
f
124+
---
125+
f
126+
(1 row)
127+
128+
SELECT 'à'::citext ~<=~ 'À'::citext AS t;
129+
t
130+
---
131+
t
132+
(1 row)
133+
134+
-- Test ~>~ and ~>=~
135+
SELECT 'à'::citext ~>~ 'À'::citext AS f;
136+
f
137+
---
138+
f
139+
(1 row)
140+
141+
SELECT 'à'::citext ~>=~ 'À'::citext AS t;
142+
t
143+
---
144+
t
145+
(1 row)
146+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding
3+
* and a Unicode-aware locale.
4+
*/
5+
SELECT getdatabaseencoding() <> 'UTF8' OR
6+
current_setting('lc_ctype') = 'C'
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit

contrib/citext/sql/citext.sql

-35
Original file line numberDiff line numberDiff line change
@@ -19,34 +19,6 @@ SELECT 'a'::citext = 'b'::citext AS f;
1919
SELECT 'a'::citext = 'ab'::citext AS f;
2020
SELECT 'a'::citext <> 'ab'::citext AS t;
2121

22-
-- Multibyte sanity tests. Uncomment to run.
23-
-- SELECT 'À'::citext = 'À'::citext AS t;
24-
-- SELECT 'À'::citext = 'à'::citext AS t;
25-
-- SELECT 'À'::text = 'à'::text AS f; -- text wins.
26-
-- SELECT 'À'::citext <> 'B'::citext AS t;
27-
28-
-- Test combining characters making up canonically equivalent strings.
29-
-- SELECT 'Ä'::text <> 'Ä'::text AS t;
30-
-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
31-
32-
-- Test the Turkish dotted I. The lowercase is a single byte while the
33-
-- uppercase is multibyte. This is why the comparison code can't be optimized
34-
-- to compare string lengths.
35-
-- SELECT 'i'::citext = 'İ'::citext AS t;
36-
37-
-- Regression.
38-
-- SELECT 'láska'::citext <> 'laská'::citext AS t;
39-
40-
-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
41-
-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
42-
-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
43-
-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
44-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
45-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
46-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
47-
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
48-
-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
49-
5022
-- Test > and >=
5123
SELECT 'B'::citext > 'a'::citext AS t;
5224
SELECT 'b'::citext > 'A'::citext AS t;
@@ -811,24 +783,17 @@ SELECT citext_pattern_ge('b'::citext, 'a'::citext) AS true;
811783
SELECT citext_pattern_ge('B'::citext, 'a'::citext) AS true;
812784
SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
813785

814-
-- Multi-byte tests below are disabled like the sanity tests above.
815-
-- Uncomment to run them.
816-
817786
-- Test ~<~ and ~<=~
818787
SELECT 'a'::citext ~<~ 'B'::citext AS t;
819788
SELECT 'b'::citext ~<~ 'A'::citext AS f;
820-
-- SELECT 'à'::citext ~<~ 'À'::citext AS f;
821789
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
822790
SELECT 'a'::citext ~<=~ 'A'::citext AS t;
823-
-- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
824791

825792
-- Test ~>~ and ~>=~
826793
SELECT 'B'::citext ~>~ 'a'::citext AS t;
827794
SELECT 'b'::citext ~>~ 'A'::citext AS t;
828-
-- SELECT 'à'::citext ~>~ 'À'::citext AS f;
829795
SELECT 'B'::citext ~>~ 'b'::citext AS f;
830796
SELECT 'B'::citext ~>=~ 'b'::citext AS t;
831-
-- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
832797

833798
-- Test implicit casting. citext casts to text, but not vice-versa.
834799
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.

0 commit comments

Comments
 (0)