Skip to content

Commit 72fe6d2

Browse files
committed
Make collation not depend on setlocale().
Now that the result of pg_newlocale_from_collation() is always non-NULL, then we can move the collate_is_c and ctype_is_c flags into pg_locale_t. That simplifies the logic in lc_collate_is_c() and lc_ctype_is_c(), removing the dependence on setlocale(). This commit also eliminates the multi-stage initialization of the collation cache. As long as we have catalog access, then it's now safe to call pg_newlocale_from_collation() without checking lc_collate_is_c() first. Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson
1 parent 9b282a9 commit 72fe6d2

File tree

4 files changed

+81
-154
lines changed

4 files changed

+81
-154
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 26 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
128128
typedef struct
129129
{
130130
Oid collid; /* hash key: pg_collation OID */
131-
bool collate_is_c; /* is collation's LC_COLLATE C? */
132-
bool ctype_is_c; /* is collation's LC_CTYPE C? */
133-
bool flags_valid; /* true if above flags are valid */
134131
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
135132

136133
/* needed for simplehash */
@@ -1225,29 +1222,13 @@ IsoLocaleName(const char *winlocname)
12251222
/*
12261223
* Cache mechanism for collation information.
12271224
*
1228-
* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1229-
* (or POSIX), so we can optimize a few code paths in various places.
1230-
* For the built-in C and POSIX collations, we can know that without even
1231-
* doing a cache lookup, but we want to support aliases for C/POSIX too.
1232-
* For the "default" collation, there are separate static cache variables,
1233-
* since consulting the pg_collation catalog doesn't tell us what we need.
1234-
*
1235-
* Also, if a pg_locale_t has been requested for a collation, we cache that
1236-
* for the life of a backend.
1237-
*
1238-
* Note that some code relies on the flags not reporting false negatives
1239-
* (that is, saying it's not C when it is). For example, char2wchar()
1240-
* could fail if the locale is C, so str_tolower() shouldn't call it
1241-
* in that case.
1242-
*
12431225
* Note that we currently lack any way to flush the cache. Since we don't
12441226
* support ALTER COLLATION, this is OK. The worst case is that someone
12451227
* drops a collation, and a useless cache entry hangs around in existing
12461228
* backends.
12471229
*/
1248-
12491230
static collation_cache_entry *
1250-
lookup_collation_cache(Oid collation, bool set_flags)
1231+
lookup_collation_cache(Oid collation)
12511232
{
12521233
collation_cache_entry *cache_entry;
12531234
bool found;
@@ -1271,59 +1252,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
12711252
* Make sure cache entry is marked invalid, in case we fail before
12721253
* setting things.
12731254
*/
1274-
cache_entry->flags_valid = false;
12751255
cache_entry->locale = 0;
12761256
}
12771257

1278-
if (set_flags && !cache_entry->flags_valid)
1279-
{
1280-
/* Attempt to set the flags */
1281-
HeapTuple tp;
1282-
Form_pg_collation collform;
1283-
1284-
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1285-
if (!HeapTupleIsValid(tp))
1286-
elog(ERROR, "cache lookup failed for collation %u", collation);
1287-
collform = (Form_pg_collation) GETSTRUCT(tp);
1288-
1289-
if (collform->collprovider == COLLPROVIDER_BUILTIN)
1290-
{
1291-
Datum datum;
1292-
const char *colllocale;
1293-
1294-
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1295-
colllocale = TextDatumGetCString(datum);
1296-
1297-
cache_entry->collate_is_c = true;
1298-
cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
1299-
}
1300-
else if (collform->collprovider == COLLPROVIDER_LIBC)
1301-
{
1302-
Datum datum;
1303-
const char *collcollate;
1304-
const char *collctype;
1305-
1306-
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1307-
collcollate = TextDatumGetCString(datum);
1308-
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1309-
collctype = TextDatumGetCString(datum);
1310-
1311-
cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1312-
(strcmp(collcollate, "POSIX") == 0));
1313-
cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1314-
(strcmp(collctype, "POSIX") == 0));
1315-
}
1316-
else
1317-
{
1318-
cache_entry->collate_is_c = false;
1319-
cache_entry->ctype_is_c = false;
1320-
}
1321-
1322-
cache_entry->flags_valid = true;
1323-
1324-
ReleaseSysCache(tp);
1325-
}
1326-
13271258
return cache_entry;
13281259
}
13291260

@@ -1341,47 +1272,6 @@ lc_collate_is_c(Oid collation)
13411272
if (!OidIsValid(collation))
13421273
return false;
13431274

1344-
/*
1345-
* If we're asked about the default collation, we have to inquire of the C
1346-
* library. Cache the result so we only have to compute it once.
1347-
*/
1348-
if (collation == DEFAULT_COLLATION_OID)
1349-
{
1350-
static int result = -1;
1351-
const char *localeptr;
1352-
1353-
if (result >= 0)
1354-
return (bool) result;
1355-
1356-
if (default_locale.provider == COLLPROVIDER_BUILTIN)
1357-
{
1358-
result = true;
1359-
return (bool) result;
1360-
}
1361-
else if (default_locale.provider == COLLPROVIDER_ICU)
1362-
{
1363-
result = false;
1364-
return (bool) result;
1365-
}
1366-
else if (default_locale.provider == COLLPROVIDER_LIBC)
1367-
{
1368-
localeptr = setlocale(LC_CTYPE, NULL);
1369-
if (!localeptr)
1370-
elog(ERROR, "invalid LC_CTYPE setting");
1371-
}
1372-
else
1373-
elog(ERROR, "unexpected collation provider '%c'",
1374-
default_locale.provider);
1375-
1376-
if (strcmp(localeptr, "C") == 0)
1377-
result = true;
1378-
else if (strcmp(localeptr, "POSIX") == 0)
1379-
result = true;
1380-
else
1381-
result = false;
1382-
return (bool) result;
1383-
}
1384-
13851275
/*
13861276
* If we're asked about the built-in C/POSIX collations, we know that.
13871277
*/
@@ -1392,7 +1282,7 @@ lc_collate_is_c(Oid collation)
13921282
/*
13931283
* Otherwise, we have to consult pg_collation, but we cache that.
13941284
*/
1395-
return (lookup_collation_cache(collation, true))->collate_is_c;
1285+
return pg_newlocale_from_collation(collation)->collate_is_c;
13961286
}
13971287

13981288
/*
@@ -1408,46 +1298,6 @@ lc_ctype_is_c(Oid collation)
14081298
if (!OidIsValid(collation))
14091299
return false;
14101300

1411-
/*
1412-
* If we're asked about the default collation, we have to inquire of the C
1413-
* library. Cache the result so we only have to compute it once.
1414-
*/
1415-
if (collation == DEFAULT_COLLATION_OID)
1416-
{
1417-
static int result = -1;
1418-
const char *localeptr;
1419-
1420-
if (result >= 0)
1421-
return (bool) result;
1422-
1423-
if (default_locale.provider == COLLPROVIDER_BUILTIN)
1424-
{
1425-
localeptr = default_locale.info.builtin.locale;
1426-
}
1427-
else if (default_locale.provider == COLLPROVIDER_ICU)
1428-
{
1429-
result = false;
1430-
return (bool) result;
1431-
}
1432-
else if (default_locale.provider == COLLPROVIDER_LIBC)
1433-
{
1434-
localeptr = setlocale(LC_CTYPE, NULL);
1435-
if (!localeptr)
1436-
elog(ERROR, "invalid LC_CTYPE setting");
1437-
}
1438-
else
1439-
elog(ERROR, "unexpected collation provider '%c'",
1440-
default_locale.provider);
1441-
1442-
if (strcmp(localeptr, "C") == 0)
1443-
result = true;
1444-
else if (strcmp(localeptr, "POSIX") == 0)
1445-
result = true;
1446-
else
1447-
result = false;
1448-
return (bool) result;
1449-
}
1450-
14511301
/*
14521302
* If we're asked about the built-in C/POSIX collations, we know that.
14531303
*/
@@ -1458,7 +1308,7 @@ lc_ctype_is_c(Oid collation)
14581308
/*
14591309
* Otherwise, we have to consult pg_collation, but we cache that.
14601310
*/
1461-
return (lookup_collation_cache(collation, true))->ctype_is_c;
1311+
return pg_newlocale_from_collation(collation)->ctype_is_c;
14621312
}
14631313

14641314
/* simple subroutine for reporting errors from newlocale() */
@@ -1647,6 +1497,9 @@ init_database_collation(void)
16471497

16481498
builtin_validate_locale(dbform->encoding, datlocale);
16491499

1500+
default_locale.collate_is_c = true;
1501+
default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
1502+
16501503
default_locale.info.builtin.locale = MemoryContextStrdup(
16511504
TopMemoryContext, datlocale);
16521505
}
@@ -1658,6 +1511,9 @@ init_database_collation(void)
16581511
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
16591512
datlocale = TextDatumGetCString(datum);
16601513

1514+
default_locale.collate_is_c = false;
1515+
default_locale.ctype_is_c = false;
1516+
16611517
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
16621518
if (!isnull)
16631519
icurules = TextDatumGetCString(datum);
@@ -1678,6 +1534,11 @@ init_database_collation(void)
16781534
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
16791535
datctype = TextDatumGetCString(datum);
16801536

1537+
default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
1538+
(strcmp(datcollate, "POSIX") == 0);
1539+
default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
1540+
(strcmp(datctype, "POSIX") == 0);
1541+
16811542
make_libc_collator(datcollate, datctype, &default_locale);
16821543
}
16831544

@@ -1712,7 +1573,7 @@ pg_newlocale_from_collation(Oid collid)
17121573
if (collid == DEFAULT_COLLATION_OID)
17131574
return &default_locale;
17141575

1715-
cache_entry = lookup_collation_cache(collid, false);
1576+
cache_entry = lookup_collation_cache(collid);
17161577

17171578
if (cache_entry->locale == 0)
17181579
{
@@ -1741,6 +1602,9 @@ pg_newlocale_from_collation(Oid collid)
17411602
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
17421603
locstr = TextDatumGetCString(datum);
17431604

1605+
result.collate_is_c = true;
1606+
result.ctype_is_c = (strcmp(locstr, "C") == 0);
1607+
17441608
builtin_validate_locale(GetDatabaseEncoding(), locstr);
17451609

17461610
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
@@ -1756,6 +1620,11 @@ pg_newlocale_from_collation(Oid collid)
17561620
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
17571621
collctype = TextDatumGetCString(datum);
17581622

1623+
result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
1624+
(strcmp(collcollate, "POSIX") == 0);
1625+
result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
1626+
(strcmp(collctype, "POSIX") == 0);
1627+
17591628
make_libc_collator(collcollate, collctype, &result);
17601629
}
17611630
else if (collform->collprovider == COLLPROVIDER_ICU)
@@ -1766,6 +1635,9 @@ pg_newlocale_from_collation(Oid collid)
17661635
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
17671636
iculocstr = TextDatumGetCString(datum);
17681637

1638+
result.collate_is_c = false;
1639+
result.ctype_is_c = false;
1640+
17691641
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
17701642
if (!isnull)
17711643
icurules = TextDatumGetCString(datum);

src/include/utils/pg_locale.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,25 @@ extern void cache_locale_time(void);
6969
/*
7070
* We use a discriminated union to hold either a locale_t or an ICU collator.
7171
* pg_locale_t is occasionally checked for truth, so make it a pointer.
72+
*
73+
* Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
74+
* (or POSIX), so we can optimize a few code paths in various places. For the
75+
* built-in C and POSIX collations, we can know that without even doing a
76+
* cache lookup, but we want to support aliases for C/POSIX too. For the
77+
* "default" collation, there are separate static cache variables, since
78+
* consulting the pg_collation catalog doesn't tell us what we need.
79+
*
80+
* Note that some code relies on the flags not reporting false negatives
81+
* (that is, saying it's not C when it is). For example, char2wchar()
82+
* could fail if the locale is C, so str_tolower() shouldn't call it
83+
* in that case.
7284
*/
7385
struct pg_locale_struct
7486
{
7587
char provider;
7688
bool deterministic;
89+
bool collate_is_c;
90+
bool ctype_is_c;
7791
union
7892
{
7993
struct

src/test/regress/expected/collate.utf8.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,32 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
99
\endif
1010
SET client_encoding TO UTF8;
1111
--
12+
-- Test builtin "C"
13+
--
14+
CREATE COLLATION regress_builtin_c (
15+
provider = builtin, locale = 'C');
16+
-- non-ASCII characters are unchanged
17+
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
18+
?column?
19+
----------
20+
t
21+
(1 row)
22+
23+
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
24+
?column?
25+
----------
26+
t
27+
(1 row)
28+
29+
-- non-ASCII characters are not alphabetic
30+
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
31+
?column?
32+
----------
33+
t
34+
(1 row)
35+
36+
DROP COLLATION regress_builtin_c;
37+
--
1238
-- Test PG_C_UTF8
1339
--
1440
CREATE COLLATION regress_pg_c_utf8 (

src/test/regress/sql/collate.utf8.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
1111

1212
SET client_encoding TO UTF8;
1313

14+
--
15+
-- Test builtin "C"
16+
--
17+
CREATE COLLATION regress_builtin_c (
18+
provider = builtin, locale = 'C');
19+
20+
-- non-ASCII characters are unchanged
21+
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
22+
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
23+
24+
-- non-ASCII characters are not alphabetic
25+
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
26+
27+
DROP COLLATION regress_builtin_c;
28+
1429
--
1530
-- Test PG_C_UTF8
1631
--

0 commit comments

Comments
 (0)