Skip to content

Commit a382997

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code paths where the result isn't supposed to be locale-dependent, for example to_char(timestamp, 'DAY'). Since the source data is always just ASCII in these cases, that usually didn't matter ... but it does matter in Turkish locales, which have unusual treatment of "i" and "I". To confuse matters even more, the misbehavior was only visible in UTF8 encoding, because in single-byte encodings we used pg_toupper/pg_tolower which don't have locale-specific behavior for ASCII characters. Fix by providing intentionally ASCII-only case-folding functions and using these where appropriate. Per bug #7913 from Adnan Dursun. Back-patch to all active branches, since it's been like this for a long time.
1 parent da5f032 commit a382997

File tree

2 files changed

+117
-25
lines changed

2 files changed

+117
-25
lines changed

src/backend/utils/adt/formatting.c

Lines changed: 113 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,87 @@ str_initcap(const char *buff, size_t nbytes)
16441644
return result;
16451645
}
16461646

1647+
/*
1648+
* ASCII-only lower function
1649+
*
1650+
* We pass the number of bytes so we can pass varlena and char*
1651+
* to this function. The result is a palloc'd, null-terminated string.
1652+
*/
1653+
char *
1654+
asc_tolower(const char *buff, size_t nbytes)
1655+
{
1656+
char *result;
1657+
char *p;
1658+
1659+
if (!buff)
1660+
return NULL;
1661+
1662+
result = pnstrdup(buff, nbytes);
1663+
1664+
for (p = result; *p; p++)
1665+
*p = pg_tolower((unsigned char) *p);
1666+
1667+
return result;
1668+
}
1669+
1670+
/*
1671+
* ASCII-only upper function
1672+
*
1673+
* We pass the number of bytes so we can pass varlena and char*
1674+
* to this function. The result is a palloc'd, null-terminated string.
1675+
*/
1676+
char *
1677+
asc_toupper(const char *buff, size_t nbytes)
1678+
{
1679+
char *result;
1680+
char *p;
1681+
1682+
if (!buff)
1683+
return NULL;
1684+
1685+
result = pnstrdup(buff, nbytes);
1686+
1687+
for (p = result; *p; p++)
1688+
*p = pg_toupper((unsigned char) *p);
1689+
1690+
return result;
1691+
}
1692+
1693+
/*
1694+
* ASCII-only initcap function
1695+
*
1696+
* We pass the number of bytes so we can pass varlena and char*
1697+
* to this function. The result is a palloc'd, null-terminated string.
1698+
*/
1699+
char *
1700+
asc_initcap(const char *buff, size_t nbytes)
1701+
{
1702+
char *result;
1703+
char *p;
1704+
int wasalnum = false;
1705+
1706+
if (!buff)
1707+
return NULL;
1708+
1709+
result = pnstrdup(buff, nbytes);
1710+
1711+
for (p = result; *p; p++)
1712+
{
1713+
char c;
1714+
1715+
if (wasalnum)
1716+
*p = c = pg_tolower((unsigned char) *p);
1717+
else
1718+
*p = c = pg_toupper((unsigned char) *p);
1719+
/* we don't trust isalnum() here */
1720+
wasalnum = ((c >= 'A' && c <= 'Z') ||
1721+
(c >= 'a' && c <= 'z') ||
1722+
(c >= '0' && c <= '9'));
1723+
}
1724+
1725+
return result;
1726+
}
1727+
16471728
/* convenience routines for when the input is null-terminated */
16481729

16491730
static char *
@@ -1664,6 +1745,20 @@ str_initcap_z(const char *buff)
16641745
return str_initcap(buff, strlen(buff));
16651746
}
16661747

1748+
static char *
1749+
asc_tolower_z(const char *buff)
1750+
{
1751+
return asc_tolower(buff, strlen(buff));
1752+
}
1753+
1754+
static char *
1755+
asc_toupper_z(const char *buff)
1756+
{
1757+
return asc_toupper(buff, strlen(buff));
1758+
}
1759+
1760+
/* asc_initcap_z is not currently needed */
1761+
16671762

16681763
/* ----------
16691764
* Skip TM / th in FROM_CHAR
@@ -2151,7 +2246,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
21512246
INVALID_FOR_INTERVAL;
21522247
if (tmtcTzn(in))
21532248
{
2154-
char *p = str_tolower_z(tmtcTzn(in));
2249+
/* We assume here that timezone names aren't localized */
2250+
char *p = asc_tolower_z(tmtcTzn(in));
21552251

21562252
strcpy(s, p);
21572253
pfree(p);
@@ -2198,7 +2294,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
21982294
strcpy(s, str_toupper_z(localized_full_months[tm->tm_mon - 1]));
21992295
else
22002296
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2201-
str_toupper_z(months_full[tm->tm_mon - 1]));
2297+
asc_toupper_z(months_full[tm->tm_mon - 1]));
22022298
s += strlen(s);
22032299
break;
22042300
case DCH_Month:
@@ -2208,7 +2304,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22082304
if (S_TM(n->suffix))
22092305
strcpy(s, str_initcap_z(localized_full_months[tm->tm_mon - 1]));
22102306
else
2211-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2307+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2308+
months_full[tm->tm_mon - 1]);
22122309
s += strlen(s);
22132310
break;
22142311
case DCH_month:
@@ -2218,10 +2315,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22182315
if (S_TM(n->suffix))
22192316
strcpy(s, str_tolower_z(localized_full_months[tm->tm_mon - 1]));
22202317
else
2221-
{
2222-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2223-
*s = pg_tolower((unsigned char) *s);
2224-
}
2318+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2319+
asc_tolower_z(months_full[tm->tm_mon - 1]));
22252320
s += strlen(s);
22262321
break;
22272322
case DCH_MON:
@@ -2231,7 +2326,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22312326
if (S_TM(n->suffix))
22322327
strcpy(s, str_toupper_z(localized_abbrev_months[tm->tm_mon - 1]));
22332328
else
2234-
strcpy(s, str_toupper_z(months[tm->tm_mon - 1]));
2329+
strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
22352330
s += strlen(s);
22362331
break;
22372332
case DCH_Mon:
@@ -2251,10 +2346,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22512346
if (S_TM(n->suffix))
22522347
strcpy(s, str_tolower_z(localized_abbrev_months[tm->tm_mon - 1]));
22532348
else
2254-
{
2255-
strcpy(s, months[tm->tm_mon - 1]);
2256-
*s = pg_tolower((unsigned char) *s);
2257-
}
2349+
strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
22582350
s += strlen(s);
22592351
break;
22602352
case DCH_MM:
@@ -2269,34 +2361,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22692361
strcpy(s, str_toupper_z(localized_full_days[tm->tm_wday]));
22702362
else
22712363
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2272-
str_toupper_z(days[tm->tm_wday]));
2364+
asc_toupper_z(days[tm->tm_wday]));
22732365
s += strlen(s);
22742366
break;
22752367
case DCH_Day:
22762368
INVALID_FOR_INTERVAL;
22772369
if (S_TM(n->suffix))
22782370
strcpy(s, str_initcap_z(localized_full_days[tm->tm_wday]));
22792371
else
2280-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2372+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2373+
days[tm->tm_wday]);
22812374
s += strlen(s);
22822375
break;
22832376
case DCH_day:
22842377
INVALID_FOR_INTERVAL;
22852378
if (S_TM(n->suffix))
22862379
strcpy(s, str_tolower_z(localized_full_days[tm->tm_wday]));
22872380
else
2288-
{
2289-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2290-
*s = pg_tolower((unsigned char) *s);
2291-
}
2381+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2382+
asc_tolower_z(days[tm->tm_wday]));
22922383
s += strlen(s);
22932384
break;
22942385
case DCH_DY:
22952386
INVALID_FOR_INTERVAL;
22962387
if (S_TM(n->suffix))
22972388
strcpy(s, str_toupper_z(localized_abbrev_days[tm->tm_wday]));
22982389
else
2299-
strcpy(s, str_toupper_z(days_short[tm->tm_wday]));
2390+
strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
23002391
s += strlen(s);
23012392
break;
23022393
case DCH_Dy:
@@ -2312,10 +2403,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
23122403
if (S_TM(n->suffix))
23132404
strcpy(s, str_tolower_z(localized_abbrev_days[tm->tm_wday]));
23142405
else
2315-
{
2316-
strcpy(s, days_short[tm->tm_wday]);
2317-
*s = pg_tolower((unsigned char) *s);
2318-
}
2406+
strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
23192407
s += strlen(s);
23202408
break;
23212409
case DCH_DDD:
@@ -4422,12 +4510,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
44224510
case NUM_rn:
44234511
if (IS_FILLMODE(Np->Num))
44244512
{
4425-
strcpy(Np->inout_p, str_tolower_z(Np->number_p));
4513+
strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
44264514
Np->inout_p += strlen(Np->inout_p) - 1;
44274515
}
44284516
else
44294517
{
4430-
sprintf(Np->inout_p, "%15s", str_tolower_z(Np->number_p));
4518+
sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
44314519
Np->inout_p += strlen(Np->inout_p) - 1;
44324520
}
44334521
break;

src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ extern char *str_tolower(const char *buff, size_t nbytes);
2525
extern char *str_toupper(const char *buff, size_t nbytes);
2626
extern char *str_initcap(const char *buff, size_t nbytes);
2727

28+
extern char *asc_tolower(const char *buff, size_t nbytes);
29+
extern char *asc_toupper(const char *buff, size_t nbytes);
30+
extern char *asc_initcap(const char *buff, size_t nbytes);
31+
2832
extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
2933
extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
3034
extern Datum interval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)