Skip to content

Commit 81e2255

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code paths where the result isn't supposed to be locale-dependent, for example to_char(timestamp, 'DAY'). Since the source data is always just ASCII in these cases, that usually didn't matter ... but it does matter in Turkish locales, which have unusual treatment of "i" and "I". To confuse matters even more, the misbehavior was only visible in UTF8 encoding, because in single-byte encodings we used pg_toupper/pg_tolower which don't have locale-specific behavior for ASCII characters. Fix by providing intentionally ASCII-only case-folding functions and using these where appropriate. Per bug #7913 from Adnan Dursun. Back-patch to all active branches, since it's been like this for a long time.
1 parent 3a77936 commit 81e2255

File tree

2 files changed

+120
-54
lines changed

2 files changed

+120
-54
lines changed

src/backend/utils/adt/formatting.c

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,12 +1491,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
14911491
/* C/POSIX collations use this path regardless of database encoding */
14921492
if (lc_ctype_is_c(collid))
14931493
{
1494-
char *p;
1495-
1496-
result = pnstrdup(buff, nbytes);
1497-
1498-
for (p = result; *p; p++)
1499-
*p = pg_ascii_tolower((unsigned char) *p);
1494+
result = asc_tolower(buff, nbytes);
15001495
}
15011496
#ifdef USE_WIDE_UPPER_LOWER
15021497
else if (pg_database_encoding_max_length() > 1)
@@ -1612,12 +1607,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16121607
/* C/POSIX collations use this path regardless of database encoding */
16131608
if (lc_ctype_is_c(collid))
16141609
{
1615-
char *p;
1616-
1617-
result = pnstrdup(buff, nbytes);
1618-
1619-
for (p = result; *p; p++)
1620-
*p = pg_ascii_toupper((unsigned char) *p);
1610+
result = asc_toupper(buff, nbytes);
16211611
}
16221612
#ifdef USE_WIDE_UPPER_LOWER
16231613
else if (pg_database_encoding_max_length() > 1)
@@ -1734,23 +1724,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17341724
/* C/POSIX collations use this path regardless of database encoding */
17351725
if (lc_ctype_is_c(collid))
17361726
{
1737-
char *p;
1738-
1739-
result = pnstrdup(buff, nbytes);
1740-
1741-
for (p = result; *p; p++)
1742-
{
1743-
char c;
1744-
1745-
if (wasalnum)
1746-
*p = c = pg_ascii_tolower((unsigned char) *p);
1747-
else
1748-
*p = c = pg_ascii_toupper((unsigned char) *p);
1749-
/* we don't trust isalnum() here */
1750-
wasalnum = ((c >= 'A' && c <= 'Z') ||
1751-
(c >= 'a' && c <= 'z') ||
1752-
(c >= '0' && c <= '9'));
1753-
}
1727+
result = asc_initcap(buff, nbytes);
17541728
}
17551729
#ifdef USE_WIDE_UPPER_LOWER
17561730
else if (pg_database_encoding_max_length() > 1)
@@ -1873,6 +1847,87 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18731847
return result;
18741848
}
18751849

1850+
/*
1851+
* ASCII-only lower function
1852+
*
1853+
* We pass the number of bytes so we can pass varlena and char*
1854+
* to this function. The result is a palloc'd, null-terminated string.
1855+
*/
1856+
char *
1857+
asc_tolower(const char *buff, size_t nbytes)
1858+
{
1859+
char *result;
1860+
char *p;
1861+
1862+
if (!buff)
1863+
return NULL;
1864+
1865+
result = pnstrdup(buff, nbytes);
1866+
1867+
for (p = result; *p; p++)
1868+
*p = pg_ascii_tolower((unsigned char) *p);
1869+
1870+
return result;
1871+
}
1872+
1873+
/*
1874+
* ASCII-only upper function
1875+
*
1876+
* We pass the number of bytes so we can pass varlena and char*
1877+
* to this function. The result is a palloc'd, null-terminated string.
1878+
*/
1879+
char *
1880+
asc_toupper(const char *buff, size_t nbytes)
1881+
{
1882+
char *result;
1883+
char *p;
1884+
1885+
if (!buff)
1886+
return NULL;
1887+
1888+
result = pnstrdup(buff, nbytes);
1889+
1890+
for (p = result; *p; p++)
1891+
*p = pg_ascii_toupper((unsigned char) *p);
1892+
1893+
return result;
1894+
}
1895+
1896+
/*
1897+
* ASCII-only initcap function
1898+
*
1899+
* We pass the number of bytes so we can pass varlena and char*
1900+
* to this function. The result is a palloc'd, null-terminated string.
1901+
*/
1902+
char *
1903+
asc_initcap(const char *buff, size_t nbytes)
1904+
{
1905+
char *result;
1906+
char *p;
1907+
int wasalnum = false;
1908+
1909+
if (!buff)
1910+
return NULL;
1911+
1912+
result = pnstrdup(buff, nbytes);
1913+
1914+
for (p = result; *p; p++)
1915+
{
1916+
char c;
1917+
1918+
if (wasalnum)
1919+
*p = c = pg_ascii_tolower((unsigned char) *p);
1920+
else
1921+
*p = c = pg_ascii_toupper((unsigned char) *p);
1922+
/* we don't trust isalnum() here */
1923+
wasalnum = ((c >= 'A' && c <= 'Z') ||
1924+
(c >= 'a' && c <= 'z') ||
1925+
(c >= '0' && c <= '9'));
1926+
}
1927+
1928+
return result;
1929+
}
1930+
18761931
/* convenience routines for when the input is null-terminated */
18771932

18781933
static char *
@@ -1893,6 +1948,20 @@ str_initcap_z(const char *buff, Oid collid)
18931948
return str_initcap(buff, strlen(buff), collid);
18941949
}
18951950

1951+
static char *
1952+
asc_tolower_z(const char *buff)
1953+
{
1954+
return asc_tolower(buff, strlen(buff));
1955+
}
1956+
1957+
static char *
1958+
asc_toupper_z(const char *buff)
1959+
{
1960+
return asc_toupper(buff, strlen(buff));
1961+
}
1962+
1963+
/* asc_initcap_z is not currently needed */
1964+
18961965

18971966
/* ----------
18981967
* Skip TM / th in FROM_CHAR
@@ -2380,7 +2449,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
23802449
INVALID_FOR_INTERVAL;
23812450
if (tmtcTzn(in))
23822451
{
2383-
char *p = str_tolower_z(tmtcTzn(in), collid);
2452+
/* We assume here that timezone names aren't localized */
2453+
char *p = asc_tolower_z(tmtcTzn(in));
23842454

23852455
strcpy(s, p);
23862456
pfree(p);
@@ -2427,7 +2497,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24272497
strcpy(s, str_toupper_z(localized_full_months[tm->tm_mon - 1], collid));
24282498
else
24292499
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2430-
str_toupper_z(months_full[tm->tm_mon - 1], collid));
2500+
asc_toupper_z(months_full[tm->tm_mon - 1]));
24312501
s += strlen(s);
24322502
break;
24332503
case DCH_Month:
@@ -2437,7 +2507,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24372507
if (S_TM(n->suffix))
24382508
strcpy(s, str_initcap_z(localized_full_months[tm->tm_mon - 1], collid));
24392509
else
2440-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2510+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2511+
months_full[tm->tm_mon - 1]);
24412512
s += strlen(s);
24422513
break;
24432514
case DCH_month:
@@ -2447,10 +2518,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24472518
if (S_TM(n->suffix))
24482519
strcpy(s, str_tolower_z(localized_full_months[tm->tm_mon - 1], collid));
24492520
else
2450-
{
2451-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2452-
*s = pg_tolower((unsigned char) *s);
2453-
}
2521+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2522+
asc_tolower_z(months_full[tm->tm_mon - 1]));
24542523
s += strlen(s);
24552524
break;
24562525
case DCH_MON:
@@ -2460,7 +2529,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24602529
if (S_TM(n->suffix))
24612530
strcpy(s, str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid));
24622531
else
2463-
strcpy(s, str_toupper_z(months[tm->tm_mon - 1], collid));
2532+
strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
24642533
s += strlen(s);
24652534
break;
24662535
case DCH_Mon:
@@ -2480,10 +2549,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24802549
if (S_TM(n->suffix))
24812550
strcpy(s, str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid));
24822551
else
2483-
{
2484-
strcpy(s, months[tm->tm_mon - 1]);
2485-
*s = pg_tolower((unsigned char) *s);
2486-
}
2552+
strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
24872553
s += strlen(s);
24882554
break;
24892555
case DCH_MM:
@@ -2498,34 +2564,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24982564
strcpy(s, str_toupper_z(localized_full_days[tm->tm_wday], collid));
24992565
else
25002566
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2501-
str_toupper_z(days[tm->tm_wday], collid));
2567+
asc_toupper_z(days[tm->tm_wday]));
25022568
s += strlen(s);
25032569
break;
25042570
case DCH_Day:
25052571
INVALID_FOR_INTERVAL;
25062572
if (S_TM(n->suffix))
25072573
strcpy(s, str_initcap_z(localized_full_days[tm->tm_wday], collid));
25082574
else
2509-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2575+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2576+
days[tm->tm_wday]);
25102577
s += strlen(s);
25112578
break;
25122579
case DCH_day:
25132580
INVALID_FOR_INTERVAL;
25142581
if (S_TM(n->suffix))
25152582
strcpy(s, str_tolower_z(localized_full_days[tm->tm_wday], collid));
25162583
else
2517-
{
2518-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2519-
*s = pg_tolower((unsigned char) *s);
2520-
}
2584+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2585+
asc_tolower_z(days[tm->tm_wday]));
25212586
s += strlen(s);
25222587
break;
25232588
case DCH_DY:
25242589
INVALID_FOR_INTERVAL;
25252590
if (S_TM(n->suffix))
25262591
strcpy(s, str_toupper_z(localized_abbrev_days[tm->tm_wday], collid));
25272592
else
2528-
strcpy(s, str_toupper_z(days_short[tm->tm_wday], collid));
2593+
strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
25292594
s += strlen(s);
25302595
break;
25312596
case DCH_Dy:
@@ -2541,10 +2606,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25412606
if (S_TM(n->suffix))
25422607
strcpy(s, str_tolower_z(localized_abbrev_days[tm->tm_wday], collid));
25432608
else
2544-
{
2545-
strcpy(s, days_short[tm->tm_wday]);
2546-
*s = pg_tolower((unsigned char) *s);
2547-
}
2609+
strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
25482610
s += strlen(s);
25492611
break;
25502612
case DCH_DDD:
@@ -4651,12 +4713,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
46514713
case NUM_rn:
46524714
if (IS_FILLMODE(Np->Num))
46534715
{
4654-
strcpy(Np->inout_p, str_tolower_z(Np->number_p, collid));
4716+
strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
46554717
Np->inout_p += strlen(Np->inout_p) - 1;
46564718
}
46574719
else
46584720
{
4659-
sprintf(Np->inout_p, "%15s", str_tolower_z(Np->number_p, collid));
4721+
sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
46604722
Np->inout_p += strlen(Np->inout_p) - 1;
46614723
}
46624724
break;

src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extern char *str_tolower(const char *buff, size_t nbytes, Oid collid);
2424
extern char *str_toupper(const char *buff, size_t nbytes, Oid collid);
2525
extern char *str_initcap(const char *buff, size_t nbytes, Oid collid);
2626

27+
extern char *asc_tolower(const char *buff, size_t nbytes);
28+
extern char *asc_toupper(const char *buff, size_t nbytes);
29+
extern char *asc_initcap(const char *buff, size_t nbytes);
30+
2731
extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
2832
extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
2933
extern Datum interval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)