Skip to content

Commit 1db236c

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code paths where the result isn't supposed to be locale-dependent, for example to_char(timestamp, 'DAY'). Since the source data is always just ASCII in these cases, that usually didn't matter ... but it does matter in Turkish locales, which have unusual treatment of "i" and "I". To confuse matters even more, the misbehavior was only visible in UTF8 encoding, because in single-byte encodings we used pg_toupper/pg_tolower which don't have locale-specific behavior for ASCII characters. Fix by providing intentionally ASCII-only case-folding functions and using these where appropriate. Per bug #7913 from Adnan Dursun. Back-patch to all active branches, since it's been like this for a long time.
1 parent fa85230 commit 1db236c

File tree

2 files changed

+120
-54
lines changed

2 files changed

+120
-54
lines changed

src/backend/utils/adt/formatting.c

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,12 +1493,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
14931493
/* C/POSIX collations use this path regardless of database encoding */
14941494
if (lc_ctype_is_c(collid))
14951495
{
1496-
char *p;
1497-
1498-
result = pnstrdup(buff, nbytes);
1499-
1500-
for (p = result; *p; p++)
1501-
*p = pg_ascii_tolower((unsigned char) *p);
1496+
result = asc_tolower(buff, nbytes);
15021497
}
15031498
#ifdef USE_WIDE_UPPER_LOWER
15041499
else if (pg_database_encoding_max_length() > 1)
@@ -1618,12 +1613,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16181613
/* C/POSIX collations use this path regardless of database encoding */
16191614
if (lc_ctype_is_c(collid))
16201615
{
1621-
char *p;
1622-
1623-
result = pnstrdup(buff, nbytes);
1624-
1625-
for (p = result; *p; p++)
1626-
*p = pg_ascii_toupper((unsigned char) *p);
1616+
result = asc_toupper(buff, nbytes);
16271617
}
16281618
#ifdef USE_WIDE_UPPER_LOWER
16291619
else if (pg_database_encoding_max_length() > 1)
@@ -1744,23 +1734,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17441734
/* C/POSIX collations use this path regardless of database encoding */
17451735
if (lc_ctype_is_c(collid))
17461736
{
1747-
char *p;
1748-
1749-
result = pnstrdup(buff, nbytes);
1750-
1751-
for (p = result; *p; p++)
1752-
{
1753-
char c;
1754-
1755-
if (wasalnum)
1756-
*p = c = pg_ascii_tolower((unsigned char) *p);
1757-
else
1758-
*p = c = pg_ascii_toupper((unsigned char) *p);
1759-
/* we don't trust isalnum() here */
1760-
wasalnum = ((c >= 'A' && c <= 'Z') ||
1761-
(c >= 'a' && c <= 'z') ||
1762-
(c >= '0' && c <= '9'));
1763-
}
1737+
result = asc_initcap(buff, nbytes);
17641738
}
17651739
#ifdef USE_WIDE_UPPER_LOWER
17661740
else if (pg_database_encoding_max_length() > 1)
@@ -1887,6 +1861,87 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18871861
return result;
18881862
}
18891863

1864+
/*
1865+
* ASCII-only lower function
1866+
*
1867+
* We pass the number of bytes so we can pass varlena and char*
1868+
* to this function. The result is a palloc'd, null-terminated string.
1869+
*/
1870+
char *
1871+
asc_tolower(const char *buff, size_t nbytes)
1872+
{
1873+
char *result;
1874+
char *p;
1875+
1876+
if (!buff)
1877+
return NULL;
1878+
1879+
result = pnstrdup(buff, nbytes);
1880+
1881+
for (p = result; *p; p++)
1882+
*p = pg_ascii_tolower((unsigned char) *p);
1883+
1884+
return result;
1885+
}
1886+
1887+
/*
1888+
* ASCII-only upper function
1889+
*
1890+
* We pass the number of bytes so we can pass varlena and char*
1891+
* to this function. The result is a palloc'd, null-terminated string.
1892+
*/
1893+
char *
1894+
asc_toupper(const char *buff, size_t nbytes)
1895+
{
1896+
char *result;
1897+
char *p;
1898+
1899+
if (!buff)
1900+
return NULL;
1901+
1902+
result = pnstrdup(buff, nbytes);
1903+
1904+
for (p = result; *p; p++)
1905+
*p = pg_ascii_toupper((unsigned char) *p);
1906+
1907+
return result;
1908+
}
1909+
1910+
/*
1911+
* ASCII-only initcap function
1912+
*
1913+
* We pass the number of bytes so we can pass varlena and char*
1914+
* to this function. The result is a palloc'd, null-terminated string.
1915+
*/
1916+
char *
1917+
asc_initcap(const char *buff, size_t nbytes)
1918+
{
1919+
char *result;
1920+
char *p;
1921+
int wasalnum = false;
1922+
1923+
if (!buff)
1924+
return NULL;
1925+
1926+
result = pnstrdup(buff, nbytes);
1927+
1928+
for (p = result; *p; p++)
1929+
{
1930+
char c;
1931+
1932+
if (wasalnum)
1933+
*p = c = pg_ascii_tolower((unsigned char) *p);
1934+
else
1935+
*p = c = pg_ascii_toupper((unsigned char) *p);
1936+
/* we don't trust isalnum() here */
1937+
wasalnum = ((c >= 'A' && c <= 'Z') ||
1938+
(c >= 'a' && c <= 'z') ||
1939+
(c >= '0' && c <= '9'));
1940+
}
1941+
1942+
return result;
1943+
}
1944+
18901945
/* convenience routines for when the input is null-terminated */
18911946

18921947
static char *
@@ -1907,6 +1962,20 @@ str_initcap_z(const char *buff, Oid collid)
19071962
return str_initcap(buff, strlen(buff), collid);
19081963
}
19091964

1965+
static char *
1966+
asc_tolower_z(const char *buff)
1967+
{
1968+
return asc_tolower(buff, strlen(buff));
1969+
}
1970+
1971+
static char *
1972+
asc_toupper_z(const char *buff)
1973+
{
1974+
return asc_toupper(buff, strlen(buff));
1975+
}
1976+
1977+
/* asc_initcap_z is not currently needed */
1978+
19101979

19111980
/* ----------
19121981
* Skip TM / th in FROM_CHAR
@@ -2419,7 +2488,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24192488
INVALID_FOR_INTERVAL;
24202489
if (tmtcTzn(in))
24212490
{
2422-
char *p = str_tolower_z(tmtcTzn(in), collid);
2491+
/* We assume here that timezone names aren't localized */
2492+
char *p = asc_tolower_z(tmtcTzn(in));
24232493

24242494
strcpy(s, p);
24252495
pfree(p);
@@ -2466,7 +2536,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24662536
strcpy(s, str_toupper_z(localized_full_months[tm->tm_mon - 1], collid));
24672537
else
24682538
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2469-
str_toupper_z(months_full[tm->tm_mon - 1], collid));
2539+
asc_toupper_z(months_full[tm->tm_mon - 1]));
24702540
s += strlen(s);
24712541
break;
24722542
case DCH_Month:
@@ -2476,7 +2546,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24762546
if (S_TM(n->suffix))
24772547
strcpy(s, str_initcap_z(localized_full_months[tm->tm_mon - 1], collid));
24782548
else
2479-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2549+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2550+
months_full[tm->tm_mon - 1]);
24802551
s += strlen(s);
24812552
break;
24822553
case DCH_month:
@@ -2486,10 +2557,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24862557
if (S_TM(n->suffix))
24872558
strcpy(s, str_tolower_z(localized_full_months[tm->tm_mon - 1], collid));
24882559
else
2489-
{
2490-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2491-
*s = pg_tolower((unsigned char) *s);
2492-
}
2560+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2561+
asc_tolower_z(months_full[tm->tm_mon - 1]));
24932562
s += strlen(s);
24942563
break;
24952564
case DCH_MON:
@@ -2499,7 +2568,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24992568
if (S_TM(n->suffix))
25002569
strcpy(s, str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid));
25012570
else
2502-
strcpy(s, str_toupper_z(months[tm->tm_mon - 1], collid));
2571+
strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
25032572
s += strlen(s);
25042573
break;
25052574
case DCH_Mon:
@@ -2519,10 +2588,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25192588
if (S_TM(n->suffix))
25202589
strcpy(s, str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid));
25212590
else
2522-
{
2523-
strcpy(s, months[tm->tm_mon - 1]);
2524-
*s = pg_tolower((unsigned char) *s);
2525-
}
2591+
strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
25262592
s += strlen(s);
25272593
break;
25282594
case DCH_MM:
@@ -2537,34 +2603,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25372603
strcpy(s, str_toupper_z(localized_full_days[tm->tm_wday], collid));
25382604
else
25392605
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2540-
str_toupper_z(days[tm->tm_wday], collid));
2606+
asc_toupper_z(days[tm->tm_wday]));
25412607
s += strlen(s);
25422608
break;
25432609
case DCH_Day:
25442610
INVALID_FOR_INTERVAL;
25452611
if (S_TM(n->suffix))
25462612
strcpy(s, str_initcap_z(localized_full_days[tm->tm_wday], collid));
25472613
else
2548-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2614+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2615+
days[tm->tm_wday]);
25492616
s += strlen(s);
25502617
break;
25512618
case DCH_day:
25522619
INVALID_FOR_INTERVAL;
25532620
if (S_TM(n->suffix))
25542621
strcpy(s, str_tolower_z(localized_full_days[tm->tm_wday], collid));
25552622
else
2556-
{
2557-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2558-
*s = pg_tolower((unsigned char) *s);
2559-
}
2623+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2624+
asc_tolower_z(days[tm->tm_wday]));
25602625
s += strlen(s);
25612626
break;
25622627
case DCH_DY:
25632628
INVALID_FOR_INTERVAL;
25642629
if (S_TM(n->suffix))
25652630
strcpy(s, str_toupper_z(localized_abbrev_days[tm->tm_wday], collid));
25662631
else
2567-
strcpy(s, str_toupper_z(days_short[tm->tm_wday], collid));
2632+
strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
25682633
s += strlen(s);
25692634
break;
25702635
case DCH_Dy:
@@ -2580,10 +2645,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25802645
if (S_TM(n->suffix))
25812646
strcpy(s, str_tolower_z(localized_abbrev_days[tm->tm_wday], collid));
25822647
else
2583-
{
2584-
strcpy(s, days_short[tm->tm_wday]);
2585-
*s = pg_tolower((unsigned char) *s);
2586-
}
2648+
strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
25872649
s += strlen(s);
25882650
break;
25892651
case DCH_DDD:
@@ -4670,12 +4732,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
46704732
case NUM_rn:
46714733
if (IS_FILLMODE(Np->Num))
46724734
{
4673-
strcpy(Np->inout_p, str_tolower_z(Np->number_p, collid));
4735+
strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
46744736
Np->inout_p += strlen(Np->inout_p) - 1;
46754737
}
46764738
else
46774739
{
4678-
sprintf(Np->inout_p, "%15s", str_tolower_z(Np->number_p, collid));
4740+
sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
46794741
Np->inout_p += strlen(Np->inout_p) - 1;
46804742
}
46814743
break;

src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extern char *str_tolower(const char *buff, size_t nbytes, Oid collid);
2424
extern char *str_toupper(const char *buff, size_t nbytes, Oid collid);
2525
extern char *str_initcap(const char *buff, size_t nbytes, Oid collid);
2626

27+
extern char *asc_tolower(const char *buff, size_t nbytes);
28+
extern char *asc_toupper(const char *buff, size_t nbytes);
29+
extern char *asc_initcap(const char *buff, size_t nbytes);
30+
2731
extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
2832
extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
2933
extern Datum interval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)