|
15 | 15 | #include "mb/pg_wchar.h"
|
16 | 16 |
|
17 | 17 |
|
| 18 | +/* |
| 19 | + * In today's multibyte encodings other than UTF8, this two-byte sequence |
| 20 | + * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0. |
| 21 | + * |
| 22 | + * For historical reasons, several verifychar implementations opt to reject |
| 23 | + * this pair specifically. Byte pair range constraints, in encoding |
| 24 | + * originator documentation, always excluded this pair. No core conversion |
| 25 | + * could translate it. However, longstanding verifychar implementations |
| 26 | + * accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate |
| 27 | + * pairs not valid per encoding originator documentation. To avoid tightening |
| 28 | + * core or non-core conversions in a security patch, we sought this one pair. |
| 29 | + * |
| 30 | + * PQescapeString() historically used spaces for BYTE1; many other values |
| 31 | + * could suffice for BYTE1. |
| 32 | + */ |
| 33 | +#define NONUTF8_INVALID_BYTE0 (0x8d) |
| 34 | +#define NONUTF8_INVALID_BYTE1 (' ') |
| 35 | + |
| 36 | + |
18 | 37 | /*
|
19 | 38 | * Operations on multi-byte encodings are driven by a table of helper
|
20 | 39 | * functions.
|
@@ -1532,6 +1551,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
|
1532 | 1551 | if (len < l)
|
1533 | 1552 | return -1;
|
1534 | 1553 |
|
| 1554 | + if (l == 2 && |
| 1555 | + s[0] == NONUTF8_INVALID_BYTE0 && |
| 1556 | + s[1] == NONUTF8_INVALID_BYTE1) |
| 1557 | + return -1; |
| 1558 | + |
1535 | 1559 | while (--l > 0)
|
1536 | 1560 | {
|
1537 | 1561 | if (*++s == '\0')
|
@@ -1581,6 +1605,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
|
1581 | 1605 | if (len < l)
|
1582 | 1606 | return -1;
|
1583 | 1607 |
|
| 1608 | + if (l == 2 && |
| 1609 | + s[0] == NONUTF8_INVALID_BYTE0 && |
| 1610 | + s[1] == NONUTF8_INVALID_BYTE1) |
| 1611 | + return -1; |
| 1612 | + |
1584 | 1613 | while (--l > 0)
|
1585 | 1614 | {
|
1586 | 1615 | if (*++s == '\0')
|
@@ -1630,6 +1659,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
|
1630 | 1659 | if (len < l)
|
1631 | 1660 | return -1;
|
1632 | 1661 |
|
| 1662 | + if (l == 2 && |
| 1663 | + s[0] == NONUTF8_INVALID_BYTE0 && |
| 1664 | + s[1] == NONUTF8_INVALID_BYTE1) |
| 1665 | + return -1; |
| 1666 | + |
1633 | 1667 | while (--l > 0)
|
1634 | 1668 | {
|
1635 | 1669 | if (*++s == '\0')
|
@@ -1858,6 +1892,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
1858 | 1892 | }
|
1859 | 1893 |
|
1860 | 1894 |
|
| 1895 | +/* |
| 1896 | + * Fills the provided buffer with two bytes such that: |
| 1897 | + * pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0 |
| 1898 | + */ |
| 1899 | +void |
| 1900 | +pg_encoding_set_invalid(int encoding, char *dst) |
| 1901 | +{ |
| 1902 | + Assert(pg_encoding_max_length(encoding) > 1); |
| 1903 | + |
| 1904 | + dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0); |
| 1905 | + dst[1] = NONUTF8_INVALID_BYTE1; |
| 1906 | +} |
| 1907 | + |
1861 | 1908 | /*
|
1862 | 1909 | *-------------------------------------------------------------------
|
1863 | 1910 | * encoding info table
|
@@ -1980,5 +2027,11 @@ pg_encoding_max_length(int encoding)
|
1980 | 2027 | {
|
1981 | 2028 | Assert(PG_VALID_ENCODING(encoding));
|
1982 | 2029 |
|
1983 |
| - return pg_wchar_table[encoding].maxmblen; |
| 2030 | + /* |
| 2031 | + * Check for the encoding despite the assert, due to some mingw versions |
| 2032 | + * otherwise issuing bogus warnings. |
| 2033 | + */ |
| 2034 | + return PG_VALID_ENCODING(encoding) ? |
| 2035 | + pg_wchar_table[encoding].maxmblen : |
| 2036 | + pg_wchar_table[PG_SQL_ASCII].maxmblen; |
1984 | 2037 | }
|
0 commit comments