Skip to content

Commit 5bc33cb

Browse files
committed
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
1 parent 04f31c8 commit 5bc33cb

File tree

7 files changed

+121
-2
lines changed

7 files changed

+121
-2
lines changed

src/common/wchar.c

+54-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,25 @@
1515
#include "mb/pg_wchar.h"
1616

1717

18+
/*
19+
* In today's multibyte encodings other than UTF8, this two-byte sequence
20+
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
21+
*
22+
* For historical reasons, several verifychar implementations opt to reject
23+
* this pair specifically. Byte pair range constraints, in encoding
24+
* originator documentation, always excluded this pair. No core conversion
25+
* could translate it. However, longstanding verifychar implementations
26+
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
27+
* pairs not valid per encoding originator documentation. To avoid tightening
28+
* core or non-core conversions in a security patch, we sought this one pair.
29+
*
30+
* PQescapeString() historically used spaces for BYTE1; many other values
31+
* could suffice for BYTE1.
32+
*/
33+
#define NONUTF8_INVALID_BYTE0 (0x8d)
34+
#define NONUTF8_INVALID_BYTE1 (' ')
35+
36+
1837
/*
1938
* Operations on multi-byte encodings are driven by a table of helper
2039
* functions.
@@ -1532,6 +1551,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
15321551
if (len < l)
15331552
return -1;
15341553

1554+
if (l == 2 &&
1555+
s[0] == NONUTF8_INVALID_BYTE0 &&
1556+
s[1] == NONUTF8_INVALID_BYTE1)
1557+
return -1;
1558+
15351559
while (--l > 0)
15361560
{
15371561
if (*++s == '\0')
@@ -1581,6 +1605,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
15811605
if (len < l)
15821606
return -1;
15831607

1608+
if (l == 2 &&
1609+
s[0] == NONUTF8_INVALID_BYTE0 &&
1610+
s[1] == NONUTF8_INVALID_BYTE1)
1611+
return -1;
1612+
15841613
while (--l > 0)
15851614
{
15861615
if (*++s == '\0')
@@ -1630,6 +1659,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
16301659
if (len < l)
16311660
return -1;
16321661

1662+
if (l == 2 &&
1663+
s[0] == NONUTF8_INVALID_BYTE0 &&
1664+
s[1] == NONUTF8_INVALID_BYTE1)
1665+
return -1;
1666+
16331667
while (--l > 0)
16341668
{
16351669
if (*++s == '\0')
@@ -1858,6 +1892,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
18581892
}
18591893

18601894

1895+
/*
1896+
* Fills the provided buffer with two bytes such that:
1897+
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
1898+
*/
1899+
void
1900+
pg_encoding_set_invalid(int encoding, char *dst)
1901+
{
1902+
Assert(pg_encoding_max_length(encoding) > 1);
1903+
1904+
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
1905+
dst[1] = NONUTF8_INVALID_BYTE1;
1906+
}
1907+
18611908
/*
18621909
*-------------------------------------------------------------------
18631910
* encoding info table
@@ -1980,5 +2027,11 @@ pg_encoding_max_length(int encoding)
19802027
{
19812028
Assert(PG_VALID_ENCODING(encoding));
19822029

1983-
return pg_wchar_table[encoding].maxmblen;
2030+
/*
2031+
* Check for the encoding despite the assert, due to some mingw versions
2032+
* otherwise issuing bogus warnings.
2033+
*/
2034+
return PG_VALID_ENCODING(encoding) ?
2035+
pg_wchar_table[encoding].maxmblen :
2036+
pg_wchar_table[PG_SQL_ASCII].maxmblen;
19842037
}

src/include/mb/pg_wchar.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ typedef struct pg_enc2name
359359
#endif
360360
} pg_enc2name;
361361

362-
extern const pg_enc2name pg_enc2name_tbl[];
362+
extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[];
363363

364364
/*
365365
* Encoding names for gettext
@@ -573,6 +573,7 @@ extern int pg_valid_server_encoding_id(int encoding);
573573
* (in addition to the ones just above). The constant tables declared
574574
* earlier in this file are also available from libpgcommon.
575575
*/
576+
extern void pg_encoding_set_invalid(int encoding, char *dst);
576577
extern int pg_encoding_mblen(int encoding, const char *mbstr);
577578
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
578579
extern int pg_encoding_dsplen(int encoding, const char *mbstr);

src/test/regress/expected/conversion.out

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
--
22
-- create user defined conversion
33
--
4+
SELECT FROM test_enc_setup();
5+
--
6+
(1 row)
7+
48
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
59
SET SESSION AUTHORIZATION regress_conversion_user;
610
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;

src/test/regress/input/create_function_0.source

+5
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ CREATE FUNCTION test_opclass_options_func(internal)
5959
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
6060
LANGUAGE C;
6161

62+
63+
CREATE FUNCTION test_enc_setup() RETURNS void
64+
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
65+
LANGUAGE C STRICT;
66+
6267
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
6368
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
6469
LANGUAGE C STRICT;

src/test/regress/output/create_function_0.source

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ CREATE FUNCTION test_opclass_options_func(internal)
4646
RETURNS void
4747
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
4848
LANGUAGE C;
49+
CREATE FUNCTION test_enc_setup() RETURNS void
50+
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
51+
LANGUAGE C STRICT;
4952
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
5053
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
5154
LANGUAGE C STRICT;

src/test/regress/regress.c

+50
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,56 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
10891089
PG_RETURN_NULL();
10901090
}
10911091

1092+
/* one-time tests for encoding infrastructure */
1093+
PG_FUNCTION_INFO_V1(test_enc_setup);
1094+
Datum
1095+
test_enc_setup(PG_FUNCTION_ARGS)
1096+
{
1097+
/* Test pg_encoding_set_invalid() */
1098+
for (int i = 0; i < _PG_LAST_ENCODING_; i++)
1099+
{
1100+
char buf[2],
1101+
bigbuf[16];
1102+
int len,
1103+
mblen,
1104+
valid;
1105+
1106+
if (pg_encoding_max_length(i) == 1)
1107+
continue;
1108+
pg_encoding_set_invalid(i, buf);
1109+
len = strnlen(buf, 2);
1110+
if (len != 2)
1111+
elog(WARNING,
1112+
"official invalid string for encoding \"%s\" has length %d",
1113+
pg_enc2name_tbl[i].name, len);
1114+
mblen = pg_encoding_mblen(i, buf);
1115+
if (mblen != 2)
1116+
elog(WARNING,
1117+
"official invalid string for encoding \"%s\" has mblen %d",
1118+
pg_enc2name_tbl[i].name, mblen);
1119+
valid = pg_encoding_verifymbstr(i, buf, len);
1120+
if (valid != 0)
1121+
elog(WARNING,
1122+
"official invalid string for encoding \"%s\" has valid prefix of length %d",
1123+
pg_enc2name_tbl[i].name, valid);
1124+
valid = pg_encoding_verifymbstr(i, buf, 1);
1125+
if (valid != 0)
1126+
elog(WARNING,
1127+
"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
1128+
pg_enc2name_tbl[i].name, valid);
1129+
memset(bigbuf, ' ', sizeof(bigbuf));
1130+
bigbuf[0] = buf[0];
1131+
bigbuf[1] = buf[1];
1132+
valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
1133+
if (valid != 0)
1134+
elog(WARNING,
1135+
"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
1136+
pg_enc2name_tbl[i].name, valid);
1137+
}
1138+
1139+
PG_RETURN_VOID();
1140+
}
1141+
10921142
/*
10931143
* Call an encoding conversion or verification function.
10941144
*

src/test/regress/sql/conversion.sql

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
--
22
-- create user defined conversion
33
--
4+
5+
SELECT FROM test_enc_setup();
6+
47
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
58
SET SESSION AUTHORIZATION regress_conversion_user;
69
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;

0 commit comments

Comments
 (0)