Skip to content

Commit db3eb0e

Browse files
committed
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
1 parent 00f1a1f commit db3eb0e

File tree

7 files changed

+121
-2
lines changed

7 files changed

+121
-2
lines changed

src/common/wchar.c

+54-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,25 @@
1515
#include "mb/pg_wchar.h"
1616

1717

18+
/*
19+
* In today's multibyte encodings other than UTF8, this two-byte sequence
20+
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
21+
*
22+
* For historical reasons, several verifychar implementations opt to reject
23+
* this pair specifically. Byte pair range constraints, in encoding
24+
* originator documentation, always excluded this pair. No core conversion
25+
* could translate it. However, longstanding verifychar implementations
26+
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
27+
* pairs not valid per encoding originator documentation. To avoid tightening
28+
* core or non-core conversions in a security patch, we sought this one pair.
29+
*
30+
* PQescapeString() historically used spaces for BYTE1; many other values
31+
* could suffice for BYTE1.
32+
*/
33+
#define NONUTF8_INVALID_BYTE0 (0x8d)
34+
#define NONUTF8_INVALID_BYTE1 (' ')
35+
36+
1837
/*
1938
* Operations on multi-byte encodings are driven by a table of helper
2039
* functions.
@@ -1330,6 +1349,11 @@ pg_big5_verifier(const unsigned char *s, int len)
13301349
if (len < l)
13311350
return -1;
13321351

1352+
if (l == 2 &&
1353+
s[0] == NONUTF8_INVALID_BYTE0 &&
1354+
s[1] == NONUTF8_INVALID_BYTE1)
1355+
return -1;
1356+
13331357
while (--l > 0)
13341358
{
13351359
if (*++s == '\0')
@@ -1350,6 +1374,11 @@ pg_gbk_verifier(const unsigned char *s, int len)
13501374
if (len < l)
13511375
return -1;
13521376

1377+
if (l == 2 &&
1378+
s[0] == NONUTF8_INVALID_BYTE0 &&
1379+
s[1] == NONUTF8_INVALID_BYTE1)
1380+
return -1;
1381+
13531382
while (--l > 0)
13541383
{
13551384
if (*++s == '\0')
@@ -1370,6 +1399,11 @@ pg_uhc_verifier(const unsigned char *s, int len)
13701399
if (len < l)
13711400
return -1;
13721401

1402+
if (l == 2 &&
1403+
s[0] == NONUTF8_INVALID_BYTE0 &&
1404+
s[1] == NONUTF8_INVALID_BYTE1)
1405+
return -1;
1406+
13731407
while (--l > 0)
13741408
{
13751409
if (*++s == '\0')
@@ -1496,6 +1530,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
14961530
}
14971531

14981532

1533+
/*
1534+
* Fills the provided buffer with two bytes such that:
1535+
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
1536+
*/
1537+
void
1538+
pg_encoding_set_invalid(int encoding, char *dst)
1539+
{
1540+
Assert(pg_encoding_max_length(encoding) > 1);
1541+
1542+
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
1543+
dst[1] = NONUTF8_INVALID_BYTE1;
1544+
}
1545+
14991546
/*
15001547
*-------------------------------------------------------------------
15011548
* encoding info table
@@ -1671,5 +1718,11 @@ pg_encoding_max_length(int encoding)
16711718
{
16721719
Assert(PG_VALID_ENCODING(encoding));
16731720

1674-
return pg_wchar_table[encoding].maxmblen;
1721+
/*
1722+
* Check for the encoding despite the assert, due to some mingw versions
1723+
* otherwise issuing bogus warnings.
1724+
*/
1725+
return PG_VALID_ENCODING(encoding) ?
1726+
pg_wchar_table[encoding].maxmblen :
1727+
pg_wchar_table[PG_SQL_ASCII].maxmblen;
16751728
}

src/include/mb/pg_wchar.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ typedef struct pg_enc2name
341341
#endif
342342
} pg_enc2name;
343343

344-
extern const pg_enc2name pg_enc2name_tbl[];
344+
extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[];
345345

346346
/*
347347
* Encoding names for gettext
@@ -552,6 +552,7 @@ extern int pg_valid_server_encoding_id(int encoding);
552552
* (in addition to the ones just above). The constant tables declared
553553
* earlier in this file are also available from libpgcommon.
554554
*/
555+
extern void pg_encoding_set_invalid(int encoding, char *dst);
555556
extern int pg_encoding_mblen(int encoding, const char *mbstr);
556557
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
557558
extern int pg_encoding_dsplen(int encoding, const char *mbstr);

src/test/regress/expected/conversion.out

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
--
22
-- create user defined conversion
33
--
4+
SELECT FROM test_enc_setup();
5+
--
6+
(1 row)
7+
48
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
59
SET SESSION AUTHORIZATION regress_conversion_user;
610
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;

src/test/regress/input/create_function_1.source

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ CREATE FUNCTION test_atomic_ops()
6262
AS '@libdir@/regress@DLSUFFIX@'
6363
LANGUAGE C;
6464

65+
CREATE FUNCTION test_enc_setup() RETURNS void
66+
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
67+
LANGUAGE C STRICT;
68+
6569
-- Tests creating a FDW handler
6670
CREATE FUNCTION test_fdw_handler()
6771
RETURNS fdw_handler

src/test/regress/output/create_function_1.source

+3
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ CREATE FUNCTION test_atomic_ops()
5555
RETURNS bool
5656
AS '@libdir@/regress@DLSUFFIX@'
5757
LANGUAGE C;
58+
CREATE FUNCTION test_enc_setup() RETURNS void
59+
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
60+
LANGUAGE C STRICT;
5861
-- Tests creating a FDW handler
5962
CREATE FUNCTION test_fdw_handler()
6063
RETURNS fdw_handler

src/test/regress/regress.c

+51
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "commands/trigger.h"
3030
#include "executor/executor.h"
3131
#include "executor/spi.h"
32+
#include "mb/pg_wchar.h"
3233
#include "miscadmin.h"
3334
#include "nodes/supportnodes.h"
3435
#include "optimizer/optimizer.h"
@@ -1088,3 +1089,53 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
10881089
{
10891090
PG_RETURN_NULL();
10901091
}
1092+
1093+
/* one-time tests for encoding infrastructure */
1094+
PG_FUNCTION_INFO_V1(test_enc_setup);
1095+
Datum
1096+
test_enc_setup(PG_FUNCTION_ARGS)
1097+
{
1098+
/* Test pg_encoding_set_invalid() */
1099+
for (int i = 0; i < _PG_LAST_ENCODING_; i++)
1100+
{
1101+
char buf[2],
1102+
bigbuf[16];
1103+
int len,
1104+
mblen,
1105+
valid;
1106+
1107+
if (pg_encoding_max_length(i) == 1)
1108+
continue;
1109+
pg_encoding_set_invalid(i, buf);
1110+
len = strnlen(buf, 2);
1111+
if (len != 2)
1112+
elog(WARNING,
1113+
"official invalid string for encoding \"%s\" has length %d",
1114+
pg_enc2name_tbl[i].name, len);
1115+
mblen = pg_encoding_mblen(i, buf);
1116+
if (mblen != 2)
1117+
elog(WARNING,
1118+
"official invalid string for encoding \"%s\" has mblen %d",
1119+
pg_enc2name_tbl[i].name, mblen);
1120+
valid = pg_encoding_verifymbstr(i, buf, len);
1121+
if (valid != 0)
1122+
elog(WARNING,
1123+
"official invalid string for encoding \"%s\" has valid prefix of length %d",
1124+
pg_enc2name_tbl[i].name, valid);
1125+
valid = pg_encoding_verifymbstr(i, buf, 1);
1126+
if (valid != 0)
1127+
elog(WARNING,
1128+
"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
1129+
pg_enc2name_tbl[i].name, valid);
1130+
memset(bigbuf, ' ', sizeof(bigbuf));
1131+
bigbuf[0] = buf[0];
1132+
bigbuf[1] = buf[1];
1133+
valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
1134+
if (valid != 0)
1135+
elog(WARNING,
1136+
"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
1137+
pg_enc2name_tbl[i].name, valid);
1138+
}
1139+
1140+
PG_RETURN_VOID();
1141+
}

src/test/regress/sql/conversion.sql

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
--
22
-- create user defined conversion
33
--
4+
5+
SELECT FROM test_enc_setup();
6+
47
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
58
SET SESSION AUTHORIZATION regress_conversion_user;
69
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;

0 commit comments

Comments
 (0)