Skip to content

Commit 5b64368

Browse files
committed
Fix incautious handling of possibly-miscoded strings in client code.
An incorrectly-encoded multibyte character near the end of a string could cause various processing loops to run past the string's terminating NUL, with results ranging from no detectable issue to a program crash, depending on what happens to be in the following memory. This isn't an issue in the server, because we take care to verify the encoding of strings before doing any interesting processing on them. However, that lack of care leaked into client-side code which shouldn't assume that anyone has validated the encoding of its input. Although this is certainly a bug worth fixing, the PG security team elected not to regard it as a security issue, primarily because any untrusted text should be sanitized by PQescapeLiteral or the like before being incorporated into a SQL or psql command. (If an app fails to do so, the same technique can be used to cause SQL injection, with probably much more dire consequences than a mere client-program crash.) Those functions were already made proof against this class of problem, cf CVE-2006-2313. To fix, invent PQmblenBounded() which is like PQmblen() except it won't return more than the number of bytes remaining in the string. In HEAD we can make this a new libpq function, as PQmblen() is. It seems imprudent to change libpq's API in stable branches though, so in the back branches define PQmblenBounded as a macro in the files that need it. (Note that just changing PQmblen's behavior would not be a good idea; notably, it would completely break the escaping functions' defense against this exact problem. So we just want a version for those callers that don't have any better way of handling this issue.) Per private report from houjingyi. Back-patch to all supported branches.
1 parent b4c027b commit 5b64368

File tree

11 files changed

+57
-25
lines changed

11 files changed

+57
-25
lines changed

src/bin/psql/common.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "portability/instr_time.h"
3030
#include "settings.h"
3131

32+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
33+
3234
static bool DescribeQuery(const char *query, double *elapsed_msec);
3335
static bool ExecQueryUsingCursor(const char *query, double *elapsed_msec);
3436
static bool command_no_begin(const char *query);
@@ -1842,7 +1844,7 @@ skip_white_space(const char *query)
18421844

18431845
while (*query)
18441846
{
1845-
int mblen = PQmblen(query, pset.encoding);
1847+
int mblen = PQmblenBounded(query, pset.encoding);
18461848

18471849
/*
18481850
* Note: we assume the encoding is a superset of ASCII, so that for
@@ -1879,7 +1881,7 @@ skip_white_space(const char *query)
18791881
query++;
18801882
break;
18811883
}
1882-
query += PQmblen(query, pset.encoding);
1884+
query += PQmblenBounded(query, pset.encoding);
18831885
}
18841886
}
18851887
else if (cnestlevel > 0)
@@ -1914,7 +1916,7 @@ command_no_begin(const char *query)
19141916
*/
19151917
wordlen = 0;
19161918
while (isalpha((unsigned char) query[wordlen]))
1917-
wordlen += PQmblen(&query[wordlen], pset.encoding);
1919+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
19181920

19191921
/*
19201922
* Transaction control commands. These should include every keyword that
@@ -1945,7 +1947,7 @@ command_no_begin(const char *query)
19451947

19461948
wordlen = 0;
19471949
while (isalpha((unsigned char) query[wordlen]))
1948-
wordlen += PQmblen(&query[wordlen], pset.encoding);
1950+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
19491951

19501952
if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0)
19511953
return true;
@@ -1979,7 +1981,7 @@ command_no_begin(const char *query)
19791981

19801982
wordlen = 0;
19811983
while (isalpha((unsigned char) query[wordlen]))
1982-
wordlen += PQmblen(&query[wordlen], pset.encoding);
1984+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
19831985

19841986
if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0)
19851987
return true;
@@ -1995,7 +1997,7 @@ command_no_begin(const char *query)
19951997

19961998
wordlen = 0;
19971999
while (isalpha((unsigned char) query[wordlen]))
1998-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2000+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
19992001
}
20002002

20012003
if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0)
@@ -2006,7 +2008,7 @@ command_no_begin(const char *query)
20062008

20072009
wordlen = 0;
20082010
while (isalpha((unsigned char) query[wordlen]))
2009-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2011+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
20102012

20112013
if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0)
20122014
return true;
@@ -2023,7 +2025,7 @@ command_no_begin(const char *query)
20232025

20242026
wordlen = 0;
20252027
while (isalpha((unsigned char) query[wordlen]))
2026-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2028+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
20272029

20282030
/* ALTER SYSTEM isn't allowed in xacts */
20292031
if (wordlen == 6 && pg_strncasecmp(query, "system", 6) == 0)
@@ -2046,7 +2048,7 @@ command_no_begin(const char *query)
20462048

20472049
wordlen = 0;
20482050
while (isalpha((unsigned char) query[wordlen]))
2049-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2051+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
20502052

20512053
if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0)
20522054
return true;
@@ -2061,7 +2063,7 @@ command_no_begin(const char *query)
20612063
query = skip_white_space(query);
20622064
wordlen = 0;
20632065
while (isalpha((unsigned char) query[wordlen]))
2064-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2066+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
20652067

20662068
/*
20672069
* REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in
@@ -2080,7 +2082,7 @@ command_no_begin(const char *query)
20802082

20812083
wordlen = 0;
20822084
while (isalpha((unsigned char) query[wordlen]))
2083-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2085+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
20842086

20852087
if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0)
20862088
return true;
@@ -2100,7 +2102,7 @@ command_no_begin(const char *query)
21002102

21012103
wordlen = 0;
21022104
while (isalpha((unsigned char) query[wordlen]))
2103-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2105+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
21042106

21052107
if (wordlen == 3 && pg_strncasecmp(query, "all", 3) == 0)
21062108
return true;
@@ -2136,7 +2138,7 @@ is_select_command(const char *query)
21362138
*/
21372139
wordlen = 0;
21382140
while (isalpha((unsigned char) query[wordlen]))
2139-
wordlen += PQmblen(&query[wordlen], pset.encoding);
2141+
wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
21402142

21412143
if (wordlen == 6 && pg_strncasecmp(query, "select", 6) == 0)
21422144
return true;

src/bin/psql/psqlscanslash.l

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
%{
2929
#include "fe_utils/psqlscan_int.h"
3030

31+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
32+
3133
/*
3234
* We must have a typedef YYSTYPE for yylex's first argument, but this lexer
3335
* doesn't presently make use of that argument, so just declare it as int.
@@ -753,7 +755,7 @@ dequote_downcase_identifier(char *str, bool downcase, int encoding)
753755
{
754756
if (downcase && !inquotes)
755757
*cp = pg_tolower((unsigned char) *cp);
756-
cp += PQmblen(cp, encoding);
758+
cp += PQmblenBounded(cp, encoding);
757759
}
758760
}
759761
}

src/bin/psql/stringutils.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include "common.h"
1313
#include "stringutils.h"
1414

15+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
16+
1517

1618
/*
1719
* Replacement for strtok() (a.k.a. poor man's flex)
@@ -143,7 +145,7 @@ strtokx(const char *s,
143145
/* okay, we have a quoted token, now scan for the closer */
144146
char thisquote = *p++;
145147

146-
for (; *p; p += PQmblen(p, encoding))
148+
for (; *p; p += PQmblenBounded(p, encoding))
147149
{
148150
if (*p == escape && p[1] != '\0')
149151
p++; /* process escaped anything */
@@ -262,7 +264,7 @@ strip_quotes(char *source, char quote, char escape, int encoding)
262264
else if (c == escape && src[1] != '\0')
263265
src++; /* process escaped character */
264266

265-
i = PQmblen(src, encoding);
267+
i = PQmblenBounded(src, encoding);
266268
while (i--)
267269
*dst++ = *src++;
268270
}
@@ -324,7 +326,7 @@ quote_if_needed(const char *source, const char *entails_quote,
324326
else if (strchr(entails_quote, c))
325327
need_quotes = true;
326328

327-
i = PQmblen(src, encoding);
329+
i = PQmblenBounded(src, encoding);
328330
while (i--)
329331
*dst++ = *src++;
330332
}

src/bin/psql/tab-complete.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
#define USE_FILENAME_QUOTING_FUNCTIONS 1
7474
#endif
7575

76+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
77+
7678
/* word break characters */
7779
#define WORD_BREAKS "\t\n@$><=;|&{() "
7880

@@ -4140,7 +4142,7 @@ _complete_from_query(const char *simple_query,
41404142
while (*pstr)
41414143
{
41424144
char_length++;
4143-
pstr += PQmblen(pstr, pset.encoding);
4145+
pstr += PQmblenBounded(pstr, pset.encoding);
41444146
}
41454147

41464148
/* Free any prior result */

src/bin/scripts/common.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
#define ERRCODE_UNDEFINED_TABLE "42P01"
2727

28+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
29+
2830
/*
2931
* Provide strictly harmonized handling of --help and --version
3032
* options.
@@ -368,7 +370,7 @@ splitTableColumnsSpec(const char *spec, int encoding,
368370
cp++;
369371
}
370372
else
371-
cp += PQmblen(cp, encoding);
373+
cp += PQmblenBounded(cp, encoding);
372374
}
373375
*table = pnstrdup(spec, cp - spec);
374376
*columns = cp;

src/common/jsonapi.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ json_lex_string(JsonLexContext *lex)
738738
ch = (ch * 16) + (*s - 'A') + 10;
739739
else
740740
{
741-
lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s);
741+
lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
742742
return JSON_UNICODE_ESCAPE_FORMAT;
743743
}
744744
}
@@ -844,7 +844,7 @@ json_lex_string(JsonLexContext *lex)
844844
default:
845845
/* Not a valid string escape, so signal error. */
846846
lex->token_start = s;
847-
lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s);
847+
lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
848848
return JSON_ESCAPING_INVALID;
849849
}
850850
}
@@ -858,7 +858,7 @@ json_lex_string(JsonLexContext *lex)
858858
* shown it's not a performance win.
859859
*/
860860
lex->token_start = s;
861-
lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s);
861+
lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
862862
return JSON_ESCAPING_INVALID;
863863
}
864864

src/common/wchar.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1549,6 +1549,11 @@ const pg_wchar_tbl pg_wchar_table[] = {
15491549

15501550
/*
15511551
* Returns the byte length of a multibyte character.
1552+
*
1553+
* Caution: when dealing with text that is not certainly valid in the
1554+
* specified encoding, the result may exceed the actual remaining
1555+
* string length. Callers that are not prepared to deal with that
1556+
* should use pg_encoding_mblen_bounded() instead.
15521557
*/
15531558
int
15541559
pg_encoding_mblen(int encoding, const char *mbstr)
@@ -1558,6 +1563,16 @@ pg_encoding_mblen(int encoding, const char *mbstr)
15581563
pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
15591564
}
15601565

1566+
/*
1567+
* Returns the byte length of a multibyte character; but not more than
1568+
* the distance to end of string.
1569+
*/
1570+
int
1571+
pg_encoding_mblen_bounded(int encoding, const char *mbstr)
1572+
{
1573+
return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
1574+
}
1575+
15611576
/*
15621577
* Returns the display length of a multibyte character.
15631578
*/

src/fe_utils/print.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3652,6 +3652,9 @@ strlen_max_width(unsigned char *str, int *target_width, int encoding)
36523652
curr_width += char_width;
36533653

36543654
str += PQmblen((char *) str, encoding);
3655+
3656+
if (str > end) /* Don't overrun invalid string */
3657+
str = end;
36553658
}
36563659

36573660
*target_width = curr_width;

src/include/mb/pg_wchar.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ extern int pg_valid_server_encoding_id(int encoding);
553553
* earlier in this file are also available from libpgcommon.
554554
*/
555555
extern int pg_encoding_mblen(int encoding, const char *mbstr);
556+
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
556557
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
557558
extern int pg_encoding_verifymb(int encoding, const char *mbstr, int len);
558559
extern int pg_encoding_max_length(int encoding);

src/interfaces/libpq/fe-print.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "libpq-fe.h"
3737
#include "libpq-int.h"
3838

39+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
3940

4041
static void do_field(const PQprintOpt *po, const PGresult *res,
4142
const int i, const int j, const int fs_len,
@@ -365,7 +366,7 @@ do_field(const PQprintOpt *po, const PGresult *res,
365366
/* Detect whether field contains non-numeric data */
366367
char ch = '0';
367368

368-
for (p = pval; *p; p += PQmblen(p, res->client_encoding))
369+
for (p = pval; *p; p += PQmblenBounded(p, res->client_encoding))
369370
{
370371
ch = *p;
371372
if (!((ch >= '0' && ch <= '9') ||

src/interfaces/libpq/fe-protocol3.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
((id) == 'T' || (id) == 'D' || (id) == 'd' || (id) == 'V' || \
4040
(id) == 'E' || (id) == 'N' || (id) == 'A')
4141

42+
#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
43+
4244

4345
static void handleSyncLoss(PGconn *conn, char id, int msgLength);
4446
static int getRowDescriptions(PGconn *conn, int msgLength);
@@ -1241,7 +1243,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
12411243
if (w <= 0)
12421244
w = 1;
12431245
scroffset += w;
1244-
qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]);
1246+
qoffset += PQmblenBounded(&wquery[qoffset], encoding);
12451247
}
12461248
else
12471249
{
@@ -1309,7 +1311,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
13091311
* width.
13101312
*/
13111313
scroffset = 0;
1312-
for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i]))
1314+
for (; i < msg->len; i += PQmblenBounded(&msg->data[i], encoding))
13131315
{
13141316
int w = pg_encoding_dsplen(encoding, &msg->data[i]);
13151317

0 commit comments

Comments
 (0)