Skip to content

Commit 4ca7a2d

Browse files
committed
Make replace(), split_part(), and string_to_array() behave somewhat sanely
when handed an invalidly-encoded pattern. The previous coding could get into an infinite loop if pg_mb2wchar_with_len() returned a zero-length string after we'd tested for nonempty pattern; which is exactly what it will do if the string consists only of an incomplete multibyte character. This led to either an out-of-memory error or a backend crash depending on platform. Per report from Wiktor Wodecki.
1 parent 0e5b4f0 commit 4ca7a2d

File tree

1 file changed

+53
-14
lines changed

1 file changed

+53
-14
lines changed

src/backend/utils/adt/varlena.c

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.156 2007/04/06 04:21:43 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.157 2007/07/19 20:34:20 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -2111,8 +2111,8 @@ replace_text(PG_FUNCTION_ARGS)
21112111
text *src_text = PG_GETARG_TEXT_P(0);
21122112
text *from_sub_text = PG_GETARG_TEXT_P(1);
21132113
text *to_sub_text = PG_GETARG_TEXT_P(2);
2114-
int src_text_len = TEXTLEN(src_text);
2115-
int from_sub_text_len = TEXTLEN(from_sub_text);
2114+
int src_text_len;
2115+
int from_sub_text_len;
21162116
TextPositionState state;
21172117
text *ret_text;
21182118
int start_posn;
@@ -2121,11 +2121,22 @@ replace_text(PG_FUNCTION_ARGS)
21212121
char *start_ptr;
21222122
StringInfoData str;
21232123

2124-
if (src_text_len == 0 || from_sub_text_len == 0)
2125-
PG_RETURN_TEXT_P(src_text);
2126-
21272124
text_position_setup(src_text, from_sub_text, &state);
21282125

2126+
/*
2127+
* Note: we check the converted string length, not the original, because
2128+
* they could be different if the input contained invalid encoding.
2129+
*/
2130+
src_text_len = state.len1;
2131+
from_sub_text_len = state.len2;
2132+
2133+
/* Return unmodified source string if empty source or pattern */
2134+
if (src_text_len < 1 || from_sub_text_len < 1)
2135+
{
2136+
text_position_cleanup(&state);
2137+
PG_RETURN_TEXT_P(src_text);
2138+
}
2139+
21292140
start_posn = 1;
21302141
curr_posn = text_position_next(1, &state);
21312142

@@ -2143,6 +2154,8 @@ replace_text(PG_FUNCTION_ARGS)
21432154

21442155
do
21452156
{
2157+
CHECK_FOR_INTERRUPTS();
2158+
21462159
/* copy the data skipped over by last text_position_next() */
21472160
chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
21482161
appendBinaryStringInfo(&str, start_ptr, chunk_len);
@@ -2449,8 +2462,8 @@ split_text(PG_FUNCTION_ARGS)
24492462
text *inputstring = PG_GETARG_TEXT_P(0);
24502463
text *fldsep = PG_GETARG_TEXT_P(1);
24512464
int fldnum = PG_GETARG_INT32(2);
2452-
int inputstring_len = TEXTLEN(inputstring);
2453-
int fldsep_len = TEXTLEN(fldsep);
2465+
int inputstring_len;
2466+
int fldsep_len;
24542467
TextPositionState state;
24552468
int start_posn;
24562469
int end_posn;
@@ -2462,22 +2475,33 @@ split_text(PG_FUNCTION_ARGS)
24622475
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
24632476
errmsg("field position must be greater than zero")));
24642477

2478+
text_position_setup(inputstring, fldsep, &state);
2479+
2480+
/*
2481+
* Note: we check the converted string length, not the original, because
2482+
* they could be different if the input contained invalid encoding.
2483+
*/
2484+
inputstring_len = state.len1;
2485+
fldsep_len = state.len2;
2486+
24652487
/* return empty string for empty input string */
24662488
if (inputstring_len < 1)
2489+
{
2490+
text_position_cleanup(&state);
24672491
PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2492+
}
24682493

24692494
/* empty field separator */
24702495
if (fldsep_len < 1)
24712496
{
2497+
text_position_cleanup(&state);
24722498
/* if first field, return input string, else empty string */
24732499
if (fldnum == 1)
24742500
PG_RETURN_TEXT_P(inputstring);
24752501
else
24762502
PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
24772503
}
24782504

2479-
text_position_setup(inputstring, fldsep, &state);
2480-
24812505
/* identify bounds of first field */
24822506
start_posn = 1;
24832507
end_posn = text_position_next(1, &state);
@@ -2537,8 +2561,8 @@ text_to_array(PG_FUNCTION_ARGS)
25372561
{
25382562
text *inputstring = PG_GETARG_TEXT_P(0);
25392563
text *fldsep = PG_GETARG_TEXT_P(1);
2540-
int inputstring_len = TEXTLEN(inputstring);
2541-
int fldsep_len = TEXTLEN(fldsep);
2564+
int inputstring_len;
2565+
int fldsep_len;
25422566
TextPositionState state;
25432567
int fldnum;
25442568
int start_posn;
@@ -2548,26 +2572,41 @@ text_to_array(PG_FUNCTION_ARGS)
25482572
text *result_text;
25492573
ArrayBuildState *astate = NULL;
25502574

2575+
text_position_setup(inputstring, fldsep, &state);
2576+
2577+
/*
2578+
* Note: we check the converted string length, not the original, because
2579+
* they could be different if the input contained invalid encoding.
2580+
*/
2581+
inputstring_len = state.len1;
2582+
fldsep_len = state.len2;
2583+
25512584
/* return NULL for empty input string */
25522585
if (inputstring_len < 1)
2586+
{
2587+
text_position_cleanup(&state);
25532588
PG_RETURN_NULL();
2589+
}
25542590

25552591
/*
25562592
* empty field separator return one element, 1D, array using the input
25572593
* string
25582594
*/
25592595
if (fldsep_len < 1)
2596+
{
2597+
text_position_cleanup(&state);
25602598
PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
25612599
PointerGetDatum(inputstring), 1));
2562-
2563-
text_position_setup(inputstring, fldsep, &state);
2600+
}
25642601

25652602
start_posn = 1;
25662603
/* start_ptr points to the start_posn'th character of inputstring */
25672604
start_ptr = (char *) VARDATA(inputstring);
25682605

25692606
for (fldnum = 1;; fldnum++) /* field number is 1 based */
25702607
{
2608+
CHECK_FOR_INTERRUPTS();
2609+
25712610
end_posn = text_position_next(start_posn, &state);
25722611

25732612
if (end_posn == 0)

0 commit comments

Comments
 (0)