Skip to content

Commit 6899be2

Browse files
committed
Avoid unnecessary out-of-memory errors during encoding conversion.
Encoding conversion uses the very simplistic rule that the output can't be more than 4X longer than the input, and palloc's a buffer of that size. This results in failure to convert any string longer than 1/4 GB, which is becoming an annoying limitation. As a band-aid to improve matters, allow the allocated output buffer size to exceed 1GB. We still insist that the final result fit into MaxAllocSize (1GB), though. Perhaps it'd be safe to relax that restriction, but it'd require close analysis of all callers, which is daunting (not least because external modules might call these functions). For the moment, this should allow a 2X to 4X improvement in the longest string we can convert, which is a useful gain in return for quite a simple patch. Also, once we have successfully converted a long string, repalloc the output down to the actual string length, returning the excess to the malloc pool. This seems worth doing since we can usually expect to give back several MB if we take this path at all. This still leaves much to be desired, most notably that the assumption that MAX_CONVERSION_GROWTH == 4 is very fragile, and yet we have no guard code verifying that the output buffer isn't overrun. Fixing that would require significant changes in the encoding conversion APIs, so it'll have to wait for some other day. The present patch seems safely back-patchable, so patch all supported branches. Alvaro Herrera and Tom Lane Discussion: https://postgr.es/m/20190816181418.GA898@alvherre.pgsql Discussion: https://postgr.es/m/3614.1569359690@sss.pgh.pa.us
1 parent 4829576 commit 6899be2

File tree

1 file changed

+60
-10
lines changed

1 file changed

+60
-10
lines changed

src/backend/utils/mb/mbutils.c

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -368,23 +368,51 @@ pg_do_encoding_conversion(unsigned char *src, int len,
368368
pg_encoding_to_char(dest_encoding))));
369369

370370
/*
371-
* Allocate space for conversion result, being wary of integer overflow
371+
* Allocate space for conversion result, being wary of integer overflow.
372+
*
373+
* len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
374+
* required space, so it might exceed MaxAllocSize even though the result
375+
* would actually fit. We do not want to hand back a result string that
376+
* exceeds MaxAllocSize, because callers might not cope gracefully --- but
377+
* if we just allocate more than that, and don't use it, that's fine.
372378
*/
373-
if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
379+
if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
374380
ereport(ERROR,
375381
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
376382
errmsg("out of memory"),
377-
errdetail("String of %d bytes is too long for encoding conversion.",
378-
len)));
383+
errdetail("String of %d bytes is too long for encoding conversion.",
384+
len)));
379385

380-
result = palloc(len * MAX_CONVERSION_GROWTH + 1);
386+
result = (unsigned char *)
387+
MemoryContextAllocHuge(CurrentMemoryContext,
388+
(Size) len * MAX_CONVERSION_GROWTH + 1);
381389

382390
OidFunctionCall5(proc,
383391
Int32GetDatum(src_encoding),
384392
Int32GetDatum(dest_encoding),
385393
CStringGetDatum(src),
386394
CStringGetDatum(result),
387395
Int32GetDatum(len));
396+
397+
/*
398+
* If the result is large, it's worth repalloc'ing to release any extra
399+
* space we asked for. The cutoff here is somewhat arbitrary, but we
400+
* *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
401+
*/
402+
if (len > 1000000)
403+
{
404+
Size resultlen = strlen((char *) result);
405+
406+
if (resultlen >= MaxAllocSize)
407+
ereport(ERROR,
408+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
409+
errmsg("out of memory"),
410+
errdetail("String of %d bytes is too long for encoding conversion.",
411+
len)));
412+
413+
result = (unsigned char *) repalloc(result, resultlen + 1);
414+
}
415+
388416
return result;
389417
}
390418

@@ -701,23 +729,45 @@ perform_default_encoding_conversion(const char *src, int len,
701729
return (char *) src;
702730

703731
/*
704-
* Allocate space for conversion result, being wary of integer overflow
732+
* Allocate space for conversion result, being wary of integer overflow.
733+
* See comments in pg_do_encoding_conversion.
705734
*/
706-
if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
735+
if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
707736
ereport(ERROR,
708737
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
709738
errmsg("out of memory"),
710-
errdetail("String of %d bytes is too long for encoding conversion.",
711-
len)));
739+
errdetail("String of %d bytes is too long for encoding conversion.",
740+
len)));
712741

713-
result = palloc(len * MAX_CONVERSION_GROWTH + 1);
742+
result = (char *)
743+
MemoryContextAllocHuge(CurrentMemoryContext,
744+
(Size) len * MAX_CONVERSION_GROWTH + 1);
714745

715746
FunctionCall5(flinfo,
716747
Int32GetDatum(src_encoding),
717748
Int32GetDatum(dest_encoding),
718749
CStringGetDatum(src),
719750
CStringGetDatum(result),
720751
Int32GetDatum(len));
752+
753+
/*
754+
* Release extra space if there might be a lot --- see comments in
755+
* pg_do_encoding_conversion.
756+
*/
757+
if (len > 1000000)
758+
{
759+
Size resultlen = strlen(result);
760+
761+
if (resultlen >= MaxAllocSize)
762+
ereport(ERROR,
763+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
764+
errmsg("out of memory"),
765+
errdetail("String of %d bytes is too long for encoding conversion.",
766+
len)));
767+
768+
result = (char *) repalloc(result, resultlen + 1);
769+
}
770+
721771
return result;
722772
}
723773

0 commit comments

Comments
 (0)