Skip to content

Commit 2f30b31

Browse files
committed
Avoid crashes in contrib/intarray gist__int_ops (bug #15518)
1. Integer overflow in internal_size could result in memory corruption in decompression since a zero-length array would be allocated and then written to. This leads to crashes or corruption when traversing an index which has been populated with sufficiently sparse values. Fix by using int64 for computations and checking for overflow. 2. Integer overflow in g_int_compress could cause pessimal merge choices, resulting in unnecessarily large ranges (which would in turn trigger issue 1 above). Fix by using int64 again. 3. Even without overflow, array sizes could become large enough to cause unexplained memory allocation errors. Fix by capping the sizes to a safe limit and report actual errors pointing at gist__intbig_ops as needed. 4. Large inputs to the compression function always consist of large runs of consecutive integers, and the compression loop was processing these one at a time in an O(N^2) manner with a lot of overhead. The expected runtime of this function could easily exceed 6 months for a single call as a result. Fix by performing a linear-time first pass, which reduces the worst case to something on the order of seconds. Backpatch all the way, since this has been wrong forever. Per bug #15518 from report from irc user "dymk", analysis and patch by me. Discussion: https://postgr.es/m/15518-799e426c3b4f8358@postgresql.org
1 parent e341d03 commit 2f30b31

File tree

2 files changed

+71
-12
lines changed

2 files changed

+71
-12
lines changed

contrib/intarray/_int_gist.c

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,17 @@
1010

1111
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
1212

13+
/*
14+
* Control the maximum sparseness of compressed keys.
15+
*
16+
* The upper safe bound for this limit is half the maximum allocatable array
17+
* size. A lower bound would give more guarantees that pathological data
18+
* wouldn't eat excessive CPU and memory, but at the expense of breaking
19+
* possibly working (after a fashion) indexes.
20+
*/
21+
#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
22+
/* or: #define MAXNUMELTS 1000000 */
23+
1324
/*
1425
** GiST support methods
1526
*/
@@ -139,11 +150,13 @@ g_int_compress(PG_FUNCTION_ARGS)
139150
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
140151
GISTENTRY *retval;
141152
ArrayType *r;
142-
int len;
153+
int len,
154+
lenr;
143155
int *dr;
144156
int i,
145-
min,
157+
j,
146158
cand;
159+
int64 min;
147160

148161
if (entry->leafkey)
149162
{
@@ -184,23 +197,62 @@ g_int_compress(PG_FUNCTION_ARGS)
184197

185198
dr = ARRPTR(r);
186199

187-
for (i = len - 1; i >= 0; i--)
188-
dr[2 * i] = dr[2 * i + 1] = dr[i];
200+
/*
201+
* "len" at this point is the number of ranges we will construct.
202+
* "lenr" is the number of ranges we must eventually remove by
203+
* merging, we must be careful to remove no more than this number.
204+
*/
205+
lenr = len - MAXNUMRANGE;
206+
207+
/*
208+
* Initially assume we can merge consecutive ints into a range. but we
209+
* must count every value removed and stop when lenr runs out
210+
*/
211+
for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
212+
{
213+
int r_end = dr[i];
214+
int r_start = r_end;
215+
while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1)
216+
--r_start, --i, --lenr;
217+
dr[2*j] = r_start;
218+
dr[2*j+1] = r_end;
219+
}
220+
/* just copy the rest, if any, as trivial ranges */
221+
for (; i >= 0; i--, j--)
222+
dr[2*j] = dr[2*j + 1] = dr[i];
189223

190-
len *= 2;
224+
if (++j)
225+
{
226+
/*
227+
* shunt everything down to start at the right place
228+
*/
229+
memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32));
230+
}
231+
/*
232+
* make "len" be number of array elements, not ranges
233+
*/
234+
len = 2*(len - j);
191235
cand = 1;
192236
while (len > MAXNUMRANGE * 2)
193237
{
194-
min = 0x7fffffff;
238+
min = INT64CONST(0x7fffffffffffffff);
195239
for (i = 2; i < len; i += 2)
196-
if (min > (dr[i] - dr[i - 1]))
240+
if (min > ((int64)dr[i] - (int64)dr[i - 1]))
197241
{
198-
min = (dr[i] - dr[i - 1]);
242+
min = ((int64)dr[i] - (int64)dr[i - 1]);
199243
cand = i;
200244
}
201245
memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
202246
len -= 2;
203247
}
248+
/*
249+
* check sparseness of result
250+
*/
251+
lenr = internal_size(dr, len);
252+
if (lenr < 0 || lenr > MAXNUMELTS)
253+
ereport(ERROR,
254+
(errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
255+
204256
r = resize_intArrayType(r, len);
205257
retval = palloc(sizeof(GISTENTRY));
206258
gistentryinit(*retval, PointerGetDatum(r),
@@ -258,6 +310,9 @@ g_int_decompress(PG_FUNCTION_ARGS)
258310

259311
din = ARRPTR(in);
260312
lenr = internal_size(din, lenin);
313+
if (lenr < 0 || lenr > MAXNUMELTS)
314+
ereport(ERROR,
315+
(errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
261316

262317
r = new_intArrayType(lenr);
263318
dr = ARRPTR(r);

contrib/intarray/_int_tool.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
*/
44
#include "postgres.h"
55

6+
#include <limits.h>
7+
68
#include "catalog/pg_type.h"
79

810
#include "_int.h"
@@ -277,16 +279,18 @@ copy_intArrayType(ArrayType *a)
277279
int
278280
internal_size(int *a, int len)
279281
{
280-
int i,
281-
size = 0;
282+
int i;
283+
int64 size = 0;
282284

283285
for (i = 0; i < len; i += 2)
284286
{
285287
if (!i || a[i] != a[i - 1]) /* do not count repeated range */
286-
size += a[i + 1] - a[i] + 1;
288+
size += (int64)(a[i + 1]) - (int64)(a[i]) + 1;
287289
}
288290

289-
return size;
291+
if (size > (int64)INT_MAX || size < (int64)INT_MIN)
292+
return -1; /* overflow */
293+
return (int) size;
290294
}
291295

292296
/* unique-ify elements of r in-place ... r must be sorted already */

0 commit comments

Comments
 (0)