Skip to content

Commit 757c518

Browse files
committed
Avoid crashes in contrib/intarray gist__int_ops (bug #15518)
1. Integer overflow in internal_size could result in memory corruption in decompression since a zero-length array would be allocated and then written to. This leads to crashes or corruption when traversing an index which has been populated with sufficiently sparse values. Fix by using int64 for computations and checking for overflow. 2. Integer overflow in g_int_compress could cause pessimal merge choices, resulting in unnecessarily large ranges (which would in turn trigger issue 1 above). Fix by using int64 again. 3. Even without overflow, array sizes could become large enough to cause unexplained memory allocation errors. Fix by capping the sizes to a safe limit and report actual errors pointing at gist__intbig_ops as needed. 4. Large inputs to the compression function always consist of large runs of consecutive integers, and the compression loop was processing these one at a time in an O(N^2) manner with a lot of overhead. The expected runtime of this function could easily exceed 6 months for a single call as a result. Fix by performing a linear-time first pass, which reduces the worst case to something on the order of seconds. Backpatch all the way, since this has been wrong forever. Per bug #15518 from report from irc user "dymk", analysis and patch by me. Discussion: https://postgr.es/m/15518-799e426c3b4f8358@postgresql.org
1 parent 452b637 commit 757c518

File tree

2 files changed

+73
-12
lines changed

2 files changed

+73
-12
lines changed

contrib/intarray/_int_gist.c

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@
1212

1313
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
1414

15+
/*
16+
* Control the maximum sparseness of compressed keys.
17+
*
18+
* The upper safe bound for this limit is half the maximum allocatable array
19+
* size. A lower bound would give more guarantees that pathological data
20+
* wouldn't eat excessive CPU and memory, but at the expense of breaking
21+
* possibly working (after a fashion) indexes.
22+
*/
23+
#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
24+
/* or: #define MAXNUMELTS 1000000 */
25+
1526
/*
1627
** GiST support methods
1728
*/
@@ -141,11 +152,13 @@ g_int_compress(PG_FUNCTION_ARGS)
141152
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
142153
GISTENTRY *retval;
143154
ArrayType *r;
144-
int len;
155+
int len,
156+
lenr;
145157
int *dr;
146158
int i,
147-
min,
159+
j,
148160
cand;
161+
int64 min;
149162

150163
if (entry->leafkey)
151164
{
@@ -186,23 +199,62 @@ g_int_compress(PG_FUNCTION_ARGS)
186199

187200
dr = ARRPTR(r);
188201

189-
for (i = len - 1; i >= 0; i--)
190-
dr[2 * i] = dr[2 * i + 1] = dr[i];
202+
/*
203+
* "len" at this point is the number of ranges we will construct.
204+
* "lenr" is the number of ranges we must eventually remove by
205+
* merging, we must be careful to remove no more than this number.
206+
*/
207+
lenr = len - MAXNUMRANGE;
208+
209+
/*
210+
* Initially assume we can merge consecutive ints into a range. but we
211+
* must count every value removed and stop when lenr runs out
212+
*/
213+
for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
214+
{
215+
int r_end = dr[i];
216+
int r_start = r_end;
217+
while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1)
218+
--r_start, --i, --lenr;
219+
dr[2*j] = r_start;
220+
dr[2*j+1] = r_end;
221+
}
222+
/* just copy the rest, if any, as trivial ranges */
223+
for (; i >= 0; i--, j--)
224+
dr[2*j] = dr[2*j + 1] = dr[i];
191225

192-
len *= 2;
226+
if (++j)
227+
{
228+
/*
229+
* shunt everything down to start at the right place
230+
*/
231+
memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32));
232+
}
233+
/*
234+
* make "len" be number of array elements, not ranges
235+
*/
236+
len = 2*(len - j);
193237
cand = 1;
194238
while (len > MAXNUMRANGE * 2)
195239
{
196-
min = INT_MAX;
240+
min = PG_INT64_MAX;
197241
for (i = 2; i < len; i += 2)
198-
if (min > (dr[i] - dr[i - 1]))
242+
if (min > ((int64)dr[i] - (int64)dr[i - 1]))
199243
{
200-
min = (dr[i] - dr[i - 1]);
244+
min = ((int64)dr[i] - (int64)dr[i - 1]);
201245
cand = i;
202246
}
203247
memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
204248
len -= 2;
205249
}
250+
/*
251+
* check sparseness of result
252+
*/
253+
lenr = internal_size(dr, len);
254+
if (lenr < 0 || lenr > MAXNUMELTS)
255+
ereport(ERROR,
256+
(errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
257+
206258
r = resize_intArrayType(r, len);
207259
retval = palloc(sizeof(GISTENTRY));
208260
gistentryinit(*retval, PointerGetDatum(r),
@@ -260,6 +312,9 @@ g_int_decompress(PG_FUNCTION_ARGS)
260312

261313
din = ARRPTR(in);
262314
lenr = internal_size(din, lenin);
315+
if (lenr < 0 || lenr > MAXNUMELTS)
316+
ereport(ERROR,
317+
(errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
263318

264319
r = new_intArrayType(lenr);
265320
dr = ARRPTR(r);

contrib/intarray/_int_tool.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
*/
44
#include "postgres.h"
55

6+
#include <limits.h>
7+
68
#include "catalog/pg_type.h"
79

810
#include "_int.h"
@@ -225,6 +227,7 @@ new_intArrayType(int num)
225227
/* if no elements, return a zero-dimensional array */
226228
if (num <= 0)
227229
{
230+
Assert(num == 0);
228231
r = construct_empty_array(INT4OID);
229232
return r;
230233
}
@@ -252,6 +255,7 @@ resize_intArrayType(ArrayType *a, int num)
252255
/* if no elements, return a zero-dimensional array */
253256
if (num <= 0)
254257
{
258+
Assert(num == 0);
255259
ARR_NDIM(a) = 0;
256260
return a;
257261
}
@@ -288,16 +292,18 @@ copy_intArrayType(ArrayType *a)
288292
int
289293
internal_size(int *a, int len)
290294
{
291-
int i,
292-
size = 0;
295+
int i;
296+
int64 size = 0;
293297

294298
for (i = 0; i < len; i += 2)
295299
{
296300
if (!i || a[i] != a[i - 1]) /* do not count repeated range */
297-
size += a[i + 1] - a[i] + 1;
301+
size += (int64)(a[i + 1]) - (int64)(a[i]) + 1;
298302
}
299303

300-
return size;
304+
if (size > (int64)INT_MAX || size < (int64)INT_MIN)
305+
return -1; /* overflow */
306+
return (int) size;
301307
}
302308

303309
/* unique-ify elements of r in-place ... r must be sorted already */

0 commit comments

Comments
 (0)