|
12 | 12 |
|
13 | 13 | #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
|
14 | 14 |
|
| 15 | +/* |
| 16 | + * Control the maximum sparseness of compressed keys. |
| 17 | + * |
| 18 | + * The upper safe bound for this limit is half the maximum allocatable array |
| 19 | + * size. A lower bound would give more guarantees that pathological data |
| 20 | + * wouldn't eat excessive CPU and memory, but at the expense of breaking |
| 21 | + * possibly working (after a fashion) indexes. |
| 22 | + */ |
| 23 | +#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2) |
| 24 | +/* or: #define MAXNUMELTS 1000000 */ |
| 25 | + |
15 | 26 | /*
|
16 | 27 | ** GiST support methods
|
17 | 28 | */
|
@@ -141,11 +152,13 @@ g_int_compress(PG_FUNCTION_ARGS)
|
141 | 152 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
142 | 153 | GISTENTRY *retval;
|
143 | 154 | ArrayType *r;
|
144 |
| - int len; |
| 155 | + int len, |
| 156 | + lenr; |
145 | 157 | int *dr;
|
146 | 158 | int i,
|
147 |
| - min, |
| 159 | + j, |
148 | 160 | cand;
|
| 161 | + int64 min; |
149 | 162 |
|
150 | 163 | if (entry->leafkey)
|
151 | 164 | {
|
@@ -186,23 +199,62 @@ g_int_compress(PG_FUNCTION_ARGS)
|
186 | 199 |
|
187 | 200 | dr = ARRPTR(r);
|
188 | 201 |
|
189 |
| - for (i = len - 1; i >= 0; i--) |
190 |
| - dr[2 * i] = dr[2 * i + 1] = dr[i]; |
| 202 | + /* |
| 203 | + * "len" at this point is the number of ranges we will construct. |
| 204 | + * "lenr" is the number of ranges we must eventually remove by |
| 205 | + * merging, we must be careful to remove no more than this number. |
| 206 | + */ |
| 207 | + lenr = len - MAXNUMRANGE; |
| 208 | + |
| 209 | + /* |
| 210 | + * Initially assume we can merge consecutive ints into a range. but we |
| 211 | + * must count every value removed and stop when lenr runs out |
| 212 | + */ |
| 213 | + for (j = i = len - 1; i > 0 && lenr > 0; i--, j--) |
| 214 | + { |
| 215 | + int r_end = dr[i]; |
| 216 | + int r_start = r_end; |
| 217 | + while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1) |
| 218 | + --r_start, --i, --lenr; |
| 219 | + dr[2*j] = r_start; |
| 220 | + dr[2*j+1] = r_end; |
| 221 | + } |
| 222 | + /* just copy the rest, if any, as trivial ranges */ |
| 223 | + for (; i >= 0; i--, j--) |
| 224 | + dr[2*j] = dr[2*j + 1] = dr[i]; |
191 | 225 |
|
192 |
| - len *= 2; |
| 226 | + if (++j) |
| 227 | + { |
| 228 | + /* |
| 229 | + * shunt everything down to start at the right place |
| 230 | + */ |
| 231 | + memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32)); |
| 232 | + } |
| 233 | + /* |
| 234 | + * make "len" be number of array elements, not ranges |
| 235 | + */ |
| 236 | + len = 2*(len - j); |
193 | 237 | cand = 1;
|
194 | 238 | while (len > MAXNUMRANGE * 2)
|
195 | 239 | {
|
196 |
| - min = INT_MAX; |
| 240 | + min = PG_INT64_MAX; |
197 | 241 | for (i = 2; i < len; i += 2)
|
198 |
| - if (min > (dr[i] - dr[i - 1])) |
| 242 | + if (min > ((int64)dr[i] - (int64)dr[i - 1])) |
199 | 243 | {
|
200 |
| - min = (dr[i] - dr[i - 1]); |
| 244 | + min = ((int64)dr[i] - (int64)dr[i - 1]); |
201 | 245 | cand = i;
|
202 | 246 | }
|
203 | 247 | memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
|
204 | 248 | len -= 2;
|
205 | 249 | }
|
| 250 | + /* |
| 251 | + * check sparseness of result |
| 252 | + */ |
| 253 | + lenr = internal_size(dr, len); |
| 254 | + if (lenr < 0 || lenr > MAXNUMELTS) |
| 255 | + ereport(ERROR, |
| 256 | + (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead"))); |
| 257 | + |
206 | 258 | r = resize_intArrayType(r, len);
|
207 | 259 | retval = palloc(sizeof(GISTENTRY));
|
208 | 260 | gistentryinit(*retval, PointerGetDatum(r),
|
@@ -260,6 +312,9 @@ g_int_decompress(PG_FUNCTION_ARGS)
|
260 | 312 |
|
261 | 313 | din = ARRPTR(in);
|
262 | 314 | lenr = internal_size(din, lenin);
|
| 315 | + if (lenr < 0 || lenr > MAXNUMELTS) |
| 316 | + ereport(ERROR, |
| 317 | + (errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead"))); |
263 | 318 |
|
264 | 319 | r = new_intArrayType(lenr);
|
265 | 320 | dr = ARRPTR(r);
|
|
0 commit comments