|
10 | 10 |
|
11 | 11 | #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
|
12 | 12 |
|
| 13 | +/* |
| 14 | + * Control the maximum sparseness of compressed keys. |
| 15 | + * |
| 16 | + * The upper safe bound for this limit is half the maximum allocatable array |
| 17 | + * size. A lower bound would give more guarantees that pathological data |
| 18 | + * wouldn't eat excessive CPU and memory, but at the expense of breaking |
| 19 | + * possibly working (after a fashion) indexes. |
| 20 | + */ |
| 21 | +#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2) |
| 22 | +/* or: #define MAXNUMELTS 1000000 */ |
| 23 | + |
13 | 24 | /*
|
14 | 25 | ** GiST support methods
|
15 | 26 | */
|
@@ -139,11 +150,13 @@ g_int_compress(PG_FUNCTION_ARGS)
|
139 | 150 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
140 | 151 | GISTENTRY *retval;
|
141 | 152 | ArrayType *r;
|
142 |
| - int len; |
| 153 | + int len, |
| 154 | + lenr; |
143 | 155 | int *dr;
|
144 | 156 | int i,
|
145 |
| - min, |
| 157 | + j, |
146 | 158 | cand;
|
| 159 | + int64 min; |
147 | 160 |
|
148 | 161 | if (entry->leafkey)
|
149 | 162 | {
|
@@ -184,23 +197,62 @@ g_int_compress(PG_FUNCTION_ARGS)
|
184 | 197 |
|
185 | 198 | dr = ARRPTR(r);
|
186 | 199 |
|
187 |
| - for (i = len - 1; i >= 0; i--) |
188 |
| - dr[2 * i] = dr[2 * i + 1] = dr[i]; |
| 200 | + /* |
| 201 | + * "len" at this point is the number of ranges we will construct. |
| 202 | + * "lenr" is the number of ranges we must eventually remove by |
| 203 | + * merging, we must be careful to remove no more than this number. |
| 204 | + */ |
| 205 | + lenr = len - MAXNUMRANGE; |
| 206 | + |
| 207 | + /* |
| 208 | + * Initially assume we can merge consecutive ints into a range. but we |
| 209 | + * must count every value removed and stop when lenr runs out |
| 210 | + */ |
| 211 | + for (j = i = len - 1; i > 0 && lenr > 0; i--, j--) |
| 212 | + { |
| 213 | + int r_end = dr[i]; |
| 214 | + int r_start = r_end; |
| 215 | + while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1) |
| 216 | + --r_start, --i, --lenr; |
| 217 | + dr[2*j] = r_start; |
| 218 | + dr[2*j+1] = r_end; |
| 219 | + } |
| 220 | + /* just copy the rest, if any, as trivial ranges */ |
| 221 | + for (; i >= 0; i--, j--) |
| 222 | + dr[2*j] = dr[2*j + 1] = dr[i]; |
189 | 223 |
|
190 |
| - len *= 2; |
| 224 | + if (++j) |
| 225 | + { |
| 226 | + /* |
| 227 | + * shunt everything down to start at the right place |
| 228 | + */ |
| 229 | + memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32)); |
| 230 | + } |
| 231 | + /* |
| 232 | + * make "len" be number of array elements, not ranges |
| 233 | + */ |
| 234 | + len = 2*(len - j); |
191 | 235 | cand = 1;
|
192 | 236 | while (len > MAXNUMRANGE * 2)
|
193 | 237 | {
|
194 |
| - min = 0x7fffffff; |
| 238 | + min = INT64CONST(0x7fffffffffffffff); |
195 | 239 | for (i = 2; i < len; i += 2)
|
196 |
| - if (min > (dr[i] - dr[i - 1])) |
| 240 | + if (min > ((int64)dr[i] - (int64)dr[i - 1])) |
197 | 241 | {
|
198 |
| - min = (dr[i] - dr[i - 1]); |
| 242 | + min = ((int64)dr[i] - (int64)dr[i - 1]); |
199 | 243 | cand = i;
|
200 | 244 | }
|
201 | 245 | memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
|
202 | 246 | len -= 2;
|
203 | 247 | }
|
| 248 | + /* |
| 249 | + * check sparseness of result |
| 250 | + */ |
| 251 | + lenr = internal_size(dr, len); |
| 252 | + if (lenr < 0 || lenr > MAXNUMELTS) |
| 253 | + ereport(ERROR, |
| 254 | + (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead"))); |
| 255 | + |
204 | 256 | r = resize_intArrayType(r, len);
|
205 | 257 | retval = palloc(sizeof(GISTENTRY));
|
206 | 258 | gistentryinit(*retval, PointerGetDatum(r),
|
@@ -258,6 +310,9 @@ g_int_decompress(PG_FUNCTION_ARGS)
|
258 | 310 |
|
259 | 311 | din = ARRPTR(in);
|
260 | 312 | lenr = internal_size(din, lenin);
|
| 313 | + if (lenr < 0 || lenr > MAXNUMELTS) |
| 314 | + ereport(ERROR, |
| 315 | + (errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead"))); |
261 | 316 |
|
262 | 317 | r = new_intArrayType(lenr);
|
263 | 318 | dr = ARRPTR(r);
|
|
0 commit comments