Skip to content

Commit 1da2a61

Browse files
committed
Back-patch recent fixes for gistchoose and gistRelocateBuildBuffersOnSplit.
This back-ports commits c8ba697 and e5db11c, which fix one definite and one speculative bug in gistchoose, and make the code a lot more intelligible as well. In 9.2 only, this also affects the largely-copied-and-pasted logic in gistRelocateBuildBuffersOnSplit. The impact of the bugs was that the functions might make poor decisions as to which index tree branch to push a new entry down into, resulting in GiST index bloat and poor performance. The fixes rectify these decisions for future insertions, but a REINDEX would be needed to clean up any existing index bloat. Alexander Korotkov, Robert Haas, Tom Lane
1 parent 4fb505d commit 1da2a61

File tree

1 file changed

+73
-25
lines changed

1 file changed

+73
-25
lines changed

src/backend/access/gist/gistutil.c

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -372,36 +372,55 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
372372
}
373373

374374
/*
375-
* find entry with lowest penalty
375+
* Search an upper index page for the entry with lowest penalty for insertion
376+
* of the new index key contained in "it".
377+
*
378+
* Returns the index of the page entry to insert into.
376379
*/
377380
OffsetNumber
378381
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
379382
GISTSTATE *giststate)
380383
{
384+
OffsetNumber result;
381385
OffsetNumber maxoff;
382386
OffsetNumber i;
383-
OffsetNumber which;
384-
float sum_grow,
385-
which_grow[INDEX_MAX_KEYS];
387+
float best_penalty[INDEX_MAX_KEYS];
386388
GISTENTRY entry,
387389
identry[INDEX_MAX_KEYS];
388390
bool isnull[INDEX_MAX_KEYS];
389391

390-
maxoff = PageGetMaxOffsetNumber(p);
391-
*which_grow = -1.0;
392-
which = InvalidOffsetNumber;
393-
sum_grow = 1;
392+
Assert(!GistPageIsLeaf(p));
393+
394394
gistDeCompressAtt(giststate, r,
395395
it, NULL, (OffsetNumber) 0,
396396
identry, isnull);
397397

398+
/* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
399+
result = FirstOffsetNumber;
400+
401+
/*
402+
* The index may have multiple columns, and there's a penalty value for
403+
* each column. The penalty associated with a column that appears earlier
404+
* in the index definition is strictly more important than the penalty of
405+
* a column that appears later in the index definition.
406+
*
407+
* best_penalty[j] is the best penalty we have seen so far for column j,
408+
* or -1 when we haven't yet examined column j. Array entries to the
409+
* right of the first -1 are undefined.
410+
*/
411+
best_penalty[0] = -1;
412+
413+
/*
414+
* Loop over tuples on page.
415+
*/
416+
maxoff = PageGetMaxOffsetNumber(p);
398417
Assert(maxoff >= FirstOffsetNumber);
399-
Assert(!GistPageIsLeaf(p));
400418

401-
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
419+
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
402420
{
403-
int j;
404421
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
422+
bool zero_penalty;
423+
int j;
405424

406425
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup))
407426
{
@@ -411,41 +430,70 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
411430
continue;
412431
}
413432

414-
sum_grow = 0;
433+
zero_penalty = true;
434+
435+
/* Loop over index attributes. */
415436
for (j = 0; j < r->rd_att->natts; j++)
416437
{
417438
Datum datum;
418439
float usize;
419440
bool IsNull;
420441

442+
/* Compute penalty for this column. */
421443
datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
422444
gistdentryinit(giststate, j, &entry, datum, r, p, i,
423445
FALSE, IsNull);
424446
usize = gistpenalty(giststate, j, &entry, IsNull,
425447
&identry[j], isnull[j]);
448+
if (usize > 0)
449+
zero_penalty = false;
426450

427-
if (which_grow[j] < 0 || usize < which_grow[j])
451+
if (best_penalty[j] < 0 || usize < best_penalty[j])
452+
{
453+
/*
454+
* New best penalty for column. Tentatively select this tuple
455+
* as the target, and record the best penalty. Then reset the
456+
* next column's penalty to "unknown" (and indirectly, the
457+
* same for all the ones to its right). This will force us to
458+
* adopt this tuple's penalty values as the best for all the
459+
* remaining columns during subsequent loop iterations.
460+
*/
461+
result = i;
462+
best_penalty[j] = usize;
463+
464+
if (j < r->rd_att->natts - 1)
465+
best_penalty[j + 1] = -1;
466+
}
467+
else if (best_penalty[j] == usize)
428468
{
429-
which = i;
430-
which_grow[j] = usize;
431-
if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
432-
which_grow[j + 1] = -1;
433-
sum_grow += which_grow[j];
469+
/*
470+
* The current tuple is exactly as good for this column as the
471+
* best tuple seen so far. The next iteration of this loop
472+
* will compare the next column.
473+
*/
434474
}
435-
else if (which_grow[j] == usize)
436-
sum_grow += usize;
437475
else
438476
{
439-
sum_grow = 1;
477+
/*
478+
* The current tuple is worse for this column than the best
479+
* tuple seen so far. Skip the remaining columns and move on
480+
* to the next tuple, if any.
481+
*/
482+
zero_penalty = false; /* so outer loop won't exit */
440483
break;
441484
}
442485
}
443-
}
444486

445-
if (which == InvalidOffsetNumber)
446-
which = FirstOffsetNumber;
487+
/*
488+
* If we find a tuple with zero penalty for all columns, there's no
489+
* need to examine remaining tuples; just break out of the loop and
490+
* return it.
491+
*/
492+
if (zero_penalty)
493+
break;
494+
}
447495

448-
return which;
496+
return result;
449497
}
450498

451499
/*

0 commit comments

Comments
 (0)