@@ -162,23 +162,16 @@ findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
162
162
* Remove tuples that are marked don't-cares from the tuple index array a[]
163
163
* of length *len. This is applied separately to the spl_left and spl_right
164
164
* arrays.
165
- *
166
- * Corner case: we do not wish to reduce the index array to zero length.
167
- * (If we did, then the union key for this side would be null, and having just
168
- * one of spl_ldatum_exists and spl_rdatum_exists be TRUE might confuse
169
- * user-defined PickSplit methods.) To avoid that, we'll forcibly redefine
170
- * one tuple as non-don't-care if necessary. Hence, we must be able to adjust
171
- * caller's NumDontCare count.
172
165
*/
173
166
static void
174
- removeDontCares (OffsetNumber * a , int * len , bool * dontcare , int * NumDontCare )
167
+ removeDontCares (OffsetNumber * a , int * len , const bool * dontcare )
175
168
{
176
169
int origlen ,
177
- curlen ,
170
+ newlen ,
178
171
i ;
179
172
OffsetNumber * curwpos ;
180
173
181
- origlen = curlen = * len ;
174
+ origlen = newlen = * len ;
182
175
curwpos = a ;
183
176
for (i = 0 ; i < origlen ; i ++ )
184
177
{
@@ -190,18 +183,11 @@ removeDontCares(OffsetNumber *a, int *len, bool *dontcare, int *NumDontCare)
190
183
* curwpos = ai ;
191
184
curwpos ++ ;
192
185
}
193
- else if (curlen == 1 )
194
- {
195
- /* corner case: don't let array become empty */
196
- dontcare [ai ] = FALSE; /* mark item as non-dont-care */
197
- * NumDontCare -= 1 ;
198
- i -- ; /* reprocess item on next iteration */
199
- }
200
186
else
201
- curlen -- ;
187
+ newlen -- ;
202
188
}
203
189
204
- * len = curlen ;
190
+ * len = newlen ;
205
191
}
206
192
207
193
/*
@@ -304,8 +290,14 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
304
290
penalty2 ;
305
291
306
292
/*
307
- * there is only one previously defined union, so we just choose swap
308
- * or not by lowest penalty
293
+ * There is only one previously defined union, so we just choose swap
294
+ * or not by lowest penalty for that side. We can only get here if a
295
+ * secondary split happened to have all NULLs in its column in the
296
+ * tuples that the outer recursion level had assigned to one side.
297
+ * (Note that the null checks in gistSplitByKey don't prevent the
298
+ * case, because they'll only be checking tuples that were considered
299
+ * don't-cares at the outer recursion level, not the tuples that went
300
+ * into determining the passed-down left and right union keys.)
309
301
*/
310
302
penalty1 = gistpenalty (giststate , attno , entry1 , false, & entrySL , false);
311
303
penalty2 = gistpenalty (giststate , attno , entry1 , false, & entrySR , false);
@@ -405,13 +397,19 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
405
397
* two vectors.
406
398
*
407
399
* Returns FALSE if split is complete (there are no more index columns, or
408
- * there is no need to consider them). Note that in this case the union
409
- * keys for all columns must be computed here.
410
- * Returns TRUE and v->spl_dontcare = NULL if left and right unions of attno
411
- * column are the same, so we should split on next column instead.
400
+ * there is no need to consider them because split is optimal already).
401
+ *
402
+ * Returns TRUE and v->spl_dontcare = NULL if the picksplit result is
403
+ * degenerate (all tuples seem to be don't-cares), so we should just
404
+ * disregard this column and split on the next column(s) instead.
405
+ *
412
406
* Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
413
407
* that could be relocated based on the next column(s). The don't-care
414
408
* tuples have been removed from the split and must be reinserted by caller.
409
+ * There is at least one non-don't-care tuple on each side of the split,
410
+ * and union keys for all columns are updated to include just those tuples.
411
+ *
412
+ * A TRUE result implies there is at least one more index column.
415
413
*/
416
414
static bool
417
415
gistUserPicksplit (Relation r , GistEntryVector * entryvec , int attno , GistSplitVector * v ,
@@ -483,49 +481,57 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
483
481
484
482
/*
485
483
* If index columns remain, then consider whether we can improve the split
486
- * by using them. Even if we can't, we must compute union keys for those
487
- * columns before we can return FALSE.
484
+ * by using them.
488
485
*/
489
486
v -> spl_dontcare = NULL ;
490
487
491
488
if (attno + 1 < giststate -> tupdesc -> natts )
492
489
{
493
490
int NumDontCare ;
494
491
492
+ /*
493
+ * Make a quick check to see if left and right union keys are equal;
494
+ * if so, the split is certainly degenerate, so tell caller to
495
+ * re-split with the next column.
496
+ */
495
497
if (gistKeyIsEQ (giststate , attno , sv -> spl_ldatum , sv -> spl_rdatum ))
496
- {
497
- /*
498
- * Left and right union keys are equal, so we can get better split
499
- * by considering next column.
500
- */
501
498
return true;
502
- }
503
499
504
500
/*
505
- * Locate don't-care tuples, if any
501
+ * Locate don't-care tuples, if any. If there are none, the split is
502
+ * optimal, so just fall out and return false.
506
503
*/
507
504
v -> spl_dontcare = (bool * ) palloc0 (sizeof (bool ) * (entryvec -> n + 1 ));
508
505
509
506
NumDontCare = findDontCares (r , giststate , entryvec -> vector , v , attno );
510
507
511
- if (NumDontCare == 0 )
508
+ if (NumDontCare > 0 )
512
509
{
513
510
/*
514
- * There are no don't-cares, so just compute the union keys for
515
- * remaining columns and we're done.
511
+ * Remove don't-cares from spl_left[] and spl_right[].
516
512
*/
517
- gistunionsubkey (giststate , itup , v );
518
- }
519
- else
520
- {
513
+ removeDontCares (sv -> spl_left , & sv -> spl_nleft , v -> spl_dontcare );
514
+ removeDontCares (sv -> spl_right , & sv -> spl_nright , v -> spl_dontcare );
515
+
521
516
/*
522
- * Remove don't-cares from spl_left[] and spl_right[]. NOTE: this
523
- * could reduce NumDontCare to zero.
517
+ * If all tuples on either side were don't-cares, the split is
518
+ * degenerate, and we're best off to ignore it and split on the
519
+ * next column. (We used to try to press on with a secondary
520
+ * split by forcing a random tuple on each side to be treated as
521
+ * non-don't-care, but it seems unlikely that that technique
522
+ * really gives a better result. Note that we don't want to try a
523
+ * secondary split with empty left or right primary split sides,
524
+ * because then there is no union key on that side for the
525
+ * PickSplit function to try to expand, so it can have no good
526
+ * figure of merit for what it's doing. Also note that this check
527
+ * ensures we can't produce a bogus one-side-only split in the
528
+ * NumDontCare == 1 special case below.)
524
529
*/
525
- removeDontCares (sv -> spl_left , & sv -> spl_nleft ,
526
- v -> spl_dontcare , & NumDontCare );
527
- removeDontCares (sv -> spl_right , & sv -> spl_nright ,
528
- v -> spl_dontcare , & NumDontCare );
530
+ if (sv -> spl_nleft == 0 || sv -> spl_nright == 0 )
531
+ {
532
+ v -> spl_dontcare = NULL ;
533
+ return true;
534
+ }
529
535
530
536
/*
531
537
* Recompute union keys, considering only non-don't-care tuples.
@@ -541,7 +547,9 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
541
547
/*
542
548
* If there's only one don't-care tuple then we can't do a
543
549
* PickSplit on it, so just choose whether to send it left or
544
- * right by comparing penalties.
550
+ * right by comparing penalties. We needed the
551
+ * gistunionsubkey step anyway so that we have appropriate
552
+ * union keys for figuring the penalties.
545
553
*/
546
554
OffsetNumber toMove ;
547
555
@@ -556,13 +564,14 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
556
564
/* ... and assign it to cheaper side */
557
565
placeOne (r , giststate , v , itup [toMove - 1 ], toMove , attno + 1 );
558
566
559
- /* recompute the union keys including this tuple */
560
- v -> spl_dontcare = NULL ;
561
- gistunionsubkey (giststate , itup , v );
567
+ /*
568
+ * At this point the union keys are wrong, but we don't care
569
+ * because we're done splitting. The outermost recursion
570
+ * level of gistSplitByKey will fix things before returning.
571
+ */
562
572
}
563
- else if ( NumDontCare > 1 )
573
+ else
564
574
return true;
565
- /* else NumDontCare is now zero; handle same as above */
566
575
}
567
576
}
568
577
@@ -648,7 +657,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
648
657
*/
649
658
v -> spl_risnull [attno ] = v -> spl_lisnull [attno ] = TRUE;
650
659
651
- if (attno + 1 < r -> rd_att -> natts )
660
+ if (attno + 1 < giststate -> tupdesc -> natts )
652
661
gistSplitByKey (r , page , itup , len , giststate , v , attno + 1 );
653
662
else
654
663
gistSplitHalf (& v -> splitVector , len );
@@ -673,28 +682,31 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
673
682
else
674
683
v -> splitVector .spl_left [v -> splitVector .spl_nleft ++ ] = i ;
675
684
676
- /* Must compute union keys for this and any following columns */
677
- v -> spl_dontcare = NULL ;
678
- gistunionsubkey (giststate , itup , v );
685
+ /* Compute union keys, unless outer recursion level will handle it */
686
+ if (attno == 0 && giststate -> tupdesc -> natts == 1 )
687
+ {
688
+ v -> spl_dontcare = NULL ;
689
+ gistunionsubkey (giststate , itup , v );
690
+ }
679
691
}
680
692
else
681
693
{
682
694
/*
683
- * all keys are not-null, so apply user-defined PickSplit method
695
+ * All keys are not-null, so apply user-defined PickSplit method
684
696
*/
685
697
if (gistUserPicksplit (r , entryvec , attno , v , itup , len , giststate ))
686
698
{
687
699
/*
688
700
* Splitting on attno column is not optimal, so consider
689
701
* redistributing don't-care tuples according to the next column
690
702
*/
691
- Assert (attno + 1 < r -> rd_att -> natts );
703
+ Assert (attno + 1 < giststate -> tupdesc -> natts );
692
704
693
705
if (v -> spl_dontcare == NULL )
694
706
{
695
707
/*
696
- * Simple case: left and right keys for attno column are
697
- * equal, so just split according to the next column.
708
+ * This split was actually degenerate, so ignore it altogether
709
+ * and just split according to the next column.
698
710
*/
699
711
gistSplitByKey (r , page , itup , len , giststate , v , attno + 1 );
700
712
}
@@ -741,10 +753,27 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
741
753
backupSplit .spl_right [backupSplit .spl_nright ++ ] = map [v -> splitVector .spl_right [i ] - 1 ];
742
754
743
755
v -> splitVector = backupSplit ;
744
-
745
- /* recompute left and right union datums */
746
- gistunionsubkey (giststate , itup , v );
747
756
}
748
757
}
749
758
}
759
+
760
+ /*
761
+ * If we're handling a multicolumn index, at the end of the recursion
762
+ * recompute the left and right union datums for all index columns. This
763
+ * makes sure we hand back correct union datums in all corner cases,
764
+ * including when we haven't processed all columns to start with, or when
765
+ * a secondary split moved "don't care" tuples from one side to the other
766
+ * (we really shouldn't assume that that didn't change the union datums).
767
+ *
768
+ * Note: when we're in an internal recursion (attno > 0), we do not worry
769
+ * about whether the union datums we return with are sensible, since
770
+ * calling levels won't care. Also, in a single-column index, we expect
771
+ * that PickSplit (or the special cases above) produced correct union
772
+ * datums.
773
+ */
774
+ if (attno == 0 && giststate -> tupdesc -> natts > 1 )
775
+ {
776
+ v -> spl_dontcare = NULL ;
777
+ gistunionsubkey (giststate , itup , v );
778
+ }
750
779
}
0 commit comments