Skip to content

Commit 7e6fb5d

Browse files
committed
Improvements and fixes for e0b1ee1
e0b1ee1 introduced optimization for matching B-tree scan keys required for the directional scan. However, it incorrectly assumed that all keys required for opposite direction scan are satisfied by _bt_first(). It has been illustrated that with multiple scan keys over the same column, a lesser one (according to the scan direction) could win leaving the other one unsatisfied. Instead of relying on _bt_first() this commit introduces code that memorizes whether there was at least one match on the page. If that's true we know that keys required for opposite-direction scan are satisfied as soon as corresponding values are not NULLs. Also, this commit simplifies the description for the optimization of keys required for the current direction scan. Now the flag used for this is named continuescanPrechecked and means exactly that *continuescan flag is known to be true for the last item on the page. Reported-by: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-Wzn0LeLcb1PdBnK0xisz8NpHkxRrMr3NWJ%2BKOK-WZ%2BQtTQ%40mail.gmail.com Reviewed-by: Pavel Borisov
1 parent 06b10f8 commit 7e6fb5d

File tree

3 files changed

+51
-33
lines changed

3 files changed

+51
-33
lines changed

src/backend/access/nbtree/nbtsearch.c

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,7 +1530,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
15301530
int itemIndex;
15311531
bool continuescan;
15321532
int indnatts;
1533-
bool requiredMatchedByPrecheck;
1533+
bool continuescanPrechecked;
1534+
bool haveFirstMatch = false;
15341535

15351536
/*
15361537
* We must have the buffer pinned and locked, but the usual macro can't be
@@ -1585,12 +1586,11 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
15851586
Assert(BTScanPosIsPinned(so->currPos));
15861587

15871588
/*
1588-
* Prechecking the page with scan keys required for direction scan. We
1589-
* check these keys with the last item on the page (according to our scan
1590-
* direction). If these keys are matched, we can skip checking them with
1591-
* every item on the page. Scan keys for our scan direction would
1592-
* necessarily match the previous items. Scan keys required for opposite
1593-
* direction scan are already matched by the _bt_first() call.
1589+
* Prechecking the value of the continuescan flag for the last item on the
1590+
* page (for backwards scan it will be the first item on a page). If we
1591+
* observe it to be true, then it should be true for all other items. This
1592+
* allows us to do significant optimizations in the _bt_checkkeys()
1593+
* function for all the items on the page.
15941594
*
15951595
* With the forward scan, we do this check for the last item on the page
15961596
* instead of the high key. It's relatively likely that the most
@@ -1610,17 +1610,17 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
16101610
itup = (IndexTuple) PageGetItem(page, iid);
16111611

16121612
/*
1613-
* Do the precheck. Note that we pass the pointer to
1614-
* 'requiredMatchedByPrecheck' to 'continuescan' argument. That will
1613+
* Do the precheck. Note that we pass the pointer to the
1614+
* 'continuescanPrechecked' to the 'continuescan' argument. That will
16151615
* set flag to true if all required keys are satisfied and false
16161616
* otherwise.
16171617
*/
16181618
(void) _bt_checkkeys(scan, itup, indnatts, dir,
1619-
&requiredMatchedByPrecheck, false);
1619+
&continuescanPrechecked, false, false);
16201620
}
16211621
else
16221622
{
1623-
requiredMatchedByPrecheck = false;
1623+
continuescanPrechecked = false;
16241624
}
16251625

16261626
if (ScanDirectionIsForward(dir))
@@ -1650,19 +1650,22 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
16501650
Assert(!BTreeTupleIsPivot(itup));
16511651

16521652
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
1653-
&continuescan, requiredMatchedByPrecheck);
1653+
&continuescan,
1654+
continuescanPrechecked,
1655+
haveFirstMatch);
16541656

16551657
/*
16561658
* If the result of prechecking required keys was true, then in
16571659
* assert-enabled builds we also recheck that the _bt_checkkeys()
16581660
* result is the same.
16591661
*/
1660-
Assert(!requiredMatchedByPrecheck ||
1662+
Assert((!continuescanPrechecked && haveFirstMatch) ||
16611663
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
1662-
&continuescan, false));
1664+
&continuescan, false, false));
16631665
if (passes_quals)
16641666
{
16651667
/* tuple passes all scan key conditions */
1668+
haveFirstMatch = true;
16661669
if (!BTreeTupleIsPosting(itup))
16671670
{
16681671
/* Remember it */
@@ -1717,7 +1720,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
17171720
int truncatt;
17181721

17191722
truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
1720-
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false);
1723+
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false, false);
17211724
}
17221725

17231726
if (!continuescan)
@@ -1770,19 +1773,22 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
17701773
Assert(!BTreeTupleIsPivot(itup));
17711774

17721775
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
1773-
&continuescan, requiredMatchedByPrecheck);
1776+
&continuescan,
1777+
continuescanPrechecked,
1778+
haveFirstMatch);
17741779

17751780
/*
17761781
* If the result of prechecking required keys was true, then in
17771782
* assert-enabled builds we also recheck that the _bt_checkkeys()
17781783
* result is the same.
17791784
*/
1780-
Assert(!requiredMatchedByPrecheck ||
1785+
Assert((!continuescanPrechecked && !haveFirstMatch) ||
17811786
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
1782-
&continuescan, false));
1787+
&continuescan, false, false));
17831788
if (passes_quals && tuple_alive)
17841789
{
17851790
/* tuple passes all scan key conditions */
1791+
haveFirstMatch = true;
17861792
if (!BTreeTupleIsPosting(itup))
17871793
{
17881794
/* Remember it */

src/backend/access/nbtree/nbtutils.c

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,13 +1364,15 @@ _bt_mark_scankey_required(ScanKey skey)
13641364
* tupnatts: number of attributes in tupnatts (high key may be truncated)
13651365
* dir: direction we are scanning in
13661366
* continuescan: output parameter (will be set correctly in all cases)
1367-
* requiredMatchedByPrecheck: indicates that scan keys required for
1368-
* direction scan are already matched
1367+
* continuescanPrechecked: indicates that *continuescan flag is known to
1368+
* be true for the last item on the page
1369+
* haveFirstMatch: indicates that we already have at least one match
1370+
* in the current page
13691371
*/
13701372
bool
13711373
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
13721374
ScanDirection dir, bool *continuescan,
1373-
bool requiredMatchedByPrecheck)
1375+
bool continuescanPrechecked, bool haveFirstMatch)
13741376
{
13751377
TupleDesc tupdesc;
13761378
BTScanOpaque so;
@@ -1406,13 +1408,23 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
14061408
requiredOppositeDir = true;
14071409

14081410
/*
1409-
* Is the key required for scanning for either forward or backward
1410-
* direction? If so and caller told us that these types of keys are
1411-
* known to be matched, skip the check. Except for the row keys,
1412-
* where NULLs could be found in the middle of matching values.
1411+
* If the caller told us the *continuescan flag is known to be true
1412+
* for the last item on the page, then we know the keys required for
1413+
* the current direction scan should be matched. Otherwise, the
1414+
* *continuescan flag would be set for the current item and
1415+
* subsequently the last item on the page accordingly.
1416+
*
1417+
* If the key is required for the opposite direction scan, we can skip
1418+
* the check if the caller tells us there was already at least one
1419+
* matching item on the page. Also, we require the *continuescan flag
1420+
* to be true for the last item on the page to know there are no
1421+
* NULLs.
1422+
*
1423+
* Both cases above work except for the row keys, where NULLs could be
1424+
* found in the middle of matching values.
14131425
*/
1414-
if ((requiredSameDir || requiredOppositeDir) &&
1415-
!(key->sk_flags & SK_ROW_HEADER) && requiredMatchedByPrecheck)
1426+
if ((requiredSameDir || (requiredOppositeDir && haveFirstMatch)) &&
1427+
!(key->sk_flags & SK_ROW_HEADER) && continuescanPrechecked)
14161428
continue;
14171429

14181430
if (key->sk_attno > tupnatts)
@@ -1513,12 +1525,12 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
15131525
}
15141526

15151527
/*
1516-
* Apply the key checking function. When the key is required for
1517-
* opposite direction scan, it must be already satisfied by
1518-
* _bt_first() except for the NULLs checking, which have already done
1519-
* above.
1528+
* Apply the key-checking function. When the key is required for the
1529+
* opposite direction scan, it must be already satisfied as soon as
1530+
* there is already match on the page. Except for the NULLs checking,
1531+
* which have already done above.
15201532
*/
1521-
if (!requiredOppositeDir)
1533+
if (!(requiredOppositeDir && haveFirstMatch))
15221534
{
15231535
test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
15241536
datum, key->sk_argument);

src/include/access/nbtree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ extern void _bt_restore_array_keys(IndexScanDesc scan);
12511251
extern void _bt_preprocess_keys(IndexScanDesc scan);
12521252
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
12531253
int tupnatts, ScanDirection dir, bool *continuescan,
1254-
bool requiredMatchedByPrecheck);
1254+
bool requiredMatchedByPrecheck, bool haveFirstMatch);
12551255
extern void _bt_killitems(IndexScanDesc scan);
12561256
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
12571257
extern BTCycleId _bt_start_vacuum(Relation rel);

0 commit comments

Comments
 (0)