Skip to content

Commit 26917eb

Browse files
committed
Fix another bug in parent page splitting during GiST index build.
Yet another bug in the ilk of commits a7ee7c8 and 741b884. In 741b884, we took care to clear the memorized location of the downlink when we split the parent page, because splitting the parent page can move the downlink. But we missed that even *updating* a tuple on the parent can move it, because updating a tuple on a gist page is implemented as a delete+insert, so the updated tuple gets moved to the end of the page. This commit fixes the bug in two different ways (belt and suspenders): 1. Clear the downlink when we update a tuple on the parent page, even if it's not split. This the same approach as in commits a7ee7c8 and 741b884. I also noticed that gistFindCorrectParent did not clear the 'downlinkoffnum' when it stepped to the right sibling. Fix that too, as it seems like a clear bug even though I haven't been able to find a test case to hit that. 2. Change gistFindCorrectParent so that it treats 'downlinkoffnum' merely as a hint. It now always first checks if the downlink is still at that location, and if not, it scans the page like before. That's more robust if there are still more cases where we fail to clear 'downlinkoffnum' that we haven't yet uncovered. With this, it's no longer necessary to meticulously clear 'downlinkoffnum', so this makes the previous fixes unnecessary, but I didn't revert them because it still seems nice to clear it when we know that the downlink has moved. Also add the test case using the same test data that Alexander posted. I tried to reduce it to a smaller test, and I also tried to reproduce this with different test data, but I was not able to, so let's just include what we have. Backpatch to v12, like the previous fixes. Reported-by: Alexander Lakhin Discussion: https://www.postgresql.org/message-id/18129-caca016eaf0c3702@postgresql.org
1 parent 11bb77d commit 26917eb

File tree

1 file changed

+99
-80
lines changed

1 file changed

+99
-80
lines changed

src/backend/access/gist/gist.c

Lines changed: 99 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,97 +1011,114 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
10111011
* remain so at exit, but it might not be the same page anymore.
10121012
*/
10131013
static void
1014-
gistFindCorrectParent(Relation r, GISTInsertStack *child)
1014+
gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
10151015
{
10161016
GISTInsertStack *parent = child->parent;
1017+
ItemId iid;
1018+
IndexTuple idxtuple;
1019+
OffsetNumber maxoff;
1020+
GISTInsertStack *ptr;
10171021

10181022
gistcheckpage(r, parent->buffer);
10191023
parent->page = (Page) BufferGetPage(parent->buffer);
1024+
maxoff = PageGetMaxOffsetNumber(parent->page);
10201025

1021-
/* here we don't need to distinguish between split and page update */
1022-
if (child->downlinkoffnum == InvalidOffsetNumber ||
1023-
parent->lsn != PageGetLSN(parent->page))
1026+
/* Check if the downlink is still where it was before */
1027+
if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
10241028
{
1025-
/* parent is changed, look child in right links until found */
1026-
OffsetNumber i,
1027-
maxoff;
1028-
ItemId iid;
1029-
IndexTuple idxtuple;
1030-
GISTInsertStack *ptr;
1029+
iid = PageGetItemId(parent->page, child->downlinkoffnum);
1030+
idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1031+
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1032+
return; /* still there */
1033+
}
10311034

1032-
while (true)
1033-
{
1034-
maxoff = PageGetMaxOffsetNumber(parent->page);
1035-
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1036-
{
1037-
iid = PageGetItemId(parent->page, i);
1038-
idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1039-
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1040-
{
1041-
/* yes!!, found */
1042-
child->downlinkoffnum = i;
1043-
return;
1044-
}
1045-
}
1035+
/*
1036+
* The page has changed since we looked. During normal operation, every
1037+
* update of a page changes its LSN, so the LSN we memorized should have
1038+
* changed too. During index build, however, we don't WAL-log the changes
1039+
* until we have built the index, so the LSN doesn't change. There is no
1040+
* concurrent activity during index build, but we might have changed the
1041+
* parent ourselves.
1042+
*/
1043+
Assert(parent->lsn != PageGetLSN(parent->page) || is_build);
1044+
1045+
/*
1046+
* Scan the page to re-find the downlink. If the page was split, it might
1047+
* have moved to a different page, so follow the right links until we find
1048+
* it.
1049+
*/
1050+
while (true)
1051+
{
1052+
OffsetNumber i;
10461053

1047-
parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1048-
UnlockReleaseBuffer(parent->buffer);
1049-
if (parent->blkno == InvalidBlockNumber)
1054+
maxoff = PageGetMaxOffsetNumber(parent->page);
1055+
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1056+
{
1057+
iid = PageGetItemId(parent->page, i);
1058+
idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1059+
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
10501060
{
1051-
/*
1052-
* End of chain and still didn't find parent. It's a very-very
1053-
* rare situation when root splited.
1054-
*/
1055-
break;
1061+
/* yes!!, found */
1062+
child->downlinkoffnum = i;
1063+
return;
10561064
}
1057-
parent->buffer = ReadBuffer(r, parent->blkno);
1058-
LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1059-
gistcheckpage(r, parent->buffer);
1060-
parent->page = (Page) BufferGetPage(parent->buffer);
10611065
}
10621066

1063-
/*
1064-
* awful!!, we need search tree to find parent ... , but before we
1065-
* should release all old parent
1066-
*/
1067-
1068-
ptr = child->parent->parent; /* child->parent already released
1069-
* above */
1070-
while (ptr)
1067+
parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1068+
parent->downlinkoffnum = InvalidOffsetNumber;
1069+
UnlockReleaseBuffer(parent->buffer);
1070+
if (parent->blkno == InvalidBlockNumber)
10711071
{
1072-
ReleaseBuffer(ptr->buffer);
1073-
ptr = ptr->parent;
1072+
/*
1073+
* End of chain and still didn't find parent. It's a very-very
1074+
* rare situation when root splitted.
1075+
*/
1076+
break;
10741077
}
1078+
parent->buffer = ReadBuffer(r, parent->blkno);
1079+
LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1080+
gistcheckpage(r, parent->buffer);
1081+
parent->page = (Page) BufferGetPage(parent->buffer);
1082+
}
10751083

1076-
/* ok, find new path */
1077-
ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1084+
/*
1085+
* awful!!, we need search tree to find parent ... , but before we should
1086+
* release all old parent
1087+
*/
10781088

1079-
/* read all buffers as expected by caller */
1080-
/* note we don't lock them or gistcheckpage them here! */
1081-
while (ptr)
1082-
{
1083-
ptr->buffer = ReadBuffer(r, ptr->blkno);
1084-
ptr->page = (Page) BufferGetPage(ptr->buffer);
1085-
ptr = ptr->parent;
1086-
}
1089+
ptr = child->parent->parent; /* child->parent already released above */
1090+
while (ptr)
1091+
{
1092+
ReleaseBuffer(ptr->buffer);
1093+
ptr = ptr->parent;
1094+
}
10871095

1088-
/* install new chain of parents to stack */
1089-
child->parent = parent;
1096+
/* ok, find new path */
1097+
ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
10901098

1091-
/* make recursive call to normal processing */
1092-
LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
1093-
gistFindCorrectParent(r, child);
1099+
/* read all buffers as expected by caller */
1100+
/* note we don't lock them or gistcheckpage them here! */
1101+
while (ptr)
1102+
{
1103+
ptr->buffer = ReadBuffer(r, ptr->blkno);
1104+
ptr->page = (Page) BufferGetPage(ptr->buffer);
1105+
ptr = ptr->parent;
10941106
}
10951107

1096-
return;
1108+
/* install new chain of parents to stack */
1109+
child->parent = parent;
1110+
1111+
/* make recursive call to normal processing */
1112+
LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
1113+
gistFindCorrectParent(r, child, is_build);
10971114
}
10981115

10991116
/*
11001117
* Form a downlink pointer for the page in 'buf'.
11011118
*/
11021119
static IndexTuple
11031120
gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
1104-
GISTInsertStack *stack)
1121+
GISTInsertStack *stack, bool is_build)
11051122
{
11061123
Page page = BufferGetPage(buf);
11071124
OffsetNumber maxoff;
@@ -1142,7 +1159,7 @@ gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
11421159
ItemId iid;
11431160

11441161
LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1145-
gistFindCorrectParent(rel, stack);
1162+
gistFindCorrectParent(rel, stack, is_build);
11461163
iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
11471164
downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
11481165
downlink = CopyIndexTuple(downlink);
@@ -1187,7 +1204,7 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
11871204
page = BufferGetPage(buf);
11881205

11891206
/* Form the new downlink tuples to insert to parent */
1190-
downlink = gistformdownlink(state->r, buf, giststate, stack);
1207+
downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
11911208

11921209
si->buf = buf;
11931210
si->downlink = downlink;
@@ -1351,7 +1368,7 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
13511368
right = (GISTPageSplitInfo *) linitial(reversed);
13521369
left = (GISTPageSplitInfo *) lsecond(reversed);
13531370

1354-
gistFindCorrectParent(state->r, stack);
1371+
gistFindCorrectParent(state->r, stack, state->is_build);
13551372
if (gistinserttuples(state, stack->parent, giststate,
13561373
&right->downlink, 1,
13571374
InvalidOffsetNumber,
@@ -1377,20 +1394,22 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
13771394
*/
13781395
tuples[0] = left->downlink;
13791396
tuples[1] = right->downlink;
1380-
gistFindCorrectParent(state->r, stack);
1381-
if (gistinserttuples(state, stack->parent, giststate,
1382-
tuples, 2,
1383-
stack->downlinkoffnum,
1384-
left->buf, right->buf,
1385-
true, /* Unlock parent */
1386-
unlockbuf /* Unlock stack->buffer if caller wants that */
1387-
))
1388-
{
1389-
/*
1390-
* If the parent page was split, the downlink might have moved.
1391-
*/
1392-
stack->downlinkoffnum = InvalidOffsetNumber;
1393-
}
1397+
gistFindCorrectParent(state->r, stack, state->is_build);
1398+
(void) gistinserttuples(state, stack->parent, giststate,
1399+
tuples, 2,
1400+
stack->downlinkoffnum,
1401+
left->buf, right->buf,
1402+
true, /* Unlock parent */
1403+
unlockbuf /* Unlock stack->buffer if caller
1404+
* wants that */
1405+
);
1406+
1407+
/*
1408+
* The downlink might have moved when we updated it. Even if the page
1409+
* wasn't split, because gistinserttuples() implements updating the old
1410+
* tuple by removing and re-inserting it!
1411+
*/
1412+
stack->downlinkoffnum = InvalidOffsetNumber;
13941413

13951414
Assert(left->buf == stack->buffer);
13961415

0 commit comments

Comments
 (0)