Skip to content

Commit 153b1db

Browse files
committed
On GiST page split, release the locks on child pages before recursing up.
When inserting the downlinks for a split gist page, we used hold the locks on the child pages until the insertion into the parent - and recursively its parent if it had to be split too - were all completed. Change that so that the locks on child pages are released after the insertion in the immediate parent is done, before recursing further up the tree. This reduces the number of lwlocks that are held simultaneously. Holding many locks is bad for concurrency, and in extreme cases you can even hit the limit of 100 simultaneously held lwlocks in a backend. If you're really unlucky, you can hit the limit while in a critical section, which brings down the whole system. This fixes bug #6629 reported by Tom Forbes. Backpatch to 9.1. The page splitting code was rewritten in 9.1, and the old code did not have this problem.
1 parent 1b48f6a commit 153b1db

File tree

1 file changed

+93
-30
lines changed

1 file changed

+93
-30
lines changed

src/backend/access/gist/gist.c

Lines changed: 93 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,15 @@ static void gistdoinsert(Relation r,
5151
Size freespace,
5252
GISTSTATE *GISTstate);
5353
static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
54+
static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
55+
GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
5456
static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
5557
GISTSTATE *giststate,
5658
IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
57-
Buffer leftchild);
59+
Buffer leftchild, Buffer rightchild,
60+
bool unlockbuf, bool unlockleftchild);
5861
static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
59-
GISTSTATE *giststate, List *splitinfo);
62+
GISTSTATE *giststate, List *splitinfo, bool releasebuf);
6063

6164

6265
#define ROTATEDIST(d) do { \
@@ -755,15 +758,15 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
755758
/*
756759
* Update the tuple.
757760
*
758-
* We still hold the lock after gistinserttuples(), but it
761+
* We still hold the lock after gistinserttuple(), but it
759762
* might have to split the page to make the updated tuple fit.
760763
* In that case the updated tuple might migrate to the other
761764
* half of the split, so we have to go back to the parent and
762765
* descend back to the half that's a better fit for the new
763766
* tuple.
764767
*/
765-
if (gistinserttuples(&state, stack, giststate, &newtup, 1,
766-
stack->childoffnum, InvalidBuffer))
768+
if (gistinserttuple(&state, stack, giststate, newtup,
769+
stack->childoffnum))
767770
{
768771
/*
769772
* If this was a root split, the root page continues to be
@@ -848,8 +851,8 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
848851

849852
/* now state.stack->(page, buffer and blkno) points to leaf page */
850853

851-
gistinserttuples(&state, stack, giststate, &itup, 1,
852-
InvalidOffsetNumber, InvalidBuffer);
854+
gistinserttuple(&state, stack, giststate, itup,
855+
InvalidOffsetNumber);
853856
LockBuffer(stack->buffer, GIST_UNLOCK);
854857

855858
/* Release any pins we might still hold before exiting */
@@ -1190,49 +1193,104 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
11901193
}
11911194

11921195
/* Insert the downlinks */
1193-
gistfinishsplit(state, stack, giststate, splitinfo);
1196+
gistfinishsplit(state, stack, giststate, splitinfo, false);
11941197
}
11951198

11961199
/*
1197-
* Insert tuples to stack->buffer. If 'oldoffnum' is valid, the new tuples
1198-
* replace an old tuple at oldoffnum. The caller must hold an exclusive lock
1199-
* on the page.
1200+
* Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1201+
* tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1202+
* 'stack' represents the path from the root to the page being updated.
12001203
*
1201-
* If leftchild is valid, we're inserting/updating the downlink for the
1202-
* page to the right of leftchild. We clear the F_FOLLOW_RIGHT flag and
1203-
* update NSN on leftchild, atomically with the insertion of the downlink.
1204+
* The caller must hold an exclusive lock on stack->buffer. The lock is still
1205+
* held on return, but the page might not contain the inserted tuple if the
1206+
* page was split. The function returns true if the page was split, false
1207+
* otherwise.
1208+
*/
1209+
static bool
1210+
gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
1211+
GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1212+
{
1213+
return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1214+
InvalidBuffer, InvalidBuffer, false, false);
1215+
}
1216+
1217+
/* ----------------
1218+
* An extended workhorse version of gistinserttuple(). This version allows
1219+
* inserting multiple tuples, or replacing a single tuple with multiple tuples.
1220+
* This is used to recursively update the downlinks in the parent when a page
1221+
* is split.
12041222
*
1205-
* Returns 'true' if the page had to be split. On return, we will continue
1206-
* to hold an exclusive lock on state->stack->buffer, but if we had to split
1207-
* the page, it might not contain the tuple we just inserted/updated.
1223+
* If leftchild and rightchild are valid, we're inserting/replacing the
1224+
* downlink for rightchild, and leftchild is its left sibling. We clear the
1225+
* F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1226+
* insertion of the downlink.
1227+
*
1228+
* To avoid holding locks for longer than necessary, when recursing up the
1229+
* tree to update the parents, the locking is a bit peculiar here. On entry,
1230+
* the caller must hold an exclusive lock on stack->buffer, as well as
1231+
* leftchild and rightchild if given. On return:
1232+
*
1233+
* - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1234+
* always kept pinned, however.
1235+
* - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1236+
* is kept pinned.
1237+
* - Lock and pin on 'rightchild' are always released.
1238+
*
1239+
* Returns 'true' if the page had to be split. Note that if the page had
1240+
* be split, the inserted/updated might've been inserted to a right sibling
1241+
* of stack->buffer instead of stack->buffer itself.
12081242
*/
12091243
static bool
12101244
gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
12111245
GISTSTATE *giststate,
12121246
IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1213-
Buffer leftchild)
1247+
Buffer leftchild, Buffer rightchild,
1248+
bool unlockbuf, bool unlockleftchild)
12141249
{
12151250
List *splitinfo;
12161251
bool is_split;
12171252

1253+
/* Insert the tuple(s) to the page, splitting the page if necessary */
12181254
is_split = gistplacetopage(state, giststate, stack->buffer,
12191255
tuples, ntup, oldoffnum,
1220-
leftchild,
1221-
&splitinfo);
1256+
leftchild, &splitinfo);
1257+
1258+
/*
1259+
* Before recursing up in case the page was split, release locks on the
1260+
* child pages. We don't need to keep them locked when updating the
1261+
* parent.
1262+
*/
1263+
if (BufferIsValid(rightchild))
1264+
UnlockReleaseBuffer(rightchild);
1265+
if (BufferIsValid(leftchild) && unlockleftchild)
1266+
LockBuffer(leftchild, GIST_UNLOCK);
1267+
1268+
/*
1269+
* If we had to split, insert/update the downlinks in the parent. If the
1270+
* caller requested us to release the lock on stack->buffer, tell
1271+
* gistfinishsplit() to do that as soon as it's safe to do so. If we
1272+
* didn't have to split, release it ourselves.
1273+
*/
12221274
if (splitinfo)
1223-
gistfinishsplit(state, stack, giststate, splitinfo);
1275+
gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1276+
else if (unlockbuf)
1277+
LockBuffer(stack->buffer, GIST_UNLOCK);
12241278

12251279
return is_split;
12261280
}
12271281

12281282
/*
1229-
* Finish an incomplete split by inserting/updating the downlinks in
1230-
* parent page. 'splitinfo' contains all the child pages, exclusively-locked,
1231-
* involved in the split, from left-to-right.
1283+
* Finish an incomplete split by inserting/updating the downlinks in parent
1284+
* page. 'splitinfo' contains all the child pages involved in the split,
1285+
* from left-to-right.
1286+
*
1287+
* On entry, the caller must hold a lock on stack->buffer and all the child
1288+
* pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1289+
* released on return. The child pages are always unlocked and unpinned.
12321290
*/
12331291
static void
12341292
gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
1235-
GISTSTATE *giststate, List *splitinfo)
1293+
GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
12361294
{
12371295
ListCell *lc;
12381296
List *reversed;
@@ -1261,6 +1319,10 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
12611319
LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
12621320
gistFindCorrectParent(state->r, stack);
12631321

1322+
/*
1323+
* insert downlinks for the siblings from right to left, until there are
1324+
* only two siblings left.
1325+
*/
12641326
while (list_length(reversed) > 2)
12651327
{
12661328
right = (GISTPageSplitInfo *) linitial(reversed);
@@ -1269,15 +1331,15 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
12691331
if (gistinserttuples(state, stack->parent, giststate,
12701332
&right->downlink, 1,
12711333
InvalidOffsetNumber,
1272-
left->buf))
1334+
left->buf, right->buf, false, false))
12731335
{
12741336
/*
12751337
* If the parent page was split, need to relocate the original
12761338
* parent pointer.
12771339
*/
12781340
gistFindCorrectParent(state->r, stack);
12791341
}
1280-
UnlockReleaseBuffer(right->buf);
1342+
/* gistinserttuples() released the lock on right->buf. */
12811343
reversed = list_delete_first(reversed);
12821344
}
12831345

@@ -1294,9 +1356,10 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
12941356
gistinserttuples(state, stack->parent, giststate,
12951357
tuples, 2,
12961358
stack->parent->childoffnum,
1297-
left->buf);
1298-
LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1299-
UnlockReleaseBuffer(right->buf);
1359+
left->buf, right->buf,
1360+
true, /* Unlock parent */
1361+
unlockbuf /* Unlock stack->buffer if caller wants that */
1362+
);
13001363
Assert(left->buf == stack->buffer);
13011364
}
13021365

0 commit comments

Comments
 (0)