Skip to content

Commit a0696d2

Browse files
committed
Prevent GIN deleted pages from being reclaimed too early
When GIN vacuum deletes a posting tree page, it assumes that no concurrent searchers can access it, thanks to ginStepRight() locking two pages at once. However, since 9.4 searches can skip parts of posting trees descending from the root. That leads to the risk that page is deleted and reclaimed before concurrent search can access it. This commit prevents the risk of above by waiting for every transaction, which might wait to reference this page, to finish. Due to binary compatibility we can't change GinPageOpaqueData to store corresponding transaction id. Instead we reuse page header pd_prune_xid field, which is unused in index pages. Discussion: https://postgr.es/m/31a702a.14dd.166c1366ac1.Coremail.chjischj%40163.com Author: Andrey Borodin, Alexander Korotkov Reviewed-by: Alexander Korotkov Backpatch-through: 9.4
1 parent 8658703 commit a0696d2

File tree

6 files changed

+22
-13
lines changed

6 files changed

+22
-13
lines changed

src/backend/access/gin/README

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,10 @@ the lock on next page has been acquired.
304304
The downlink is more tricky. A search descending the tree must release the
305305
lock on the parent page before locking the child, or it could deadlock with
306306
a concurrent split of the child page; a page split locks the parent, while
307-
already holding a lock on the child page. However, posting trees are only
308-
fully searched from left to right, starting from the leftmost leaf. (The
309-
tree-structure is only needed by insertions, to quickly find the correct
310-
insert location). So as long as we don't delete the leftmost page on each
311-
level, a search can never follow a downlink to page that's about to be
312-
deleted.
307+
already holding a lock on the child page. So, deleted page cannot be reclaimed
308+
immediately. Instead, we have to wait for every transaction, which might wait
309+
to reference this page, to finish. Corresponding processes must observe that
310+
the page is marked deleted and recover accordingly.
313311

314312
The previous paragraph's reasoning only applies to searches, and only to
315313
posting trees. To protect from inserters following a downlink to a deleted

src/backend/access/gin/ginutil.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,7 @@ GinNewBuffer(Relation index)
305305
*/
306306
if (ConditionalLockBuffer(buffer))
307307
{
308-
Page page = BufferGetPage(buffer);
309-
310-
if (PageIsNew(page))
311-
return buffer; /* OK to use, if never initialized */
312-
313-
if (GinPageIsDeleted(page))
308+
if (GinPageIsRecyclable(BufferGetPage(buffer)))
314309
return buffer; /* OK to use */
315310

316311
LockBuffer(buffer, GIN_UNLOCK);

src/backend/access/gin/ginvacuum.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
159159
page = BufferGetPage(dBuffer);
160160
rightlink = GinPageGetOpaque(page)->rightlink;
161161

162+
/* For deleted page remember last xid which could knew its address */
163+
GinPageSetDeleteXid(page, ReadNewTransactionId());
164+
162165
page = BufferGetPage(lBuffer);
163166
GinPageGetOpaque(page)->rightlink = rightlink;
164167

@@ -206,6 +209,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
206209

207210
data.parentOffset = myoff;
208211
data.rightLink = GinPageGetOpaque(page)->rightlink;
212+
data.deleteXid = GinPageGetDeleteXid(page);
209213

210214
XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
211215

@@ -725,7 +729,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
725729
LockBuffer(buffer, GIN_SHARE);
726730
page = (Page) BufferGetPage(buffer);
727731

728-
if (PageIsNew(page) || GinPageIsDeleted(page))
732+
if (GinPageIsRecyclable(page))
729733
{
730734
Assert(blkno != GIN_ROOT_BLKNO);
731735
RecordFreeIndexPage(index, blkno);

src/backend/access/gin/ginxlog.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ ginRedoDeletePage(XLogReaderState *record)
531531
page = BufferGetPage(dbuffer);
532532
Assert(GinPageIsData(page));
533533
GinPageGetOpaque(page)->flags = GIN_DELETED;
534+
GinPageSetDeleteXid(page, data->deleteXid);
534535
PageSetLSN(page, lsn);
535536
MarkBufferDirty(dbuffer);
536537
}

src/include/access/ginblock.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef GINBLOCK_H
1111
#define GINBLOCK_H
1212

13+
#include "access/transam.h"
1314
#include "storage/block.h"
1415
#include "storage/itemptr.h"
1516
#include "storage/off.h"
@@ -127,6 +128,15 @@ typedef struct GinMetaPageData
127128

128129
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
129130

131+
/*
132+
* We should reclaim deleted page only once every transaction started before
133+
* its deletion is over.
134+
*/
135+
#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid )
136+
#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid)
137+
#define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \
138+
&& TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalXmin)))
139+
130140
/*
131141
* We use our own ItemPointerGet(BlockNumber|OffsetNumber)
132142
* to avoid Asserts, since sometimes the ip_posid isn't "valid"

src/include/access/ginxlog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ typedef struct ginxlogDeletePage
158158
{
159159
OffsetNumber parentOffset;
160160
BlockNumber rightLink;
161+
TransactionId deleteXid; /* last Xid which could see this page in scan */
161162
} ginxlogDeletePage;
162163

163164
#define XLOG_GIN_UPDATE_META_PAGE 0x60

0 commit comments

Comments
 (0)