Skip to content

Commit fafa374

Browse files
Introduce WAL records to log reuse of btree pages, allowing conflict
resolution during Hot Standby. Page reuse interlock requested by Tom. Analysis and patch by me.
1 parent 4688869 commit fafa374

File tree

3 files changed

+111
-22
lines changed

3 files changed

+111
-22
lines changed

src/backend/access/nbtree/nbtpage.c

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf)
446446
errhint("Please REINDEX it.")));
447447
}
448448

449+
/*
450+
* Log the reuse of a page from the FSM.
451+
*/
452+
static void
453+
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
454+
{
455+
if (rel->rd_istemp)
456+
return;
457+
458+
/* No ereport(ERROR) until changes are logged */
459+
START_CRIT_SECTION();
460+
461+
/*
462+
* We don't do MarkBufferDirty here because we're about initialise
463+
* the page, and nobody else can see it yet.
464+
*/
465+
466+
/* XLOG stuff */
467+
{
468+
XLogRecPtr recptr;
469+
XLogRecData rdata[1];
470+
xl_btree_reuse_page xlrec_reuse;
471+
472+
xlrec_reuse.node = rel->rd_node;
473+
xlrec_reuse.block = blkno;
474+
xlrec_reuse.latestRemovedXid = latestRemovedXid;
475+
rdata[0].data = (char *) &xlrec_reuse;
476+
rdata[0].len = SizeOfBtreeReusePage;
477+
rdata[0].buffer = InvalidBuffer;
478+
rdata[0].next = NULL;
479+
480+
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
481+
482+
/*
483+
* We don't do PageSetLSN or PageSetTLI here because
484+
* we're about initialise the page, so no need.
485+
*/
486+
}
487+
488+
END_CRIT_SECTION();
489+
}
490+
449491
/*
450492
* _bt_getbuf() -- Get a buffer by block number for read or write.
451493
*
@@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
510552
{
511553
page = BufferGetPage(buf);
512554
if (_bt_page_recyclable(page))
513-
{
555+
{
556+
/*
557+
* If we are generating WAL for Hot Standby then create
558+
* a WAL record that will allow us to conflict with
559+
* queries running on standby.
560+
*/
561+
if (XLogStandbyInfoActive())
562+
{
563+
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
564+
565+
_bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
566+
}
567+
514568
/* Okay to use page. Re-initialize and return it */
515569
_bt_pageinit(page, BufferGetPageSize(buf));
516570
return buf;

src/backend/access/nbtree/nbtxlog.c

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
814814
{
815815
uint8 info = record->xl_info & ~XLR_INFO_MASK;
816816

817-
/*
818-
* Btree delete records can conflict with standby queries. You might
819-
* think that vacuum records would conflict as well, but we've handled
820-
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
821-
* cleaned by the vacuum of the heap and so we can resolve any conflicts
822-
* just once when that arrives. After that any we know that no conflicts
823-
* exist from individual btree vacuum records on that index.
824-
*/
825-
if (InHotStandby && info == XLOG_BTREE_DELETE)
817+
if (InHotStandby)
826818
{
827-
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
819+
switch (info)
820+
{
821+
case XLOG_BTREE_DELETE:
822+
/*
823+
* Btree delete records can conflict with standby queries. You might
824+
* think that vacuum records would conflict as well, but we've handled
825+
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
826+
* cleaned by the vacuum of the heap and so we can resolve any conflicts
827+
* just once when that arrives. After that any we know that no conflicts
828+
* exist from individual btree vacuum records on that index.
829+
*/
830+
{
831+
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
828832

829-
/*
830-
* XXX Currently we put everybody on death row, because
831-
* currently _bt_delitems() supplies InvalidTransactionId.
832-
* This can be fairly painful, so providing a better value
833-
* here is worth some thought and possibly some effort to
834-
* improve.
835-
*/
836-
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
833+
/*
834+
* XXX Currently we put everybody on death row, because
835+
* currently _bt_delitems() supplies InvalidTransactionId.
836+
* This can be fairly painful, so providing a better value
837+
* here is worth some thought and possibly some effort to
838+
* improve.
839+
*/
840+
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
841+
}
842+
break;
843+
844+
case XLOG_BTREE_REUSE_PAGE:
845+
/*
846+
* Btree reuse page records exist to provide a conflict point when we
847+
* reuse pages in the index via the FSM. That's all it does though.
848+
*/
849+
{
850+
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
851+
852+
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
853+
}
854+
return;
855+
856+
default:
857+
break;
858+
}
837859
}
838860

839861
/*

src/include/access/nbtree.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -221,6 +221,7 @@ typedef struct BTMetaPageData
221221
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
222222
* parent half-dead */
223223
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
224+
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
224225

225226
/*
226227
* All that we need to find changed index tuple
@@ -321,6 +322,18 @@ typedef struct xl_btree_delete
321322

322323
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
323324

325+
/*
326+
* This is what we need to know about page reuse within btree.
327+
*/
328+
typedef struct xl_btree_reuse_page
329+
{
330+
RelFileNode node;
331+
BlockNumber block;
332+
TransactionId latestRemovedXid;
333+
} xl_btree_reuse_page;
334+
335+
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
336+
324337
/*
325338
* This is what we need to know about vacuum of individual leaf index tuples.
326339
* The WAL record can represent deletion of any number of index tuples on a

0 commit comments

Comments
 (0)