Skip to content

Commit 4f70e09

Browse files
Fix index deletion latestRemovedXid bug.
The logic for determining the latest removed XID for the purposes of generating recovery conflicts in REDO routines was subtly broken. It failed to follow links from HOT chains, and so failed to consider all relevant heap tuple headers in some cases. To fix, expand the loop that deals with LP_REDIRECT line pointers to also deal with HOT chains. The new version of the loop is loosely based on a similar loop from heap_prune_chain(). The impact of this bug is probably quite limited, since the horizon code necessarily deals with heap tuples that are pointed to by LP_DEAD-set index tuples. The process of setting LP_DEAD index tuples (e.g. within the kill_prior_tuple mechanism) is highly correlated with opportunistic pruning of pointed-to heap tuples. Plus the question of generating a recovery conflict usually comes up some time after index tuple LP_DEAD bits were initially set, unlike heap pruning, where a latestRemovedXid is generated at the point of the pruning operation (heap pruning has no deferred "would-be page split" style processing that produces conflicts lazily). Only backpatch to Postgres 12, the first version where this logic runs during original execution (following commit 558a916). The index latestRemovedXid mechanism has had the same bug since it first appeared over 10 years ago (in commit a760893), but backpatching to all supported versions now seems like a bad idea on balance. Running the new improved code during recovery seems risky, especially given the lack of complaints from the field. Author: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAH2-Wz=Eib393+HHcERK_9MtgNS7Ew1HY=RDC_g6GL46zM5C6Q@mail.gmail.com Backpatch: 12-
1 parent e3bfdf2 commit 4f70e09

File tree

2 files changed

+79
-56
lines changed

2 files changed

+79
-56
lines changed

src/backend/access/heap/heapam.c

Lines changed: 70 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6996,10 +6996,13 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
69966996
ItemPointerData *tids,
69976997
int nitems)
69986998
{
6999+
/* Initial assumption is that earlier pruning took care of conflict */
69997000
TransactionId latestRemovedXid = InvalidTransactionId;
7000-
BlockNumber hblkno;
7001+
BlockNumber blkno = InvalidBlockNumber;
70017002
Buffer buf = InvalidBuffer;
7002-
Page hpage;
7003+
Page page = NULL;
7004+
OffsetNumber maxoff = InvalidOffsetNumber;
7005+
TransactionId priorXmax;
70037006
#ifdef USE_PREFETCH
70047007
XidHorizonPrefetchState prefetch_state;
70057008
int io_concurrency;
@@ -7049,20 +7052,17 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70497052
#endif
70507053

70517054
/* Iterate over all tids, and check their horizon */
7052-
hblkno = InvalidBlockNumber;
7053-
hpage = NULL;
70547055
for (int i = 0; i < nitems; i++)
70557056
{
70567057
ItemPointer htid = &tids[i];
7057-
ItemId hitemid;
7058-
OffsetNumber hoffnum;
7058+
OffsetNumber offnum;
70597059

70607060
/*
70617061
* Read heap buffer, but avoid refetching if it's the same block as
70627062
* required for the last tid.
70637063
*/
7064-
if (hblkno == InvalidBlockNumber ||
7065-
ItemPointerGetBlockNumber(htid) != hblkno)
7064+
if (blkno == InvalidBlockNumber ||
7065+
ItemPointerGetBlockNumber(htid) != blkno)
70667066
{
70677067
/* release old buffer */
70687068
if (BufferIsValid(buf))
@@ -7071,9 +7071,9 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70717071
ReleaseBuffer(buf);
70727072
}
70737073

7074-
hblkno = ItemPointerGetBlockNumber(htid);
7074+
blkno = ItemPointerGetBlockNumber(htid);
70757075

7076-
buf = ReadBuffer(rel, hblkno);
7076+
buf = ReadBuffer(rel, blkno);
70777077

70787078
#ifdef USE_PREFETCH
70797079

@@ -7084,50 +7084,79 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70847084
xid_horizon_prefetch_buffer(rel, &prefetch_state, 1);
70857085
#endif
70867086

7087-
hpage = BufferGetPage(buf);
7087+
page = BufferGetPage(buf);
7088+
maxoff = PageGetMaxOffsetNumber(page);
70887089

70897090
LockBuffer(buf, BUFFER_LOCK_SHARE);
70907091
}
70917092

7092-
hoffnum = ItemPointerGetOffsetNumber(htid);
7093-
hitemid = PageGetItemId(hpage, hoffnum);
7094-
70957093
/*
7096-
* Follow any redirections until we find something useful.
7094+
* Maintain latestRemovedXid value for deletion operation as a whole
7095+
* by advancing current value using heap tuple headers. This is
7096+
* loosely based on the logic for pruning a HOT chain.
70977097
*/
7098-
while (ItemIdIsRedirected(hitemid))
7098+
offnum = ItemPointerGetOffsetNumber(htid);
7099+
priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7100+
for (;;)
70997101
{
7100-
hoffnum = ItemIdGetRedirect(hitemid);
7101-
hitemid = PageGetItemId(hpage, hoffnum);
7102-
CHECK_FOR_INTERRUPTS();
7103-
}
7102+
ItemId lp;
7103+
HeapTupleHeader htup;
71047104

7105-
/*
7106-
* If the heap item has storage, then read the header and use that to
7107-
* set latestRemovedXid.
7108-
*
7109-
* Some LP_DEAD items may not be accessible, so we ignore them.
7110-
*/
7111-
if (ItemIdHasStorage(hitemid))
7112-
{
7113-
HeapTupleHeader htuphdr;
7105+
/* Some sanity checks */
7106+
if (offnum < FirstOffsetNumber || offnum > maxoff)
7107+
{
7108+
Assert(false);
7109+
break;
7110+
}
71147111

7115-
htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
7112+
lp = PageGetItemId(page, offnum);
7113+
if (ItemIdIsRedirected(lp))
7114+
{
7115+
offnum = ItemIdGetRedirect(lp);
7116+
continue;
7117+
}
71167118

7117-
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid);
7118-
}
7119-
else if (ItemIdIsDead(hitemid))
7120-
{
71217119
/*
7122-
* Conjecture: if hitemid is dead then it had xids before the xids
7123-
* marked on LP_NORMAL items. So we just ignore this item and move
7124-
* onto the next, for the purposes of calculating
7125-
* latestRemovedXid.
7120+
* We'll often encounter LP_DEAD line pointers. No need to do
7121+
* anything more with htid when that happens. This is okay
7122+
* because the earlier pruning operation that made the line
7123+
* pointer LP_DEAD in the first place must have considered the
7124+
* tuple header as part of generating its own latestRemovedXid
7125+
* value.
7126+
*
7127+
* Relying on XLOG_HEAP2_CLEANUP_INFO records like this is the
7128+
* same strategy that index vacuuming uses in all cases. Index
7129+
* VACUUM WAL records don't even have a latestRemovedXid field of
7130+
* their own for this reason.
71267131
*/
7127-
}
7128-
else
7129-
Assert(!ItemIdIsUsed(hitemid));
7132+
if (!ItemIdIsNormal(lp))
7133+
break;
7134+
7135+
htup = (HeapTupleHeader) PageGetItem(page, lp);
7136+
7137+
/*
7138+
* Check the tuple XMIN against prior XMAX, if any
7139+
*/
7140+
if (TransactionIdIsValid(priorXmax) &&
7141+
!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7142+
break;
71307143

7144+
HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid);
7145+
7146+
/*
7147+
* If the tuple is not HOT-updated, then we are at the end of this
7148+
* HOT-chain. No need to visit later tuples from the same update
7149+
* chain (they get their own index entries) -- just move on to
7150+
* next htid from index AM caller.
7151+
*/
7152+
if (!HeapTupleHeaderIsHotUpdated(htup))
7153+
break;
7154+
7155+
/* Advance to next HOT chain member */
7156+
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7157+
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7158+
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7159+
}
71317160
}
71327161

71337162
if (BufferIsValid(buf))
@@ -7136,14 +7165,6 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
71367165
ReleaseBuffer(buf);
71377166
}
71387167

7139-
/*
7140-
* If all heap tuples were LP_DEAD then we will be returning
7141-
* InvalidTransactionId here, which avoids conflicts. This matches
7142-
* existing logic which assumes that LP_DEAD tuples must already be older
7143-
* than the latestRemovedXid on the cleanup record that set them as
7144-
* LP_DEAD, hence must already have generated a conflict.
7145-
*/
7146-
71477168
return latestRemovedXid;
71487169
}
71497170

src/backend/storage/ipc/standby.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,13 +301,15 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode
301301
VirtualTransactionId *backends;
302302

303303
/*
304-
* If we get passed InvalidTransactionId then we are a little surprised,
305-
* but it is theoretically possible in normal running. It also happens
306-
* when replaying already applied WAL records after a standby crash or
307-
* restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as
308-
* frozen a page which was already all-visible. If latestRemovedXid is
309-
* invalid then there is no conflict. That rule applies across all record
310-
* types that suffer from this conflict.
304+
* If we get passed InvalidTransactionId then we do nothing (no conflict).
305+
*
306+
* This can happen when replaying already-applied WAL records after a
307+
* standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
308+
* record that marks as frozen a page which was already all-visible. It's
309+
* also quite common with records generated during index deletion
310+
* (original execution of the deletion can reason that a recovery conflict
311+
* which is sufficient for the deletion operation must take place before
312+
* replay of the deletion record itself).
311313
*/
312314
if (!TransactionIdIsValid(latestRemovedXid))
313315
return;

0 commit comments

Comments
 (0)