Skip to content

Commit 7a57960

Browse files
Fix index deletion latestRemovedXid bug.
The logic for determining the latest removed XID for the purposes of generating recovery conflicts in REDO routines was subtly broken. It failed to follow links from HOT chains, and so failed to consider all relevant heap tuple headers in some cases. To fix, expand the loop that deals with LP_REDIRECT line pointers to also deal with HOT chains. The new version of the loop is loosely based on a similar loop from heap_prune_chain(). The impact of this bug is probably quite limited, since the horizon code necessarily deals with heap tuples that are pointed to by LP_DEAD-set index tuples. The process of setting LP_DEAD index tuples (e.g. within the kill_prior_tuple mechanism) is highly correlated with opportunistic pruning of pointed-to heap tuples. Plus the question of generating a recovery conflict usually comes up some time after index tuple LP_DEAD bits were initially set, unlike heap pruning, where a latestRemovedXid is generated at the point of the pruning operation (heap pruning has no deferred "would-be page split" style processing that produces conflicts lazily). Only backpatch to Postgres 12, the first version where this logic runs during original execution (following commit 558a916). The index latestRemovedXid mechanism has had the same bug since it first appeared over 10 years ago (in commit a760893), but backpatching to all supported versions now seems like a bad idea on balance. Running the new improved code during recovery seems risky, especially given the lack of complaints from the field. Author: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAH2-Wz=Eib393+HHcERK_9MtgNS7Ew1HY=RDC_g6GL46zM5C6Q@mail.gmail.com Backpatch: 12-
1 parent 624fd9e commit 7a57960

File tree

2 files changed

+79
-56
lines changed

2 files changed

+79
-56
lines changed

src/backend/access/heap/heapam.c

Lines changed: 70 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6964,10 +6964,13 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
69646964
ItemPointerData *tids,
69656965
int nitems)
69666966
{
6967+
/* Initial assumption is that earlier pruning took care of conflict */
69676968
TransactionId latestRemovedXid = InvalidTransactionId;
6968-
BlockNumber hblkno;
6969+
BlockNumber blkno = InvalidBlockNumber;
69696970
Buffer buf = InvalidBuffer;
6970-
Page hpage;
6971+
Page page = NULL;
6972+
OffsetNumber maxoff = InvalidOffsetNumber;
6973+
TransactionId priorXmax;
69716974
#ifdef USE_PREFETCH
69726975
XidHorizonPrefetchState prefetch_state;
69736976
int prefetch_distance;
@@ -7007,20 +7010,17 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70077010
#endif
70087011

70097012
/* Iterate over all tids, and check their horizon */
7010-
hblkno = InvalidBlockNumber;
7011-
hpage = NULL;
70127013
for (int i = 0; i < nitems; i++)
70137014
{
70147015
ItemPointer htid = &tids[i];
7015-
ItemId hitemid;
7016-
OffsetNumber hoffnum;
7016+
OffsetNumber offnum;
70177017

70187018
/*
70197019
* Read heap buffer, but avoid refetching if it's the same block as
70207020
* required for the last tid.
70217021
*/
7022-
if (hblkno == InvalidBlockNumber ||
7023-
ItemPointerGetBlockNumber(htid) != hblkno)
7022+
if (blkno == InvalidBlockNumber ||
7023+
ItemPointerGetBlockNumber(htid) != blkno)
70247024
{
70257025
/* release old buffer */
70267026
if (BufferIsValid(buf))
@@ -7029,9 +7029,9 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70297029
ReleaseBuffer(buf);
70307030
}
70317031

7032-
hblkno = ItemPointerGetBlockNumber(htid);
7032+
blkno = ItemPointerGetBlockNumber(htid);
70337033

7034-
buf = ReadBuffer(rel, hblkno);
7034+
buf = ReadBuffer(rel, blkno);
70357035

70367036
#ifdef USE_PREFETCH
70377037

@@ -7042,50 +7042,79 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70427042
xid_horizon_prefetch_buffer(rel, &prefetch_state, 1);
70437043
#endif
70447044

7045-
hpage = BufferGetPage(buf);
7045+
page = BufferGetPage(buf);
7046+
maxoff = PageGetMaxOffsetNumber(page);
70467047

70477048
LockBuffer(buf, BUFFER_LOCK_SHARE);
70487049
}
70497050

7050-
hoffnum = ItemPointerGetOffsetNumber(htid);
7051-
hitemid = PageGetItemId(hpage, hoffnum);
7052-
70537051
/*
7054-
* Follow any redirections until we find something useful.
7052+
* Maintain latestRemovedXid value for deletion operation as a whole
7053+
* by advancing current value using heap tuple headers. This is
7054+
* loosely based on the logic for pruning a HOT chain.
70557055
*/
7056-
while (ItemIdIsRedirected(hitemid))
7056+
offnum = ItemPointerGetOffsetNumber(htid);
7057+
priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7058+
for (;;)
70577059
{
7058-
hoffnum = ItemIdGetRedirect(hitemid);
7059-
hitemid = PageGetItemId(hpage, hoffnum);
7060-
CHECK_FOR_INTERRUPTS();
7061-
}
7060+
ItemId lp;
7061+
HeapTupleHeader htup;
70627062

7063-
/*
7064-
* If the heap item has storage, then read the header and use that to
7065-
* set latestRemovedXid.
7066-
*
7067-
* Some LP_DEAD items may not be accessible, so we ignore them.
7068-
*/
7069-
if (ItemIdHasStorage(hitemid))
7070-
{
7071-
HeapTupleHeader htuphdr;
7063+
/* Some sanity checks */
7064+
if (offnum < FirstOffsetNumber || offnum > maxoff)
7065+
{
7066+
Assert(false);
7067+
break;
7068+
}
70727069

7073-
htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
7070+
lp = PageGetItemId(page, offnum);
7071+
if (ItemIdIsRedirected(lp))
7072+
{
7073+
offnum = ItemIdGetRedirect(lp);
7074+
continue;
7075+
}
70747076

7075-
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid);
7076-
}
7077-
else if (ItemIdIsDead(hitemid))
7078-
{
70797077
/*
7080-
* Conjecture: if hitemid is dead then it had xids before the xids
7081-
* marked on LP_NORMAL items. So we just ignore this item and move
7082-
* onto the next, for the purposes of calculating
7083-
* latestRemovedXid.
7078+
* We'll often encounter LP_DEAD line pointers. No need to do
7079+
* anything more with htid when that happens. This is okay
7080+
* because the earlier pruning operation that made the line
7081+
* pointer LP_DEAD in the first place must have considered the
7082+
* tuple header as part of generating its own latestRemovedXid
7083+
* value.
7084+
*
7085+
* Relying on XLOG_HEAP2_CLEANUP_INFO records like this is the
7086+
* same strategy that index vacuuming uses in all cases. Index
7087+
* VACUUM WAL records don't even have a latestRemovedXid field of
7088+
* their own for this reason.
70847089
*/
7085-
}
7086-
else
7087-
Assert(!ItemIdIsUsed(hitemid));
7090+
if (!ItemIdIsNormal(lp))
7091+
break;
7092+
7093+
htup = (HeapTupleHeader) PageGetItem(page, lp);
7094+
7095+
/*
7096+
* Check the tuple XMIN against prior XMAX, if any
7097+
*/
7098+
if (TransactionIdIsValid(priorXmax) &&
7099+
!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7100+
break;
70887101

7102+
HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid);
7103+
7104+
/*
7105+
* If the tuple is not HOT-updated, then we are at the end of this
7106+
* HOT-chain. No need to visit later tuples from the same update
7107+
* chain (they get their own index entries) -- just move on to
7108+
* next htid from index AM caller.
7109+
*/
7110+
if (!HeapTupleHeaderIsHotUpdated(htup))
7111+
break;
7112+
7113+
/* Advance to next HOT chain member */
7114+
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7115+
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7116+
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7117+
}
70897118
}
70907119

70917120
if (BufferIsValid(buf))
@@ -7094,14 +7123,6 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70947123
ReleaseBuffer(buf);
70957124
}
70967125

7097-
/*
7098-
* If all heap tuples were LP_DEAD then we will be returning
7099-
* InvalidTransactionId here, which avoids conflicts. This matches
7100-
* existing logic which assumes that LP_DEAD tuples must already be older
7101-
* than the latestRemovedXid on the cleanup record that set them as
7102-
* LP_DEAD, hence must already have generated a conflict.
7103-
*/
7104-
71057126
return latestRemovedXid;
71067127
}
71077128

src/backend/storage/ipc/standby.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -306,13 +306,15 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode
306306
VirtualTransactionId *backends;
307307

308308
/*
309-
* If we get passed InvalidTransactionId then we are a little surprised,
310-
* but it is theoretically possible in normal running. It also happens
311-
* when replaying already applied WAL records after a standby crash or
312-
* restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as
313-
* frozen a page which was already all-visible. If latestRemovedXid is
314-
* invalid then there is no conflict. That rule applies across all record
315-
* types that suffer from this conflict.
309+
* If we get passed InvalidTransactionId then we do nothing (no conflict).
310+
*
311+
* This can happen when replaying already-applied WAL records after a
312+
* standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
313+
* record that marks as frozen a page which was already all-visible. It's
314+
* also quite common with records generated during index deletion
315+
* (original execution of the deletion can reason that a recovery conflict
316+
* which is sufficient for the deletion operation must take place before
317+
* replay of the deletion record itself).
316318
*/
317319
if (!TransactionIdIsValid(latestRemovedXid))
318320
return;

0 commit comments

Comments
 (0)