Skip to content

Commit 4228817

Browse files
Fix index deletion latestRemovedXid bug.
The logic for determining the latest removed XID for the purposes of generating recovery conflicts in REDO routines was subtly broken. It failed to follow links from HOT chains, and so failed to consider all relevant heap tuple headers in some cases. To fix, expand the loop that deals with LP_REDIRECT line pointers to also deal with HOT chains. The new version of the loop is loosely based on a similar loop from heap_prune_chain(). The impact of this bug is probably quite limited, since the horizon code necessarily deals with heap tuples that are pointed to by LP_DEAD-set index tuples. The process of setting LP_DEAD index tuples (e.g. within the kill_prior_tuple mechanism) is highly correlated with opportunistic pruning of pointed-to heap tuples. Plus the question of generating a recovery conflict usually comes up some time after index tuple LP_DEAD bits were initially set, unlike heap pruning, where a latestRemovedXid is generated at the point of the pruning operation (heap pruning has no deferred "would-be page split" style processing that produces conflicts lazily). Only backpatch to Postgres 12, the first version where this logic runs during original execution (following commit 558a916). The index latestRemovedXid mechanism has had the same bug since it first appeared over 10 years ago (in commit a760893), but backpatching to all supported versions now seems like a bad idea on balance. Running the new improved code during recovery seems risky, especially given the lack of complaints from the field. Author: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAH2-Wz=Eib393+HHcERK_9MtgNS7Ew1HY=RDC_g6GL46zM5C6Q@mail.gmail.com Backpatch: 12-
1 parent 319f4d5 commit 4228817

File tree

2 files changed

+79
-55
lines changed

2 files changed

+79
-55
lines changed

src/backend/access/heap/heapam.c

Lines changed: 70 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -6997,10 +6997,13 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
69976997
ItemPointerData *tids,
69986998
int nitems)
69996999
{
7000+
/* Initial assumption is that earlier pruning took care of conflict */
70007001
TransactionId latestRemovedXid = InvalidTransactionId;
7001-
BlockNumber hblkno;
7002+
BlockNumber blkno = InvalidBlockNumber;
70027003
Buffer buf = InvalidBuffer;
7003-
Page hpage;
7004+
Page page = NULL;
7005+
OffsetNumber maxoff = InvalidOffsetNumber;
7006+
TransactionId priorXmax;
70047007
#ifdef USE_PREFETCH
70057008
XidHorizonPrefetchState prefetch_state;
70067009
int prefetch_distance;
@@ -7040,20 +7043,17 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70407043
#endif
70417044

70427045
/* Iterate over all tids, and check their horizon */
7043-
hblkno = InvalidBlockNumber;
7044-
hpage = NULL;
70457046
for (int i = 0; i < nitems; i++)
70467047
{
70477048
ItemPointer htid = &tids[i];
7048-
ItemId hitemid;
7049-
OffsetNumber hoffnum;
7049+
OffsetNumber offnum;
70507050

70517051
/*
70527052
* Read heap buffer, but avoid refetching if it's the same block as
70537053
* required for the last tid.
70547054
*/
7055-
if (hblkno == InvalidBlockNumber ||
7056-
ItemPointerGetBlockNumber(htid) != hblkno)
7055+
if (blkno == InvalidBlockNumber ||
7056+
ItemPointerGetBlockNumber(htid) != blkno)
70577057
{
70587058
/* release old buffer */
70597059
if (BufferIsValid(buf))
@@ -7062,9 +7062,9 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70627062
ReleaseBuffer(buf);
70637063
}
70647064

7065-
hblkno = ItemPointerGetBlockNumber(htid);
7065+
blkno = ItemPointerGetBlockNumber(htid);
70667066

7067-
buf = ReadBuffer(rel, hblkno);
7067+
buf = ReadBuffer(rel, blkno);
70687068

70697069
#ifdef USE_PREFETCH
70707070

@@ -7075,49 +7075,79 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
70757075
xid_horizon_prefetch_buffer(rel, &prefetch_state, 1);
70767076
#endif
70777077

7078-
hpage = BufferGetPage(buf);
7078+
page = BufferGetPage(buf);
7079+
maxoff = PageGetMaxOffsetNumber(page);
70797080

70807081
LockBuffer(buf, BUFFER_LOCK_SHARE);
70817082
}
70827083

7083-
hoffnum = ItemPointerGetOffsetNumber(htid);
7084-
hitemid = PageGetItemId(hpage, hoffnum);
7085-
70867084
/*
7087-
* Follow any redirections until we find something useful.
7085+
* Maintain latestRemovedXid value for deletion operation as a whole
7086+
* by advancing current value using heap tuple headers. This is
7087+
* loosely based on the logic for pruning a HOT chain.
70887088
*/
7089-
while (ItemIdIsRedirected(hitemid))
7089+
offnum = ItemPointerGetOffsetNumber(htid);
7090+
priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7091+
for (;;)
70907092
{
7091-
hoffnum = ItemIdGetRedirect(hitemid);
7092-
hitemid = PageGetItemId(hpage, hoffnum);
7093-
}
7093+
ItemId lp;
7094+
HeapTupleHeader htup;
70947095

7095-
/*
7096-
* If the heap item has storage, then read the header and use that to
7097-
* set latestRemovedXid.
7098-
*
7099-
* Some LP_DEAD items may not be accessible, so we ignore them.
7100-
*/
7101-
if (ItemIdHasStorage(hitemid))
7102-
{
7103-
HeapTupleHeader htuphdr;
7096+
/* Some sanity checks */
7097+
if (offnum < FirstOffsetNumber || offnum > maxoff)
7098+
{
7099+
Assert(false);
7100+
break;
7101+
}
71047102

7105-
htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
7103+
lp = PageGetItemId(page, offnum);
7104+
if (ItemIdIsRedirected(lp))
7105+
{
7106+
offnum = ItemIdGetRedirect(lp);
7107+
continue;
7108+
}
71067109

7107-
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid);
7108-
}
7109-
else if (ItemIdIsDead(hitemid))
7110-
{
71117110
/*
7112-
* Conjecture: if hitemid is dead then it had xids before the xids
7113-
* marked on LP_NORMAL items. So we just ignore this item and move
7114-
* onto the next, for the purposes of calculating
7115-
* latestRemovedXid.
7111+
* We'll often encounter LP_DEAD line pointers. No need to do
7112+
* anything more with htid when that happens. This is okay
7113+
* because the earlier pruning operation that made the line
7114+
* pointer LP_DEAD in the first place must have considered the
7115+
* tuple header as part of generating its own latestRemovedXid
7116+
* value.
7117+
*
7118+
* Relying on XLOG_HEAP2_CLEANUP_INFO records like this is the
7119+
* same strategy that index vacuuming uses in all cases. Index
7120+
* VACUUM WAL records don't even have a latestRemovedXid field of
7121+
* their own for this reason.
71167122
*/
7117-
}
7118-
else
7119-
Assert(!ItemIdIsUsed(hitemid));
7123+
if (!ItemIdIsNormal(lp))
7124+
break;
7125+
7126+
htup = (HeapTupleHeader) PageGetItem(page, lp);
7127+
7128+
/*
7129+
* Check the tuple XMIN against prior XMAX, if any
7130+
*/
7131+
if (TransactionIdIsValid(priorXmax) &&
7132+
!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7133+
break;
71207134

7135+
HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid);
7136+
7137+
/*
7138+
* If the tuple is not HOT-updated, then we are at the end of this
7139+
* HOT-chain. No need to visit later tuples from the same update
7140+
* chain (they get their own index entries) -- just move on to
7141+
* next htid from index AM caller.
7142+
*/
7143+
if (!HeapTupleHeaderIsHotUpdated(htup))
7144+
break;
7145+
7146+
/* Advance to next HOT chain member */
7147+
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7148+
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7149+
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7150+
}
71217151
}
71227152

71237153
if (BufferIsValid(buf))
@@ -7126,14 +7156,6 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
71267156
ReleaseBuffer(buf);
71277157
}
71287158

7129-
/*
7130-
* If all heap tuples were LP_DEAD then we will be returning
7131-
* InvalidTransactionId here, which avoids conflicts. This matches
7132-
* existing logic which assumes that LP_DEAD tuples must already be older
7133-
* than the latestRemovedXid on the cleanup record that set them as
7134-
* LP_DEAD, hence must already have generated a conflict.
7135-
*/
7136-
71377159
return latestRemovedXid;
71387160
}
71397161

src/backend/storage/ipc/standby.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,13 +305,15 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode
305305
VirtualTransactionId *backends;
306306

307307
/*
308-
* If we get passed InvalidTransactionId then we are a little surprised,
309-
* but it is theoretically possible in normal running. It also happens
310-
* when replaying already applied WAL records after a standby crash or
311-
* restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as
312-
* frozen a page which was already all-visible. If latestRemovedXid is
313-
* invalid then there is no conflict. That rule applies across all record
314-
* types that suffer from this conflict.
308+
* If we get passed InvalidTransactionId then we do nothing (no conflict).
309+
*
310+
* This can happen when replaying already-applied WAL records after a
311+
* standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
312+
* record that marks as frozen a page which was already all-visible. It's
313+
* also quite common with records generated during index deletion
314+
* (original execution of the deletion can reason that a recovery conflict
315+
* which is sufficient for the deletion operation must take place before
316+
* replay of the deletion record itself).
315317
*/
316318
if (!TransactionIdIsValid(latestRemovedXid))
317319
return;

0 commit comments

Comments
 (0)