Skip to content

Commit 7aa00f1

Browse files
committed
Refactor heap_prune_chain()
Keep track of the number of deleted tuples in PruneState and record this information when recording a tuple dead, unused or redirected. This removes a special case from the traversal and chain processing logic as well as setting a precedent of recording the impact of prune actions in the record functions themselves. This paradigm will be used in future commits which move tracking of additional statistics on pruning actions from lazy_scan_prune() to heap_prune_chain(). Simplify heap_prune_chain()'s chain traversal logic by handling each case explicitly. That is, do not attempt to share code when processing different types of chains. For each category of chain, process it specifically and procedurally: first handling the root, then any intervening tuples, and, finally, the end of the chain. While we are at it, add a few new comments to heap_prune_chain() clarifying some special cases involving RECENTLY_DEAD tuples. Author: Melanie Plageman <melanieplageman@gmail.com> Discussion: https://www.postgresql.org/message-id/20240330055710.kqg6ii2cdojsxgje@liskov
1 parent 9917e79 commit 7aa00f1

File tree

1 file changed

+116
-91
lines changed

1 file changed

+116
-91
lines changed

src/backend/access/heap/pruneheap.c

Lines changed: 116 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,22 @@ typedef struct
5151
* 1. Otherwise every access would need to subtract 1.
5252
*/
5353
bool marked[MaxHeapTuplesPerPage + 1];
54+
55+
int ndeleted; /* Number of tuples deleted from the page */
5456
} PruneState;
5557

5658
/* Local functions */
5759
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
5860
HeapTuple tup,
5961
Buffer buffer);
60-
static int heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
62+
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
6163
OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate);
6264
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
6365
static void heap_prune_record_redirect(PruneState *prstate,
64-
OffsetNumber offnum, OffsetNumber rdoffnum);
65-
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
66-
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
67-
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
66+
OffsetNumber offnum, OffsetNumber rdoffnum, bool was_normal);
67+
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum, bool was_normal);
68+
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
69+
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
6870
static void page_verify_redirects(Page page);
6971

7072

@@ -241,6 +243,7 @@ heap_page_prune(Relation relation, Buffer buffer,
241243
prstate.snapshotConflictHorizon = InvalidTransactionId;
242244
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
243245
memset(prstate.marked, 0, sizeof(prstate.marked));
246+
prstate.ndeleted = 0;
244247

245248
/*
246249
* presult->htsv is not initialized here because all ntuple spots in the
@@ -321,8 +324,8 @@ heap_page_prune(Relation relation, Buffer buffer,
321324
continue;
322325

323326
/* Process this item or chain of items */
324-
presult->ndeleted += heap_prune_chain(page, blockno, maxoff, offnum,
325-
presult->htsv, &prstate);
327+
heap_prune_chain(page, blockno, maxoff,
328+
offnum, presult->htsv, &prstate);
326329
}
327330

328331
/* Clear the offset information once we have processed the given page. */
@@ -394,8 +397,9 @@ heap_page_prune(Relation relation, Buffer buffer,
394397

395398
END_CRIT_SECTION();
396399

397-
/* Record number of newly-set-LP_DEAD items for caller */
400+
/* Copy information back for caller */
398401
presult->nnewlpdead = prstate.ndead;
402+
presult->ndeleted = prstate.ndeleted;
399403
}
400404

401405

@@ -444,22 +448,23 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
444448
* to the redirected[] array (two entries per redirection); items to be set to
445449
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
446450
* state are added to nowunused[].
447-
*
448-
* Returns the number of tuples (to be) deleted from the page.
449451
*/
450-
static int
452+
static void
451453
heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
452454
OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate)
453455
{
454-
int ndeleted = 0;
455456
TransactionId priorXmax = InvalidTransactionId;
456457
ItemId rootlp;
457458
HeapTupleHeader htup;
458-
OffsetNumber latestdead = InvalidOffsetNumber,
459-
offnum;
459+
OffsetNumber offnum;
460460
OffsetNumber chainitems[MaxHeapTuplesPerPage];
461-
int nchain = 0,
462-
i;
461+
462+
/*
463+
* After traversing the HOT chain, ndeadchain is the index in chainitems
464+
* of the first live successor after the last dead item.
465+
*/
466+
int ndeadchain = 0,
467+
nchain = 0;
463468

464469
rootlp = PageGetItemId(page, rootoffnum);
465470

@@ -494,14 +499,12 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
494499
if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
495500
!HeapTupleHeaderIsHotUpdated(htup))
496501
{
497-
heap_prune_record_unused(prstate, rootoffnum);
502+
heap_prune_record_unused(prstate, rootoffnum, true);
498503
HeapTupleHeaderAdvanceConflictHorizon(htup,
499504
&prstate->snapshotConflictHorizon);
500-
ndeleted++;
501505
}
502506

503-
/* Nothing more to do */
504-
return ndeleted;
507+
return;
505508
}
506509
}
507510

@@ -512,8 +515,6 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
512515
for (;;)
513516
{
514517
ItemId lp;
515-
bool tupdead,
516-
recent_dead;
517518

518519
/* Sanity check (pure paranoia) */
519520
if (offnum < FirstOffsetNumber)
@@ -563,7 +564,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
563564
* the LP was already marked dead.
564565
*/
565566
if (unlikely(prstate->mark_unused_now))
566-
heap_prune_record_unused(prstate, offnum);
567+
heap_prune_record_unused(prstate, offnum, false);
567568

568569
break;
569570
}
@@ -586,23 +587,41 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
586587
/*
587588
* Check tuple's visibility status.
588589
*/
589-
tupdead = recent_dead = false;
590-
591590
switch (htsv_get_valid_status(htsv[offnum]))
592591
{
593592
case HEAPTUPLE_DEAD:
594-
tupdead = true;
593+
594+
/* Remember the last DEAD tuple seen */
595+
ndeadchain = nchain;
596+
HeapTupleHeaderAdvanceConflictHorizon(htup,
597+
&prstate->snapshotConflictHorizon);
598+
599+
/* Advance to next chain member */
595600
break;
596601

597602
case HEAPTUPLE_RECENTLY_DEAD:
598-
recent_dead = true;
599603

600604
/*
601605
* This tuple may soon become DEAD. Update the hint field so
602606
* that the page is reconsidered for pruning in future.
607+
*
608+
* We don't need to advance the conflict horizon for
609+
* RECENTLY_DEAD tuples, even if we are removing them. This
610+
* is because we only remove RECENTLY_DEAD tuples if they
611+
* precede a DEAD tuple, and the DEAD tuple must have been
612+
* inserted by a newer transaction than the RECENTLY_DEAD
613+
* tuple by virtue of being later in the chain. We will have
614+
* advanced the conflict horizon for the DEAD tuple.
603615
*/
604616
heap_prune_record_prunable(prstate,
605617
HeapTupleHeaderGetUpdateXid(htup));
618+
619+
/*
620+
* Advance past RECENTLY_DEAD tuples just in case there's a
621+
* DEAD one after them. We have to make sure that we don't
622+
* miss any DEAD tuples, since DEAD tuples that still have
623+
* tuple storage after pruning will confuse VACUUM.
624+
*/
606625
break;
607626

608627
case HEAPTUPLE_DELETE_IN_PROGRESS:
@@ -613,7 +632,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
613632
*/
614633
heap_prune_record_prunable(prstate,
615634
HeapTupleHeaderGetUpdateXid(htup));
616-
break;
635+
goto process_chain;
617636

618637
case HEAPTUPLE_LIVE:
619638
case HEAPTUPLE_INSERT_IN_PROGRESS:
@@ -624,35 +643,19 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
624643
* But we don't. See related decisions about when to mark the
625644
* page prunable in heapam.c.
626645
*/
627-
break;
646+
goto process_chain;
628647

629648
default:
630649
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
631-
break;
650+
goto process_chain;
632651
}
633652

634-
/*
635-
* Remember the last DEAD tuple seen. We will advance past
636-
* RECENTLY_DEAD tuples just in case there's a DEAD one after them;
637-
* but we can't advance past anything else. We have to make sure that
638-
* we don't miss any DEAD tuples, since DEAD tuples that still have
639-
* tuple storage after pruning will confuse VACUUM.
640-
*/
641-
if (tupdead)
642-
{
643-
latestdead = offnum;
644-
HeapTupleHeaderAdvanceConflictHorizon(htup,
645-
&prstate->snapshotConflictHorizon);
646-
}
647-
else if (!recent_dead)
648-
break;
649-
650653
/*
651654
* If the tuple is not HOT-updated, then we are at the end of this
652655
* HOT-update chain.
653656
*/
654657
if (!HeapTupleHeaderIsHotUpdated(htup))
655-
break;
658+
goto process_chain;
656659

657660
/* HOT implies it can't have moved to different partition */
658661
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
@@ -665,57 +668,52 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
665668
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
666669
}
667670

668-
/*
669-
* If we found a DEAD tuple in the chain, adjust the HOT chain so that all
670-
* the DEAD tuples at the start of the chain are removed and the root line
671-
* pointer is appropriately redirected.
672-
*/
673-
if (OffsetNumberIsValid(latestdead))
671+
if (ItemIdIsRedirected(rootlp) && nchain < 2)
674672
{
675673
/*
676-
* Mark as unused each intermediate item that we are able to remove
677-
* from the chain.
678-
*
679-
* When the previous item is the last dead tuple seen, we are at the
680-
* right candidate for redirection.
674+
* We found a redirect item that doesn't point to a valid follow-on
675+
* item. This can happen if the loop in heap_page_prune caused us to
676+
* visit the dead successor of a redirect item before visiting the
677+
* redirect item. We can clean up by setting the redirect item to
678+
* LP_DEAD state or LP_UNUSED if the caller indicated.
681679
*/
682-
for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
683-
{
684-
heap_prune_record_unused(prstate, chainitems[i]);
685-
ndeleted++;
686-
}
680+
heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
681+
return;
682+
}
687683

688-
/*
689-
* If the root entry had been a normal tuple, we are deleting it, so
690-
* count it in the result. But changing a redirect (even to DEAD
691-
* state) doesn't count.
692-
*/
693-
if (ItemIdIsNormal(rootlp))
694-
ndeleted++;
684+
process_chain:
695685

686+
if (ndeadchain == 0)
687+
{
696688
/*
697-
* If the DEAD tuple is at the end of the chain, the entire chain is
698-
* dead and the root line pointer can be marked dead. Otherwise just
699-
* redirect the root to the correct chain member.
689+
* No DEAD tuple was found, so the chain is entirely composed of
690+
* normal, unchanged tuples. Leave it alone.
700691
*/
701-
if (i >= nchain)
702-
heap_prune_record_dead_or_unused(prstate, rootoffnum);
703-
else
704-
heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
705692
}
706-
else if (nchain < 2 && ItemIdIsRedirected(rootlp))
693+
else if (ndeadchain == nchain)
707694
{
708695
/*
709-
* We found a redirect item that doesn't point to a valid follow-on
710-
* item. This can happen if the loop in heap_page_prune caused us to
711-
* visit the dead successor of a redirect item before visiting the
712-
* redirect item. We can clean up by setting the redirect item to
713-
* DEAD state or LP_UNUSED if the caller indicated.
696+
* The entire chain is dead. Mark the root line pointer LP_DEAD, and
697+
* fully remove the other tuples in the chain.
714698
*/
715-
heap_prune_record_dead_or_unused(prstate, rootoffnum);
699+
heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
700+
for (int i = 1; i < nchain; i++)
701+
heap_prune_record_unused(prstate, chainitems[i], true);
716702
}
703+
else
704+
{
705+
/*
706+
* We found a DEAD tuple in the chain. Redirect the root line pointer
707+
* to the first non-DEAD tuple, and mark as unused each intermediate
708+
* item that we are able to remove from the chain.
709+
*/
710+
heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
711+
ItemIdIsNormal(rootlp));
712+
for (int i = 1; i < ndeadchain; i++)
713+
heap_prune_record_unused(prstate, chainitems[i], true);
717714

718-
return ndeleted;
715+
/* the rest of tuples in the chain are normal, unchanged tuples */
716+
}
719717
}
720718

721719
/* Record lowest soon-prunable XID */
@@ -735,7 +733,8 @@ heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
735733
/* Record line pointer to be redirected */
736734
static void
737735
heap_prune_record_redirect(PruneState *prstate,
738-
OffsetNumber offnum, OffsetNumber rdoffnum)
736+
OffsetNumber offnum, OffsetNumber rdoffnum,
737+
bool was_normal)
739738
{
740739
Assert(prstate->nredirected < MaxHeapTuplesPerPage);
741740
prstate->redirected[prstate->nredirected * 2] = offnum;
@@ -745,17 +744,34 @@ heap_prune_record_redirect(PruneState *prstate,
745744
prstate->marked[offnum] = true;
746745
Assert(!prstate->marked[rdoffnum]);
747746
prstate->marked[rdoffnum] = true;
747+
748+
/*
749+
* If the root entry had been a normal tuple, we are deleting it, so count
750+
* it in the result. But changing a redirect (even to DEAD state) doesn't
751+
* count.
752+
*/
753+
if (was_normal)
754+
prstate->ndeleted++;
748755
}
749756

750757
/* Record line pointer to be marked dead */
751758
static void
752-
heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
759+
heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
760+
bool was_normal)
753761
{
754762
Assert(prstate->ndead < MaxHeapTuplesPerPage);
755763
prstate->nowdead[prstate->ndead] = offnum;
756764
prstate->ndead++;
757765
Assert(!prstate->marked[offnum]);
758766
prstate->marked[offnum] = true;
767+
768+
/*
769+
* If the root entry had been a normal tuple, we are deleting it, so count
770+
* it in the result. But changing a redirect (even to DEAD state) doesn't
771+
* count.
772+
*/
773+
if (was_normal)
774+
prstate->ndeleted++;
759775
}
760776

761777
/*
@@ -765,7 +781,8 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
765781
* pointers LP_DEAD if mark_unused_now is true.
766782
*/
767783
static void
768-
heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
784+
heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
785+
bool was_normal)
769786
{
770787
/*
771788
* If the caller set mark_unused_now to true, we can remove dead tuples
@@ -774,20 +791,28 @@ heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
774791
* likely.
775792
*/
776793
if (unlikely(prstate->mark_unused_now))
777-
heap_prune_record_unused(prstate, offnum);
794+
heap_prune_record_unused(prstate, offnum, was_normal);
778795
else
779-
heap_prune_record_dead(prstate, offnum);
796+
heap_prune_record_dead(prstate, offnum, was_normal);
780797
}
781798

782799
/* Record line pointer to be marked unused */
783800
static void
784-
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
801+
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
785802
{
786803
Assert(prstate->nunused < MaxHeapTuplesPerPage);
787804
prstate->nowunused[prstate->nunused] = offnum;
788805
prstate->nunused++;
789806
Assert(!prstate->marked[offnum]);
790807
prstate->marked[offnum] = true;
808+
809+
/*
810+
* If the root entry had been a normal tuple, we are deleting it, so count
811+
* it in the result. But changing a redirect (even to DEAD state) doesn't
812+
* count.
813+
*/
814+
if (was_normal)
815+
prstate->ndeleted++;
791816
}
792817

793818

0 commit comments

Comments
 (0)