@@ -51,20 +51,22 @@ typedef struct
51
51
* 1. Otherwise every access would need to subtract 1.
52
52
*/
53
53
bool marked [MaxHeapTuplesPerPage + 1 ];
54
+
55
+ int ndeleted ; /* Number of tuples deleted from the page */
54
56
} PruneState ;
55
57
56
58
/* Local functions */
57
59
static HTSV_Result heap_prune_satisfies_vacuum (PruneState * prstate ,
58
60
HeapTuple tup ,
59
61
Buffer buffer );
60
- static int heap_prune_chain (Page page , BlockNumber blockno , OffsetNumber maxoff ,
62
+ static void heap_prune_chain (Page page , BlockNumber blockno , OffsetNumber maxoff ,
61
63
OffsetNumber rootoffnum , int8 * htsv , PruneState * prstate );
62
64
static void heap_prune_record_prunable (PruneState * prstate , TransactionId xid );
63
65
static void heap_prune_record_redirect (PruneState * prstate ,
64
- OffsetNumber offnum , OffsetNumber rdoffnum );
65
- static void heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum );
66
- static void heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum );
67
- static void heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum );
66
+ OffsetNumber offnum , OffsetNumber rdoffnum , bool was_normal );
67
+ static void heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum , bool was_normal );
68
+ static void heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum , bool was_normal );
69
+ static void heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum , bool was_normal );
68
70
static void page_verify_redirects (Page page );
69
71
70
72
@@ -241,6 +243,7 @@ heap_page_prune(Relation relation, Buffer buffer,
241
243
prstate .snapshotConflictHorizon = InvalidTransactionId ;
242
244
prstate .nredirected = prstate .ndead = prstate .nunused = 0 ;
243
245
memset (prstate .marked , 0 , sizeof (prstate .marked ));
246
+ prstate .ndeleted = 0 ;
244
247
245
248
/*
246
249
* presult->htsv is not initialized here because all ntuple spots in the
@@ -321,8 +324,8 @@ heap_page_prune(Relation relation, Buffer buffer,
321
324
continue ;
322
325
323
326
/* Process this item or chain of items */
324
- presult -> ndeleted += heap_prune_chain (page , blockno , maxoff , offnum ,
325
- presult -> htsv , & prstate );
327
+ heap_prune_chain (page , blockno , maxoff ,
328
+ offnum , presult -> htsv , & prstate );
326
329
}
327
330
328
331
/* Clear the offset information once we have processed the given page. */
@@ -394,8 +397,9 @@ heap_page_prune(Relation relation, Buffer buffer,
394
397
395
398
END_CRIT_SECTION ();
396
399
397
- /* Record number of newly-set-LP_DEAD items for caller */
400
+ /* Copy information back for caller */
398
401
presult -> nnewlpdead = prstate .ndead ;
402
+ presult -> ndeleted = prstate .ndeleted ;
399
403
}
400
404
401
405
@@ -444,22 +448,23 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
444
448
* to the redirected[] array (two entries per redirection); items to be set to
445
449
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
446
450
* state are added to nowunused[].
447
- *
448
- * Returns the number of tuples (to be) deleted from the page.
449
451
*/
450
- static int
452
+ static void
451
453
heap_prune_chain (Page page , BlockNumber blockno , OffsetNumber maxoff ,
452
454
OffsetNumber rootoffnum , int8 * htsv , PruneState * prstate )
453
455
{
454
- int ndeleted = 0 ;
455
456
TransactionId priorXmax = InvalidTransactionId ;
456
457
ItemId rootlp ;
457
458
HeapTupleHeader htup ;
458
- OffsetNumber latestdead = InvalidOffsetNumber ,
459
- offnum ;
459
+ OffsetNumber offnum ;
460
460
OffsetNumber chainitems [MaxHeapTuplesPerPage ];
461
- int nchain = 0 ,
462
- i ;
461
+
462
+ /*
463
+ * After traversing the HOT chain, ndeadchain is the index in chainitems
464
+ * of the first live successor after the last dead item.
465
+ */
466
+ int ndeadchain = 0 ,
467
+ nchain = 0 ;
463
468
464
469
rootlp = PageGetItemId (page , rootoffnum );
465
470
@@ -494,14 +499,12 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
494
499
if (htsv [rootoffnum ] == HEAPTUPLE_DEAD &&
495
500
!HeapTupleHeaderIsHotUpdated (htup ))
496
501
{
497
- heap_prune_record_unused (prstate , rootoffnum );
502
+ heap_prune_record_unused (prstate , rootoffnum , true );
498
503
HeapTupleHeaderAdvanceConflictHorizon (htup ,
499
504
& prstate -> snapshotConflictHorizon );
500
- ndeleted ++ ;
501
505
}
502
506
503
- /* Nothing more to do */
504
- return ndeleted ;
507
+ return ;
505
508
}
506
509
}
507
510
@@ -512,8 +515,6 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
512
515
for (;;)
513
516
{
514
517
ItemId lp ;
515
- bool tupdead ,
516
- recent_dead ;
517
518
518
519
/* Sanity check (pure paranoia) */
519
520
if (offnum < FirstOffsetNumber )
@@ -563,7 +564,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
563
564
* the LP was already marked dead.
564
565
*/
565
566
if (unlikely (prstate -> mark_unused_now ))
566
- heap_prune_record_unused (prstate , offnum );
567
+ heap_prune_record_unused (prstate , offnum , false );
567
568
568
569
break ;
569
570
}
@@ -586,23 +587,41 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
586
587
/*
587
588
* Check tuple's visibility status.
588
589
*/
589
- tupdead = recent_dead = false;
590
-
591
590
switch (htsv_get_valid_status (htsv [offnum ]))
592
591
{
593
592
case HEAPTUPLE_DEAD :
594
- tupdead = true;
593
+
594
+ /* Remember the last DEAD tuple seen */
595
+ ndeadchain = nchain ;
596
+ HeapTupleHeaderAdvanceConflictHorizon (htup ,
597
+ & prstate -> snapshotConflictHorizon );
598
+
599
+ /* Advance to next chain member */
595
600
break ;
596
601
597
602
case HEAPTUPLE_RECENTLY_DEAD :
598
- recent_dead = true;
599
603
600
604
/*
601
605
* This tuple may soon become DEAD. Update the hint field so
602
606
* that the page is reconsidered for pruning in future.
607
+ *
608
+ * We don't need to advance the conflict horizon for
609
+ * RECENTLY_DEAD tuples, even if we are removing them. This
610
+ * is because we only remove RECENTLY_DEAD tuples if they
611
+ * precede a DEAD tuple, and the DEAD tuple must have been
612
+ * inserted by a newer transaction than the RECENTLY_DEAD
613
+ * tuple by virtue of being later in the chain. We will have
614
+ * advanced the conflict horizon for the DEAD tuple.
603
615
*/
604
616
heap_prune_record_prunable (prstate ,
605
617
HeapTupleHeaderGetUpdateXid (htup ));
618
+
619
+ /*
620
+ * Advance past RECENTLY_DEAD tuples just in case there's a
621
+ * DEAD one after them. We have to make sure that we don't
622
+ * miss any DEAD tuples, since DEAD tuples that still have
623
+ * tuple storage after pruning will confuse VACUUM.
624
+ */
606
625
break ;
607
626
608
627
case HEAPTUPLE_DELETE_IN_PROGRESS :
@@ -613,7 +632,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
613
632
*/
614
633
heap_prune_record_prunable (prstate ,
615
634
HeapTupleHeaderGetUpdateXid (htup ));
616
- break ;
635
+ goto process_chain ;
617
636
618
637
case HEAPTUPLE_LIVE :
619
638
case HEAPTUPLE_INSERT_IN_PROGRESS :
@@ -624,35 +643,19 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
624
643
* But we don't. See related decisions about when to mark the
625
644
* page prunable in heapam.c.
626
645
*/
627
- break ;
646
+ goto process_chain ;
628
647
629
648
default :
630
649
elog (ERROR , "unexpected HeapTupleSatisfiesVacuum result" );
631
- break ;
650
+ goto process_chain ;
632
651
}
633
652
634
- /*
635
- * Remember the last DEAD tuple seen. We will advance past
636
- * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
637
- * but we can't advance past anything else. We have to make sure that
638
- * we don't miss any DEAD tuples, since DEAD tuples that still have
639
- * tuple storage after pruning will confuse VACUUM.
640
- */
641
- if (tupdead )
642
- {
643
- latestdead = offnum ;
644
- HeapTupleHeaderAdvanceConflictHorizon (htup ,
645
- & prstate -> snapshotConflictHorizon );
646
- }
647
- else if (!recent_dead )
648
- break ;
649
-
650
653
/*
651
654
* If the tuple is not HOT-updated, then we are at the end of this
652
655
* HOT-update chain.
653
656
*/
654
657
if (!HeapTupleHeaderIsHotUpdated (htup ))
655
- break ;
658
+ goto process_chain ;
656
659
657
660
/* HOT implies it can't have moved to different partition */
658
661
Assert (!HeapTupleHeaderIndicatesMovedPartitions (htup ));
@@ -665,57 +668,52 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
665
668
priorXmax = HeapTupleHeaderGetUpdateXid (htup );
666
669
}
667
670
668
- /*
669
- * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
670
- * the DEAD tuples at the start of the chain are removed and the root line
671
- * pointer is appropriately redirected.
672
- */
673
- if (OffsetNumberIsValid (latestdead ))
671
+ if (ItemIdIsRedirected (rootlp ) && nchain < 2 )
674
672
{
675
673
/*
676
- * Mark as unused each intermediate item that we are able to remove
677
- * from the chain.
678
- *
679
- * When the previous item is the last dead tuple seen, we are at the
680
- * right candidate for redirection .
674
+ * We found a redirect item that doesn't point to a valid follow-on
675
+ * item. This can happen if the loop in heap_page_prune caused us to
676
+ * visit the dead successor of a redirect item before visiting the
677
+ * redirect item. We can clean up by setting the redirect item to
678
+ * LP_DEAD state or LP_UNUSED if the caller indicated .
681
679
*/
682
- for (i = 1 ; (i < nchain ) && (chainitems [i - 1 ] != latestdead ); i ++ )
683
- {
684
- heap_prune_record_unused (prstate , chainitems [i ]);
685
- ndeleted ++ ;
686
- }
680
+ heap_prune_record_dead_or_unused (prstate , rootoffnum , false);
681
+ return ;
682
+ }
687
683
688
- /*
689
- * If the root entry had been a normal tuple, we are deleting it, so
690
- * count it in the result. But changing a redirect (even to DEAD
691
- * state) doesn't count.
692
- */
693
- if (ItemIdIsNormal (rootlp ))
694
- ndeleted ++ ;
684
+ process_chain :
695
685
686
+ if (ndeadchain == 0 )
687
+ {
696
688
/*
697
- * If the DEAD tuple is at the end of the chain, the entire chain is
698
- * dead and the root line pointer can be marked dead. Otherwise just
699
- * redirect the root to the correct chain member.
689
+ * No DEAD tuple was found, so the chain is entirely composed of
690
+ * normal, unchanged tuples. Leave it alone.
700
691
*/
701
- if (i >= nchain )
702
- heap_prune_record_dead_or_unused (prstate , rootoffnum );
703
- else
704
- heap_prune_record_redirect (prstate , rootoffnum , chainitems [i ]);
705
692
}
706
- else if (nchain < 2 && ItemIdIsRedirected ( rootlp ) )
693
+ else if (ndeadchain == nchain )
707
694
{
708
695
/*
709
- * We found a redirect item that doesn't point to a valid follow-on
710
- * item. This can happen if the loop in heap_page_prune caused us to
711
- * visit the dead successor of a redirect item before visiting the
712
- * redirect item. We can clean up by setting the redirect item to
713
- * DEAD state or LP_UNUSED if the caller indicated.
696
+ * The entire chain is dead. Mark the root line pointer LP_DEAD, and
697
+ * fully remove the other tuples in the chain.
714
698
*/
715
- heap_prune_record_dead_or_unused (prstate , rootoffnum );
699
+ heap_prune_record_dead_or_unused (prstate , rootoffnum , ItemIdIsNormal (rootlp ));
700
+ for (int i = 1 ; i < nchain ; i ++ )
701
+ heap_prune_record_unused (prstate , chainitems [i ], true);
716
702
}
703
+ else
704
+ {
705
+ /*
706
+ * We found a DEAD tuple in the chain. Redirect the root line pointer
707
+ * to the first non-DEAD tuple, and mark as unused each intermediate
708
+ * item that we are able to remove from the chain.
709
+ */
710
+ heap_prune_record_redirect (prstate , rootoffnum , chainitems [ndeadchain ],
711
+ ItemIdIsNormal (rootlp ));
712
+ for (int i = 1 ; i < ndeadchain ; i ++ )
713
+ heap_prune_record_unused (prstate , chainitems [i ], true);
717
714
718
- return ndeleted ;
715
+ /* the rest of tuples in the chain are normal, unchanged tuples */
716
+ }
719
717
}
720
718
721
719
/* Record lowest soon-prunable XID */
@@ -735,7 +733,8 @@ heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
735
733
/* Record line pointer to be redirected */
736
734
static void
737
735
heap_prune_record_redirect (PruneState * prstate ,
738
- OffsetNumber offnum , OffsetNumber rdoffnum )
736
+ OffsetNumber offnum , OffsetNumber rdoffnum ,
737
+ bool was_normal )
739
738
{
740
739
Assert (prstate -> nredirected < MaxHeapTuplesPerPage );
741
740
prstate -> redirected [prstate -> nredirected * 2 ] = offnum ;
@@ -745,17 +744,34 @@ heap_prune_record_redirect(PruneState *prstate,
745
744
prstate -> marked [offnum ] = true;
746
745
Assert (!prstate -> marked [rdoffnum ]);
747
746
prstate -> marked [rdoffnum ] = true;
747
+
748
+ /*
749
+ * If the root entry had been a normal tuple, we are deleting it, so count
750
+ * it in the result. But changing a redirect (even to DEAD state) doesn't
751
+ * count.
752
+ */
753
+ if (was_normal )
754
+ prstate -> ndeleted ++ ;
748
755
}
749
756
750
757
/* Record line pointer to be marked dead */
751
758
static void
752
- heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum )
759
+ heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum ,
760
+ bool was_normal )
753
761
{
754
762
Assert (prstate -> ndead < MaxHeapTuplesPerPage );
755
763
prstate -> nowdead [prstate -> ndead ] = offnum ;
756
764
prstate -> ndead ++ ;
757
765
Assert (!prstate -> marked [offnum ]);
758
766
prstate -> marked [offnum ] = true;
767
+
768
+ /*
769
+ * If the root entry had been a normal tuple, we are deleting it, so count
770
+ * it in the result. But changing a redirect (even to DEAD state) doesn't
771
+ * count.
772
+ */
773
+ if (was_normal )
774
+ prstate -> ndeleted ++ ;
759
775
}
760
776
761
777
/*
@@ -765,7 +781,8 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
765
781
* pointers LP_DEAD if mark_unused_now is true.
766
782
*/
767
783
static void
768
- heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum )
784
+ heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum ,
785
+ bool was_normal )
769
786
{
770
787
/*
771
788
* If the caller set mark_unused_now to true, we can remove dead tuples
@@ -774,20 +791,28 @@ heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
774
791
* likely.
775
792
*/
776
793
if (unlikely (prstate -> mark_unused_now ))
777
- heap_prune_record_unused (prstate , offnum );
794
+ heap_prune_record_unused (prstate , offnum , was_normal );
778
795
else
779
- heap_prune_record_dead (prstate , offnum );
796
+ heap_prune_record_dead (prstate , offnum , was_normal );
780
797
}
781
798
782
799
/* Record line pointer to be marked unused */
783
800
static void
784
- heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum )
801
+ heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum , bool was_normal )
785
802
{
786
803
Assert (prstate -> nunused < MaxHeapTuplesPerPage );
787
804
prstate -> nowunused [prstate -> nunused ] = offnum ;
788
805
prstate -> nunused ++ ;
789
806
Assert (!prstate -> marked [offnum ]);
790
807
prstate -> marked [offnum ] = true;
808
+
809
+ /*
810
+ * If the root entry had been a normal tuple, we are deleting it, so count
811
+ * it in the result. But changing a redirect (even to DEAD state) doesn't
812
+ * count.
813
+ */
814
+ if (was_normal )
815
+ prstate -> ndeleted ++ ;
791
816
}
792
817
793
818
0 commit comments