@@ -150,7 +150,9 @@ typedef struct HeapCheckContext
150
150
} HeapCheckContext ;
151
151
152
152
/* Internal implementation */
153
- static void check_tuple (HeapCheckContext * ctx );
153
+ static void check_tuple (HeapCheckContext * ctx ,
154
+ bool * xmin_commit_status_ok ,
155
+ XidCommitStatus * xmin_commit_status );
154
156
static void check_toast_tuple (HeapTuple toasttup , HeapCheckContext * ctx ,
155
157
ToastedAttribute * ta , int32 * expected_chunk_seq ,
156
158
uint32 extsize );
@@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
160
162
ToastedAttribute * ta );
161
163
162
164
static bool check_tuple_header (HeapCheckContext * ctx );
163
- static bool check_tuple_visibility (HeapCheckContext * ctx );
165
+ static bool check_tuple_visibility (HeapCheckContext * ctx ,
166
+ bool * xmin_commit_status_ok ,
167
+ XidCommitStatus * xmin_commit_status );
164
168
165
169
static void report_corruption (HeapCheckContext * ctx , char * msg );
166
170
static void report_toast_corruption (HeapCheckContext * ctx ,
@@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
399
403
for (ctx .blkno = first_block ; ctx .blkno <= last_block ; ctx .blkno ++ )
400
404
{
401
405
OffsetNumber maxoff ;
406
+ OffsetNumber predecessor [MaxOffsetNumber ];
407
+ OffsetNumber successor [MaxOffsetNumber ];
408
+ bool lp_valid [MaxOffsetNumber ];
409
+ bool xmin_commit_status_ok [MaxOffsetNumber ];
410
+ XidCommitStatus xmin_commit_status [MaxOffsetNumber ];
402
411
403
412
CHECK_FOR_INTERRUPTS ();
404
413
414
+ memset (predecessor , 0 , sizeof (OffsetNumber ) * MaxOffsetNumber );
415
+
405
416
/* Optionally skip over all-frozen or all-visible blocks */
406
417
if (skip_option != SKIP_PAGES_NONE )
407
418
{
@@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
433
444
for (ctx .offnum = FirstOffsetNumber ; ctx .offnum <= maxoff ;
434
445
ctx .offnum = OffsetNumberNext (ctx .offnum ))
435
446
{
447
+ BlockNumber nextblkno ;
448
+ OffsetNumber nextoffnum ;
449
+
450
+ successor [ctx .offnum ] = InvalidOffsetNumber ;
451
+ lp_valid [ctx .offnum ] = false;
452
+ xmin_commit_status_ok [ctx .offnum ] = false;
436
453
ctx .itemid = PageGetItemId (ctx .page , ctx .offnum );
437
454
438
455
/* Skip over unused/dead line pointers */
@@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
469
486
report_corruption (& ctx ,
470
487
psprintf ("line pointer redirection to unused item at offset %u" ,
471
488
(unsigned ) rdoffnum ));
489
+
490
+ /*
491
+ * Record the fact that this line pointer has passed basic
492
+ * sanity checking, and also the offset number to which it
493
+ * points.
494
+ */
495
+ lp_valid [ctx .offnum ] = true;
496
+ successor [ctx .offnum ] = rdoffnum ;
472
497
continue ;
473
498
}
474
499
@@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
502
527
}
503
528
504
529
/* It should be safe to examine the tuple's header, at least */
530
+ lp_valid [ctx .offnum ] = true;
505
531
ctx .tuphdr = (HeapTupleHeader ) PageGetItem (ctx .page , ctx .itemid );
506
532
ctx .natts = HeapTupleHeaderGetNatts (ctx .tuphdr );
507
533
508
534
/* Ok, ready to check this next tuple */
509
- check_tuple (& ctx );
535
+ check_tuple (& ctx ,
536
+ & xmin_commit_status_ok [ctx .offnum ],
537
+ & xmin_commit_status [ctx .offnum ]);
538
+
539
+ /*
540
+ * If the CTID field of this tuple seems to point to another tuple
541
+ * on the same page, record that tuple as the successor of this
542
+ * one.
543
+ */
544
+ nextblkno = ItemPointerGetBlockNumber (& (ctx .tuphdr )-> t_ctid );
545
+ nextoffnum = ItemPointerGetOffsetNumber (& (ctx .tuphdr )-> t_ctid );
546
+ if (nextblkno == ctx .blkno && nextoffnum != ctx .offnum )
547
+ successor [ctx .offnum ] = nextoffnum ;
548
+ }
549
+
550
+ /*
551
+ * Update chain validation. Check each line pointer that's got a valid
552
+ * successor against that successor.
553
+ */
554
+ ctx .attnum = -1 ;
555
+ for (ctx .offnum = FirstOffsetNumber ; ctx .offnum <= maxoff ;
556
+ ctx .offnum = OffsetNumberNext (ctx .offnum ))
557
+ {
558
+ ItemId curr_lp ;
559
+ ItemId next_lp ;
560
+ HeapTupleHeader curr_htup ;
561
+ HeapTupleHeader next_htup ;
562
+ TransactionId curr_xmin ;
563
+ TransactionId curr_xmax ;
564
+ TransactionId next_xmin ;
565
+ OffsetNumber nextoffnum = successor [ctx .offnum ];
566
+
567
+ /*
568
+ * The current line pointer may not have a successor, either
569
+ * because it's not valid or because it didn't point to anything.
570
+ * In either case, we have to give up.
571
+ *
572
+ * If the current line pointer does point to something, it's
573
+ * possible that the target line pointer isn't valid. We have to
574
+ * give up in that case, too.
575
+ */
576
+ if (nextoffnum == InvalidOffsetNumber || !lp_valid [nextoffnum ])
577
+ continue ;
578
+
579
+ /* We have two valid line pointers that we can examine. */
580
+ curr_lp = PageGetItemId (ctx .page , ctx .offnum );
581
+ next_lp = PageGetItemId (ctx .page , nextoffnum );
582
+
583
+ /* Handle the cases where the current line pointer is a redirect. */
584
+ if (ItemIdIsRedirected (curr_lp ))
585
+ {
586
+ /* Can't redirect to another redirect. */
587
+ if (ItemIdIsRedirected (next_lp ))
588
+ {
589
+ report_corruption (& ctx ,
590
+ psprintf ("redirected line pointer points to another redirected line pointer at offset %u" ,
591
+ (unsigned ) nextoffnum ));
592
+ continue ;
593
+ }
594
+
595
+ /* Can only redirect to a HOT tuple. */
596
+ next_htup = (HeapTupleHeader ) PageGetItem (ctx .page , next_lp );
597
+ if (!HeapTupleHeaderIsHeapOnly (next_htup ))
598
+ {
599
+ report_corruption (& ctx ,
600
+ psprintf ("redirected line pointer points to a non-heap-only tuple at offset %u" ,
601
+ (unsigned ) nextoffnum ));
602
+ }
603
+
604
+ /*
605
+ * Redirects are created by updates, so successor should be
606
+ * the result of an update.
607
+ */
608
+ if ((next_htup -> t_infomask & HEAP_UPDATED ) == 0 )
609
+ {
610
+ report_corruption (& ctx ,
611
+ psprintf ("redirected line pointer points to a non-heap-updated tuple at offset %u" ,
612
+ (unsigned ) nextoffnum ));
613
+ }
614
+
615
+ /* HOT chains should not intersect. */
616
+ if (predecessor [nextoffnum ] != InvalidOffsetNumber )
617
+ {
618
+ report_corruption (& ctx ,
619
+ psprintf ("redirect line pointer points to offset %u, but offset %u also points there" ,
620
+ (unsigned ) nextoffnum , (unsigned ) predecessor [nextoffnum ]));
621
+ continue ;
622
+ }
623
+
624
+ /*
625
+ * This redirect and the tuple to which it points seem to be
626
+ * part of an update chain.
627
+ */
628
+ predecessor [nextoffnum ] = ctx .offnum ;
629
+ continue ;
630
+ }
631
+
632
+ /*
633
+ * If the next line pointer is a redirect, or if it's a tuple
634
+ * but the XMAX of this tuple doesn't match the XMIN of the next
635
+ * tuple, then the two aren't part of the same update chain and
636
+ * there is nothing more to do.
637
+ */
638
+ if (ItemIdIsRedirected (next_lp ))
639
+ continue ;
640
+ curr_htup = (HeapTupleHeader ) PageGetItem (ctx .page , curr_lp );
641
+ curr_xmax = HeapTupleHeaderGetUpdateXid (curr_htup );
642
+ next_htup = (HeapTupleHeader ) PageGetItem (ctx .page , next_lp );
643
+ next_xmin = HeapTupleHeaderGetXmin (next_htup );
644
+ if (!TransactionIdIsValid (curr_xmax ) ||
645
+ !TransactionIdEquals (curr_xmax , next_xmin ))
646
+ continue ;
647
+
648
+ /* HOT chains should not intersect. */
649
+ if (predecessor [nextoffnum ] != InvalidOffsetNumber )
650
+ {
651
+ report_corruption (& ctx ,
652
+ psprintf ("tuple points to new version at offset %u, but offset %u also points there" ,
653
+ (unsigned ) nextoffnum , (unsigned ) predecessor [nextoffnum ]));
654
+ continue ;
655
+ }
656
+
657
+ /*
658
+ * This tuple and the tuple to which it points seem to be part
659
+ * of an update chain.
660
+ */
661
+ predecessor [nextoffnum ] = ctx .offnum ;
662
+
663
+ /*
664
+ * If the current tuple is marked as HOT-updated, then the next
665
+ * tuple should be marked as a heap-only tuple. Conversely, if the
666
+ * current tuple isn't marked as HOT-updated, then the next tuple
667
+ * shouldn't be marked as a heap-only tuple.
668
+ */
669
+ if (!HeapTupleHeaderIsHotUpdated (curr_htup ) &&
670
+ HeapTupleHeaderIsHeapOnly (next_htup ))
671
+ {
672
+ report_corruption (& ctx ,
673
+ psprintf ("non-heap-only update produced a heap-only tuple at offset %u" ,
674
+ (unsigned ) nextoffnum ));
675
+ }
676
+ if (HeapTupleHeaderIsHotUpdated (curr_htup ) &&
677
+ !HeapTupleHeaderIsHeapOnly (next_htup ))
678
+ {
679
+ report_corruption (& ctx ,
680
+ psprintf ("heap-only update produced a non-heap only tuple at offset %u" ,
681
+ (unsigned ) nextoffnum ));
682
+ }
683
+
684
+ /*
685
+ * If the current tuple's xmin is still in progress but the
686
+ * successor tuple's xmin is committed, that's corruption.
687
+ *
688
+ * NB: We recheck the commit status of the current tuple's xmin
689
+ * here, because it might have committed after we checked it and
690
+ * before we checked the commit status of the successor tuple's
691
+ * xmin. This should be safe because the xmin itself can't have
692
+ * changed, only its commit status.
693
+ */
694
+ curr_xmin = HeapTupleHeaderGetXmin (curr_htup );
695
+ if (xmin_commit_status_ok [ctx .offnum ] &&
696
+ xmin_commit_status [ctx .offnum ] == XID_IN_PROGRESS &&
697
+ xmin_commit_status_ok [nextoffnum ] &&
698
+ xmin_commit_status [nextoffnum ] == XID_COMMITTED &&
699
+ TransactionIdIsInProgress (curr_xmin ))
700
+ {
701
+ report_corruption (& ctx ,
702
+ psprintf ("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u" ,
703
+ (unsigned ) curr_xmin ,
704
+ (unsigned ) ctx .offnum ,
705
+ (unsigned ) next_xmin ));
706
+ }
707
+
708
+ /*
709
+ * If the current tuple's xmin is aborted but the successor tuple's
710
+ * xmin is in-progress or committed, that's corruption.
711
+ */
712
+ if (xmin_commit_status_ok [ctx .offnum ] &&
713
+ xmin_commit_status [ctx .offnum ] == XID_ABORTED &&
714
+ xmin_commit_status_ok [nextoffnum ])
715
+ {
716
+ if (xmin_commit_status [nextoffnum ] == XID_IN_PROGRESS )
717
+ report_corruption (& ctx ,
718
+ psprintf ("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u" ,
719
+ (unsigned ) curr_xmin ,
720
+ (unsigned ) ctx .offnum ,
721
+ (unsigned ) next_xmin ));
722
+ else if (xmin_commit_status [nextoffnum ] == XID_COMMITTED )
723
+ report_corruption (& ctx ,
724
+ psprintf ("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u" ,
725
+ (unsigned ) curr_xmin ,
726
+ (unsigned ) ctx .offnum ,
727
+ (unsigned ) next_xmin ));
728
+ }
729
+ }
730
+
731
+ /*
732
+ * An update chain can start either with a non-heap-only tuple or with
733
+ * a redirect line pointer, but not with a heap-only tuple.
734
+ *
735
+ * (This check is in a separate loop because we need the predecessor
736
+ * array to be fully populated before we can perform it.)
737
+ */
738
+ for (ctx .offnum = FirstOffsetNumber ;
739
+ ctx .offnum <= maxoff ;
740
+ ctx .offnum = OffsetNumberNext (ctx .offnum ))
741
+ {
742
+ if (xmin_commit_status_ok [ctx .offnum ] &&
743
+ (xmin_commit_status [ctx .offnum ] == XID_COMMITTED ||
744
+ xmin_commit_status [ctx .offnum ] == XID_IN_PROGRESS ) &&
745
+ predecessor [ctx .offnum ] == InvalidOffsetNumber )
746
+ {
747
+ ItemId curr_lp ;
748
+
749
+ curr_lp = PageGetItemId (ctx .page , ctx .offnum );
750
+ if (!ItemIdIsRedirected (curr_lp ))
751
+ {
752
+ HeapTupleHeader curr_htup ;
753
+
754
+ curr_htup = (HeapTupleHeader )
755
+ PageGetItem (ctx .page , curr_lp );
756
+ if (HeapTupleHeaderIsHeapOnly (curr_htup ))
757
+ report_corruption (& ctx ,
758
+ psprintf ("tuple is root of chain but is marked as heap-only tuple" ));
759
+ }
760
+ }
510
761
}
511
762
512
763
/* clean up */
@@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
638
889
{
639
890
HeapTupleHeader tuphdr = ctx -> tuphdr ;
640
891
uint16 infomask = tuphdr -> t_infomask ;
892
+ TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid (tuphdr );
641
893
bool result = true;
642
894
unsigned expected_hoff ;
643
895
@@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
663
915
*/
664
916
}
665
917
918
+ if (!TransactionIdIsValid (curr_xmax ) &&
919
+ HeapTupleHeaderIsHotUpdated (tuphdr ))
920
+ {
921
+ report_corruption (ctx ,
922
+ psprintf ("tuple has been HOT updated, but xmax is 0" ));
923
+
924
+ /*
925
+ * As above, even though this shouldn't happen, it's not sufficient
926
+ * justification for skipping further checks, we should still be able
927
+ * to perform sensibly.
928
+ */
929
+ }
930
+
666
931
if (infomask & HEAP_HASNULL )
667
932
expected_hoff = MAXALIGN (SizeofHeapTupleHeader + BITMAPLEN (ctx -> natts ));
668
933
else
@@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
718
983
* Returns true if the tuple itself should be checked, false otherwise. Sets
719
984
* ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
720
985
* TOAST tuples -- are eligible for pruning.
986
+ *
987
+ * Sets *xmin_commit_status_ok to true if the commit status of xmin is known
988
+ * and false otherwise. If it's set to true, then also set *xid_commit_status
989
+ * to the actual commit status.
721
990
*/
722
991
static bool
723
- check_tuple_visibility (HeapCheckContext * ctx )
992
+ check_tuple_visibility (HeapCheckContext * ctx , bool * xmin_commit_status_ok ,
993
+ XidCommitStatus * xmin_commit_status )
724
994
{
725
995
TransactionId xmin ;
726
996
TransactionId xvac ;
@@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
731
1001
HeapTupleHeader tuphdr = ctx -> tuphdr ;
732
1002
733
1003
ctx -> tuple_could_be_pruned = true; /* have not yet proven otherwise */
1004
+ * xmin_commit_status_ok = false; /* have not yet proven otherwise */
734
1005
735
1006
/* If xmin is normal, it should be within valid range */
736
1007
xmin = HeapTupleHeaderGetXmin (tuphdr );
737
1008
switch (get_xid_status (xmin , ctx , & xmin_status ))
738
1009
{
739
1010
case XID_INVALID :
1011
+ break ;
740
1012
case XID_BOUNDS_OK :
1013
+ * xmin_commit_status_ok = true;
1014
+ * xmin_commit_status = xmin_status ;
741
1015
break ;
742
1016
case XID_IN_FUTURE :
743
1017
report_corruption (ctx ,
@@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
1515
1789
/*
1516
1790
* Check the current tuple as tracked in ctx, recording any corruption found in
1517
1791
* ctx->tupstore.
1792
+ *
1793
+ * We return some information about the status of xmin to aid in validating
1794
+ * update chains.
1518
1795
*/
1519
1796
static void
1520
- check_tuple (HeapCheckContext * ctx )
1797
+ check_tuple (HeapCheckContext * ctx , bool * xmin_commit_status_ok ,
1798
+ XidCommitStatus * xmin_commit_status )
1521
1799
{
1522
1800
/*
1523
1801
* Check various forms of tuple header corruption, and if the header is
@@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
1531
1809
* cannot assume our relation description matches the tuple structure, and
1532
1810
* therefore cannot check it.
1533
1811
*/
1534
- if (!check_tuple_visibility (ctx ))
1812
+ if (!check_tuple_visibility (ctx , xmin_commit_status_ok ,
1813
+ xmin_commit_status ))
1535
1814
return ;
1536
1815
1537
1816
/*
0 commit comments