Skip to content

Commit bbc1376

Browse files
committed
Teach verify_heapam() to validate update chains within a page.
Prior to this commit, we only consider each tuple or line pointer on the page in isolation, but now we can do some validation of a line pointer against its successor. For example, a redirect line pointer shouldn't point to another redirect line pointer, and if a tuple is HOT-updated, the result should be a heap-only tuple. Himanshu Upadhyaya and Robert Haas, reviewed by Aleksander Alekseev, Andres Freund, and Peter Geoghegan.
1 parent d69c404 commit bbc1376

File tree

2 files changed

+524
-17
lines changed

2 files changed

+524
-17
lines changed

contrib/amcheck/verify_heapam.c

+285-6
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,9 @@ typedef struct HeapCheckContext
150150
} HeapCheckContext;
151151

152152
/* Internal implementation */
153-
static void check_tuple(HeapCheckContext *ctx);
153+
static void check_tuple(HeapCheckContext *ctx,
154+
bool *xmin_commit_status_ok,
155+
XidCommitStatus *xmin_commit_status);
154156
static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
155157
ToastedAttribute *ta, int32 *expected_chunk_seq,
156158
uint32 extsize);
@@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
160162
ToastedAttribute *ta);
161163

162164
static bool check_tuple_header(HeapCheckContext *ctx);
163-
static bool check_tuple_visibility(HeapCheckContext *ctx);
165+
static bool check_tuple_visibility(HeapCheckContext *ctx,
166+
bool *xmin_commit_status_ok,
167+
XidCommitStatus *xmin_commit_status);
164168

165169
static void report_corruption(HeapCheckContext *ctx, char *msg);
166170
static void report_toast_corruption(HeapCheckContext *ctx,
@@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
399403
for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
400404
{
401405
OffsetNumber maxoff;
406+
OffsetNumber predecessor[MaxOffsetNumber];
407+
OffsetNumber successor[MaxOffsetNumber];
408+
bool lp_valid[MaxOffsetNumber];
409+
bool xmin_commit_status_ok[MaxOffsetNumber];
410+
XidCommitStatus xmin_commit_status[MaxOffsetNumber];
402411

403412
CHECK_FOR_INTERRUPTS();
404413

414+
memset(predecessor, 0, sizeof(OffsetNumber) * MaxOffsetNumber);
415+
405416
/* Optionally skip over all-frozen or all-visible blocks */
406417
if (skip_option != SKIP_PAGES_NONE)
407418
{
@@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
433444
for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
434445
ctx.offnum = OffsetNumberNext(ctx.offnum))
435446
{
447+
BlockNumber nextblkno;
448+
OffsetNumber nextoffnum;
449+
450+
successor[ctx.offnum] = InvalidOffsetNumber;
451+
lp_valid[ctx.offnum] = false;
452+
xmin_commit_status_ok[ctx.offnum] = false;
436453
ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
437454

438455
/* Skip over unused/dead line pointers */
@@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
469486
report_corruption(&ctx,
470487
psprintf("line pointer redirection to unused item at offset %u",
471488
(unsigned) rdoffnum));
489+
490+
/*
491+
* Record the fact that this line pointer has passed basic
492+
* sanity checking, and also the offset number to which it
493+
* points.
494+
*/
495+
lp_valid[ctx.offnum] = true;
496+
successor[ctx.offnum] = rdoffnum;
472497
continue;
473498
}
474499

@@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
502527
}
503528

504529
/* It should be safe to examine the tuple's header, at least */
530+
lp_valid[ctx.offnum] = true;
505531
ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
506532
ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
507533

508534
/* Ok, ready to check this next tuple */
509-
check_tuple(&ctx);
535+
check_tuple(&ctx,
536+
&xmin_commit_status_ok[ctx.offnum],
537+
&xmin_commit_status[ctx.offnum]);
538+
539+
/*
540+
* If the CTID field of this tuple seems to point to another tuple
541+
* on the same page, record that tuple as the successor of this
542+
* one.
543+
*/
544+
nextblkno = ItemPointerGetBlockNumber(&(ctx.tuphdr)->t_ctid);
545+
nextoffnum = ItemPointerGetOffsetNumber(&(ctx.tuphdr)->t_ctid);
546+
if (nextblkno == ctx.blkno && nextoffnum != ctx.offnum)
547+
successor[ctx.offnum] = nextoffnum;
548+
}
549+
550+
/*
551+
* Update chain validation. Check each line pointer that's got a valid
552+
* successor against that successor.
553+
*/
554+
ctx.attnum = -1;
555+
for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
556+
ctx.offnum = OffsetNumberNext(ctx.offnum))
557+
{
558+
ItemId curr_lp;
559+
ItemId next_lp;
560+
HeapTupleHeader curr_htup;
561+
HeapTupleHeader next_htup;
562+
TransactionId curr_xmin;
563+
TransactionId curr_xmax;
564+
TransactionId next_xmin;
565+
OffsetNumber nextoffnum = successor[ctx.offnum];
566+
567+
/*
568+
* The current line pointer may not have a successor, either
569+
* because it's not valid or because it didn't point to anything.
570+
* In either case, we have to give up.
571+
*
572+
* If the current line pointer does point to something, it's
573+
* possible that the target line pointer isn't valid. We have to
574+
* give up in that case, too.
575+
*/
576+
if (nextoffnum == InvalidOffsetNumber || !lp_valid[nextoffnum])
577+
continue;
578+
579+
/* We have two valid line pointers that we can examine. */
580+
curr_lp = PageGetItemId(ctx.page, ctx.offnum);
581+
next_lp = PageGetItemId(ctx.page, nextoffnum);
582+
583+
/* Handle the cases where the current line pointer is a redirect. */
584+
if (ItemIdIsRedirected(curr_lp))
585+
{
586+
/* Can't redirect to another redirect. */
587+
if (ItemIdIsRedirected(next_lp))
588+
{
589+
report_corruption(&ctx,
590+
psprintf("redirected line pointer points to another redirected line pointer at offset %u",
591+
(unsigned) nextoffnum));
592+
continue;
593+
}
594+
595+
/* Can only redirect to a HOT tuple. */
596+
next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
597+
if (!HeapTupleHeaderIsHeapOnly(next_htup))
598+
{
599+
report_corruption(&ctx,
600+
psprintf("redirected line pointer points to a non-heap-only tuple at offset %u",
601+
(unsigned) nextoffnum));
602+
}
603+
604+
/*
605+
* Redirects are created by updates, so successor should be
606+
* the result of an update.
607+
*/
608+
if ((next_htup->t_infomask & HEAP_UPDATED) == 0)
609+
{
610+
report_corruption(&ctx,
611+
psprintf("redirected line pointer points to a non-heap-updated tuple at offset %u",
612+
(unsigned) nextoffnum));
613+
}
614+
615+
/* HOT chains should not intersect. */
616+
if (predecessor[nextoffnum] != InvalidOffsetNumber)
617+
{
618+
report_corruption(&ctx,
619+
psprintf("redirect line pointer points to offset %u, but offset %u also points there",
620+
(unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
621+
continue;
622+
}
623+
624+
/*
625+
* This redirect and the tuple to which it points seem to be
626+
* part of an update chain.
627+
*/
628+
predecessor[nextoffnum] = ctx.offnum;
629+
continue;
630+
}
631+
632+
/*
633+
* If the next line pointer is a redirect, or if it's a tuple
634+
* but the XMAX of this tuple doesn't match the XMIN of the next
635+
* tuple, then the two aren't part of the same update chain and
636+
* there is nothing more to do.
637+
*/
638+
if (ItemIdIsRedirected(next_lp))
639+
continue;
640+
curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp);
641+
curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup);
642+
next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
643+
next_xmin = HeapTupleHeaderGetXmin(next_htup);
644+
if (!TransactionIdIsValid(curr_xmax) ||
645+
!TransactionIdEquals(curr_xmax, next_xmin))
646+
continue;
647+
648+
/* HOT chains should not intersect. */
649+
if (predecessor[nextoffnum] != InvalidOffsetNumber)
650+
{
651+
report_corruption(&ctx,
652+
psprintf("tuple points to new version at offset %u, but offset %u also points there",
653+
(unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
654+
continue;
655+
}
656+
657+
/*
658+
* This tuple and the tuple to which it points seem to be part
659+
* of an update chain.
660+
*/
661+
predecessor[nextoffnum] = ctx.offnum;
662+
663+
/*
664+
* If the current tuple is marked as HOT-updated, then the next
665+
* tuple should be marked as a heap-only tuple. Conversely, if the
666+
* current tuple isn't marked as HOT-updated, then the next tuple
667+
* shouldn't be marked as a heap-only tuple.
668+
*/
669+
if (!HeapTupleHeaderIsHotUpdated(curr_htup) &&
670+
HeapTupleHeaderIsHeapOnly(next_htup))
671+
{
672+
report_corruption(&ctx,
673+
psprintf("non-heap-only update produced a heap-only tuple at offset %u",
674+
(unsigned) nextoffnum));
675+
}
676+
if (HeapTupleHeaderIsHotUpdated(curr_htup) &&
677+
!HeapTupleHeaderIsHeapOnly(next_htup))
678+
{
679+
report_corruption(&ctx,
680+
psprintf("heap-only update produced a non-heap only tuple at offset %u",
681+
(unsigned) nextoffnum));
682+
}
683+
684+
/*
685+
* If the current tuple's xmin is still in progress but the
686+
* successor tuple's xmin is committed, that's corruption.
687+
*
688+
* NB: We recheck the commit status of the current tuple's xmin
689+
* here, because it might have committed after we checked it and
690+
* before we checked the commit status of the successor tuple's
691+
* xmin. This should be safe because the xmin itself can't have
692+
* changed, only its commit status.
693+
*/
694+
curr_xmin = HeapTupleHeaderGetXmin(curr_htup);
695+
if (xmin_commit_status_ok[ctx.offnum] &&
696+
xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS &&
697+
xmin_commit_status_ok[nextoffnum] &&
698+
xmin_commit_status[nextoffnum] == XID_COMMITTED &&
699+
TransactionIdIsInProgress(curr_xmin))
700+
{
701+
report_corruption(&ctx,
702+
psprintf("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
703+
(unsigned) curr_xmin,
704+
(unsigned) ctx.offnum,
705+
(unsigned) next_xmin));
706+
}
707+
708+
/*
709+
* If the current tuple's xmin is aborted but the successor tuple's
710+
* xmin is in-progress or committed, that's corruption.
711+
*/
712+
if (xmin_commit_status_ok[ctx.offnum] &&
713+
xmin_commit_status[ctx.offnum] == XID_ABORTED &&
714+
xmin_commit_status_ok[nextoffnum])
715+
{
716+
if (xmin_commit_status[nextoffnum] == XID_IN_PROGRESS)
717+
report_corruption(&ctx,
718+
psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u",
719+
(unsigned) curr_xmin,
720+
(unsigned) ctx.offnum,
721+
(unsigned) next_xmin));
722+
else if (xmin_commit_status[nextoffnum] == XID_COMMITTED)
723+
report_corruption(&ctx,
724+
psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
725+
(unsigned) curr_xmin,
726+
(unsigned) ctx.offnum,
727+
(unsigned) next_xmin));
728+
}
729+
}
730+
731+
/*
732+
* An update chain can start either with a non-heap-only tuple or with
733+
* a redirect line pointer, but not with a heap-only tuple.
734+
*
735+
* (This check is in a separate loop because we need the predecessor
736+
* array to be fully populated before we can perform it.)
737+
*/
738+
for (ctx.offnum = FirstOffsetNumber;
739+
ctx.offnum <= maxoff;
740+
ctx.offnum = OffsetNumberNext(ctx.offnum))
741+
{
742+
if (xmin_commit_status_ok[ctx.offnum] &&
743+
(xmin_commit_status[ctx.offnum] == XID_COMMITTED ||
744+
xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS) &&
745+
predecessor[ctx.offnum] == InvalidOffsetNumber)
746+
{
747+
ItemId curr_lp;
748+
749+
curr_lp = PageGetItemId(ctx.page, ctx.offnum);
750+
if (!ItemIdIsRedirected(curr_lp))
751+
{
752+
HeapTupleHeader curr_htup;
753+
754+
curr_htup = (HeapTupleHeader)
755+
PageGetItem(ctx.page, curr_lp);
756+
if (HeapTupleHeaderIsHeapOnly(curr_htup))
757+
report_corruption(&ctx,
758+
psprintf("tuple is root of chain but is marked as heap-only tuple"));
759+
}
760+
}
510761
}
511762

512763
/* clean up */
@@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
638889
{
639890
HeapTupleHeader tuphdr = ctx->tuphdr;
640891
uint16 infomask = tuphdr->t_infomask;
892+
TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr);
641893
bool result = true;
642894
unsigned expected_hoff;
643895

@@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
663915
*/
664916
}
665917

918+
if (!TransactionIdIsValid(curr_xmax) &&
919+
HeapTupleHeaderIsHotUpdated(tuphdr))
920+
{
921+
report_corruption(ctx,
922+
psprintf("tuple has been HOT updated, but xmax is 0"));
923+
924+
/*
925+
* As above, even though this shouldn't happen, it's not sufficient
926+
* justification for skipping further checks, we should still be able
927+
* to perform sensibly.
928+
*/
929+
}
930+
666931
if (infomask & HEAP_HASNULL)
667932
expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
668933
else
@@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
718983
* Returns true if the tuple itself should be checked, false otherwise. Sets
719984
* ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
720985
* TOAST tuples -- are eligible for pruning.
986+
*
987+
* Sets *xmin_commit_status_ok to true if the commit status of xmin is known
988+
* and false otherwise. If it's set to true, then also set *xid_commit_status
989+
* to the actual commit status.
721990
*/
722991
static bool
723-
check_tuple_visibility(HeapCheckContext *ctx)
992+
check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
993+
XidCommitStatus *xmin_commit_status)
724994
{
725995
TransactionId xmin;
726996
TransactionId xvac;
@@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
7311001
HeapTupleHeader tuphdr = ctx->tuphdr;
7321002

7331003
ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */
1004+
*xmin_commit_status_ok = false; /* have not yet proven otherwise */
7341005

7351006
/* If xmin is normal, it should be within valid range */
7361007
xmin = HeapTupleHeaderGetXmin(tuphdr);
7371008
switch (get_xid_status(xmin, ctx, &xmin_status))
7381009
{
7391010
case XID_INVALID:
1011+
break;
7401012
case XID_BOUNDS_OK:
1013+
*xmin_commit_status_ok = true;
1014+
*xmin_commit_status = xmin_status;
7411015
break;
7421016
case XID_IN_FUTURE:
7431017
report_corruption(ctx,
@@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
15151789
/*
15161790
* Check the current tuple as tracked in ctx, recording any corruption found in
15171791
* ctx->tupstore.
1792+
*
1793+
* We return some information about the status of xmin to aid in validating
1794+
* update chains.
15181795
*/
15191796
static void
1520-
check_tuple(HeapCheckContext *ctx)
1797+
check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
1798+
XidCommitStatus *xmin_commit_status)
15211799
{
15221800
/*
15231801
* Check various forms of tuple header corruption, and if the header is
@@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
15311809
* cannot assume our relation description matches the tuple structure, and
15321810
* therefore cannot check it.
15331811
*/
1534-
if (!check_tuple_visibility(ctx))
1812+
if (!check_tuple_visibility(ctx, xmin_commit_status_ok,
1813+
xmin_commit_status))
15351814
return;
15361815

15371816
/*

0 commit comments

Comments
 (0)