Skip to content

Commit e04509f

Browse files
committed
amcheck: Distinguish interrupted page deletion from corruption.
This prevents false-positive reports about "the first child of leftmost target page is not leftmost of its level", "block %u is not leftmost" and "left link/right link pair". They appeared if amcheck ran before VACUUM cleaned things, after a cluster exited recovery between the first-stage and second-stage WAL records of a deletion. Back-patch to v11 (all supported versions). Reviewed by Peter Geoghegan. Discussion: https://postgr.es/m/20231005025232.c7.nmisch@google.com
1 parent c804ffb commit e04509f

File tree

1 file changed

+72
-4
lines changed

1 file changed

+72
-4
lines changed

contrib/amcheck/verify_nbtree.c

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
127127
bool readonly, bool heapallindexed);
128128
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
129129
BtreeLevel level);
130+
static bool bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
131+
BlockNumber start,
132+
BTPageOpaque start_opaque);
130133
static void bt_target_page_check(BtreeCheckState *state);
131134
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
132135
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
@@ -716,7 +719,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
716719
*/
717720
if (state->readonly)
718721
{
719-
if (!P_LEFTMOST(opaque))
722+
if (!bt_leftmost_ignoring_half_dead(state, current, opaque))
720723
ereport(ERROR,
721724
(errcode(ERRCODE_INDEX_CORRUPTED),
722725
errmsg("block %u is not leftmost in index \"%s\"",
@@ -769,10 +772,14 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
769772
}
770773

771774
/*
772-
* readonly mode can only ever land on live pages and half-dead pages,
773-
* so sibling pointers should always be in mutual agreement
775+
* Sibling links should be in mutual agreement. There arises
776+
* leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
777+
* of the parent's low-key downlink is half-dead. (A half-dead page
778+
* has no downlink from its parent.) Under heavyweight locking, the
779+
* last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
774780
*/
775-
if (state->readonly && opaque->btpo_prev != leftcurrent)
781+
if (state->readonly &&
782+
opaque->btpo_prev != leftcurrent && leftcurrent != P_NONE)
776783
ereport(ERROR,
777784
(errcode(ERRCODE_INDEX_CORRUPTED),
778785
errmsg("left link/right link pair in index \"%s\" not in agreement",
@@ -822,6 +829,67 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
822829
return nextleveldown;
823830
}
824831

832+
/*
833+
* Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
834+
* half-dead, sibling-linked pages to the left. If a half-dead page appears
835+
* under state->readonly, the database exited recovery between the first-stage
836+
* and second-stage WAL records of a deletion.
837+
*/
838+
static bool
839+
bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
840+
BlockNumber start,
841+
BTPageOpaque start_opaque)
842+
{
843+
BlockNumber reached = start_opaque->btpo_prev,
844+
reached_from = start;
845+
bool all_half_dead = true;
846+
847+
/*
848+
* To handle the !readonly case, we'd need to accept BTP_DELETED pages and
849+
* potentially observe nbtree/README "Page deletion and backwards scans".
850+
*/
851+
Assert(state->readonly);
852+
853+
while (reached != P_NONE && all_half_dead)
854+
{
855+
Page page = palloc_btree_page(state, reached);
856+
BTPageOpaque reached_opaque = (BTPageOpaque) PageGetSpecialPointer(page);
857+
858+
CHECK_FOR_INTERRUPTS();
859+
860+
/*
861+
* Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
862+
* writes that side-links will continue to point to the siblings.
863+
* Check btpo_next for that property.
864+
*/
865+
all_half_dead = P_ISHALFDEAD(reached_opaque) &&
866+
reached != start &&
867+
reached != reached_from &&
868+
reached_opaque->btpo_next == reached_from;
869+
if (all_half_dead)
870+
{
871+
XLogRecPtr pagelsn = PageGetLSN(page);
872+
873+
/* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
874+
ereport(DEBUG1,
875+
(errcode(ERRCODE_NO_DATA),
876+
errmsg_internal("harmless interrupted page deletion detected in index \"%s\"",
877+
RelationGetRelationName(state->rel)),
878+
errdetail_internal("Block=%u right block=%u page lsn=%X/%X.",
879+
reached, reached_from,
880+
(uint32) (pagelsn >> 32),
881+
(uint32) pagelsn)));
882+
883+
reached_from = reached;
884+
reached = reached_opaque->btpo_prev;
885+
}
886+
887+
pfree(page);
888+
}
889+
890+
return all_half_dead;
891+
}
892+
825893
/*
826894
* Function performs the following checks on target page, or pages ancillary to
827895
* target page:

0 commit comments

Comments
 (0)