Skip to content

Commit 980ae17

Browse files
Tighten up VACUUM's approach to setting VM bits.
Tighten up the way that visibilitymap_set() is called: request that both the all-visible and all-frozen bits get set whenever the all-frozen bit is set, regardless of what we think we know about the present state of the all-visible bit. Also make sure that the page level PD_ALL_VISIBLE flag is set in the same code path. In practice there doesn't seem to be a concrete scenario in which the previous approach could lead to inconsistencies. It was almost possible in scenarios involving concurrent HOT updates from transactions that abort, but (unlike pruning) freezing can never remove XIDs > VACUUM's OldestXmin, even those from transactions that are known to have aborted. That was protective here. These issues have been around since commit a892234, which added the all-frozen bit to the VM fork. There is no known live bug here, so no backpatch. In passing, add some defensive assertions to catch the issue, and stop reading the existing state of the VM when setting the VM in VACUUM's final heap pass. We already know that affected pages must have had at least one LP_DEAD item before we set it LP_UNUSED, so there is no point in reading the VM when it is set like this. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CAH2-WznuNGSzF8v6OsgjaC5aYsb3cZ6HW6MLm30X0d65cmSH6A@mail.gmail.com
1 parent 6fa66ec commit 980ae17

File tree

2 files changed

+65
-40
lines changed

2 files changed

+65
-40
lines changed

src/backend/access/heap/vacuumlazy.c

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ static void lazy_vacuum(LVRelState *vacrel);
260260
static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
261261
static void lazy_vacuum_heap_rel(LVRelState *vacrel);
262262
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
263-
Buffer buffer, int index, Buffer *vmbuffer);
263+
Buffer buffer, int index, Buffer vmbuffer);
264264
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
265265
static void lazy_cleanup_all_indexes(LVRelState *vacrel);
266266
static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
@@ -945,17 +945,15 @@ lazy_scan_heap(LVRelState *vacrel)
945945
*/
946946
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
947947

948-
/* Finished preparatory checks. Actually scan the page. */
949-
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno,
950-
RBM_NORMAL, vacrel->bstrategy);
951-
page = BufferGetPage(buf);
952-
953948
/*
954949
* We need a buffer cleanup lock to prune HOT chains and defragment
955950
* the page in lazy_scan_prune. But when it's not possible to acquire
956951
* a cleanup lock right away, we may be able to settle for reduced
957952
* processing using lazy_scan_noprune.
958953
*/
954+
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
955+
vacrel->bstrategy);
956+
page = BufferGetPage(buf);
959957
if (!ConditionalLockBufferForCleanup(buf))
960958
{
961959
bool hastup,
@@ -1040,7 +1038,7 @@ lazy_scan_heap(LVRelState *vacrel)
10401038
{
10411039
Size freespace;
10421040

1043-
lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer);
1041+
lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
10441042

10451043
/* Forget the LP_DEAD items that we just vacuumed */
10461044
dead_items->num_items = 0;
@@ -1092,7 +1090,10 @@ lazy_scan_heap(LVRelState *vacrel)
10921090
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
10931091

10941092
if (prunestate.all_frozen)
1093+
{
1094+
Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid));
10951095
flags |= VISIBILITYMAP_ALL_FROZEN;
1096+
}
10961097

10971098
/*
10981099
* It should never be the case that the visibility map page is set
@@ -1120,8 +1121,8 @@ lazy_scan_heap(LVRelState *vacrel)
11201121
* got cleared after lazy_scan_skip() was called, so we must recheck
11211122
* with buffer lock before concluding that the VM is corrupt.
11221123
*/
1123-
else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1124-
&& VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
1124+
else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1125+
visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
11251126
{
11261127
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
11271128
vacrel->relname, blkno);
@@ -1164,12 +1165,27 @@ lazy_scan_heap(LVRelState *vacrel)
11641165
!VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
11651166
{
11661167
/*
1167-
* We can pass InvalidTransactionId as the cutoff XID here,
1168-
* because setting the all-frozen bit doesn't cause recovery
1169-
* conflicts.
1168+
* Avoid relying on all_visible_according_to_vm as a proxy for the
1169+
* page-level PD_ALL_VISIBLE bit being set, since it might have
1170+
* become stale -- even when all_visible is set in prunestate
1171+
*/
1172+
if (!PageIsAllVisible(page))
1173+
{
1174+
PageSetAllVisible(page);
1175+
MarkBufferDirty(buf);
1176+
}
1177+
1178+
/*
1179+
* Set the page all-frozen (and all-visible) in the VM.
1180+
*
1181+
* We can pass InvalidTransactionId as our visibility_cutoff_xid,
1182+
* since a snapshotConflictHorizon sufficient to make everything
1183+
* safe for REDO was logged when the page's tuples were frozen.
11701184
*/
1185+
Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid));
11711186
visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
11721187
vmbuffer, InvalidTransactionId,
1188+
VISIBILITYMAP_ALL_VISIBLE |
11731189
VISIBILITYMAP_ALL_FROZEN);
11741190
}
11751191

@@ -1311,7 +1327,11 @@ lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
13111327

13121328
/* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
13131329
if (!vacrel->skipwithvm)
1330+
{
1331+
/* Caller shouldn't rely on all_visible_according_to_vm */
1332+
*next_unskippable_allvis = false;
13141333
break;
1334+
}
13151335

13161336
/*
13171337
* Aggressive VACUUM caller can't skip pages just because they are
@@ -1807,8 +1827,6 @@ lazy_scan_prune(LVRelState *vacrel,
18071827
{
18081828
TransactionId snapshotConflictHorizon;
18091829

1810-
Assert(prunestate->hastup);
1811-
18121830
vacrel->frozen_pages++;
18131831

18141832
/*
@@ -1818,7 +1836,11 @@ lazy_scan_prune(LVRelState *vacrel,
18181836
* cutoff by stepping back from OldestXmin.
18191837
*/
18201838
if (prunestate->all_visible && prunestate->all_frozen)
1839+
{
1840+
/* Using same cutoff when setting VM is now unnecessary */
18211841
snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
1842+
prunestate->visibility_cutoff_xid = InvalidTransactionId;
1843+
}
18221844
else
18231845
{
18241846
/* Avoids false conflicts when hot_standby_feedback in use */
@@ -2417,10 +2439,19 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
24172439

24182440
blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
24192441
vacrel->blkno = blkno;
2442+
2443+
/*
2444+
* Pin the visibility map page in case we need to mark the page
2445+
* all-visible. In most cases this will be very cheap, because we'll
2446+
* already have the correct page pinned anyway.
2447+
*/
2448+
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2449+
2450+
/* We need a non-cleanup exclusive lock to mark dead_items unused */
24202451
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
24212452
vacrel->bstrategy);
24222453
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2423-
index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, &vmbuffer);
2454+
index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer);
24242455

24252456
/* Now that we've vacuumed the page, record its available space */
24262457
page = BufferGetPage(buf);
@@ -2457,15 +2488,16 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
24572488
* vacrel->dead_items array.
24582489
*
24592490
* Caller must have an exclusive buffer lock on the buffer (though a full
2460-
* cleanup lock is also acceptable).
2491+
* cleanup lock is also acceptable). vmbuffer must be valid and already have
2492+
* a pin on blkno's visibility map page.
24612493
*
24622494
* index is an offset into the vacrel->dead_items array for the first listed
24632495
* LP_DEAD item on the page. The return value is the first index immediately
24642496
* after all LP_DEAD items for the same page in the array.
24652497
*/
24662498
static int
24672499
lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2468-
int index, Buffer *vmbuffer)
2500+
int index, Buffer vmbuffer)
24692501
{
24702502
VacDeadItems *dead_items = vacrel->dead_items;
24712503
Page page = BufferGetPage(buffer);
@@ -2546,31 +2578,21 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
25462578
* dirty, exclusively locked, and, if needed, a full page image has been
25472579
* emitted.
25482580
*/
2581+
Assert(!PageIsAllVisible(page));
25492582
if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
25502583
&all_frozen))
2551-
PageSetAllVisible(page);
2552-
2553-
/*
2554-
* All the changes to the heap page have been done. If the all-visible
2555-
* flag is now set, also set the VM all-visible bit (and, if possible, the
2556-
* all-frozen bit) unless this has already been done previously.
2557-
*/
2558-
if (PageIsAllVisible(page))
25592584
{
2560-
uint8 flags = 0;
2561-
uint8 vm_status = visibilitymap_get_status(vacrel->rel,
2562-
blkno, vmbuffer);
2563-
2564-
/* Set the VM all-frozen bit to flag, if needed */
2565-
if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
2566-
flags |= VISIBILITYMAP_ALL_VISIBLE;
2567-
if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
2585+
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2586+
2587+
if (all_frozen)
2588+
{
2589+
Assert(!TransactionIdIsValid(visibility_cutoff_xid));
25682590
flags |= VISIBILITYMAP_ALL_FROZEN;
2591+
}
25692592

2570-
Assert(BufferIsValid(*vmbuffer));
2571-
if (flags != 0)
2572-
visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2573-
*vmbuffer, visibility_cutoff_xid, flags);
2593+
PageSetAllVisible(page);
2594+
visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2595+
vmbuffer, visibility_cutoff_xid, flags);
25742596
}
25752597

25762598
/* Revert to the previous phase information for error traceback */

src/backend/access/heap/visibilitymap.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,9 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags
146146
char *map;
147147
bool cleared = false;
148148

149+
/* Must never clear all_visible bit while leaving all_frozen bit set */
149150
Assert(flags & VISIBILITYMAP_VALID_BITS);
151+
Assert(flags != VISIBILITYMAP_ALL_VISIBLE);
150152

151153
#ifdef TRACE_VISIBILITYMAP
152154
elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
@@ -256,8 +258,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
256258
#endif
257259

258260
Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
259-
Assert(InRecovery || BufferIsValid(heapBuf));
261+
Assert(InRecovery || PageIsAllVisible((Page) BufferGetPage(heapBuf)));
262+
263+
/* Must never set all_frozen bit without also setting all_visible bit */
260264
Assert(flags & VISIBILITYMAP_VALID_BITS);
265+
Assert(flags != VISIBILITYMAP_ALL_FROZEN);
261266

262267
/* Check that we have the right heap page pinned, if present */
263268
if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
@@ -299,8 +304,6 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
299304
{
300305
Page heapPage = BufferGetPage(heapBuf);
301306

302-
/* caller is expected to set PD_ALL_VISIBLE first */
303-
Assert(PageIsAllVisible(heapPage));
304307
PageSetLSN(heapPage, recptr);
305308
}
306309
}

0 commit comments

Comments
 (0)