Skip to content

Commit 40708ea

Browse files
Fix undercounting in VACUUM VERBOSE output.
The logic for determining how many nbtree pages in an index are deleted pages sometimes undercounted pages. Pages that were deleted by the current VACUUM operation (as opposed to some previous VACUUM operation whose deleted pages have yet to be reused) were sometimes overlooked. The final count is exposed to users through VACUUM VERBOSE's "%u index pages have been deleted" output. btvacuumpage() avoided double-counting when _bt_pagedel() deleted more than one page by assuming that only one page was deleted, and that the additional deleted pages would get picked up during a future call to btvacuumpage() by the same VACUUM operation. _bt_pagedel() can legitimately delete pages that the btvacuumscan() scan will not visit again, though, so that assumption was slightly faulty. Fix the accounting by teaching _bt_pagedel() about its caller's requirements. It now only reports on pages that it knows btvacuumscan() won't visit again (including the current btvacuumpage() page), so everything works out in the end. This bug has been around forever. Only backpatch to v11, though, to keep _bt_pagedel() is sync on the branches that have today's bugfix commit b0229f2. Note that this commit changes the signature of _bt_pagedel(), just like commit b0229f2. Author: Peter Geoghegan Reviewed-By: Masahiko Sawada Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com Backpatch: 11-
1 parent 05b7326 commit 40708ea

File tree

3 files changed

+38
-18
lines changed

3 files changed

+38
-18
lines changed

src/backend/access/nbtree/nbtpage.c

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
3737
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
3838
BTStack stack);
3939
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
40+
BlockNumber scanblkno,
4041
bool *rightsib_empty,
41-
TransactionId *oldestBtpoXact);
42+
TransactionId *oldestBtpoXact,
43+
uint32 *ndeleted);
4244
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
4345
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
4446
BlockNumber *target, BlockNumber *rightsib);
@@ -1301,7 +1303,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
13011303
*
13021304
* Returns the number of pages successfully deleted (zero if page cannot
13031305
* be deleted now; could be more than one if parent or right sibling pages
1304-
* were deleted too).
1306+
* were deleted too). Note that this does not include pages that we delete
1307+
* that the btvacuumscan scan has yet to reach; they'll get counted later
1308+
* instead.
13051309
*
13061310
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
13071311
* responsible for maintaining *oldestBtpoXact in the case of pages that were
@@ -1311,15 +1315,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
13111315
* carefully, it's better to run it in a temp context that can be reset
13121316
* frequently.
13131317
*/
1314-
int
1318+
uint32
13151319
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
13161320
{
1317-
int ndeleted = 0;
1321+
uint32 ndeleted = 0;
13181322
BlockNumber rightsib;
13191323
bool rightsib_empty;
13201324
Page page;
13211325
BTPageOpaque opaque;
13221326

1327+
/*
1328+
* Save original leafbuf block number from caller. Only deleted blocks
1329+
* that are <= scanblkno get counted in ndeleted return value.
1330+
*/
1331+
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
1332+
13231333
/*
13241334
* "stack" is a search stack leading (approximately) to the target page.
13251335
* It is initially NULL, but when iterating, we keep it to avoid
@@ -1370,8 +1380,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
13701380
if (P_ISDELETED(opaque))
13711381
ereport(LOG,
13721382
(errcode(ERRCODE_INDEX_CORRUPTED),
1373-
errmsg_internal("found deleted block %u while following right link in index \"%s\"",
1383+
errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
13741384
BufferGetBlockNumber(leafbuf),
1385+
scanblkno,
13751386
RelationGetRelationName(rel))));
13761387

13771388
_bt_relbuf(rel, leafbuf);
@@ -1521,13 +1532,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
15211532
while (P_ISHALFDEAD(opaque))
15221533
{
15231534
/* Check for interrupts in _bt_unlink_halfdead_page */
1524-
if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
1525-
oldestBtpoXact))
1535+
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1536+
&rightsib_empty, oldestBtpoXact,
1537+
&ndeleted))
15261538
{
15271539
/* _bt_unlink_halfdead_page failed, released buffer */
15281540
return ndeleted;
15291541
}
1530-
ndeleted++;
15311542
}
15321543

15331544
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@@ -1779,8 +1790,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
17791790
* to avoid having to reacquire a lock we already released).
17801791
*/
17811792
static bool
1782-
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
1783-
TransactionId *oldestBtpoXact)
1793+
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
1794+
bool *rightsib_empty, TransactionId *oldestBtpoXact,
1795+
uint32 *ndeleted)
17841796
{
17851797
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
17861798
BlockNumber leafleftsib;
@@ -2166,6 +2178,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
21662178
TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
21672179
*oldestBtpoXact = opaque->btpo.xact;
21682180

2181+
/*
2182+
* If btvacuumscan won't revisit this page in a future btvacuumpage call
2183+
* and count it as deleted then, we count it as deleted by current
2184+
* btvacuumpage call
2185+
*/
2186+
if (target <= scanblkno)
2187+
(*ndeleted)++;
2188+
21692189
/*
21702190
* Release the target, if it was not the leaf block. The leaf is always
21712191
* kept locked.

src/backend/access/nbtree/nbtree.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,17 +1349,17 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
13491349
if (delete_now)
13501350
{
13511351
MemoryContext oldcontext;
1352-
int ndel;
13531352

13541353
/* Run pagedel in a temp context to avoid memory leakage */
13551354
MemoryContextReset(vstate->pagedelcontext);
13561355
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
13571356

1358-
ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1359-
1360-
/* count only this page, else may double-count parent */
1361-
if (ndel)
1362-
stats->pages_deleted++;
1357+
/*
1358+
* We trust the _bt_pagedel return value because it does not include
1359+
* any page that a future call here from btvacuumscan is expected to
1360+
* count. There will be no double-counting.
1361+
*/
1362+
stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
13631363

13641364
MemoryContextSwitchTo(oldcontext);
13651365
/* pagedel released buffer, so we shouldn't */

src/include/access/nbtree.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,8 @@ extern void _bt_delitems_delete(Relation rel, Buffer buf,
764764
extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
765765
OffsetNumber *itemnos, int nitems,
766766
BlockNumber lastBlockVacuumed);
767-
extern int _bt_pagedel(Relation rel, Buffer leafbuf,
768-
TransactionId *oldestBtpoXact);
767+
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
768+
TransactionId *oldestBtpoXact);
769769

770770
/*
771771
* prototypes for functions in nbtsearch.c

0 commit comments

Comments
 (0)