Skip to content

Commit 2376361

Browse files
VACUUM VERBOSE: Count "newly deleted" index pages.
Teach VACUUM VERBOSE to report on pages deleted by the _current_ VACUUM operation -- these are newly deleted pages. VACUUM VERBOSE continues to report on the total number of deleted pages in the entire index (no change there). The former is a subset of the latter. The distinction between each category of deleted index page only arises with index AMs where page deletion is supported and is decoupled from page recycling for performance reasons. This is follow-up work to commit e5d8a99, which made nbtree store 64-bit XIDs (not 32-bit XIDs) in pages at the point at which they're deleted. Note that the btm_last_cleanup_num_delpages metapage field added by that commit usually gets set to pages_newly_deleted. The exceptions (the scenarios in which they're not equal) all seem to be tricky cases for the implementation (of page deletion and recycling) in general. Author: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAH2-WznpdHvujGUwYZ8sihX%3Dd5u-tRYhi-F4wnV2uN2zHpMUXw%40mail.gmail.com
1 parent 301ed88 commit 2376361

File tree

8 files changed

+85
-53
lines changed

8 files changed

+85
-53
lines changed

src/backend/access/gin/ginvacuum.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
231231

232232
END_CRIT_SECTION();
233233

234+
gvs->result->pages_newly_deleted++;
234235
gvs->result->pages_deleted++;
235236
}
236237

src/backend/access/gist/gistvacuum.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,21 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
133133
MemoryContext oldctx;
134134

135135
/*
136-
* Reset counts that will be incremented during the scan; needed in case
137-
* of multiple scans during a single VACUUM command.
136+
* Reset fields that track information about the entire index now. This
137+
* avoids double-counting in the case where a single VACUUM command
138+
* requires multiple scans of the index.
139+
*
140+
* Avoid resetting the tuples_removed and pages_newly_deleted fields here,
141+
* since they track information about the VACUUM command, and so must last
142+
* across each call to gistvacuumscan().
143+
*
144+
* (Note that pages_free is treated as state about the whole index, not
145+
* the current VACUUM. This is appropriate because RecordFreeIndexPage()
146+
* calls are idempotent, and get repeated for the same deleted pages in
147+
* some scenarios. The point for us is to track the number of recyclable
148+
* pages in the index at the end of the VACUUM command.)
138149
*/
150+
stats->num_pages = 0;
139151
stats->estimated_count = false;
140152
stats->num_index_tuples = 0;
141153
stats->pages_deleted = 0;
@@ -281,8 +293,8 @@ gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
281293
{
282294
/* Okay to recycle this page */
283295
RecordFreeIndexPage(rel, blkno);
284-
vstate->stats->pages_free++;
285296
vstate->stats->pages_deleted++;
297+
vstate->stats->pages_free++;
286298
}
287299
else if (GistPageIsDeleted(page))
288300
{
@@ -636,6 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
636648
/* mark the page as deleted */
637649
MarkBufferDirty(leafBuffer);
638650
GistPageSetDeleted(leafPage, txid);
651+
stats->pages_newly_deleted++;
639652
stats->pages_deleted++;
640653

641654
/* remove the downlink from the parent */

src/backend/access/heap/vacuumlazy.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2521,9 +2521,11 @@ lazy_cleanup_index(Relation indrel,
25212521
(*stats)->num_index_tuples,
25222522
(*stats)->num_pages),
25232523
errdetail("%.0f index row versions were removed.\n"
2524-
"%u index pages have been deleted, %u are currently reusable.\n"
2524+
"%u index pages were newly deleted.\n"
2525+
"%u index pages are currently deleted, of which %u are currently reusable.\n"
25252526
"%s.",
25262527
(*stats)->tuples_removed,
2528+
(*stats)->pages_newly_deleted,
25272529
(*stats)->pages_deleted, (*stats)->pages_free,
25282530
pg_rusage_show(&ru0))));
25292531
}

src/backend/access/nbtree/nbtpage.c

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
5050
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
5151
BlockNumber scanblkno,
5252
bool *rightsib_empty,
53-
uint32 *ndeleted);
53+
BTVacState *vstate);
5454
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child,
5555
BTStack stack,
5656
Buffer *subtreeparent,
@@ -1760,28 +1760,31 @@ _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
17601760
* should never pass a buffer containing an existing deleted page here. The
17611761
* lock and pin on caller's buffer will be dropped before we return.
17621762
*
1763-
* Returns the number of pages successfully deleted (zero if page cannot
1764-
* be deleted now; could be more than one if parent or right sibling pages
1765-
* were deleted too). Note that this does not include pages that we delete
1766-
* that the btvacuumscan scan has yet to reach; they'll get counted later
1767-
* instead.
1763+
* Maintains bulk delete stats for caller, which are taken from vstate. We
1764+
* need to cooperate closely with caller here so that whole VACUUM operation
1765+
* reliably avoids any double counting of subsidiary-to-leafbuf pages that we
1766+
* delete in passing. If such pages happen to be from a block number that is
1767+
* ahead of the current scanblkno position, then caller is expected to count
1768+
* them directly later on. It's simpler for us to understand caller's
1769+
* requirements than it would be for caller to understand when or how a
1770+
* deleted page became deleted after the fact.
17681771
*
17691772
* NOTE: this leaks memory. Rather than trying to clean up everything
17701773
* carefully, it's better to run it in a temp context that can be reset
17711774
* frequently.
17721775
*/
1773-
uint32
1774-
_bt_pagedel(Relation rel, Buffer leafbuf)
1776+
void
1777+
_bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
17751778
{
1776-
uint32 ndeleted = 0;
17771779
BlockNumber rightsib;
17781780
bool rightsib_empty;
17791781
Page page;
17801782
BTPageOpaque opaque;
17811783

17821784
/*
17831785
* Save original leafbuf block number from caller. Only deleted blocks
1784-
* that are <= scanblkno get counted in ndeleted return value.
1786+
* that are <= scanblkno are added to bulk delete stat's pages_deleted
1787+
* count.
17851788
*/
17861789
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
17871790

@@ -1843,7 +1846,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
18431846
RelationGetRelationName(rel))));
18441847

18451848
_bt_relbuf(rel, leafbuf);
1846-
return ndeleted;
1849+
return;
18471850
}
18481851

18491852
/*
@@ -1873,7 +1876,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
18731876
Assert(!P_ISHALFDEAD(opaque));
18741877

18751878
_bt_relbuf(rel, leafbuf);
1876-
return ndeleted;
1879+
return;
18771880
}
18781881

18791882
/*
@@ -1922,8 +1925,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
19221925
if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
19231926
{
19241927
ReleaseBuffer(leafbuf);
1925-
Assert(ndeleted == 0);
1926-
return ndeleted;
1928+
return;
19271929
}
19281930

19291931
/* we need an insertion scan key for the search, so build one */
@@ -1964,7 +1966,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
19641966
if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
19651967
{
19661968
_bt_relbuf(rel, leafbuf);
1967-
return ndeleted;
1969+
return;
19681970
}
19691971
}
19701972

@@ -1979,7 +1981,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
19791981
{
19801982
/* Check for interrupts in _bt_unlink_halfdead_page */
19811983
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1982-
&rightsib_empty, &ndeleted))
1984+
&rightsib_empty, vstate))
19831985
{
19841986
/*
19851987
* _bt_unlink_halfdead_page should never fail, since we
@@ -1990,7 +1992,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
19901992
* lock and pin on leafbuf for us.
19911993
*/
19921994
Assert(false);
1993-
return ndeleted;
1995+
return;
19941996
}
19951997
}
19961998

@@ -2026,8 +2028,6 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
20262028

20272029
leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
20282030
}
2029-
2030-
return ndeleted;
20312031
}
20322032

20332033
/*
@@ -2262,9 +2262,10 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
22622262
*/
22632263
static bool
22642264
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
2265-
bool *rightsib_empty, uint32 *ndeleted)
2265+
bool *rightsib_empty, BTVacState *vstate)
22662266
{
22672267
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
2268+
IndexBulkDeleteResult *stats = vstate->stats;
22682269
BlockNumber leafleftsib;
22692270
BlockNumber leafrightsib;
22702271
BlockNumber target;
@@ -2674,12 +2675,17 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
26742675
_bt_relbuf(rel, buf);
26752676

26762677
/*
2677-
* If btvacuumscan won't revisit this page in a future btvacuumpage call
2678-
* and count it as deleted then, we count it as deleted by current
2679-
* btvacuumpage call
2678+
* Maintain pages_newly_deleted, which is simply the number of pages
2679+
* deleted by the ongoing VACUUM operation.
2680+
*
2681+
* Maintain pages_deleted in a way that takes into account how
2682+
* btvacuumpage() will count deleted pages that have yet to become
2683+
* scanblkno -- only count page when it's not going to get that treatment
2684+
* later on.
26802685
*/
2686+
stats->pages_newly_deleted++;
26812687
if (target <= scanblkno)
2682-
(*ndeleted)++;
2688+
stats->pages_deleted++;
26832689

26842690
return true;
26852691
}

src/backend/access/nbtree/nbtree.c

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,6 @@
3838
#include "utils/memutils.h"
3939

4040

41-
/* Working state needed by btvacuumpage */
42-
typedef struct
43-
{
44-
IndexVacuumInfo *info;
45-
IndexBulkDeleteResult *stats;
46-
IndexBulkDeleteCallback callback;
47-
void *callback_state;
48-
BTCycleId cycleid;
49-
MemoryContext pagedelcontext;
50-
} BTVacState;
51-
5241
/*
5342
* BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
5443
*
@@ -1016,9 +1005,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
10161005
* avoids double-counting in the case where a single VACUUM command
10171006
* requires multiple scans of the index.
10181007
*
1019-
* Avoid resetting the tuples_removed field here, since it tracks
1020-
* information about the VACUUM command, and so must last across each call
1021-
* to btvacuumscan().
1008+
* Avoid resetting the tuples_removed and pages_newly_deleted fields here,
1009+
* since they track information about the VACUUM command, and so must last
1010+
* across each call to btvacuumscan().
10221011
*
10231012
* (Note that pages_free is treated as state about the whole index, not
10241013
* the current VACUUM. This is appropriate because RecordFreeIndexPage()
@@ -1237,11 +1226,13 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
12371226
}
12381227
else if (P_ISHALFDEAD(opaque))
12391228
{
1229+
/* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1230+
attempt_pagedel = true;
1231+
12401232
/*
1241-
* Half-dead leaf page. Try to delete now. Might update
1242-
* pages_deleted below.
1233+
* _bt_pagedel() will increment both pages_newly_deleted and
1234+
* pages_deleted stats in all cases (barring corruption)
12431235
*/
1244-
attempt_pagedel = true;
12451236
}
12461237
else if (P_ISLEAF(opaque))
12471238
{
@@ -1451,12 +1442,12 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
14511442
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
14521443

14531444
/*
1454-
* We trust the _bt_pagedel return value because it does not include
1455-
* any page that a future call here from btvacuumscan is expected to
1456-
* count. There will be no double-counting.
1445+
* _bt_pagedel maintains the bulk delete stats on our behalf;
1446+
* pages_newly_deleted and pages_deleted are likely to be incremented
1447+
* during call
14571448
*/
14581449
Assert(blkno == scanblkno);
1459-
stats->pages_deleted += _bt_pagedel(rel, buf);
1450+
_bt_pagedel(rel, buf, vstate);
14601451

14611452
MemoryContextSwitchTo(oldcontext);
14621453
/* pagedel released buffer, so we shouldn't */

src/backend/access/spgist/spgvacuum.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,7 @@ spgvacuumscan(spgBulkDeleteState *bds)
891891

892892
/* Report final stats */
893893
bds->stats->num_pages = num_pages;
894+
bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
894895
bds->stats->pages_free = bds->stats->pages_deleted;
895896
}
896897

src/include/access/genam.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,11 @@ typedef struct IndexVacuumInfo
6363
* of which this is just the first field; this provides a way for ambulkdelete
6464
* to communicate additional private data to amvacuumcleanup.
6565
*
66-
* Note: pages_deleted and pages_free refer to free space within the index
67-
* file. Some index AMs may compute num_index_tuples by reference to
66+
* Note: pages_newly_deleted is the number of pages in the index that were
67+
* deleted by the current vacuum operation. pages_deleted and pages_free
68+
* refer to free space within the index file.
69+
*
70+
* Note: Some index AMs may compute num_index_tuples by reference to
6871
* num_heap_tuples, in which case they should copy the estimated_count field
6972
* from IndexVacuumInfo.
7073
*/
@@ -74,7 +77,8 @@ typedef struct IndexBulkDeleteResult
7477
bool estimated_count; /* num_index_tuples is an estimate */
7578
double num_index_tuples; /* tuples remaining */
7679
double tuples_removed; /* # removed during vacuum operation */
77-
BlockNumber pages_deleted; /* # unused pages in index */
80+
BlockNumber pages_newly_deleted; /* # pages marked deleted by us */
81+
BlockNumber pages_deleted; /* # pages marked deleted (could be by us) */
7882
BlockNumber pages_free; /* # pages available for reuse */
7983
} IndexBulkDeleteResult;
8084

src/include/access/nbtree.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,20 @@ BTPageIsRecyclable(Page page)
312312
return false;
313313
}
314314

315+
/*
316+
* BTVacState is private nbtree.c state used during VACUUM. It is exported
317+
* for use by page deletion related code in nbtpage.c.
318+
*/
319+
typedef struct BTVacState
320+
{
321+
IndexVacuumInfo *info;
322+
IndexBulkDeleteResult *stats;
323+
IndexBulkDeleteCallback callback;
324+
void *callback_state;
325+
BTCycleId cycleid;
326+
MemoryContext pagedelcontext;
327+
} BTVacState;
328+
315329
/*
316330
* Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
317331
* page. The high key is not a tuple that is used to visit the heap. It is
@@ -1181,7 +1195,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
11811195
extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
11821196
Relation heapRel,
11831197
TM_IndexDeleteOp *delstate);
1184-
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf);
1198+
extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
11851199

11861200
/*
11871201
* prototypes for functions in nbtsearch.c

0 commit comments

Comments
 (0)