Skip to content

Commit c120550

Browse files
committed
Optimize vacuuming of relations with no indexes.
If there are no indexes on a relation, items can be marked LP_UNUSED instead of LP_DEAD when pruning. This significantly reduces WAL volume, since we no longer need to emit one WAL record for pruning and a second to change the LP_DEAD line pointers thus created to LP_UNUSED. Melanie Plageman, reviewed by Andres Freund, Peter Geoghegan, and me Discussion: https://postgr.es/m/CAAKRu_bgvb_k0gKOXWzNKWHt560R0smrGe3E8zewKPs8fiMKkw%40mail.gmail.com
1 parent 7b1dbf0 commit c120550

File tree

3 files changed

+108
-102
lines changed

3 files changed

+108
-102
lines changed

src/backend/access/heap/pruneheap.c

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ typedef struct
3535

3636
/* tuple visibility test, initialized for the relation */
3737
GlobalVisState *vistest;
38+
/* whether or not dead items can be set LP_UNUSED during pruning */
39+
bool mark_unused_now;
3840

3941
TransactionId new_prune_xid; /* new prune hint value for page */
4042
TransactionId snapshotConflictHorizon; /* latest xid removed */
@@ -67,6 +69,7 @@ static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
6769
static void heap_prune_record_redirect(PruneState *prstate,
6870
OffsetNumber offnum, OffsetNumber rdoffnum);
6971
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
72+
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
7073
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
7174
static void page_verify_redirects(Page page);
7275

@@ -148,7 +151,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
148151
{
149152
PruneResult presult;
150153

151-
heap_page_prune(relation, buffer, vistest, &presult, NULL);
154+
/*
155+
* For now, pass mark_unused_now as false regardless of whether or
156+
* not the relation has indexes, since we cannot safely determine
157+
* that during on-access pruning with the current implementation.
158+
*/
159+
heap_page_prune(relation, buffer, vistest, false,
160+
&presult, NULL);
152161

153162
/*
154163
* Report the number of tuples reclaimed to pgstats. This is
@@ -193,6 +202,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
193202
* (see heap_prune_satisfies_vacuum and
194203
* HeapTupleSatisfiesVacuum).
195204
*
205+
* mark_unused_now indicates whether or not dead items can be set LP_UNUSED during
206+
* pruning.
207+
*
196208
* off_loc is the offset location required by the caller to use in error
197209
* callback.
198210
*
@@ -203,6 +215,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
203215
void
204216
heap_page_prune(Relation relation, Buffer buffer,
205217
GlobalVisState *vistest,
218+
bool mark_unused_now,
206219
PruneResult *presult,
207220
OffsetNumber *off_loc)
208221
{
@@ -227,6 +240,7 @@ heap_page_prune(Relation relation, Buffer buffer,
227240
prstate.new_prune_xid = InvalidTransactionId;
228241
prstate.rel = relation;
229242
prstate.vistest = vistest;
243+
prstate.mark_unused_now = mark_unused_now;
230244
prstate.snapshotConflictHorizon = InvalidTransactionId;
231245
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
232246
memset(prstate.marked, 0, sizeof(prstate.marked));
@@ -306,9 +320,9 @@ heap_page_prune(Relation relation, Buffer buffer,
306320
if (off_loc)
307321
*off_loc = offnum;
308322

309-
/* Nothing to do if slot is empty or already dead */
323+
/* Nothing to do if slot is empty */
310324
itemid = PageGetItemId(page, offnum);
311-
if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
325+
if (!ItemIdIsUsed(itemid))
312326
continue;
313327

314328
/* Process this item or chain of items */
@@ -581,7 +595,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
581595
* function.)
582596
*/
583597
if (ItemIdIsDead(lp))
598+
{
599+
/*
600+
* If the caller set mark_unused_now true, we can set dead line
601+
* pointers LP_UNUSED now. We don't increment ndeleted here since
602+
* the LP was already marked dead.
603+
*/
604+
if (unlikely(prstate->mark_unused_now))
605+
heap_prune_record_unused(prstate, offnum);
606+
584607
break;
608+
}
585609

586610
Assert(ItemIdIsNormal(lp));
587611
htup = (HeapTupleHeader) PageGetItem(dp, lp);
@@ -715,7 +739,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
715739
* redirect the root to the correct chain member.
716740
*/
717741
if (i >= nchain)
718-
heap_prune_record_dead(prstate, rootoffnum);
742+
heap_prune_record_dead_or_unused(prstate, rootoffnum);
719743
else
720744
heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
721745
}
@@ -726,9 +750,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
726750
* item. This can happen if the loop in heap_page_prune caused us to
727751
* visit the dead successor of a redirect item before visiting the
728752
* redirect item. We can clean up by setting the redirect item to
729-
* DEAD state.
753+
* DEAD state or LP_UNUSED if the caller indicated.
730754
*/
731-
heap_prune_record_dead(prstate, rootoffnum);
755+
heap_prune_record_dead_or_unused(prstate, rootoffnum);
732756
}
733757

734758
return ndeleted;
@@ -774,6 +798,27 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
774798
prstate->marked[offnum] = true;
775799
}
776800

801+
/*
802+
* Depending on whether or not the caller set mark_unused_now to true, record that a
803+
* line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
804+
* which we will mark line pointers LP_UNUSED, but we will not mark line
805+
* pointers LP_DEAD if mark_unused_now is true.
806+
*/
807+
static void
808+
heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
809+
{
810+
/*
811+
* If the caller set mark_unused_now to true, we can remove dead tuples
812+
* during pruning instead of marking their line pointers dead. Set this
813+
* tuple's line pointer LP_UNUSED. We hint that this option is less
814+
* likely.
815+
*/
816+
if (unlikely(prstate->mark_unused_now))
817+
heap_prune_record_unused(prstate, offnum);
818+
else
819+
heap_prune_record_dead(prstate, offnum);
820+
}
821+
777822
/* Record line pointer to be marked unused */
778823
static void
779824
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
@@ -903,13 +948,24 @@ heap_page_prune_execute(Buffer buffer,
903948
#ifdef USE_ASSERT_CHECKING
904949

905950
/*
906-
* Only heap-only tuples can become LP_UNUSED during pruning. They
907-
* don't need to be left in place as LP_DEAD items until VACUUM gets
908-
* around to doing index vacuuming.
951+
* When heap_page_prune() was called, mark_unused_now may have been
952+
* passed as true, which allows would-be LP_DEAD items to be made
953+
* LP_UNUSED instead. This is only possible if the relation has no
954+
* indexes. If there are any dead items, then mark_unused_now was not
955+
* true and every item being marked LP_UNUSED must refer to a
956+
* heap-only tuple.
909957
*/
910-
Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
911-
htup = (HeapTupleHeader) PageGetItem(page, lp);
912-
Assert(HeapTupleHeaderIsHeapOnly(htup));
958+
if (ndead > 0)
959+
{
960+
Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
961+
htup = (HeapTupleHeader) PageGetItem(page, lp);
962+
Assert(HeapTupleHeaderIsHeapOnly(htup));
963+
}
964+
else
965+
{
966+
Assert(ItemIdIsUsed(lp));
967+
}
968+
913969
#endif
914970

915971
ItemIdSetUnused(lp);

src/backend/access/heap/vacuumlazy.c

Lines changed: 39 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,69 +1036,6 @@ lazy_scan_heap(LVRelState *vacrel)
10361036

10371037
Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
10381038

1039-
if (vacrel->nindexes == 0)
1040-
{
1041-
/*
1042-
* Consider the need to do page-at-a-time heap vacuuming when
1043-
* using the one-pass strategy now.
1044-
*
1045-
* The one-pass strategy will never call lazy_vacuum(). The steps
1046-
* performed here can be thought of as the one-pass equivalent of
1047-
* a call to lazy_vacuum().
1048-
*/
1049-
if (prunestate.has_lpdead_items)
1050-
{
1051-
Size freespace;
1052-
1053-
lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
1054-
1055-
/* Forget the LP_DEAD items that we just vacuumed */
1056-
dead_items->num_items = 0;
1057-
1058-
/*
1059-
* Now perform FSM processing for blkno, and move on to next
1060-
* page.
1061-
*
1062-
* Our call to lazy_vacuum_heap_page() will have considered if
1063-
* it's possible to set all_visible/all_frozen independently
1064-
* of lazy_scan_prune(). Note that prunestate was invalidated
1065-
* by lazy_vacuum_heap_page() call.
1066-
*/
1067-
freespace = PageGetHeapFreeSpace(page);
1068-
1069-
UnlockReleaseBuffer(buf);
1070-
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1071-
1072-
/*
1073-
* Periodically perform FSM vacuuming to make newly-freed
1074-
* space visible on upper FSM pages. FreeSpaceMapVacuumRange()
1075-
* vacuums the portion of the freespace map covering heap
1076-
* pages from start to end - 1. Include the block we just
1077-
* vacuumed by passing it blkno + 1. Overflow isn't an issue
1078-
* because MaxBlockNumber + 1 is InvalidBlockNumber which
1079-
* causes FreeSpaceMapVacuumRange() to vacuum freespace map
1080-
* pages covering the remainder of the relation.
1081-
*/
1082-
if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1083-
{
1084-
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1085-
blkno + 1);
1086-
next_fsm_block_to_vacuum = blkno + 1;
1087-
}
1088-
1089-
continue;
1090-
}
1091-
1092-
/*
1093-
* There was no call to lazy_vacuum_heap_page() because pruning
1094-
* didn't encounter/create any LP_DEAD items that needed to be
1095-
* vacuumed. Prune state has not been invalidated, so proceed
1096-
* with prunestate-driven visibility map and FSM steps (just like
1097-
* the two-pass strategy).
1098-
*/
1099-
Assert(dead_items->num_items == 0);
1100-
}
1101-
11021039
/*
11031040
* Handle setting visibility map bit based on information from the VM
11041041
* (as of last lazy_scan_skip() call), and from prunestate
@@ -1209,38 +1146,45 @@ lazy_scan_heap(LVRelState *vacrel)
12091146

12101147
/*
12111148
* Final steps for block: drop cleanup lock, record free space in the
1212-
* FSM
1149+
* FSM.
1150+
*
1151+
* If we will likely do index vacuuming, wait until
1152+
* lazy_vacuum_heap_rel() to save free space. This doesn't just save
1153+
* us some cycles; it also allows us to record any additional free
1154+
* space that lazy_vacuum_heap_page() will make available in cases
1155+
* where it's possible to truncate the page's line pointer array.
1156+
*
1157+
* Note: It's not in fact 100% certain that we really will call
1158+
* lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip index
1159+
* vacuuming (and so must skip heap vacuuming). This is deemed okay
1160+
* because it only happens in emergencies, or when there is very
1161+
* little free space anyway. (Besides, we start recording free space
1162+
* in the FSM once index vacuuming has been abandoned.)
12131163
*/
1214-
if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1215-
{
1216-
/*
1217-
* Wait until lazy_vacuum_heap_rel() to save free space. This
1218-
* doesn't just save us some cycles; it also allows us to record
1219-
* any additional free space that lazy_vacuum_heap_page() will
1220-
* make available in cases where it's possible to truncate the
1221-
* page's line pointer array.
1222-
*
1223-
* Note: It's not in fact 100% certain that we really will call
1224-
* lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1225-
* index vacuuming (and so must skip heap vacuuming). This is
1226-
* deemed okay because it only happens in emergencies, or when
1227-
* there is very little free space anyway. (Besides, we start
1228-
* recording free space in the FSM once index vacuuming has been
1229-
* abandoned.)
1230-
*
1231-
* Note: The one-pass (no indexes) case is only supposed to make
1232-
* it this far when there were no LP_DEAD items during pruning.
1233-
*/
1234-
Assert(vacrel->nindexes > 0);
1235-
UnlockReleaseBuffer(buf);
1236-
}
1237-
else
1164+
if (vacrel->nindexes == 0
1165+
|| !vacrel->do_index_vacuuming
1166+
|| !prunestate.has_lpdead_items)
12381167
{
12391168
Size freespace = PageGetHeapFreeSpace(page);
12401169

12411170
UnlockReleaseBuffer(buf);
12421171
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1172+
1173+
/*
1174+
* Periodically perform FSM vacuuming to make newly-freed space
1175+
* visible on upper FSM pages. This is done after vacuuming if the
1176+
* table has indexes.
1177+
*/
1178+
if (vacrel->nindexes == 0 && prunestate.has_lpdead_items &&
1179+
blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1180+
{
1181+
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1182+
blkno);
1183+
next_fsm_block_to_vacuum = blkno;
1184+
}
12431185
}
1186+
else
1187+
UnlockReleaseBuffer(buf);
12441188
}
12451189

12461190
vacrel->blkno = InvalidBlockNumber;
@@ -1596,8 +1540,13 @@ lazy_scan_prune(LVRelState *vacrel,
15961540
* in presult.ndeleted. It should not be confused with lpdead_items;
15971541
* lpdead_items's final value can be thought of as the number of tuples
15981542
* that were deleted from indexes.
1543+
*
1544+
* If the relation has no indexes, we can immediately mark would-be dead
1545+
* items LP_UNUSED, so mark_unused_now should be true if no indexes and
1546+
* false otherwise.
15991547
*/
1600-
heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum);
1548+
heap_page_prune(rel, buf, vacrel->vistest, vacrel->nindexes == 0,
1549+
&presult, &vacrel->offnum);
16011550

16021551
/*
16031552
* Now scan the page to collect LP_DEAD items and check for tuples
@@ -2520,7 +2469,7 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
25202469
bool all_frozen;
25212470
LVSavedErrInfo saved_err_info;
25222471

2523-
Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2472+
Assert(vacrel->do_index_vacuuming);
25242473

25252474
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
25262475

src/include/access/heapam.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ struct GlobalVisState;
320320
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
321321
extern void heap_page_prune(Relation relation, Buffer buffer,
322322
struct GlobalVisState *vistest,
323+
bool mark_unused_now,
323324
PruneResult *presult,
324325
OffsetNumber *off_loc);
325326
extern void heap_page_prune_execute(Buffer buffer,

0 commit comments

Comments
 (0)