Skip to content

Commit 73bd34c

Browse files
committed
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the visibility map, we made it just not update the reltuples/relpages statistics if it skipped any pages. But this could leave us with extremely out-of-date stats for a table that contains any unchanging areas, especially for TOAST tables which never get processed by ANALYZE. In particular this could result in autovacuum making poor decisions about when to process the table, as in recent report from Florian Helmberger. And in general it's a bad idea to not update the stats at all. Instead, use the previous values of reltuples/relpages as an estimate of the tuple density in unvisited pages. This approach results in a "moving average" estimate of reltuples, which should converge to the correct value over multiple VACUUM and ANALYZE cycles even when individual measurements aren't very good. This new method for updating reltuples is used by both VACUUM and ANALYZE, with the result that we no longer need the grotty interconnections that caused ANALYZE to not update the stats depending on what had happened in the parent VACUUM command. Also, fix the logic for skipping all-visible pages during VACUUM so that it looks ahead rather than behind to decide what to do, as per a suggestion from Greg Stark. This eliminates useless scanning of all-visible pages at the start of the relation or just after a not-all-visible page. In particular, the first few pages of the relation will not be invariably included in the scanned pages, which seems to help in not overweighting them in the reltuples estimate. Back-patch to 8.4, where the visibility map was introduced.
1 parent ab7c5a9 commit 73bd34c

File tree

6 files changed

+228
-129
lines changed

6 files changed

+228
-129
lines changed

src/backend/commands/analyze.c

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ static MemoryContext anl_context = NULL;
8282
static BufferAccessStrategy vac_strategy;
8383

8484

85-
static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
86-
bool update_reltuples, bool inh);
85+
static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh);
8786
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
8887
int samplesize);
8988
static bool BlockSampler_HasMore(BlockSampler bs);
@@ -113,18 +112,9 @@ static bool std_typanalyze(VacAttrStats *stats);
113112

114113
/*
115114
* analyze_rel() -- analyze one relation
116-
*
117-
* If update_reltuples is true, we update reltuples and relpages columns
118-
* in pg_class. Caller should pass false if we're part of VACUUM ANALYZE,
119-
* and the VACUUM didn't skip any pages. We only have an approximate count,
120-
* so we don't want to overwrite the accurate values already inserted by the
121-
* VACUUM in that case. VACUUM always scans all indexes, however, so the
122-
* pg_class entries for indexes are never updated if we're part of VACUUM
123-
* ANALYZE.
124115
*/
125116
void
126-
analyze_rel(Oid relid, VacuumStmt *vacstmt,
127-
BufferAccessStrategy bstrategy, bool update_reltuples)
117+
analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
128118
{
129119
Relation onerel;
130120

@@ -224,13 +214,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
224214
/*
225215
* Do the normal non-recursive ANALYZE.
226216
*/
227-
do_analyze_rel(onerel, vacstmt, update_reltuples, false);
217+
do_analyze_rel(onerel, vacstmt, false);
228218

229219
/*
230220
* If there are child tables, do recursive ANALYZE.
231221
*/
232222
if (onerel->rd_rel->relhassubclass)
233-
do_analyze_rel(onerel, vacstmt, false, true);
223+
do_analyze_rel(onerel, vacstmt, true);
234224

235225
/*
236226
* Close source relation now, but keep lock so that no one deletes it
@@ -253,8 +243,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
253243
* do_analyze_rel() -- analyze one relation, recursively or not
254244
*/
255245
static void
256-
do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
257-
bool update_reltuples, bool inh)
246+
do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
258247
{
259248
int attr_cnt,
260249
tcnt,
@@ -423,9 +412,9 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
423412
}
424413

425414
/*
426-
* Quit if no analyzable columns and no pg_class update needed.
415+
* Quit if no analyzable columns.
427416
*/
428-
if (attr_cnt <= 0 && !analyzableindex && !update_reltuples)
417+
if (attr_cnt <= 0 && !analyzableindex)
429418
goto cleanup;
430419

431420
/*
@@ -535,10 +524,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
535524
}
536525

537526
/*
538-
* Update pages/tuples stats in pg_class, but not if we're inside a VACUUM
539-
* that got a more precise number.
527+
* Update pages/tuples stats in pg_class ... but not if we're doing
528+
* inherited stats.
540529
*/
541-
if (update_reltuples)
530+
if (!inh)
542531
vac_update_relstats(onerel,
543532
RelationGetNumberOfBlocks(onerel),
544533
totalrows, hasindex, InvalidTransactionId);
@@ -548,7 +537,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
548537
* VACUUM ANALYZE, don't overwrite the accurate count already inserted by
549538
* VACUUM.
550539
*/
551-
if (!(vacstmt->options & VACOPT_VACUUM))
540+
if (!inh && !(vacstmt->options & VACOPT_VACUUM))
552541
{
553542
for (ind = 0; ind < nindexes; ind++)
554543
{
@@ -563,13 +552,12 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
563552
}
564553

565554
/*
566-
* Report ANALYZE to the stats collector, too; likewise, tell it to adopt
567-
* these numbers only if we're not inside a VACUUM that got a better
568-
* number. However, a call with inh = true shouldn't reset the stats.
555+
* Report ANALYZE to the stats collector, too. However, if doing
556+
* inherited stats we shouldn't report, because the stats collector only
557+
* tracks per-table stats.
569558
*/
570559
if (!inh)
571-
pgstat_report_analyze(onerel, update_reltuples,
572-
totalrows, totaldeadrows);
560+
pgstat_report_analyze(onerel, totalrows, totaldeadrows);
573561

574562
/* We skip to here if there were no analyzable columns */
575563
cleanup:
@@ -1229,18 +1217,19 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
12291217
qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
12301218

12311219
/*
1232-
* Estimate total numbers of rows in relation.
1220+
* Estimate total numbers of rows in relation. For live rows, use
1221+
* vac_estimate_reltuples; for dead rows, we have no source of old
1222+
* information, so we have to assume the density is the same in unseen
1223+
* pages as in the pages we scanned.
12331224
*/
1225+
*totalrows = vac_estimate_reltuples(onerel, true,
1226+
totalblocks,
1227+
bs.m,
1228+
liverows);
12341229
if (bs.m > 0)
1235-
{
1236-
*totalrows = floor((liverows * totalblocks) / bs.m + 0.5);
1237-
*totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5);
1238-
}
1230+
*totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
12391231
else
1240-
{
1241-
*totalrows = 0.0;
12421232
*totaldeadrows = 0.0;
1243-
}
12441233

12451234
/*
12461235
* Emit some interesting relation info

src/backend/commands/vacuum.c

Lines changed: 82 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
*/
2121
#include "postgres.h"
2222

23+
#include <math.h>
24+
2325
#include "access/clog.h"
2426
#include "access/genam.h"
2527
#include "access/heapam.h"
@@ -63,7 +65,7 @@ static List *get_rel_oids(Oid relid, const RangeVar *vacrel,
6365
const char *stmttype);
6466
static void vac_truncate_clog(TransactionId frozenXID);
6567
static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
66-
bool for_wraparound, bool *scanned_all);
68+
bool for_wraparound);
6769

6870

6971
/*
@@ -224,11 +226,9 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
224226
foreach(cur, relations)
225227
{
226228
Oid relid = lfirst_oid(cur);
227-
bool scanned_all = false;
228229

229230
if (vacstmt->options & VACOPT_VACUUM)
230-
vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
231-
&scanned_all);
231+
vacuum_rel(relid, vacstmt, do_toast, for_wraparound);
232232

233233
if (vacstmt->options & VACOPT_ANALYZE)
234234
{
@@ -243,7 +243,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
243243
PushActiveSnapshot(GetTransactionSnapshot());
244244
}
245245

246-
analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
246+
analyze_rel(relid, vacstmt, vac_strategy);
247247

248248
if (use_own_xacts)
249249
{
@@ -455,6 +455,79 @@ vacuum_set_xid_limits(int freeze_min_age,
455455
}
456456

457457

458+
/*
459+
* vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
460+
*
461+
* If we scanned the whole relation then we should just use the count of
462+
* live tuples seen; but if we did not, we should not trust the count
463+
* unreservedly, especially not in VACUUM, which may have scanned a quite
464+
* nonrandom subset of the table. When we have only partial information,
465+
* we take the old value of pg_class.reltuples as a measurement of the
466+
* tuple density in the unscanned pages.
467+
*
468+
* This routine is shared by VACUUM and ANALYZE.
469+
*/
470+
double
471+
vac_estimate_reltuples(Relation relation, bool is_analyze,
472+
BlockNumber total_pages,
473+
BlockNumber scanned_pages,
474+
double scanned_tuples)
475+
{
476+
BlockNumber old_rel_pages = relation->rd_rel->relpages;
477+
double old_rel_tuples = relation->rd_rel->reltuples;
478+
double old_density;
479+
double new_density;
480+
double multiplier;
481+
double updated_density;
482+
483+
/* If we did scan the whole table, just use the count as-is */
484+
if (scanned_pages >= total_pages)
485+
return scanned_tuples;
486+
487+
/*
488+
* If scanned_pages is zero but total_pages isn't, keep the existing
489+
* value of reltuples.
490+
*/
491+
if (scanned_pages == 0)
492+
return old_rel_tuples;
493+
494+
/*
495+
* If old value of relpages is zero, old density is indeterminate; we
496+
* can't do much except scale up scanned_tuples to match total_pages.
497+
*/
498+
if (old_rel_pages == 0)
499+
return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
500+
501+
/*
502+
* Okay, we've covered the corner cases. The normal calculation is to
503+
* convert the old measurement to a density (tuples per page), then
504+
* update the density using an exponential-moving-average approach,
505+
* and finally compute reltuples as updated_density * total_pages.
506+
*
507+
* For ANALYZE, the moving average multiplier is just the fraction of
508+
* the table's pages we scanned. This is equivalent to assuming
509+
* that the tuple density in the unscanned pages didn't change. Of
510+
* course, it probably did, if the new density measurement is different.
511+
* But over repeated cycles, the value of reltuples will converge towards
512+
* the correct value, if repeated measurements show the same new density.
513+
*
514+
* For VACUUM, the situation is a bit different: we have looked at a
515+
* nonrandom sample of pages, but we know for certain that the pages we
516+
* didn't look at are precisely the ones that haven't changed lately.
517+
* Thus, there is a reasonable argument for doing exactly the same thing
518+
* as for the ANALYZE case, that is use the old density measurement as
519+
* the value for the unscanned pages.
520+
*
521+
* This logic could probably use further refinement.
522+
*/
523+
old_density = old_rel_tuples / old_rel_pages;
524+
new_density = scanned_tuples / scanned_pages;
525+
multiplier = (double) scanned_pages / (double) total_pages;
526+
updated_density = old_density + (new_density - old_density) * multiplier;
527+
return floor(updated_density * total_pages + 0.5);
528+
}
529+
530+
458531
/*
459532
* vac_update_relstats() -- update statistics for one relation
460533
*
@@ -482,7 +555,7 @@ vacuum_set_xid_limits(int freeze_min_age,
482555
* somebody vacuuming pg_class might think they could delete a tuple
483556
* marked with xmin = our xid.
484557
*
485-
* This routine is shared by VACUUM and stand-alone ANALYZE.
558+
* This routine is shared by VACUUM and ANALYZE.
486559
*/
487560
void
488561
vac_update_relstats(Relation relation,
@@ -769,14 +842,10 @@ vac_truncate_clog(TransactionId frozenXID)
769842
* many small transactions. Otherwise, two-phase locking would require
770843
* us to lock the entire database during one pass of the vacuum cleaner.
771844
*
772-
* We'll return true in *scanned_all if the vacuum scanned all heap
773-
* pages, and updated pg_class.
774-
*
775845
* At entry and exit, we are not inside a transaction.
776846
*/
777847
static void
778-
vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
779-
bool *scanned_all)
848+
vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
780849
{
781850
LOCKMODE lmode;
782851
Relation onerel;
@@ -786,9 +855,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
786855
int save_sec_context;
787856
int save_nestlevel;
788857

789-
if (scanned_all)
790-
*scanned_all = false;
791-
792858
/* Begin a transaction for vacuuming this relation */
793859
StartTransactionCommand();
794860

@@ -967,7 +1033,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
9671033
vacstmt->freeze_min_age, vacstmt->freeze_table_age);
9681034
}
9691035
else
970-
lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
1036+
lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
9711037

9721038
/* Roll back any GUC changes executed by index functions */
9731039
AtEOXact_GUC(false, save_nestlevel);
@@ -993,7 +1059,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
9931059
* totally unimportant for toast relations.
9941060
*/
9951061
if (toast_relid != InvalidOid)
996-
vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
1062+
vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
9971063

9981064
/*
9991065
* Now release the session-level lock on the master table.

0 commit comments

Comments
 (0)