Skip to content

Commit 9c02e3a

Browse files
pg_dump: Retrieve attribute statistics in batches.
Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many relations. This commit addresses this by teaching pg_dump to gather attribute statistics for 64 relations at a time. Some simple tests showed this was the optimal batch size, but performance may vary depending on the workload. Our lookahead code determines the next batch of relations by searching the TOC sequentially for relevant entries. This approach assumes that we will dump all such entries in TOC order, which unfortunately isn't true for dump formats that use RestoreArchive(). RestoreArchive() does multiple passes through the TOC and selectively dumps certain groups of entries each time. This is particularly problematic for index stats and a subset of matview stats; both are in SECTION_POST_DATA, but matview stats that depend on matview data are dumped in RESTORE_PASS_POST_ACL, while all other stats are dumped in RESTORE_PASS_MAIN. To handle this, this commit moves all statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL, which ensures that we always dump them in TOC order. A convenient side effect of this change is that we can revert a decent chunk of commit a0a4601, but that is left for a follow-up commit. Author: Corey Huinker <corey.huinker@gmail.com> Co-authored-by: Nathan Bossart <nathandbossart@gmail.com> Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com
1 parent 7d5c83b commit 9c02e3a

File tree

3 files changed

+142
-39
lines changed

3 files changed

+142
-39
lines changed

src/bin/pg_dump/pg_backup.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,10 @@ typedef int DumpId;
285285
* Function pointer prototypes for assorted callback methods.
286286
*/
287287

288-
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg);
288+
/* forward declaration to avoid including pg_backup_archiver.h here */
289+
typedef struct _tocEntry TocEntry;
290+
291+
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te);
289292
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
290293

291294
typedef void (*SetupWorkerPtrType) (Archive *AH);

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,7 +2655,7 @@ WriteToc(ArchiveHandle *AH)
26552655
}
26562656
else if (te->defnDumper)
26572657
{
2658-
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
2658+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
26592659

26602660
te->defnLen = WriteStr(AH, defn);
26612661
pg_free(defn);
@@ -3284,23 +3284,16 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
32843284

32853285
/*
32863286
* If statistics data is dependent on materialized view data, it must be
3287-
* deferred to RESTORE_PASS_POST_ACL.
3287+
* deferred to RESTORE_PASS_POST_ACL. Those entries are already marked as
3288+
* SECTION_POST_DATA, and some other stats entries (e.g., index stats)
3289+
* will also be marked as SECTION_POST_DATA. Additionally, our lookahead
3290+
* code in fetchAttributeStats() assumes that we dump all statistics data
3291+
* entries in TOC order. To ensure this assumption holds, we move all
3292+
* statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL.
32883293
*/
3289-
if (strcmp(te->desc, "STATISTICS DATA") == 0)
3290-
{
3291-
for (int i = 0; i < te->nDeps; i++)
3292-
{
3293-
DumpId depid = te->dependencies[i];
3294-
3295-
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
3296-
{
3297-
TocEntry *otherte = AH->tocsByDumpId[depid];
3298-
3299-
if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
3300-
return RESTORE_PASS_POST_ACL;
3301-
}
3302-
}
3303-
}
3294+
if (strcmp(te->desc, "STATISTICS DATA") == 0 &&
3295+
te->section == SECTION_POST_DATA)
3296+
return RESTORE_PASS_POST_ACL;
33043297

33053298
/* All else can be handled in the main pass. */
33063299
return RESTORE_PASS_MAIN;
@@ -3951,7 +3944,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
39513944
}
39523945
else if (te->defnDumper)
39533946
{
3954-
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
3947+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
39553948

39563949
te->defnLen = ahprintf(AH, "%s\n\n", defn);
39573950
pg_free(defn);

src/bin/pg_dump/pg_dump.c

Lines changed: 127 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0;
209209
static SequenceItem *sequences = NULL;
210210
static int nsequences = 0;
211211

212+
/* Maximum number of relations to fetch in a fetchAttributeStats() call. */
213+
#define MAX_ATTR_STATS_RELS 64
214+
212215
/*
213216
* The default number of rows per INSERT when
214217
* --inserts is specified without --rows-per-insert
@@ -10553,6 +10556,77 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
1055310556
appendPQExpBuffer(out, "::%s", argtype);
1055410557
}
1055510558

10559+
/*
10560+
* fetchAttributeStats --
10561+
*
10562+
* Fetch next batch of attribute statistics for dumpRelationStats_dumper().
10563+
*/
10564+
static PGresult *
10565+
fetchAttributeStats(Archive *fout)
10566+
{
10567+
ArchiveHandle *AH = (ArchiveHandle *) fout;
10568+
PQExpBuffer nspnames = createPQExpBuffer();
10569+
PQExpBuffer relnames = createPQExpBuffer();
10570+
int count = 0;
10571+
PGresult *res = NULL;
10572+
static TocEntry *te;
10573+
static bool restarted;
10574+
10575+
/* If we're just starting, set our TOC pointer. */
10576+
if (!te)
10577+
te = AH->toc->next;
10578+
10579+
/*
10580+
* We can't easily avoid a second TOC scan for the tar format because it
10581+
* writes restore.sql separately, which means we must execute the queries
10582+
* twice. This feels risky, but there is no known reason it should
10583+
* generate different output than the first pass. Even if it does, the
10584+
* worst-case scenario is that restore.sql might have different statistics
10585+
* data than the archive.
10586+
*/
10587+
if (!restarted && te == AH->toc && AH->format == archTar)
10588+
{
10589+
te = AH->toc->next;
10590+
restarted = true;
10591+
}
10592+
10593+
/*
10594+
* Scan the TOC for the next set of relevant stats entries. We assume
10595+
* that statistics are dumped in the order they are listed in the TOC.
10596+
* This is perhaps not the sturdiest assumption, so we verify it matches
10597+
* reality in dumpRelationStats_dumper().
10598+
*/
10599+
for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next)
10600+
{
10601+
if ((te->reqs & REQ_STATS) != 0 &&
10602+
strcmp(te->desc, "STATISTICS DATA") == 0)
10603+
{
10604+
appendPQExpBuffer(nspnames, "%s%s", count ? "," : "",
10605+
fmtId(te->namespace));
10606+
appendPQExpBuffer(relnames, "%s%s", count ? "," : "",
10607+
fmtId(te->tag));
10608+
count++;
10609+
}
10610+
}
10611+
10612+
/* Execute the query for the next batch of relations. */
10613+
if (count > 0)
10614+
{
10615+
PQExpBuffer query = createPQExpBuffer();
10616+
10617+
appendPQExpBuffer(query, "EXECUTE getAttributeStats("
10618+
"'{%s}'::pg_catalog.name[],"
10619+
"'{%s}'::pg_catalog.name[])",
10620+
nspnames->data, relnames->data);
10621+
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
10622+
destroyPQExpBuffer(query);
10623+
}
10624+
10625+
destroyPQExpBuffer(nspnames);
10626+
destroyPQExpBuffer(relnames);
10627+
return res;
10628+
}
10629+
1055610630
/*
1055710631
* dumpRelationStats_dumper --
1055810632
*
@@ -10561,14 +10635,16 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
1056110635
* dumped.
1056210636
*/
1056310637
static char *
10564-
dumpRelationStats_dumper(Archive *fout, const void *userArg)
10638+
dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
1056510639
{
1056610640
const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg;
10567-
const DumpableObject *dobj = &rsinfo->dobj;
10568-
PGresult *res;
10641+
static PGresult *res;
10642+
static int rownum;
1056910643
PQExpBuffer query;
1057010644
PQExpBufferData out_data;
1057110645
PQExpBuffer out = &out_data;
10646+
int i_schemaname;
10647+
int i_tablename;
1057210648
int i_attname;
1057310649
int i_inherited;
1057410650
int i_null_frac;
@@ -10584,13 +10660,31 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1058410660
int i_range_length_histogram;
1058510661
int i_range_empty_frac;
1058610662
int i_range_bounds_histogram;
10663+
static TocEntry *expected_te;
10664+
10665+
/*
10666+
* fetchAttributeStats() assumes that the statistics are dumped in the
10667+
* order they are listed in the TOC. We verify that here for safety.
10668+
*/
10669+
if (!expected_te)
10670+
expected_te = ((ArchiveHandle *) fout)->toc;
10671+
10672+
expected_te = expected_te->next;
10673+
while ((expected_te->reqs & REQ_STATS) == 0 ||
10674+
strcmp(expected_te->desc, "STATISTICS DATA") != 0)
10675+
expected_te = expected_te->next;
10676+
10677+
if (te != expected_te)
10678+
pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)",
10679+
te->dumpId, te->desc, te->tag,
10680+
expected_te->dumpId, expected_te->desc, expected_te->tag);
1058710681

1058810682
query = createPQExpBuffer();
1058910683
if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
1059010684
{
1059110685
appendPQExpBufferStr(query,
10592-
"PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n"
10593-
"SELECT s.attname, s.inherited, "
10686+
"PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n"
10687+
"SELECT s.schemaname, s.tablename, s.attname, s.inherited, "
1059410688
"s.null_frac, s.avg_width, s.n_distinct, "
1059510689
"s.most_common_vals, s.most_common_freqs, "
1059610690
"s.histogram_bounds, s.correlation, "
@@ -10608,11 +10702,21 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1060810702
"NULL AS range_empty_frac,"
1060910703
"NULL AS range_bounds_histogram ");
1061010704

10705+
/*
10706+
* The results must be in the order of the relations supplied in the
10707+
* parameters to ensure we remain in sync as we walk through the TOC.
10708+
* The redundant filter clause on s.tablename = ANY(...) seems
10709+
* sufficient to convince the planner to use
10710+
* pg_class_relname_nsp_index, which avoids a full scan of pg_stats.
10711+
* This may not work for all versions.
10712+
*/
1061110713
appendPQExpBufferStr(query,
1061210714
"FROM pg_catalog.pg_stats s "
10613-
"WHERE s.schemaname = $1 "
10614-
"AND s.tablename = $2 "
10615-
"ORDER BY s.attname, s.inherited");
10715+
"JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) "
10716+
"ON s.schemaname = u.schemaname "
10717+
"AND s.tablename = u.tablename "
10718+
"WHERE s.tablename = ANY($2) "
10719+
"ORDER BY u.ord, s.attname, s.inherited");
1061610720

1061710721
ExecuteSqlStatement(fout, query->data);
1061810722

@@ -10642,16 +10746,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1064210746

1064310747
appendPQExpBufferStr(out, "\n);\n");
1064410748

10749+
/* Fetch the next batch of attribute statistics if needed. */
10750+
if (rownum >= PQntuples(res))
10751+
{
10752+
PQclear(res);
10753+
res = fetchAttributeStats(fout);
10754+
rownum = 0;
10755+
}
1064510756

10646-
/* fetch attribute stats */
10647-
appendPQExpBufferStr(query, "EXECUTE getAttributeStats(");
10648-
appendStringLiteralAH(query, dobj->namespace->dobj.name, fout);
10649-
appendPQExpBufferStr(query, ", ");
10650-
appendStringLiteralAH(query, dobj->name, fout);
10651-
appendPQExpBufferStr(query, ");");
10652-
10653-
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
10654-
10757+
i_schemaname = PQfnumber(res, "schemaname");
10758+
i_tablename = PQfnumber(res, "tablename");
1065510759
i_attname = PQfnumber(res, "attname");
1065610760
i_inherited = PQfnumber(res, "inherited");
1065710761
i_null_frac = PQfnumber(res, "null_frac");
@@ -10669,10 +10773,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1066910773
i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram");
1067010774

1067110775
/* restore attribute stats */
10672-
for (int rownum = 0; rownum < PQntuples(res); rownum++)
10776+
for (; rownum < PQntuples(res); rownum++)
1067310777
{
1067410778
const char *attname;
1067510779

10780+
/* Stop if the next stat row in our cache isn't for this relation. */
10781+
if (strcmp(te->tag, PQgetvalue(res, rownum, i_tablename)) != 0 ||
10782+
strcmp(te->namespace, PQgetvalue(res, rownum, i_schemaname)) != 0)
10783+
break;
10784+
1067610785
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n");
1067710786
appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
1067810787
fout->remoteVersion);
@@ -10762,8 +10871,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1076210871
appendPQExpBufferStr(out, "\n);\n");
1076310872
}
1076410873

10765-
PQclear(res);
10766-
1076710874
destroyPQExpBuffer(query);
1076810875
return out->data;
1076910876
}

0 commit comments

Comments
 (0)