Skip to content

Commit d681703

Browse files
committed
Build inherited extended stats on partitioned tables
Commit 859b300 disabled building of extended stats for inheritance trees, to prevent updating the same catalog row twice. While that resolved the issue, it also means there are no extended stats for declaratively partitioned tables, because there are no data in the non-leaf relations. That also means declaratively partitioned tables were not affected by the issue 859b300 addressed, which means this is a regression affecting queries that calculate estimates for the whole inheritance tree as a whole (which includes e.g. GROUP BY queries). But because partitioned tables are empty, we can invert the condition and build statistics only for the case with inheritance, without losing anything. And we can consider them when calculating estimates. It may be necessary to run ANALYZE on partitioned tables, to collect proper statistics. For declarative partitioning there should no prior statistics, and it might take time before autoanalyze is triggered. For tables partitioned by inheritance the statistics may include data from child relations (if built 859b300), contradicting the current code. Report and patch by Justin Pryzby, minor fixes and cleanup by me. Backpatch all the way back to PostgreSQL 10, where extended statistics were introduced (same as 859b300). Author: Justin Pryzby Reported-by: Justin Pryzby Backpatch-through: 10 Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
1 parent acfde7c commit d681703

File tree

6 files changed

+70
-15
lines changed

6 files changed

+70
-15
lines changed

src/backend/commands/analyze.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
533533
{
534534
MemoryContext col_context,
535535
old_context;
536+
bool build_ext_stats;
536537

537538
pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
538539
PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
@@ -596,13 +597,28 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
596597
thisdata->attr_cnt, thisdata->vacattrstats);
597598
}
598599

600+
/*
601+
* Should we build extended statistics for this relation?
602+
*
603+
* The extended statistics catalog does not include an inheritance
604+
* flag, so we can't store statistics built both with and without
605+
* data from child relations. We can store just one set of statistics
606+
* per relation. For plain relations that's fine, but for inheritance
607+
* trees we have to pick whether to store statistics for just the
608+
* one relation or the whole tree. For plain inheritance we store
609+
* the (!inh) version, mostly for backwards compatibility reasons.
610+
* For partitioned tables that's pointless (the non-leaf tables are
611+
* always empty), so we store stats representing the whole tree.
612+
*/
613+
build_ext_stats = (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ? inh : (!inh);
614+
599615
/*
600616
* Build extended statistics (if there are any).
601617
*
602618
* For now we only build extended statistics on individual relations,
603619
* not for relations representing inheritance trees.
604620
*/
605-
if (!inh)
621+
if (build_ext_stats)
606622
BuildRelationExtStatistics(onerel, totalrows, numrows, rows,
607623
attr_cnt, vacattrstats);
608624
}

src/backend/statistics/dependencies.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,10 +1217,13 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
12171217
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
12181218

12191219
/*
1220-
* When dealing with inheritance trees, ignore extended stats (which were
1221-
* built without data from child rels, and thus do not represent them).
1220+
* When dealing with regular inheritance trees, ignore extended stats
1221+
* (which were built without data from child rels, and thus do not
1222+
* represent them). For partitioned tables data there's no data in the
1223+
* non-leaf relations, so we build stats only for the inheritance tree.
1224+
* So for partitioned tables we do consider extended stats.
12221225
*/
1223-
if (rte->inh)
1226+
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
12241227
return 1.0;
12251228

12261229
/* check if there's any stats that might be useful for us. */

src/backend/statistics/extended_stats.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,10 +1298,13 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
12981298
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
12991299

13001300
/*
1301-
* When dealing with inheritance trees, ignore extended stats (which were
1302-
* built without data from child rels, and thus do not represent them).
1301+
* When dealing with regular inheritance trees, ignore extended stats
1302+
* (which were built without data from child rels, and thus do not
1303+
* represent them). For partitioned tables data there's no data in the
1304+
* non-leaf relations, so we build stats only for the inheritance tree.
1305+
* So for partitioned tables we do consider extended stats.
13031306
*/
1304-
if (rte->inh)
1307+
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
13051308
return 1.0;
13061309

13071310
/* check if there's any stats that might be useful for us. */

src/backend/utils/adt/selfuncs.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3887,19 +3887,23 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
38873887
Oid statOid = InvalidOid;
38883888
MVNDistinct *stats;
38893889
Bitmapset *matched = NULL;
3890-
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
3891-
3892-
/*
3893-
* When dealing with inheritance trees, ignore extended stats (which were
3894-
* built without data from child rels, and thus do not represent them).
3895-
*/
3896-
if (rte->inh)
3897-
return false;
3890+
RangeTblEntry *rte;
38983891

38993892
/* bail out immediately if the table has no extended statistics */
39003893
if (!rel->statlist)
39013894
return false;
39023895

3896+
/*
3897+
* When dealing with regular inheritance trees, ignore extended stats
3898+
* (which were built without data from child rels, and thus do not
3899+
* represent them). For partitioned tables data there's no data in the
3900+
* non-leaf relations, so we build stats only for the inheritance tree.
3901+
* So for partitioned tables we do consider extended stats.
3902+
*/
3903+
rte = planner_rt_fetch(rel->relid, root);
3904+
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
3905+
return false;
3906+
39033907
/* Determine the attnums we're looking for */
39043908
foreach(lc, *varinfos)
39053909
{

src/test/regress/expected/stats_ext.out

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,25 @@ SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b
202202
(1 row)
203203

204204
DROP TABLE stxdinh, stxdinh1, stxdinh2;
205+
-- Ensure inherited stats ARE applied to inherited query in partitioned table
206+
CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
207+
CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100);
208+
INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a;
209+
CREATE STATISTICS stxdinp ON a, b FROM stxdinp;
210+
VACUUM ANALYZE stxdinp; -- partitions are processed recursively
211+
SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass;
212+
?column?
213+
----------
214+
1
215+
(1 row)
216+
217+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2');
218+
estimated | actual
219+
-----------+--------
220+
10 | 10
221+
(1 row)
222+
223+
DROP TABLE stxdinp;
205224
-- Verify supported object types for extended statistics
206225
CREATE schema tststats;
207226
CREATE TABLE tststats.t (a int, b int, c text);

src/test/regress/sql/stats_ext.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,16 @@ SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
128128
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
129129
DROP TABLE stxdinh, stxdinh1, stxdinh2;
130130

131+
-- Ensure inherited stats ARE applied to inherited query in partitioned table
132+
CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
133+
CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100);
134+
INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a;
135+
CREATE STATISTICS stxdinp ON a, b FROM stxdinp;
136+
VACUUM ANALYZE stxdinp; -- partitions are processed recursively
137+
SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass;
138+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2');
139+
DROP TABLE stxdinp;
140+
131141
-- Verify supported object types for extended statistics
132142
CREATE schema tststats;
133143

0 commit comments

Comments
 (0)