Skip to content

Commit acfde7c

Browse files
committed
Ignore extended statistics for inheritance trees
Since commit 859b300 we only build extended statistics for individual relations, ignoring the child relations. This resolved the issue with updating catalog tuple twice, but we still tried to use the statistics when calculating estimates for the whole inheritance tree. When the relations contain very distinct data, it may produce bogus estimates. This is roughly the same issue 427c6b5 addressed ~15 years ago, and we fix it the same way - by ignoring extended statistics when calculating estimates for the inheritance tree as a whole. We still consider extended statistics when calculating estimates for individual child relations, of course. This may result in plan changes due to different estimates, but if the old statistics were not describing the inheritance tree particularly well it's quite likely the new plans is actually better. Report and patch by Justin Pryzby, minor fixes and cleanup by me. Backpatch all the way back to PostgreSQL 10, where extended statistics were introduced (same as 859b300). Author: Justin Pryzby Reported-by: Justin Pryzby Backpatch-through: 10 Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
1 parent ca14c41 commit acfde7c

File tree

5 files changed

+89
-0
lines changed

5 files changed

+89
-0
lines changed

src/backend/statistics/dependencies.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "nodes/pathnodes.h"
2525
#include "optimizer/clauses.h"
2626
#include "optimizer/optimizer.h"
27+
#include "parser/parsetree.h"
2728
#include "statistics/extended_stats_internal.h"
2829
#include "statistics/statistics.h"
2930
#include "utils/bytea.h"
@@ -1213,6 +1214,14 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
12131214
MVDependency **dependencies;
12141215
int ndependencies;
12151216
int i;
1217+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1218+
1219+
/*
1220+
* When dealing with inheritance trees, ignore extended stats (which were
1221+
* built without data from child rels, and thus do not represent them).
1222+
*/
1223+
if (rte->inh)
1224+
return 1.0;
12161225

12171226
/* check if there's any stats that might be useful for us. */
12181227
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))

src/backend/statistics/extended_stats.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "nodes/nodeFuncs.h"
3030
#include "optimizer/clauses.h"
3131
#include "optimizer/optimizer.h"
32+
#include "parser/parsetree.h"
3233
#include "pgstat.h"
3334
#include "postmaster/autovacuum.h"
3435
#include "statistics/extended_stats_internal.h"
@@ -1294,6 +1295,14 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
12941295
Bitmapset **list_attnums;
12951296
int listidx;
12961297
Selectivity sel = 1.0;
1298+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1299+
1300+
/*
1301+
* When dealing with inheritance trees, ignore extended stats (which were
1302+
* built without data from child rels, and thus do not represent them).
1303+
*/
1304+
if (rte->inh)
1305+
return 1.0;
12971306

12981307
/* check if there's any stats that might be useful for us. */
12991308
if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))

src/backend/utils/adt/selfuncs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3887,6 +3887,14 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
38873887
Oid statOid = InvalidOid;
38883888
MVNDistinct *stats;
38893889
Bitmapset *matched = NULL;
3890+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
3891+
3892+
/*
3893+
* When dealing with inheritance trees, ignore extended stats (which were
3894+
* built without data from child rels, and thus do not represent them).
3895+
*/
3896+
if (rte->inh)
3897+
return false;
38903898

38913899
/* bail out immediately if the table has no extended statistics */
38923900
if (!rel->statlist)

src/test/regress/expected/stats_ext.out

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,47 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
161161
ANALYZE ab1;
162162
DROP TABLE ab1 CASCADE;
163163
NOTICE: drop cascades to table ab1c
164+
-- Tests for stats with inheritance
165+
CREATE TABLE stxdinh(a int, b int);
166+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
167+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
168+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
169+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
170+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
171+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
172+
-- Ensure non-inherited stats are not applied to inherited query
173+
-- Without stats object, it looks like this
174+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
175+
estimated | actual
176+
-----------+--------
177+
400 | 150
178+
(1 row)
179+
180+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
181+
estimated | actual
182+
-----------+--------
183+
3 | 40
184+
(1 row)
185+
186+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
187+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
188+
-- Since the stats object does not include inherited stats, it should not
189+
-- affect the estimates
190+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
191+
estimated | actual
192+
-----------+--------
193+
400 | 150
194+
(1 row)
195+
196+
-- Dependencies are applied at individual relations (within append), so
197+
-- this estimate changes a bit because we improve estimates for the parent
198+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
199+
estimated | actual
200+
-----------+--------
201+
22 | 40
202+
(1 row)
203+
204+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
164205
-- Verify supported object types for extended statistics
165206
CREATE schema tststats;
166207
CREATE TABLE tststats.t (a int, b int, c text);

src/test/regress/sql/stats_ext.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,28 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
106106
ANALYZE ab1;
107107
DROP TABLE ab1 CASCADE;
108108

109+
-- Tests for stats with inheritance
110+
CREATE TABLE stxdinh(a int, b int);
111+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
112+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
113+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
114+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
115+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
116+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
117+
-- Ensure non-inherited stats are not applied to inherited query
118+
-- Without stats object, it looks like this
119+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
120+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
121+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
122+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
123+
-- Since the stats object does not include inherited stats, it should not
124+
-- affect the estimates
125+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
126+
-- Dependencies are applied at individual relations (within append), so
127+
-- this estimate changes a bit because we improve estimates for the parent
128+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
129+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
130+
109131
-- Verify supported object types for extended statistics
110132
CREATE schema tststats;
111133

0 commit comments

Comments
 (0)