Skip to content

Commit 76569ad

Browse files
committed
Ignore extended statistics for inheritance trees
Since commit 859b300 we only build extended statistics for individual relations, ignoring the child relations. This resolved the issue with updating catalog tuple twice, but we still tried to use the statistics when calculating estimates for the whole inheritance tree. When the relations contain very distinct data, it may produce bogus estimates. This is roughly the same issue 427c6b5 addressed ~15 years ago, and we fix it the same way - by ignoring extended statistics when calculating estimates for the inheritance tree as a whole. We still consider extended statistics when calculating estimates for individual child relations, of course. This may result in plan changes due to different estimates, but if the old statistics were not describing the inheritance tree particularly well it's quite likely the new plans is actually better. Report and patch by Justin Pryzby, minor fixes and cleanup by me. Backpatch all the way back to PostgreSQL 10, where extended statistics were introduced (same as 859b300). Author: Justin Pryzby Reported-by: Justin Pryzby Backpatch-through: 10 Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
1 parent 45a3cef commit 76569ad

File tree

5 files changed

+89
-0
lines changed

5 files changed

+89
-0
lines changed

src/backend/statistics/dependencies.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "optimizer/optimizer.h"
2525
#include "nodes/nodes.h"
2626
#include "nodes/pathnodes.h"
27+
#include "parser/parsetree.h"
2728
#include "statistics/extended_stats_internal.h"
2829
#include "statistics/statistics.h"
2930
#include "utils/bytea.h"
@@ -963,6 +964,14 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
963964
MVDependencies *dependencies;
964965
Bitmapset **list_attnums;
965966
int listidx;
967+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
968+
969+
/*
970+
* When dealing with inheritance trees, ignore extended stats (which were
971+
* built without data from child rels, and thus do not represent them).
972+
*/
973+
if (rte->inh)
974+
return 1.0;
966975

967976
/* check if there's any stats that might be useful for us. */
968977
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))

src/backend/statistics/extended_stats.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "nodes/nodeFuncs.h"
2929
#include "optimizer/clauses.h"
3030
#include "optimizer/optimizer.h"
31+
#include "parser/parsetree.h"
3132
#include "postmaster/autovacuum.h"
3233
#include "statistics/extended_stats_internal.h"
3334
#include "statistics/statistics.h"
@@ -1075,6 +1076,14 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
10751076
mcv_totalsel,
10761077
other_sel,
10771078
sel;
1079+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1080+
1081+
/*
1082+
* When dealing with inheritance trees, ignore extended stats (which were
1083+
* built without data from child rels, and thus do not represent them).
1084+
*/
1085+
if (rte->inh)
1086+
return 1.0;
10781087

10791088
/* check if there's any stats that might be useful for us. */
10801089
if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))

src/backend/utils/adt/selfuncs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3641,6 +3641,14 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
36413641
Oid statOid = InvalidOid;
36423642
MVNDistinct *stats;
36433643
Bitmapset *matched = NULL;
3644+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
3645+
3646+
/*
3647+
* When dealing with inheritance trees, ignore extended stats (which were
3648+
* built without data from child rels, and thus do not represent them).
3649+
*/
3650+
if (rte->inh)
3651+
return false;
36443652

36453653
/* bail out immediately if the table has no extended statistics */
36463654
if (!rel->statlist)

src/test/regress/expected/stats_ext.out

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,47 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
124124
ANALYZE ab1;
125125
DROP TABLE ab1 CASCADE;
126126
NOTICE: drop cascades to table ab1c
127+
-- Tests for stats with inheritance
128+
CREATE TABLE stxdinh(a int, b int);
129+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
130+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
131+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
132+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
133+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
134+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
135+
-- Ensure non-inherited stats are not applied to inherited query
136+
-- Without stats object, it looks like this
137+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
138+
estimated | actual
139+
-----------+--------
140+
400 | 150
141+
(1 row)
142+
143+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
144+
estimated | actual
145+
-----------+--------
146+
3 | 40
147+
(1 row)
148+
149+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
150+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
151+
-- Since the stats object does not include inherited stats, it should not
152+
-- affect the estimates
153+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
154+
estimated | actual
155+
-----------+--------
156+
400 | 150
157+
(1 row)
158+
159+
-- Dependencies are applied at individual relations (within append), so
160+
-- this estimate changes a bit because we improve estimates for the parent
161+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
162+
estimated | actual
163+
-----------+--------
164+
22 | 40
165+
(1 row)
166+
167+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
127168
-- Verify supported object types for extended statistics
128169
CREATE schema tststats;
129170
CREATE TABLE tststats.t (a int, b int, c text);

src/test/regress/sql/stats_ext.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,28 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
9292
ANALYZE ab1;
9393
DROP TABLE ab1 CASCADE;
9494

95+
-- Tests for stats with inheritance
96+
CREATE TABLE stxdinh(a int, b int);
97+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
98+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
99+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
100+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
101+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
102+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
103+
-- Ensure non-inherited stats are not applied to inherited query
104+
-- Without stats object, it looks like this
105+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
106+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
107+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
108+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
109+
-- Since the stats object does not include inherited stats, it should not
110+
-- affect the estimates
111+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
112+
-- Dependencies are applied at individual relations (within append), so
113+
-- this estimate changes a bit because we improve estimates for the parent
114+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
115+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
116+
95117
-- Verify supported object types for extended statistics
96118
CREATE schema tststats;
97119

0 commit comments

Comments
 (0)