Skip to content

Commit b3cac25

Browse files
committed
Ignore extended statistics for inheritance trees
Since commit 859b300 we only build extended statistics for individual relations, ignoring the child relations. This resolved the issue with updating catalog tuple twice, but we still tried to use the statistics when calculating estimates for the whole inheritance tree. When the relations contain very distinct data, it may produce bogus estimates. This is roughly the same issue 427c6b5 addressed ~15 years ago, and we fix it the same way - by ignoring extended statistics when calculating estimates for the inheritance tree as a whole. We still consider extended statistics when calculating estimates for individual child relations, of course. This may result in plan changes due to different estimates, but if the old statistics were not describing the inheritance tree particularly well it's quite likely the new plans is actually better. Report and patch by Justin Pryzby, minor fixes and cleanup by me. Backpatch all the way back to PostgreSQL 10, where extended statistics were introduced (same as 859b300). Author: Justin Pryzby Reported-by: Justin Pryzby Backpatch-through: 10 Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
1 parent 3a1bfe2 commit b3cac25

File tree

5 files changed

+122
-0
lines changed

5 files changed

+122
-0
lines changed

src/backend/statistics/dependencies.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "optimizer/var.h"
2424
#include "nodes/nodes.h"
2525
#include "nodes/relation.h"
26+
#include "parser/parsetree.h"
2627
#include "statistics/extended_stats_internal.h"
2728
#include "statistics/statistics.h"
2829
#include "utils/bytea.h"
@@ -961,10 +962,18 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
961962
MVDependencies *dependencies;
962963
AttrNumber *list_attnums;
963964
int listidx;
965+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
964966

965967
/* initialize output argument */
966968
*estimatedclauses = NULL;
967969

970+
/*
971+
* When dealing with inheritance trees, ignore extended stats (which were
972+
* built without data from child rels, and thus do not represent them).
973+
*/
974+
if (rte->inh)
975+
return 1.0;
976+
968977
/* check if there's any stats that might be useful for us. */
969978
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
970979
return 1.0;

src/backend/statistics/extended_stats.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "catalog/pg_collation.h"
2424
#include "catalog/pg_statistic_ext.h"
2525
#include "nodes/relation.h"
26+
#include "parser/parsetree.h"
2627
#include "postmaster/autovacuum.h"
2728
#include "statistics/extended_stats_internal.h"
2829
#include "statistics/statistics.h"

src/backend/utils/adt/selfuncs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3923,6 +3923,14 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
39233923
Oid statOid = InvalidOid;
39243924
MVNDistinct *stats;
39253925
Bitmapset *matched = NULL;
3926+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
3927+
3928+
/*
3929+
* When dealing with inheritance trees, ignore extended stats (which were
3930+
* built without data from child rels, and thus do not represent them).
3931+
*/
3932+
if (rte->inh)
3933+
return false;
39263934

39273935
/* bail out immediately if the table has no extended statistics */
39283936
if (!rel->statlist)

src/test/regress/expected/stats_ext.out

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,24 @@
11
-- Generic extended statistics support
2+
-- check the number of estimated/actual rows in the top node
3+
create function check_estimated_rows(text) returns table (estimated int, actual int)
4+
language plpgsql as
5+
$$
6+
declare
7+
ln text;
8+
tmp text[];
9+
first_row bool := true;
10+
begin
11+
for ln in
12+
execute format('explain analyze %s', $1)
13+
loop
14+
if first_row then
15+
first_row := false;
16+
tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
17+
return query select tmp[1]::int, tmp[2]::int;
18+
end if;
19+
end loop;
20+
end;
21+
$$;
222
-- We will be checking execution plans without/with statistics, so
323
-- let's make sure we get simple non-parallel plans. Also set the
424
-- work_mem low so that we can use small amounts of data.
@@ -107,6 +127,47 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
107127
ANALYZE ab1;
108128
DROP TABLE ab1 CASCADE;
109129
NOTICE: drop cascades to table ab1c
130+
-- Tests for stats with inheritance
131+
CREATE TABLE stxdinh(a int, b int);
132+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
133+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
134+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
135+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
136+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
137+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
138+
-- Ensure non-inherited stats are not applied to inherited query
139+
-- Without stats object, it looks like this
140+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
141+
estimated | actual
142+
-----------+--------
143+
400 | 150
144+
(1 row)
145+
146+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
147+
estimated | actual
148+
-----------+--------
149+
3 | 40
150+
(1 row)
151+
152+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
153+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
154+
-- Since the stats object does not include inherited stats, it should not
155+
-- affect the estimates
156+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
157+
estimated | actual
158+
-----------+--------
159+
400 | 150
160+
(1 row)
161+
162+
-- Dependencies are applied at individual relations (within append), so
163+
-- this estimate changes a bit because we improve estimates for the parent
164+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
165+
estimated | actual
166+
-----------+--------
167+
22 | 40
168+
(1 row)
169+
170+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
110171
-- Verify supported object types for extended statistics
111172
CREATE schema tststats;
112173
CREATE TABLE tststats.t (a int, b int, c text);

src/test/regress/sql/stats_ext.sql

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
11
-- Generic extended statistics support
22

3+
-- check the number of estimated/actual rows in the top node
4+
create function check_estimated_rows(text) returns table (estimated int, actual int)
5+
language plpgsql as
6+
$$
7+
declare
8+
ln text;
9+
tmp text[];
10+
first_row bool := true;
11+
begin
12+
for ln in
13+
execute format('explain analyze %s', $1)
14+
loop
15+
if first_row then
16+
first_row := false;
17+
tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
18+
return query select tmp[1]::int, tmp[2]::int;
19+
end if;
20+
end loop;
21+
end;
22+
$$;
23+
324
-- We will be checking execution plans without/with statistics, so
425
-- let's make sure we get simple non-parallel plans. Also set the
526
-- work_mem low so that we can use small amounts of data.
@@ -74,6 +95,28 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
7495
ANALYZE ab1;
7596
DROP TABLE ab1 CASCADE;
7697

98+
-- Tests for stats with inheritance
99+
CREATE TABLE stxdinh(a int, b int);
100+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
101+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
102+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
103+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
104+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
105+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
106+
-- Ensure non-inherited stats are not applied to inherited query
107+
-- Without stats object, it looks like this
108+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
109+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
110+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
111+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
112+
-- Since the stats object does not include inherited stats, it should not
113+
-- affect the estimates
114+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
115+
-- Dependencies are applied at individual relations (within append), so
116+
-- this estimate changes a bit because we improve estimates for the parent
117+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
118+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
119+
77120
-- Verify supported object types for extended statistics
78121
CREATE schema tststats;
79122

0 commit comments

Comments
 (0)