Skip to content

Commit bed9ef5

Browse files
committed
Rework the stats_ext test
As suggested by Tom Lane, avoid printing specific estimated cost values, because they vary across architectures; instead, verify plan shapes (in this case, HashAggregate vs. GroupAggregate), as we do in other planner tests. We can now remove expected/stats_ext_1.out. Author: Tomas Vondra
1 parent 70ec3f1 commit bed9ef5

File tree

3 files changed

+262
-207
lines changed

3 files changed

+262
-207
lines changed

src/test/regress/expected/stats_ext.out

+191-40
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
-- Generic extended statistics support
2+
-- We will be checking execution plans without/with statistics, so
3+
-- let's make sure we get simple non-parallel plans. Also set the
4+
-- work_mem low so that we can use small amounts of data.
5+
SET max_parallel_workers = 0;
6+
SET max_parallel_workers_per_gather = 0;
7+
SET work_mem = '128kB';
28
-- Ensure stats are dropped sanely
39
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
410
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
@@ -49,6 +55,67 @@ CREATE TABLE ndistinct (
4955
c INT,
5056
d INT
5157
);
58+
-- over-estimates when using only per-column statistics
59+
INSERT INTO ndistinct (a, b, c, filler1)
60+
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
61+
FROM generate_series(1,30000) s(i);
62+
ANALYZE ndistinct;
63+
-- Group Aggregate, due to over-estimate of the number of groups
64+
EXPLAIN (COSTS off)
65+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
66+
QUERY PLAN
67+
-----------------------------------
68+
GroupAggregate
69+
Group Key: a, b
70+
-> Sort
71+
Sort Key: a, b
72+
-> Seq Scan on ndistinct
73+
(5 rows)
74+
75+
EXPLAIN (COSTS off)
76+
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
77+
QUERY PLAN
78+
-----------------------------------
79+
GroupAggregate
80+
Group Key: b, c
81+
-> Sort
82+
Sort Key: b, c
83+
-> Seq Scan on ndistinct
84+
(5 rows)
85+
86+
EXPLAIN (COSTS off)
87+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
88+
QUERY PLAN
89+
-----------------------------------
90+
GroupAggregate
91+
Group Key: a, b, c
92+
-> Sort
93+
Sort Key: a, b, c
94+
-> Seq Scan on ndistinct
95+
(5 rows)
96+
97+
EXPLAIN (COSTS off)
98+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
99+
QUERY PLAN
100+
-----------------------------------
101+
GroupAggregate
102+
Group Key: a, b, c, d
103+
-> Sort
104+
Sort Key: a, b, c, d
105+
-> Seq Scan on ndistinct
106+
(5 rows)
107+
108+
EXPLAIN (COSTS off)
109+
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
110+
QUERY PLAN
111+
-----------------------------------
112+
GroupAggregate
113+
Group Key: b, c, d
114+
-> Sort
115+
Sort Key: b, c, d
116+
-> Seq Scan on ndistinct
117+
(5 rows)
118+
52119
-- unknown column
53120
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
54121
ERROR: column "unknown_column" referenced in statistics does not exist
@@ -63,18 +130,15 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
63130
ERROR: duplicate column name in statistics definition
64131
-- correct command
65132
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
66-
-- perfectly correlated groups
67-
INSERT INTO ndistinct (a, b, c, filler1)
68-
SELECT i/100, i/100, i/100, cash_words(i::money)
69-
FROM generate_series(1,10000) s(i);
70133
ANALYZE ndistinct;
71134
SELECT staenabled, standistinct
72135
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
73136
staenabled | standistinct
74137
------------+------------------------------------------------------------------------------------------------
75-
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
138+
{d} | [{(b 3 4), 301.000000}, {(b 3 6), 301.000000}, {(b 4 6), 301.000000}, {(b 3 4 6), 301.000000}]
76139
(1 row)
77140

141+
-- Hash Aggregate, thanks to estimates improved by the statistic
78142
EXPLAIN (COSTS off)
79143
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
80144
QUERY PLAN
@@ -85,78 +149,165 @@ EXPLAIN (COSTS off)
85149
(3 rows)
86150

87151
EXPLAIN (COSTS off)
88-
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
152+
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
89153
QUERY PLAN
90154
-----------------------------
91155
HashAggregate
92-
Group Key: a, b, c
156+
Group Key: b, c
93157
-> Seq Scan on ndistinct
94158
(3 rows)
95159

96160
EXPLAIN (COSTS off)
97-
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
161+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
98162
QUERY PLAN
99163
-----------------------------
100164
HashAggregate
101-
Group Key: a, b, c, d
165+
Group Key: a, b, c
102166
-> Seq Scan on ndistinct
103167
(3 rows)
104168

169+
-- last two plans keep using Group Aggregate, because 'd' is not covered
170+
-- by the statistic and while it's NULL-only we assume 200 values for it
171+
EXPLAIN (COSTS off)
172+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
173+
QUERY PLAN
174+
-----------------------------------
175+
GroupAggregate
176+
Group Key: a, b, c, d
177+
-> Sort
178+
Sort Key: a, b, c, d
179+
-> Seq Scan on ndistinct
180+
(5 rows)
181+
182+
EXPLAIN (COSTS off)
183+
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
184+
QUERY PLAN
185+
-----------------------------------
186+
GroupAggregate
187+
Group Key: b, c, d
188+
-> Sort
189+
Sort Key: b, c, d
190+
-> Seq Scan on ndistinct
191+
(5 rows)
192+
105193
TRUNCATE TABLE ndistinct;
106-
-- partially correlated groups
107-
INSERT INTO ndistinct (a, b, c)
108-
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
194+
-- under-estimates when using only per-column statistics
195+
INSERT INTO ndistinct (a, b, c, filler1)
196+
SELECT mod(i,50), mod(i,51), mod(i,32),
197+
cash_words(mod(i,33)::int::money)
198+
FROM generate_series(1,10000) s(i);
109199
ANALYZE ndistinct;
110200
SELECT staenabled, standistinct
111201
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
112-
staenabled | standistinct
113-
------------+------------------------------------------------------------------------------------------------
114-
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
202+
staenabled | standistinct
203+
------------+----------------------------------------------------------------------------------------------------
204+
{d} | [{(b 3 4), 2550.000000}, {(b 3 6), 800.000000}, {(b 4 6), 1632.000000}, {(b 3 4 6), 10000.000000}]
115205
(1 row)
116206

117-
EXPLAIN
207+
-- plans using Group Aggregate, thanks to using correct esimates
208+
EXPLAIN (COSTS off)
118209
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
119-
QUERY PLAN
120-
---------------------------------------------------------------------
121-
HashAggregate (cost=230.00..232.01 rows=201 width=16)
210+
QUERY PLAN
211+
-----------------------------------
212+
GroupAggregate
213+
Group Key: a, b
214+
-> Sort
215+
Sort Key: a, b
216+
-> Seq Scan on ndistinct
217+
(5 rows)
218+
219+
EXPLAIN (COSTS off)
220+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
221+
QUERY PLAN
222+
-----------------------------------
223+
GroupAggregate
224+
Group Key: a, b, c
225+
-> Sort
226+
Sort Key: a, b, c
227+
-> Seq Scan on ndistinct
228+
(5 rows)
229+
230+
EXPLAIN (COSTS off)
231+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
232+
QUERY PLAN
233+
-----------------------------------
234+
GroupAggregate
235+
Group Key: a, b, c, d
236+
-> Sort
237+
Sort Key: a, b, c, d
238+
-> Seq Scan on ndistinct
239+
(5 rows)
240+
241+
EXPLAIN (COSTS off)
242+
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
243+
QUERY PLAN
244+
-----------------------------
245+
HashAggregate
246+
Group Key: b, c, d
247+
-> Seq Scan on ndistinct
248+
(3 rows)
249+
250+
EXPLAIN (COSTS off)
251+
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
252+
QUERY PLAN
253+
-----------------------------
254+
HashAggregate
255+
Group Key: a, d
256+
-> Seq Scan on ndistinct
257+
(3 rows)
258+
259+
DROP STATISTICS s10;
260+
SELECT staenabled, standistinct
261+
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
262+
staenabled | standistinct
263+
------------+--------------
264+
(0 rows)
265+
266+
-- dropping the statistics switches the plans to Hash Aggregate,
267+
-- due to under-estimates
268+
EXPLAIN (COSTS off)
269+
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
270+
QUERY PLAN
271+
-----------------------------
272+
HashAggregate
122273
Group Key: a, b
123-
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
274+
-> Seq Scan on ndistinct
124275
(3 rows)
125276

126-
EXPLAIN
277+
EXPLAIN (COSTS off)
127278
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
128-
QUERY PLAN
129-
----------------------------------------------------------------------
130-
HashAggregate (cost=255.00..257.01 rows=201 width=20)
279+
QUERY PLAN
280+
-----------------------------
281+
HashAggregate
131282
Group Key: a, b, c
132-
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
283+
-> Seq Scan on ndistinct
133284
(3 rows)
134285

135-
EXPLAIN
286+
EXPLAIN (COSTS off)
136287
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
137-
QUERY PLAN
138-
----------------------------------------------------------------------
139-
HashAggregate (cost=280.00..290.00 rows=1000 width=24)
288+
QUERY PLAN
289+
-----------------------------
290+
HashAggregate
140291
Group Key: a, b, c, d
141-
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16)
292+
-> Seq Scan on ndistinct
142293
(3 rows)
143294

144-
EXPLAIN
295+
EXPLAIN (COSTS off)
145296
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
146-
QUERY PLAN
147-
----------------------------------------------------------------------
148-
HashAggregate (cost=255.00..265.00 rows=1000 width=20)
297+
QUERY PLAN
298+
-----------------------------
299+
HashAggregate
149300
Group Key: b, c, d
150-
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
301+
-> Seq Scan on ndistinct
151302
(3 rows)
152303

153-
EXPLAIN
304+
EXPLAIN (COSTS off)
154305
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
155-
QUERY PLAN
156-
---------------------------------------------------------------------
157-
HashAggregate (cost=230.00..240.00 rows=1000 width=16)
306+
QUERY PLAN
307+
-----------------------------
308+
HashAggregate
158309
Group Key: a, d
159-
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
310+
-> Seq Scan on ndistinct
160311
(3 rows)
161312

162313
DROP TABLE ndistinct;

0 commit comments

Comments
 (0)