1
1
-- Generic extended statistics support
2
+ -- We will be checking execution plans without/with statistics, so
3
+ -- let's make sure we get simple non-parallel plans. Also set the
4
+ -- work_mem low so that we can use small amounts of data.
5
+ SET max_parallel_workers = 0;
6
+ SET max_parallel_workers_per_gather = 0;
7
+ SET work_mem = '128kB';
2
8
-- Ensure stats are dropped sanely
3
9
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
4
10
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
@@ -49,6 +55,67 @@ CREATE TABLE ndistinct (
49
55
c INT,
50
56
d INT
51
57
);
58
+ -- over-estimates when using only per-column statistics
59
+ INSERT INTO ndistinct (a, b, c, filler1)
60
+ SELECT i/100, i/100, i/100, cash_words((i/100)::money)
61
+ FROM generate_series(1,30000) s(i);
62
+ ANALYZE ndistinct;
63
+ -- Group Aggregate, due to over-estimate of the number of groups
64
+ EXPLAIN (COSTS off)
65
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
66
+ QUERY PLAN
67
+ -----------------------------------
68
+ GroupAggregate
69
+ Group Key: a, b
70
+ -> Sort
71
+ Sort Key: a, b
72
+ -> Seq Scan on ndistinct
73
+ (5 rows)
74
+
75
+ EXPLAIN (COSTS off)
76
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
77
+ QUERY PLAN
78
+ -----------------------------------
79
+ GroupAggregate
80
+ Group Key: b, c
81
+ -> Sort
82
+ Sort Key: b, c
83
+ -> Seq Scan on ndistinct
84
+ (5 rows)
85
+
86
+ EXPLAIN (COSTS off)
87
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
88
+ QUERY PLAN
89
+ -----------------------------------
90
+ GroupAggregate
91
+ Group Key: a, b, c
92
+ -> Sort
93
+ Sort Key: a, b, c
94
+ -> Seq Scan on ndistinct
95
+ (5 rows)
96
+
97
+ EXPLAIN (COSTS off)
98
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
99
+ QUERY PLAN
100
+ -----------------------------------
101
+ GroupAggregate
102
+ Group Key: a, b, c, d
103
+ -> Sort
104
+ Sort Key: a, b, c, d
105
+ -> Seq Scan on ndistinct
106
+ (5 rows)
107
+
108
+ EXPLAIN (COSTS off)
109
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
110
+ QUERY PLAN
111
+ -----------------------------------
112
+ GroupAggregate
113
+ Group Key: b, c, d
114
+ -> Sort
115
+ Sort Key: b, c, d
116
+ -> Seq Scan on ndistinct
117
+ (5 rows)
118
+
52
119
-- unknown column
53
120
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
54
121
ERROR: column "unknown_column" referenced in statistics does not exist
@@ -63,18 +130,15 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
63
130
ERROR: duplicate column name in statistics definition
64
131
-- correct command
65
132
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
66
- -- perfectly correlated groups
67
- INSERT INTO ndistinct (a, b, c, filler1)
68
- SELECT i/100, i/100, i/100, cash_words(i::money)
69
- FROM generate_series(1,10000) s(i);
70
133
ANALYZE ndistinct;
71
134
SELECT staenabled, standistinct
72
135
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
73
136
staenabled | standistinct
74
137
------------+------------------------------------------------------------------------------------------------
75
- {d} | [{(b 3 4), 101 .000000}, {(b 3 6), 101 .000000}, {(b 4 6), 101 .000000}, {(b 3 4 6), 101 .000000}]
138
+ {d} | [{(b 3 4), 301 .000000}, {(b 3 6), 301 .000000}, {(b 4 6), 301 .000000}, {(b 3 4 6), 301 .000000}]
76
139
(1 row)
77
140
141
+ -- Hash Aggregate, thanks to estimates improved by the statistic
78
142
EXPLAIN (COSTS off)
79
143
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
80
144
QUERY PLAN
@@ -85,78 +149,165 @@ EXPLAIN (COSTS off)
85
149
(3 rows)
86
150
87
151
EXPLAIN (COSTS off)
88
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
152
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
89
153
QUERY PLAN
90
154
-----------------------------
91
155
HashAggregate
92
- Group Key: a, b, c
156
+ Group Key: b, c
93
157
-> Seq Scan on ndistinct
94
158
(3 rows)
95
159
96
160
EXPLAIN (COSTS off)
97
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d ;
161
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
98
162
QUERY PLAN
99
163
-----------------------------
100
164
HashAggregate
101
- Group Key: a, b, c, d
165
+ Group Key: a, b, c
102
166
-> Seq Scan on ndistinct
103
167
(3 rows)
104
168
169
+ -- last two plans keep using Group Aggregate, because 'd' is not covered
170
+ -- by the statistic and while it's NULL-only we assume 200 values for it
171
+ EXPLAIN (COSTS off)
172
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
173
+ QUERY PLAN
174
+ -----------------------------------
175
+ GroupAggregate
176
+ Group Key: a, b, c, d
177
+ -> Sort
178
+ Sort Key: a, b, c, d
179
+ -> Seq Scan on ndistinct
180
+ (5 rows)
181
+
182
+ EXPLAIN (COSTS off)
183
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
184
+ QUERY PLAN
185
+ -----------------------------------
186
+ GroupAggregate
187
+ Group Key: b, c, d
188
+ -> Sort
189
+ Sort Key: b, c, d
190
+ -> Seq Scan on ndistinct
191
+ (5 rows)
192
+
105
193
TRUNCATE TABLE ndistinct;
106
- -- partially correlated groups
107
- INSERT INTO ndistinct (a, b, c)
108
- SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
194
+ -- under-estimates when using only per-column statistics
195
+ INSERT INTO ndistinct (a, b, c, filler1)
196
+ SELECT mod(i,50), mod(i,51), mod(i,32),
197
+ cash_words(mod(i,33)::int::money)
198
+ FROM generate_series(1,10000) s(i);
109
199
ANALYZE ndistinct;
110
200
SELECT staenabled, standistinct
111
201
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
112
- staenabled | standistinct
113
- ------------+------------------------------------------------------------------------------------------------
114
- {d} | [{(b 3 4), 201 .000000}, {(b 3 6), 201 .000000}, {(b 4 6), 101 .000000}, {(b 3 4 6), 201 .000000}]
202
+ staenabled | standistinct
203
+ ------------+----------------------------------------------------------------------------------------------------
204
+ {d} | [{(b 3 4), 2550 .000000}, {(b 3 6), 800 .000000}, {(b 4 6), 1632 .000000}, {(b 3 4 6), 10000 .000000}]
115
205
(1 row)
116
206
117
- EXPLAIN
207
+ -- plans using Group Aggregate, thanks to using correct esimates
208
+ EXPLAIN (COSTS off)
118
209
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
119
- QUERY PLAN
120
- ---------------------------------------------------------------------
121
- HashAggregate (cost=230.00..232.01 rows=201 width=16)
210
+ QUERY PLAN
211
+ -----------------------------------
212
+ GroupAggregate
213
+ Group Key: a, b
214
+ -> Sort
215
+ Sort Key: a, b
216
+ -> Seq Scan on ndistinct
217
+ (5 rows)
218
+
219
+ EXPLAIN (COSTS off)
220
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
221
+ QUERY PLAN
222
+ -----------------------------------
223
+ GroupAggregate
224
+ Group Key: a, b, c
225
+ -> Sort
226
+ Sort Key: a, b, c
227
+ -> Seq Scan on ndistinct
228
+ (5 rows)
229
+
230
+ EXPLAIN (COSTS off)
231
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
232
+ QUERY PLAN
233
+ -----------------------------------
234
+ GroupAggregate
235
+ Group Key: a, b, c, d
236
+ -> Sort
237
+ Sort Key: a, b, c, d
238
+ -> Seq Scan on ndistinct
239
+ (5 rows)
240
+
241
+ EXPLAIN (COSTS off)
242
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
243
+ QUERY PLAN
244
+ -----------------------------
245
+ HashAggregate
246
+ Group Key: b, c, d
247
+ -> Seq Scan on ndistinct
248
+ (3 rows)
249
+
250
+ EXPLAIN (COSTS off)
251
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
252
+ QUERY PLAN
253
+ -----------------------------
254
+ HashAggregate
255
+ Group Key: a, d
256
+ -> Seq Scan on ndistinct
257
+ (3 rows)
258
+
259
+ DROP STATISTICS s10;
260
+ SELECT staenabled, standistinct
261
+ FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
262
+ staenabled | standistinct
263
+ ------------+--------------
264
+ (0 rows)
265
+
266
+ -- dropping the statistics switches the plans to Hash Aggregate,
267
+ -- due to under-estimates
268
+ EXPLAIN (COSTS off)
269
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
270
+ QUERY PLAN
271
+ -----------------------------
272
+ HashAggregate
122
273
Group Key: a, b
123
- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
274
+ -> Seq Scan on ndistinct
124
275
(3 rows)
125
276
126
- EXPLAIN
277
+ EXPLAIN (COSTS off)
127
278
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
128
- QUERY PLAN
129
- ----------------------------------------------------------------------
130
- HashAggregate (cost=255.00..257.01 rows=201 width=20)
279
+ QUERY PLAN
280
+ -----------------------------
281
+ HashAggregate
131
282
Group Key: a, b, c
132
- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
283
+ -> Seq Scan on ndistinct
133
284
(3 rows)
134
285
135
- EXPLAIN
286
+ EXPLAIN (COSTS off)
136
287
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
137
- QUERY PLAN
138
- ----------------------------------------------------------------------
139
- HashAggregate (cost=280.00..290.00 rows=1000 width=24)
288
+ QUERY PLAN
289
+ -----------------------------
290
+ HashAggregate
140
291
Group Key: a, b, c, d
141
- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16)
292
+ -> Seq Scan on ndistinct
142
293
(3 rows)
143
294
144
- EXPLAIN
295
+ EXPLAIN (COSTS off)
145
296
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
146
- QUERY PLAN
147
- ----------------------------------------------------------------------
148
- HashAggregate (cost=255.00..265.00 rows=1000 width=20)
297
+ QUERY PLAN
298
+ -----------------------------
299
+ HashAggregate
149
300
Group Key: b, c, d
150
- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
301
+ -> Seq Scan on ndistinct
151
302
(3 rows)
152
303
153
- EXPLAIN
304
+ EXPLAIN (COSTS off)
154
305
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
155
- QUERY PLAN
156
- ---------------------------------------------------------------------
157
- HashAggregate (cost=230.00..240.00 rows=1000 width=16)
306
+ QUERY PLAN
307
+ -----------------------------
308
+ HashAggregate
158
309
Group Key: a, d
159
- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
310
+ -> Seq Scan on ndistinct
160
311
(3 rows)
161
312
162
313
DROP TABLE ndistinct;
0 commit comments