Skip to content

Commit 92c58fd

Browse files
committed
Rework HashAgg GUCs.
Eliminate enable_groupingsets_hash_disk, which was primarily useful for testing grouping sets that use HashAgg and spill. Instead, hack the table stats to convince the planner to choose hashed aggregation for grouping sets that will spill to disk. Suggested by Melanie Plageman. Rename enable_hashagg_disk to hashagg_avoid_disk_plan, and invert the meaning of on/off. The new name indicates more strongly that it only affects the planner. Also, the word "avoid" is less definite, which should avoid surprises when HashAgg still needs to use the disk. Change suggested by Justin Pryzby, though I chose a different GUC name. Discussion: https://postgr.es/m/CAAKRu_aisiENMsPM2gC4oUY1hHG3yrCwY-fXUg22C6_MJUwQdA%40mail.gmail.com Discussion: https://postgr.es/m/20200610021544.GA14879@telsasoft.com Backpatch-through: 13
1 parent 5940ffb commit 92c58fd

File tree

10 files changed

+119
-158
lines changed

10 files changed

+119
-158
lines changed

doc/src/sgml/config.sgml

+8-23
Original file line numberDiff line numberDiff line change
@@ -4546,23 +4546,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
45464546
</listitem>
45474547
</varlistentry>
45484548

4549-
<varlistentry id="guc-enable-groupingsets-hash-disk" xreflabel="enable_groupingsets_hash_disk">
4550-
<term><varname>enable_groupingsets_hash_disk</varname> (<type>boolean</type>)
4551-
<indexterm>
4552-
<primary><varname>enable_groupingsets_hash_disk</varname> configuration parameter</primary>
4553-
</indexterm>
4554-
</term>
4555-
<listitem>
4556-
<para>
4557-
Enables or disables the query planner's use of hashed aggregation plan
4558-
types for grouping sets when the total size of the hash tables is
4559-
expected to exceed <varname>work_mem</varname>. See <xref
4560-
linkend="queries-grouping-sets"/>. The default is
4561-
<literal>off</literal>.
4562-
</para>
4563-
</listitem>
4564-
</varlistentry>
4565-
45664549
<varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
45674550
<term><varname>enable_hashagg</varname> (<type>boolean</type>)
45684551
<indexterm>
@@ -4577,17 +4560,19 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
45774560
</listitem>
45784561
</varlistentry>
45794562

4580-
<varlistentry id="guc-enable-hashagg-disk" xreflabel="enable_hashagg_disk">
4581-
<term><varname>enable_hashagg_disk</varname> (<type>boolean</type>)
4563+
<varlistentry id="guc-hashagg-avoid-disk-plan" xreflabel="hashagg_avoid_disk_plan">
4564+
<term><varname>hashagg_avoid_disk_plan</varname> (<type>boolean</type>)
45824565
<indexterm>
4583-
<primary><varname>enable_hashagg_disk</varname> configuration parameter</primary>
4566+
<primary><varname>hashagg_avoid_disk_plan</varname> configuration parameter</primary>
45844567
</indexterm>
45854568
</term>
45864569
<listitem>
45874570
<para>
4588-
Enables or disables the query planner's use of hashed aggregation plan
4589-
types when the memory usage is expected to exceed
4590-
<varname>work_mem</varname>. The default is <literal>on</literal>.
4571+
If set to <literal>on</literal>, causes the planner to avoid choosing
4572+
hashed aggregation plans that are expected to use the disk. If hashed
4573+
aggregation is chosen, it may still require the use of disk at
4574+
execution time, even if this parameter is enabled. The default is
4575+
<literal>off</literal>.
45914576
</para>
45924577
</listitem>
45934578
</varlistentry>

src/backend/optimizer/path/costsize.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ bool enable_tidscan = true;
130130
bool enable_sort = true;
131131
bool enable_incrementalsort = true;
132132
bool enable_hashagg = true;
133-
bool enable_hashagg_disk = true;
134-
bool enable_groupingsets_hash_disk = false;
133+
bool hashagg_avoid_disk_plan = true;
135134
bool enable_nestloop = true;
136135
bool enable_material = true;
137136
bool enable_mergejoin = true;

src/backend/optimizer/plan/planner.c

+10-10
Original file line numberDiff line numberDiff line change
@@ -4256,12 +4256,11 @@ consider_groupingsets_paths(PlannerInfo *root,
42564256
dNumGroups - exclude_groups);
42574257

42584258
/*
4259-
* If we have sortable columns to work with (gd->rollups is non-empty)
4260-
* and enable_groupingsets_hash_disk is disabled, don't generate
4261-
* hash-based paths that will exceed work_mem.
4259+
* gd->rollups is empty if we have only unsortable columns to work
4260+
* with. Override work_mem in that case; otherwise, we'll rely on the
4261+
* sorted-input case to generate usable mixed paths.
42624262
*/
4263-
if (!enable_groupingsets_hash_disk &&
4264-
hashsize > work_mem * 1024L && gd->rollups)
4263+
if (hashsize > work_mem * 1024L && gd->rollups)
42654264
return; /* nope, won't fit */
42664265

42674266
/*
@@ -4868,7 +4867,7 @@ create_distinct_paths(PlannerInfo *root,
48684867
{
48694868
Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0);
48704869

4871-
allow_hash = enable_hashagg_disk ||
4870+
allow_hash = !hashagg_avoid_disk_plan ||
48724871
(hashentrysize * numDistinctRows <= work_mem * 1024L);
48734872
}
48744873

@@ -6773,7 +6772,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
67736772
* were unable to sort above, then we'd better generate a Path, so
67746773
* that we at least have one.
67756774
*/
6776-
if (enable_hashagg_disk ||
6775+
if (!hashagg_avoid_disk_plan ||
67776776
hashaggtablesize < work_mem * 1024L ||
67786777
grouped_rel->pathlist == NIL)
67796778
{
@@ -6807,7 +6806,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
68076806
agg_final_costs,
68086807
dNumGroups);
68096808

6810-
if (enable_hashagg_disk ||
6809+
if (!hashagg_avoid_disk_plan ||
68116810
hashaggtablesize < work_mem * 1024L)
68126811
add_path(grouped_rel, (Path *)
68136812
create_agg_path(root,
@@ -7188,7 +7187,7 @@ create_partial_grouping_paths(PlannerInfo *root,
71887187
* Tentatively produce a partial HashAgg Path, depending on if it
71897188
* looks as if the hash table will fit in work_mem.
71907189
*/
7191-
if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) &&
7190+
if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) &&
71927191
cheapest_total_path != NULL)
71937192
{
71947193
add_path(partially_grouped_rel, (Path *)
@@ -7215,7 +7214,8 @@ create_partial_grouping_paths(PlannerInfo *root,
72157214
dNumPartialPartialGroups);
72167215

72177216
/* Do the same for partial paths. */
7218-
if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) &&
7217+
if ((!hashagg_avoid_disk_plan ||
7218+
hashaggtablesize < work_mem * 1024L) &&
72197219
cheapest_partial_path != NULL)
72207220
{
72217221
add_partial_path(partially_grouped_rel, (Path *)

src/backend/utils/misc/guc.c

+3-13
Original file line numberDiff line numberDiff line change
@@ -1002,22 +1002,12 @@ static struct config_bool ConfigureNamesBool[] =
10021002
NULL, NULL, NULL
10031003
},
10041004
{
1005-
{"enable_hashagg_disk", PGC_USERSET, QUERY_TUNING_METHOD,
1006-
gettext_noop("Enables the planner's use of hashed aggregation plans that are expected to exceed work_mem."),
1005+
{"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD,
1006+
gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."),
10071007
NULL,
10081008
GUC_EXPLAIN
10091009
},
1010-
&enable_hashagg_disk,
1011-
true,
1012-
NULL, NULL, NULL
1013-
},
1014-
{
1015-
{"enable_groupingsets_hash_disk", PGC_USERSET, QUERY_TUNING_METHOD,
1016-
gettext_noop("Enables the planner's use of hashed aggregation plans for groupingsets when the total size of the hash tables is expected to exceed work_mem."),
1017-
NULL,
1018-
GUC_EXPLAIN
1019-
},
1020-
&enable_groupingsets_hash_disk,
1010+
&hashagg_avoid_disk_plan,
10211011
false,
10221012
NULL, NULL, NULL
10231013
},

src/include/optimizer/cost.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ extern PGDLLIMPORT bool enable_tidscan;
5555
extern PGDLLIMPORT bool enable_sort;
5656
extern PGDLLIMPORT bool enable_incrementalsort;
5757
extern PGDLLIMPORT bool enable_hashagg;
58-
extern PGDLLIMPORT bool enable_hashagg_disk;
59-
extern PGDLLIMPORT bool enable_groupingsets_hash_disk;
58+
extern PGDLLIMPORT bool hashagg_avoid_disk_plan;
6059
extern PGDLLIMPORT bool enable_nestloop;
6160
extern PGDLLIMPORT bool enable_material;
6261
extern PGDLLIMPORT bool enable_mergejoin;

src/test/regress/expected/aggregates.out

+22-24
Original file line numberDiff line numberDiff line change
@@ -2449,83 +2449,81 @@ set enable_sort to default;
24492449
-- aggregation. Force spilling in both cases by setting work_mem low.
24502450
--
24512451
set work_mem='64kB';
2452+
create table agg_data_2k as
2453+
select g from generate_series(0, 1999) g;
2454+
analyze agg_data_2k;
2455+
create table agg_data_20k as
2456+
select g from generate_series(0, 19999) g;
2457+
analyze agg_data_20k;
24522458
-- Produce results with sorting.
24532459
set enable_hashagg = false;
24542460
set jit_above_cost = 0;
24552461
explain (costs off)
24562462
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
2457-
from generate_series(0, 19999) g
2458-
group by g%10000;
2459-
QUERY PLAN
2460-
------------------------------------------------
2463+
from agg_data_20k group by g%10000;
2464+
QUERY PLAN
2465+
--------------------------------------
24612466
GroupAggregate
24622467
Group Key: ((g % 10000))
24632468
-> Sort
24642469
Sort Key: ((g % 10000))
2465-
-> Function Scan on generate_series g
2470+
-> Seq Scan on agg_data_20k
24662471
(5 rows)
24672472

24682473
create table agg_group_1 as
24692474
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
2470-
from generate_series(0, 19999) g
2471-
group by g%10000;
2475+
from agg_data_20k group by g%10000;
24722476
create table agg_group_2 as
24732477
select * from
24742478
(values (100), (300), (500)) as r(a),
24752479
lateral (
24762480
select (g/2)::numeric as c1,
24772481
array_agg(g::numeric) as c2,
24782482
count(*) as c3
2479-
from generate_series(0, 1999) g
2483+
from agg_data_2k
24802484
where g < r.a
24812485
group by g/2) as s;
24822486
set jit_above_cost to default;
24832487
create table agg_group_3 as
24842488
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
2485-
from generate_series(0, 1999) g
2486-
group by g/2;
2489+
from agg_data_2k group by g/2;
24872490
create table agg_group_4 as
24882491
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
2489-
from generate_series(0, 1999) g
2490-
group by g/2;
2492+
from agg_data_2k group by g/2;
24912493
-- Produce results with hash aggregation
24922494
set enable_hashagg = true;
24932495
set enable_sort = false;
24942496
set jit_above_cost = 0;
24952497
explain (costs off)
24962498
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
2497-
from generate_series(0, 19999) g
2498-
group by g%10000;
2499-
QUERY PLAN
2500-
------------------------------------------
2499+
from agg_data_20k group by g%10000;
2500+
QUERY PLAN
2501+
--------------------------------
25012502
HashAggregate
25022503
Group Key: (g % 10000)
2503-
-> Function Scan on generate_series g
2504+
-> Seq Scan on agg_data_20k
25042505
(3 rows)
25052506

25062507
create table agg_hash_1 as
25072508
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
2508-
from generate_series(0, 19999) g
2509-
group by g%10000;
2509+
from agg_data_20k group by g%10000;
25102510
create table agg_hash_2 as
25112511
select * from
25122512
(values (100), (300), (500)) as r(a),
25132513
lateral (
25142514
select (g/2)::numeric as c1,
25152515
array_agg(g::numeric) as c2,
25162516
count(*) as c3
2517-
from generate_series(0, 1999) g
2517+
from agg_data_2k
25182518
where g < r.a
25192519
group by g/2) as s;
25202520
set jit_above_cost to default;
25212521
create table agg_hash_3 as
25222522
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
2523-
from generate_series(0, 1999) g
2524-
group by g/2;
2523+
from agg_data_2k group by g/2;
25252524
create table agg_hash_4 as
25262525
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
2527-
from generate_series(0, 1999) g
2528-
group by g/2;
2526+
from agg_data_2k group by g/2;
25292527
set enable_sort = true;
25302528
set work_mem to default;
25312529
-- Compare group aggregation results to hash aggregation results

src/test/regress/expected/groupingsets.out

+38-42
Original file line numberDiff line numberDiff line change
@@ -1636,68 +1636,65 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
16361636
--
16371637
-- Compare results between plans using sorting and plans using hash
16381638
-- aggregation. Force spilling in both cases by setting work_mem low
1639-
-- and turning on enable_groupingsets_hash_disk.
1639+
-- and altering the statistics.
16401640
--
1641-
SET enable_groupingsets_hash_disk = true;
1641+
create table gs_data_1 as
1642+
select g%1000 as g1000, g%100 as g100, g%10 as g10, g
1643+
from generate_series(0,1999) g;
1644+
analyze gs_data_1;
1645+
alter table gs_data_1 set (autovacuum_enabled = 'false');
1646+
update pg_class set reltuples = 10 where relname='gs_data_1';
16421647
SET work_mem='64kB';
16431648
-- Produce results with sorting.
16441649
set enable_hashagg = false;
16451650
set jit_above_cost = 0;
16461651
explain (costs off)
1647-
select g100, g10, sum(g::numeric), count(*), max(g::text) from
1648-
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
1649-
from generate_series(0,1999) g) s
1650-
group by cube (g1000, g100,g10);
1651-
QUERY PLAN
1652-
---------------------------------------------------------------
1652+
select g100, g10, sum(g::numeric), count(*), max(g::text)
1653+
from gs_data_1 group by cube (g1000, g100,g10);
1654+
QUERY PLAN
1655+
------------------------------------
16531656
GroupAggregate
1654-
Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
1655-
Group Key: ((g.g % 1000)), ((g.g % 100))
1656-
Group Key: ((g.g % 1000))
1657+
Group Key: g1000, g100, g10
1658+
Group Key: g1000, g100
1659+
Group Key: g1000
16571660
Group Key: ()
1658-
Sort Key: ((g.g % 100)), ((g.g % 10))
1659-
Group Key: ((g.g % 100)), ((g.g % 10))
1660-
Group Key: ((g.g % 100))
1661-
Sort Key: ((g.g % 10)), ((g.g % 1000))
1662-
Group Key: ((g.g % 10)), ((g.g % 1000))
1663-
Group Key: ((g.g % 10))
1661+
Sort Key: g100, g10
1662+
Group Key: g100, g10
1663+
Group Key: g100
1664+
Sort Key: g10, g1000
1665+
Group Key: g10, g1000
1666+
Group Key: g10
16641667
-> Sort
1665-
Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
1666-
-> Function Scan on generate_series g
1668+
Sort Key: g1000, g100, g10
1669+
-> Seq Scan on gs_data_1
16671670
(14 rows)
16681671

16691672
create table gs_group_1 as
1670-
select g100, g10, sum(g::numeric), count(*), max(g::text) from
1671-
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
1672-
from generate_series(0,1999) g) s
1673-
group by cube (g1000, g100,g10);
1673+
select g100, g10, sum(g::numeric), count(*), max(g::text)
1674+
from gs_data_1 group by cube (g1000, g100,g10);
16741675
-- Produce results with hash aggregation.
16751676
set enable_hashagg = true;
16761677
set enable_sort = false;
16771678
explain (costs off)
1678-
select g100, g10, sum(g::numeric), count(*), max(g::text) from
1679-
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
1680-
from generate_series(0,1999) g) s
1681-
group by cube (g1000, g100,g10);
1682-
QUERY PLAN
1683-
---------------------------------------------------
1679+
select g100, g10, sum(g::numeric), count(*), max(g::text)
1680+
from gs_data_1 group by cube (g1000, g100,g10);
1681+
QUERY PLAN
1682+
------------------------------
16841683
MixedAggregate
1685-
Hash Key: (g.g % 1000), (g.g % 100), (g.g % 10)
1686-
Hash Key: (g.g % 1000), (g.g % 100)
1687-
Hash Key: (g.g % 1000)
1688-
Hash Key: (g.g % 100), (g.g % 10)
1689-
Hash Key: (g.g % 100)
1690-
Hash Key: (g.g % 10), (g.g % 1000)
1691-
Hash Key: (g.g % 10)
1684+
Hash Key: g1000, g100, g10
1685+
Hash Key: g1000, g100
1686+
Hash Key: g1000
1687+
Hash Key: g100, g10
1688+
Hash Key: g100
1689+
Hash Key: g10, g1000
1690+
Hash Key: g10
16921691
Group Key: ()
1693-
-> Function Scan on generate_series g
1692+
-> Seq Scan on gs_data_1
16941693
(10 rows)
16951694

16961695
create table gs_hash_1 as
1697-
select g100, g10, sum(g::numeric), count(*), max(g::text) from
1698-
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
1699-
from generate_series(0,1999) g) s
1700-
group by cube (g1000, g100,g10);
1696+
select g100, g10, sum(g::numeric), count(*), max(g::text)
1697+
from gs_data_1 group by cube (g1000, g100,g10);
17011698
set enable_sort = true;
17021699
set work_mem to default;
17031700
-- Compare results
@@ -1710,5 +1707,4 @@ set work_mem to default;
17101707

17111708
drop table gs_group_1;
17121709
drop table gs_hash_1;
1713-
SET enable_groupingsets_hash_disk TO DEFAULT;
17141710
-- end

src/test/regress/expected/sysviews.out

+1-3
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,7 @@ select name, setting from pg_settings where name like 'enable%';
7474
--------------------------------+---------
7575
enable_bitmapscan | on
7676
enable_gathermerge | on
77-
enable_groupingsets_hash_disk | off
7877
enable_hashagg | on
79-
enable_hashagg_disk | on
8078
enable_hashjoin | on
8179
enable_incrementalsort | on
8280
enable_indexonlyscan | on
@@ -92,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%';
9290
enable_seqscan | on
9391
enable_sort | on
9492
enable_tidscan | on
95-
(20 rows)
93+
(18 rows)
9694

9795
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
9896
-- more-or-less working. We can't test their contents in any great detail

0 commit comments

Comments
 (0)