Skip to content

Commit 0c7d537

Browse files
committed
Move estimate_hashagg_tablesize to selfuncs.c, and widen result to double.
It seems to make more sense for this to be in selfuncs.c, since it's largely a statistical-estimation thing, and it's related to other functions like estimate_hash_bucket_stats that are there. While at it, change the result type from Size to double. Perhaps at one point it was impossible for the result to overflow an integer, but I've got no confidence in that proposition anymore. Nothing's actually done with the result except to compare it to a work_mem-based limit, so as long as we don't get an overflow on the way to that comparison, things should be fine even with very large dNumGroups. Code movement proposed by Antonin Houska, type change by me Discussion: https://postgr.es/m/25767.1549359615@localhost
1 parent f9692a7 commit 0c7d537

File tree

3 files changed

+48
-44
lines changed

3 files changed

+48
-44
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 7 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,6 @@ static double get_number_of_groups(PlannerInfo *root,
148148
double path_rows,
149149
grouping_sets_data *gd,
150150
List *target_list);
151-
static Size estimate_hashagg_tablesize(Path *path,
152-
const AggClauseCosts *agg_costs,
153-
double dNumGroups);
154151
static RelOptInfo *create_grouping_paths(PlannerInfo *root,
155152
RelOptInfo *input_rel,
156153
PathTarget *target,
@@ -3659,40 +3656,6 @@ get_number_of_groups(PlannerInfo *root,
36593656
return dNumGroups;
36603657
}
36613658

3662-
/*
3663-
* estimate_hashagg_tablesize
3664-
* estimate the number of bytes that a hash aggregate hashtable will
3665-
* require based on the agg_costs, path width and dNumGroups.
3666-
*
3667-
* XXX this may be over-estimating the size now that hashagg knows to omit
3668-
* unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3669-
* grouping columns not in the hashed set are counted here even though hashagg
3670-
* won't store them. Is this a problem?
3671-
*/
3672-
static Size
3673-
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
3674-
double dNumGroups)
3675-
{
3676-
Size hashentrysize;
3677-
3678-
/* Estimate per-hash-entry space at tuple width... */
3679-
hashentrysize = MAXALIGN(path->pathtarget->width) +
3680-
MAXALIGN(SizeofMinimalTupleHeader);
3681-
3682-
/* plus space for pass-by-ref transition values... */
3683-
hashentrysize += agg_costs->transitionSpace;
3684-
/* plus the per-hash-entry overhead */
3685-
hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3686-
3687-
/*
3688-
* Note that this disregards the effect of fill-factor and growth policy
3689-
* of the hash-table. That's probably ok, given default the default
3690-
* fill-factor is relatively high. It'd be hard to meaningfully factor in
3691-
* "double-in-size" growth policies here.
3692-
*/
3693-
return hashentrysize * dNumGroups;
3694-
}
3695-
36963659
/*
36973660
* create_grouping_paths
36983661
*
@@ -4130,7 +4093,7 @@ consider_groupingsets_paths(PlannerInfo *root,
41304093
ListCell *lc;
41314094
ListCell *l_start = list_head(gd->rollups);
41324095
AggStrategy strat = AGG_HASHED;
4133-
Size hashsize;
4096+
double hashsize;
41344097
double exclude_groups = 0.0;
41354098

41364099
Assert(can_hash);
@@ -4297,9 +4260,9 @@ consider_groupingsets_paths(PlannerInfo *root,
42974260
/*
42984261
* Account first for space needed for groups we can't sort at all.
42994262
*/
4300-
availspace -= (double) estimate_hashagg_tablesize(path,
4301-
agg_costs,
4302-
gd->dNumHashGroups);
4263+
availspace -= estimate_hashagg_tablesize(path,
4264+
agg_costs,
4265+
gd->dNumHashGroups);
43034266

43044267
if (availspace > 0 && list_length(gd->rollups) > 1)
43054268
{
@@ -6424,7 +6387,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
64246387

64256388
if (can_hash)
64266389
{
6427-
Size hashaggtablesize;
6390+
double hashaggtablesize;
64286391

64296392
if (parse->groupingSets)
64306393
{
@@ -6735,7 +6698,7 @@ create_partial_grouping_paths(PlannerInfo *root,
67356698

67366699
if (can_hash && cheapest_total_path != NULL)
67376700
{
6738-
Size hashaggtablesize;
6701+
double hashaggtablesize;
67396702

67406703
/* Checked above */
67416704
Assert(parse->hasAggs || parse->groupClause);
@@ -6768,7 +6731,7 @@ create_partial_grouping_paths(PlannerInfo *root,
67686731

67696732
if (can_hash && cheapest_partial_path != NULL)
67706733
{
6771-
Size hashaggtablesize;
6734+
double hashaggtablesize;
67726735

67736736
hashaggtablesize =
67746737
estimate_hashagg_tablesize(cheapest_partial_path,

src/backend/utils/adt/selfuncs.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
#include "catalog/pg_statistic.h"
114114
#include "catalog/pg_statistic_ext.h"
115115
#include "executor/executor.h"
116+
#include "executor/nodeAgg.h"
116117
#include "miscadmin.h"
117118
#include "nodes/makefuncs.h"
118119
#include "nodes/nodeFuncs.h"
@@ -3428,6 +3429,43 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
34283429
ReleaseVariableStats(vardata);
34293430
}
34303431

3432+
/*
3433+
* estimate_hashagg_tablesize
3434+
* estimate the number of bytes that a hash aggregate hashtable will
3435+
* require based on the agg_costs, path width and number of groups.
3436+
*
3437+
* We return the result as "double" to forestall any possible overflow
3438+
* problem in the multiplication by dNumGroups.
3439+
*
3440+
* XXX this may be over-estimating the size now that hashagg knows to omit
3441+
* unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3442+
* grouping columns not in the hashed set are counted here even though hashagg
3443+
* won't store them. Is this a problem?
3444+
*/
3445+
double
3446+
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
3447+
double dNumGroups)
3448+
{
3449+
Size hashentrysize;
3450+
3451+
/* Estimate per-hash-entry space at tuple width... */
3452+
hashentrysize = MAXALIGN(path->pathtarget->width) +
3453+
MAXALIGN(SizeofMinimalTupleHeader);
3454+
3455+
/* plus space for pass-by-ref transition values... */
3456+
hashentrysize += agg_costs->transitionSpace;
3457+
/* plus the per-hash-entry overhead */
3458+
hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3459+
3460+
/*
3461+
* Note that this disregards the effect of fill-factor and growth policy
3462+
* of the hash table. That's probably ok, given that the default
3463+
* fill-factor is relatively high. It'd be hard to meaningfully factor in
3464+
* "double-in-size" growth policies here.
3465+
*/
3466+
return hashentrysize * dNumGroups;
3467+
}
3468+
34313469

34323470
/*-------------------------------------------------------------------------
34333471
*

src/include/utils/selfuncs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ extern void estimate_hash_bucket_stats(PlannerInfo *root,
186186
Node *hashkey, double nbuckets,
187187
Selectivity *mcv_freq,
188188
Selectivity *bucketsize_frac);
189+
extern double estimate_hashagg_tablesize(Path *path,
190+
const AggClauseCosts *agg_costs,
191+
double dNumGroups);
189192

190193
extern List *get_quals_from_indexclauses(List *indexclauses);
191194
extern Cost index_other_operands_eval_cost(PlannerInfo *root,

0 commit comments

Comments
 (0)