Skip to content

Commit ed934d4

Browse files
committed
Allow estimate_num_groups() to pass back further details about the estimation
Here we add a new output parameter to estimate_num_groups() to allow it to inform the caller of additional, possibly useful information about the estimation. The new output parameter is a struct that currently contains just a single field with a set of flags. This was done rather than having the flags as an output parameter to allow future fields to be added without having to change the signature of the function at a later date when we want to pass back further information that might not be suitable to store in the flags field. It seems reasonable that one day in the future that the planner would want to know more about the estimation. For example, how many individual sets of statistics was the estimation generated from? The planner may want to take that into account if we ever want to consider risks as well as costs when generating plans. For now, there's only 1 flag we set in the flags field. This is to indicate if the estimation fell back on using the hard-coded constants in any part of the estimation. Callers may like to change their behavior if this is set, and this gives them the ability to do so. Callers may pass the flag pointer as NULL if they have no interest in obtaining any additional information about the estimate. We're not adding any actual usages of these flags here. Some follow-up commits will make use of this feature. Additionally, we're also not making any changes to add support for clauselist_selectivity() and clauselist_selectivity_ext(). However, if this is required in the future then the same struct being added here should be fine to use as a new output argument for those functions too. Author: David Rowley Discussion: https://postgr.es/m/CAApHDvqQqpk=1W-G_ds7A9CsXX3BggWj_7okinzkLVhDubQzjA@mail.gmail.com
1 parent efd9d92 commit ed934d4

File tree

8 files changed

+49
-8
lines changed

8 files changed

+49
-8
lines changed

contrib/postgres_fdw/postgres_fdw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3087,7 +3087,7 @@ estimate_path_cost_size(PlannerInfo *root,
30873087
numGroups = estimate_num_groups(root,
30883088
get_sortgrouplist_exprs(root->parse->groupClause,
30893089
fpinfo->grouped_tlist),
3090-
input_rows, NULL);
3090+
input_rows, NULL, NULL);
30913091

30923092
/*
30933093
* Get the retrieved_rows and rows estimates. If there are HAVING

src/backend/optimizer/path/costsize.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1969,7 +1969,8 @@ cost_incremental_sort(Path *path,
19691969

19701970
/* Estimate number of groups with equal presorted keys. */
19711971
if (!unknown_varno)
1972-
input_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL);
1972+
input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
1973+
NULL, NULL);
19731974

19741975
group_tuples = input_tuples / input_groups;
19751976
group_input_run_cost = input_run_cost / input_groups;

src/backend/optimizer/path/indxpath.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,7 @@ adjust_rowcount_for_semijoins(PlannerInfo *root,
19901990
nunique = estimate_num_groups(root,
19911991
sjinfo->semi_rhs_exprs,
19921992
nraw,
1993+
NULL,
19931994
NULL);
19941995
if (rowcount > nunique)
19951996
rowcount = nunique;

src/backend/optimizer/plan/planner.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,7 +3702,8 @@ get_number_of_groups(PlannerInfo *root,
37023702
double numGroups = estimate_num_groups(root,
37033703
groupExprs,
37043704
path_rows,
3705-
&gset);
3705+
&gset,
3706+
NULL);
37063707

37073708
gs->numGroups = numGroups;
37083709
rollup->numGroups += numGroups;
@@ -3727,7 +3728,8 @@ get_number_of_groups(PlannerInfo *root,
37273728
double numGroups = estimate_num_groups(root,
37283729
groupExprs,
37293730
path_rows,
3730-
&gset);
3731+
&gset,
3732+
NULL);
37313733

37323734
gs->numGroups = numGroups;
37333735
gd->dNumHashGroups += numGroups;
@@ -3743,7 +3745,7 @@ get_number_of_groups(PlannerInfo *root,
37433745
target_list);
37443746

37453747
dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3746-
NULL);
3748+
NULL, NULL);
37473749
}
37483750
}
37493751
else if (parse->groupingSets)
@@ -4792,7 +4794,7 @@ create_distinct_paths(PlannerInfo *root,
47924794
parse->targetList);
47934795
numDistinctRows = estimate_num_groups(root, distinctExprs,
47944796
cheapest_input_path->rows,
4795-
NULL);
4797+
NULL, NULL);
47964798
}
47974799

47984800
/*

src/backend/optimizer/prep/prepunion.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
338338
*pNumGroups = estimate_num_groups(subroot,
339339
get_tlist_exprs(subquery->targetList, false),
340340
subpath->rows,
341+
NULL,
341342
NULL);
342343
}
343344
}

src/backend/optimizer/util/pathnode.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,6 +1713,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
17131713
pathnode->path.rows = estimate_num_groups(root,
17141714
sjinfo->semi_rhs_exprs,
17151715
rel->rows,
1716+
NULL,
17161717
NULL);
17171718
numCols = list_length(sjinfo->semi_rhs_exprs);
17181719

src/backend/utils/adt/selfuncs.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3241,6 +3241,7 @@ typedef struct
32413241
Node *var; /* might be an expression, not just a Var */
32423242
RelOptInfo *rel; /* relation it belongs to */
32433243
double ndistinct; /* # distinct values */
3244+
bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */
32443245
} GroupVarInfo;
32453246

32463247
static List *
@@ -3287,6 +3288,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
32873288
varinfo->var = var;
32883289
varinfo->rel = vardata->rel;
32893290
varinfo->ndistinct = ndistinct;
3291+
varinfo->isdefault = isdefault;
32903292
varinfos = lappend(varinfos, varinfo);
32913293
return varinfos;
32923294
}
@@ -3311,6 +3313,12 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
33113313
* pgset - NULL, or a List** pointing to a grouping set to filter the
33123314
* groupExprs against
33133315
*
3316+
* Outputs:
3317+
* estinfo - When passed as non-NULL, the function will set bits in the
3318+
* "flags" field in order to provide callers with additional information
3319+
* about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT
3320+
* bit if we used any default values in the estimation.
3321+
*
33143322
* Given the lack of any cross-correlation statistics in the system, it's
33153323
* impossible to do anything really trustworthy with GROUP BY conditions
33163324
* involving multiple Vars. We should however avoid assuming the worst
@@ -3358,14 +3366,18 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
33583366
*/
33593367
double
33603368
estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3361-
List **pgset)
3369+
List **pgset, EstimationInfo *estinfo)
33623370
{
33633371
List *varinfos = NIL;
33643372
double srf_multiplier = 1.0;
33653373
double numdistinct;
33663374
ListCell *l;
33673375
int i;
33683376

3377+
/* Zero the estinfo output parameter, if non-NULL */
3378+
if (estinfo != NULL)
3379+
memset(estinfo, 0, sizeof(EstimationInfo));
3380+
33693381
/*
33703382
* We don't ever want to return an estimate of zero groups, as that tends
33713383
* to lead to division-by-zero and other unpleasantness. The input_rows
@@ -3577,6 +3589,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
35773589
if (relmaxndistinct < varinfo2->ndistinct)
35783590
relmaxndistinct = varinfo2->ndistinct;
35793591
relvarcount++;
3592+
3593+
/*
3594+
* When varinfo2's isdefault is set then we'd better set
3595+
* the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
3596+
*/
3597+
if (estinfo != NULL && varinfo2->isdefault)
3598+
estinfo->flags |= SELFLAG_USED_DEFAULT;
3599+
35803600
}
35813601

35823602
/* we're done with this relation */

src/include/utils/selfuncs.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,20 @@
6868
p = 1.0; \
6969
} while (0)
7070

71+
/*
72+
* A set of flags which some selectivity estimation functions can pass back to
73+
* callers to provide further details about some assumptions which were made
74+
* during the estimation.
75+
*/
76+
#define SELFLAG_USED_DEFAULT (1 << 0) /* Estimation fell back on one
77+
* of the DEFAULTs as defined
78+
* above. */
79+
80+
typedef struct EstimationInfo
81+
{
82+
uint32 flags; /* Flags, as defined above to mark special
83+
* properties of the estimation. */
84+
} EstimationInfo;
7185

7286
/* Return data from examine_variable and friends */
7387
typedef struct VariableStatData
@@ -197,7 +211,8 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause,
197211
Selectivity *rightstart, Selectivity *rightend);
198212

199213
extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
200-
double input_rows, List **pgset);
214+
double input_rows, List **pgset,
215+
EstimationInfo *estinfo);
201216

202217
extern void estimate_hash_bucket_stats(PlannerInfo *root,
203218
Node *hashkey, double nbuckets,

0 commit comments

Comments
 (0)