Skip to content

Commit 53434c6

Browse files
committed
Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit 7f3eba3 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant.
1 parent 047f205 commit 53434c6

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
139139
FmgrInfo *opproc, bool isgt,
140140
Datum constval, Oid consttype);
141141
static double eqjoinsel_inner(Oid operator,
142-
VariableStatData *vardata1, VariableStatData *vardata2);
142+
VariableStatData *vardata1, VariableStatData *vardata2,
143+
RelOptInfo *rel1, RelOptInfo *rel2);
143144
static double eqjoinsel_semi(Oid operator,
144-
VariableStatData *vardata1, VariableStatData *vardata2);
145+
VariableStatData *vardata1, VariableStatData *vardata2,
146+
RelOptInfo *rel1, RelOptInfo *rel2);
145147
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
146148
Datum lobound, Datum hibound, Oid boundstypid,
147149
double *scaledlobound, double *scaledhibound);
@@ -170,6 +172,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
170172
VariableStatData *vardata,
171173
Oid sortop,
172174
Datum *min, Datum *max);
175+
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
173176
static Selectivity prefix_selectivity(PlannerInfo *root,
174177
VariableStatData *vardata,
175178
Oid vartype, Oid opfamily, Const *prefixcon);
@@ -1990,24 +1993,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
19901993
VariableStatData vardata1;
19911994
VariableStatData vardata2;
19921995
bool join_is_reversed;
1996+
RelOptInfo *rel1;
1997+
RelOptInfo *rel2;
19931998

19941999
get_join_variables(root, args, sjinfo,
19952000
&vardata1, &vardata2, &join_is_reversed);
19962001

2002+
/*
2003+
* Identify the join's direct input relations. We use the min lefthand
2004+
* and min righthand as the inputs, even though the join might actually
2005+
* get done with larger input relations. The min inputs are guaranteed to
2006+
* have been formed by now, though, and always using them ensures
2007+
* consistency of estimates.
2008+
*/
2009+
if (!join_is_reversed)
2010+
{
2011+
rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
2012+
rel2 = find_join_input_rel(root, sjinfo->min_righthand);
2013+
}
2014+
else
2015+
{
2016+
rel1 = find_join_input_rel(root, sjinfo->min_righthand);
2017+
rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
2018+
}
2019+
19972020
switch (sjinfo->jointype)
19982021
{
19992022
case JOIN_INNER:
20002023
case JOIN_LEFT:
20012024
case JOIN_FULL:
2002-
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2025+
selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
2026+
rel1, rel2);
20032027
break;
20042028
case JOIN_SEMI:
20052029
case JOIN_ANTI:
20062030
if (!join_is_reversed)
2007-
selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
2031+
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2032+
rel1, rel2);
20082033
else
20092034
selec = eqjoinsel_semi(get_commutator(operator),
2010-
&vardata2, &vardata1);
2035+
&vardata2, &vardata1,
2036+
rel2, rel1);
20112037
break;
20122038
default:
20132039
/* other values not expected here */
@@ -2033,7 +2059,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
20332059
*/
20342060
static double
20352061
eqjoinsel_inner(Oid operator,
2036-
VariableStatData *vardata1, VariableStatData *vardata2)
2062+
VariableStatData *vardata1, VariableStatData *vardata2,
2063+
RelOptInfo *rel1, RelOptInfo *rel2)
20372064
{
20382065
double selec;
20392066
double nd1;
@@ -2233,15 +2260,19 @@ eqjoinsel_inner(Oid operator,
22332260
* be, providing a crude correction for the selectivity of restriction
22342261
* clauses on those relations. (We don't do that in the other path
22352262
* since there we are comparing the nd values to stats for the whole
2236-
* relations.)
2263+
* relations.) We can apply this clamp both with respect to the base
2264+
* relations from which the join variables come, and to the immediate
2265+
* input relations of the current join.
22372266
*/
22382267
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
22392268
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
22402269

22412270
if (vardata1->rel)
22422271
nd1 = Min(nd1, vardata1->rel->rows);
2272+
nd1 = Min(nd1, rel1->rows);
22432273
if (vardata2->rel)
22442274
nd2 = Min(nd2, vardata2->rel->rows);
2275+
nd2 = Min(nd2, rel2->rows);
22452276

22462277
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
22472278
if (nd1 > nd2)
@@ -2268,7 +2299,8 @@ eqjoinsel_inner(Oid operator,
22682299
*/
22692300
static double
22702301
eqjoinsel_semi(Oid operator,
2271-
VariableStatData *vardata1, VariableStatData *vardata2)
2302+
VariableStatData *vardata1, VariableStatData *vardata2,
2303+
RelOptInfo *rel1, RelOptInfo *rel2)
22722304
{
22732305
double selec;
22742306
double nd1;
@@ -2417,8 +2449,10 @@ eqjoinsel_semi(Oid operator,
24172449
{
24182450
if (vardata1->rel)
24192451
nd1 = Min(nd1, vardata1->rel->rows);
2452+
nd1 = Min(nd1, rel1->rows);
24202453
if (vardata2->rel)
24212454
nd2 = Min(nd2, vardata2->rel->rows);
2455+
nd2 = Min(nd2, rel2->rows);
24222456

24232457
if (nd1 <= nd2 || nd2 <= 0)
24242458
selec = 1.0 - nullfrac1;
@@ -4722,6 +4756,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47224756
return have_data;
47234757
}
47244758

4759+
/*
4760+
* find_join_input_rel
4761+
* Look up the input relation for a join.
4762+
*
4763+
* We assume that the input relation's RelOptInfo must have been constructed
4764+
* already.
4765+
*/
4766+
static RelOptInfo *
4767+
find_join_input_rel(PlannerInfo *root, Relids relids)
4768+
{
4769+
RelOptInfo *rel = NULL;
4770+
4771+
switch (bms_membership(relids))
4772+
{
4773+
case BMS_EMPTY_SET:
4774+
/* should not happen */
4775+
break;
4776+
case BMS_SINGLETON:
4777+
rel = find_base_rel(root, bms_singleton_member(relids));
4778+
break;
4779+
case BMS_MULTIPLE:
4780+
rel = find_join_rel(root, relids);
4781+
break;
4782+
}
4783+
4784+
if (rel == NULL)
4785+
elog(ERROR, "could not find RelOptInfo for given relids");
4786+
4787+
return rel;
4788+
}
4789+
47254790

47264791
/*-------------------------------------------------------------------------
47274792
*

0 commit comments

Comments
 (0)