Skip to content

Commit 9391f71

Browse files
committed
Teach estimate_array_length() to use statistics where available.
If we have DECHIST statistics about the argument expression, use the average number of distinct elements as the array length estimate. (It'd be better to use the average total number of elements, but that is not currently calculated by compute_array_stats(), and it's unclear that it'd be worth extra effort to get.) To do this, we have to change the signature of estimate_array_length to pass the "root" pointer. While at it, also change its result type to "double". That's probably not really necessary, but it avoids any risk of overflow of the value extracted from DECHIST. All existing callers are going to use the result in a "double" calculation anyway. Paul Jungwirth, reviewed by Jian He and myself Discussion: https://postgr.es/m/CA+renyUnM2d+SmrxKpDuAdpiq6FOM=FByvi6aS6yi__qyf6j9A@mail.gmail.com
1 parent 14dd0f2 commit 9391f71

File tree

4 files changed

+45
-16
lines changed

4 files changed

+45
-16
lines changed

src/backend/optimizer/path/costsize.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,7 +1256,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
12561256
QualCost qpqual_cost;
12571257
Cost cpu_per_tuple;
12581258
QualCost tid_qual_cost;
1259-
int ntuples;
1259+
double ntuples;
12601260
ListCell *l;
12611261
double spc_random_page_cost;
12621262

@@ -1283,7 +1283,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
12831283
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
12841284
Node *arraynode = (Node *) lsecond(saop->args);
12851285

1286-
ntuples += estimate_array_length(arraynode);
1286+
ntuples += estimate_array_length(root, arraynode);
12871287
}
12881288
else if (IsA(qual, CurrentOfExpr))
12891289
{
@@ -4770,7 +4770,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
47704770
Node *arraynode = (Node *) lsecond(saop->args);
47714771
QualCost sacosts;
47724772
QualCost hcosts;
4773-
int estarraylen = estimate_array_length(arraynode);
4773+
double estarraylen = estimate_array_length(context->root, arraynode);
47744774

47754775
set_sa_opfuncid(saop);
47764776
sacosts.startup = sacosts.per_tuple = 0;
@@ -4808,7 +4808,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
48084808
*/
48094809
context->total.startup += sacosts.startup;
48104810
context->total.per_tuple += sacosts.per_tuple *
4811-
estimate_array_length(arraynode) * 0.5;
4811+
estimate_array_length(context->root, arraynode) * 0.5;
48124812
}
48134813
}
48144814
else if (IsA(node, Aggref) ||
@@ -4859,7 +4859,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
48594859
context->total.startup += perelemcost.startup;
48604860
if (perelemcost.per_tuple > 0)
48614861
context->total.per_tuple += perelemcost.per_tuple *
4862-
estimate_array_length((Node *) acoerce->arg);
4862+
estimate_array_length(context->root, (Node *) acoerce->arg);
48634863
}
48644864
else if (IsA(node, RowCompareExpr))
48654865
{

src/backend/utils/adt/arrayfuncs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6340,7 +6340,7 @@ array_unnest_support(PG_FUNCTION_ARGS)
63406340
/* We can use estimated argument values here */
63416341
arg1 = estimate_expression_value(req->root, linitial(args));
63426342

6343-
req->rows = estimate_array_length(arg1);
6343+
req->rows = estimate_array_length(req->root, arg1);
63446344
ret = (Node *) req;
63456345
}
63466346
}

src/backend/utils/adt/selfuncs.c

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,10 +2128,11 @@ scalararraysel(PlannerInfo *root,
21282128
/*
21292129
* Estimate number of elements in the array yielded by an expression.
21302130
*
2131-
* It's important that this agree with scalararraysel.
2131+
* Note: the result is integral, but we use "double" to avoid overflow
2132+
* concerns. Most callers will use it in double-type expressions anyway.
21322133
*/
2133-
int
2134-
estimate_array_length(Node *arrayexpr)
2134+
double
2135+
estimate_array_length(PlannerInfo *root, Node *arrayexpr)
21352136
{
21362137
/* look through any binary-compatible relabeling of arrayexpr */
21372138
arrayexpr = strip_array_coercion(arrayexpr);
@@ -2152,11 +2153,39 @@ estimate_array_length(Node *arrayexpr)
21522153
{
21532154
return list_length(((ArrayExpr *) arrayexpr)->elements);
21542155
}
2155-
else
2156+
else if (arrayexpr)
21562157
{
2157-
/* default guess --- see also scalararraysel */
2158-
return 10;
2158+
/* See if we can find any statistics about it */
2159+
VariableStatData vardata;
2160+
AttStatsSlot sslot;
2161+
double nelem = 0;
2162+
2163+
examine_variable(root, arrayexpr, 0, &vardata);
2164+
if (HeapTupleIsValid(vardata.statsTuple))
2165+
{
2166+
/*
2167+
* Found stats, so use the average element count, which is stored
2168+
* in the last stanumbers element of the DECHIST statistics.
2169+
* Actually that is the average count of *distinct* elements;
2170+
* perhaps we should scale it up somewhat?
2171+
*/
2172+
if (get_attstatsslot(&sslot, vardata.statsTuple,
2173+
STATISTIC_KIND_DECHIST, InvalidOid,
2174+
ATTSTATSSLOT_NUMBERS))
2175+
{
2176+
if (sslot.nnumbers > 0)
2177+
nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
2178+
free_attstatsslot(&sslot);
2179+
}
2180+
}
2181+
ReleaseVariableStats(vardata);
2182+
2183+
if (nelem > 0)
2184+
return nelem;
21592185
}
2186+
2187+
/* Else use a default guess --- this should match scalararraysel */
2188+
return 10;
21602189
}
21612190

21622191
/*
@@ -6540,7 +6569,7 @@ genericcostestimate(PlannerInfo *root,
65406569
if (IsA(rinfo->clause, ScalarArrayOpExpr))
65416570
{
65426571
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6543-
int alength = estimate_array_length(lsecond(saop->args));
6572+
double alength = estimate_array_length(root, lsecond(saop->args));
65446573

65456574
if (alength > 1)
65466575
num_sa_scans *= alength;
@@ -6820,7 +6849,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
68206849
{
68216850
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
68226851
Node *other_operand = (Node *) lsecond(saop->args);
6823-
int alength = estimate_array_length(other_operand);
6852+
double alength = estimate_array_length(root, other_operand);
68246853

68256854
clause_op = saop->opno;
68266855
found_saop = true;
@@ -7414,7 +7443,7 @@ gincost_scalararrayopexpr(PlannerInfo *root,
74147443
{
74157444
counts->exactEntries++;
74167445
counts->searchEntries++;
7417-
counts->arrayScans *= estimate_array_length(rightop);
7446+
counts->arrayScans *= estimate_array_length(root, rightop);
74187447
return true;
74197448
}
74207449

src/include/utils/selfuncs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ extern Selectivity scalararraysel(PlannerInfo *root,
200200
ScalarArrayOpExpr *clause,
201201
bool is_join_clause,
202202
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
203-
extern int estimate_array_length(Node *arrayexpr);
203+
extern double estimate_array_length(PlannerInfo *root, Node *arrayexpr);
204204
extern Selectivity rowcomparesel(PlannerInfo *root,
205205
RowCompareExpr *clause,
206206
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);

0 commit comments

Comments
 (0)