Skip to content

Commit 2c2161a

Browse files
committed
Improve planner's estimation of the size of an append relation: rather than
taking the maximum of any child rel's width, we should weight the widths proportionally to the number of rows expected from each child. In hindsight this is obviously correct because row width is really a proxy for the total physical size of the relation. Per discussion with Scott Carey (bug #4264).
1 parent f95b533 commit 2c2161a

File tree

2 files changed

+66
-27
lines changed

2 files changed

+66
-27
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.170 2008/04/01 00:48:33 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
1515

1616
#include "postgres.h"
1717

18+
#include <math.h>
19+
1820
#ifdef OPTIMIZER_DEBUG
1921
#include "nodes/print.h"
2022
#endif
@@ -263,6 +265,10 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
263265
{
264266
int parentRTindex = rti;
265267
List *subpaths = NIL;
268+
double parent_rows;
269+
double parent_size;
270+
double *parent_attrsizes;
271+
int nattrs;
266272
ListCell *l;
267273

268274
/*
@@ -277,10 +283,23 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
277283
errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
278284

279285
/*
280-
* Initialize to compute size estimates for whole append relation
286+
* Initialize to compute size estimates for whole append relation.
287+
*
288+
* We handle width estimates by weighting the widths of different
289+
* child rels proportionally to their number of rows. This is sensible
290+
* because the use of width estimates is mainly to compute the total
291+
* relation "footprint" if we have to sort or hash it. To do this,
292+
* we sum the total equivalent size (in "double" arithmetic) and then
293+
* divide by the total rowcount estimate. This is done separately for
294+
* the total rel width and each attribute.
295+
*
296+
* Note: if you consider changing this logic, beware that child rels could
297+
* have zero rows and/or width, if they were excluded by constraints.
281298
*/
282-
rel->rows = 0;
283-
rel->width = 0;
299+
parent_rows = 0;
300+
parent_size = 0;
301+
nattrs = rel->max_attr - rel->min_attr + 1;
302+
parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
284303

285304
/*
286305
* Generate access paths for each member relation, and pick the cheapest
@@ -379,38 +398,53 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
379398
subpaths = lappend(subpaths, childpath);
380399

381400
/*
382-
* Propagate size information from the child back to the parent. For
383-
* simplicity, we use the largest widths from any child as the parent
384-
* estimates. (If you want to change this, beware of child
385-
* attr_widths[] entries that haven't been set and are still 0.)
401+
* Accumulate size information from each child.
386402
*/
387-
rel->rows += childrel->rows;
388-
if (childrel->width > rel->width)
389-
rel->width = childrel->width;
390-
391-
forboth(parentvars, rel->reltargetlist,
392-
childvars, childrel->reltargetlist)
403+
if (childrel->rows > 0)
393404
{
394-
Var *parentvar = (Var *) lfirst(parentvars);
395-
Var *childvar = (Var *) lfirst(childvars);
405+
parent_rows += childrel->rows;
406+
parent_size += childrel->width * childrel->rows;
396407

397-
if (IsA(parentvar, Var) &&
398-
IsA(childvar, Var))
408+
forboth(parentvars, rel->reltargetlist,
409+
childvars, childrel->reltargetlist)
399410
{
400-
int pndx = parentvar->varattno - rel->min_attr;
401-
int cndx = childvar->varattno - childrel->min_attr;
411+
Var *parentvar = (Var *) lfirst(parentvars);
412+
Var *childvar = (Var *) lfirst(childvars);
402413

403-
if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
404-
rel->attr_widths[pndx] = childrel->attr_widths[cndx];
414+
if (IsA(parentvar, Var) &&
415+
IsA(childvar, Var))
416+
{
417+
int pndx = parentvar->varattno - rel->min_attr;
418+
int cndx = childvar->varattno - childrel->min_attr;
419+
420+
parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows;
421+
}
405422
}
406423
}
407424
}
408425

426+
/*
427+
* Save the finished size estimates.
428+
*/
429+
rel->rows = parent_rows;
430+
if (parent_rows > 0)
431+
{
432+
int i;
433+
434+
rel->width = rint(parent_size / parent_rows);
435+
for (i = 0; i < nattrs; i++)
436+
rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
437+
}
438+
else
439+
rel->width = 0; /* attr_widths should be zero already */
440+
409441
/*
410442
* Set "raw tuples" count equal to "rows" for the appendrel; needed
411443
* because some places assume rel->tuples is valid for any baserel.
412444
*/
413-
rel->tuples = rel->rows;
445+
rel->tuples = parent_rows;
446+
447+
pfree(parent_attrsizes);
414448

415449
/*
416450
* Finally, build Append path and install it as the only access path for

src/backend/optimizer/plan/createplan.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.240 2008/04/17 21:22:14 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
1717
#include "postgres.h"
1818

1919
#include <limits.h>
20+
#include <math.h>
2021

2122
#include "access/skey.h"
2223
#include "nodes/makefuncs.h"
@@ -2312,6 +2313,7 @@ make_append(List *appendplans, bool isTarget, List *tlist)
23122313
{
23132314
Append *node = makeNode(Append);
23142315
Plan *plan = &node->plan;
2316+
double total_size;
23152317
ListCell *subnode;
23162318

23172319
/*
@@ -2322,7 +2324,7 @@ make_append(List *appendplans, bool isTarget, List *tlist)
23222324
plan->startup_cost = 0;
23232325
plan->total_cost = 0;
23242326
plan->plan_rows = 0;
2325-
plan->plan_width = 0;
2327+
total_size = 0;
23262328
foreach(subnode, appendplans)
23272329
{
23282330
Plan *subplan = (Plan *) lfirst(subnode);
@@ -2331,9 +2333,12 @@ make_append(List *appendplans, bool isTarget, List *tlist)
23312333
plan->startup_cost = subplan->startup_cost;
23322334
plan->total_cost += subplan->total_cost;
23332335
plan->plan_rows += subplan->plan_rows;
2334-
if (plan->plan_width < subplan->plan_width)
2335-
plan->plan_width = subplan->plan_width;
2336+
total_size += subplan->plan_width * subplan->plan_rows;
23362337
}
2338+
if (plan->plan_rows > 0)
2339+
plan->plan_width = rint(total_size / plan->plan_rows);
2340+
else
2341+
plan->plan_width = 0;
23372342

23382343
plan->targetlist = tlist;
23392344
plan->qual = NIL;

0 commit comments

Comments
 (0)