Skip to content

Commit 9fd8843

Browse files
committed
Fix mergejoin cost estimation so that we consider the statistical ranges of
the two join variables at both ends: not only trailing rows that need not be scanned because there cannot be a match on the other side, but initial rows that will be scanned without possibly having a match. This allows a more realistic estimate of startup cost to be made, per recent pgsql-performance discussion. In passing, fix a couple of bugs that had crept into mergejoinscansel: it was not quite up to speed for the task of estimating descending-order scans, which is a new requirement in 8.3.
1 parent 8821612 commit 9fd8843

File tree

4 files changed

+345
-148
lines changed

4 files changed

+345
-148
lines changed

src/backend/optimizer/path/costsize.c

Lines changed: 78 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
* Portions Copyright (c) 1994, Regents of the University of California
5555
*
5656
* IDENTIFICATION
57-
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $
57+
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $
5858
*
5959
*-------------------------------------------------------------------------
6060
*/
@@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
13721372
double outer_path_rows = PATH_ROWS(outer_path);
13731373
double inner_path_rows = PATH_ROWS(inner_path);
13741374
double outer_rows,
1375-
inner_rows;
1375+
inner_rows,
1376+
outer_skip_rows,
1377+
inner_skip_rows;
13761378
double mergejointuples,
13771379
rescannedtuples;
13781380
double rescanratio;
1379-
Selectivity outerscansel,
1380-
innerscansel;
1381+
Selectivity outerstartsel,
1382+
outerendsel,
1383+
innerstartsel,
1384+
innerendsel;
13811385
Selectivity joininfactor;
13821386
Path sort_path; /* dummy for result of cost_sort */
13831387

@@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
14441448
* A merge join will stop as soon as it exhausts either input stream
14451449
* (unless it's an outer join, in which case the outer side has to be
14461450
* scanned all the way anyway). Estimate fraction of the left and right
1447-
* inputs that will actually need to be scanned. We use only the first
1448-
* (most significant) merge clause for this purpose. Since
1449-
* mergejoinscansel() is a fairly expensive computation, we cache the
1450-
* results in the merge clause RestrictInfo.
1451+
* inputs that will actually need to be scanned. Likewise, we can
1452+
* estimate the number of rows that will be skipped before the first
1453+
* join pair is found, which should be factored into startup cost.
1454+
* We use only the first (most significant) merge clause for this purpose.
1455+
* Since mergejoinscansel() is a fairly expensive computation, we cache
1456+
* the results in the merge clause RestrictInfo.
14511457
*/
14521458
if (mergeclauses && path->jpath.jointype != JOIN_FULL)
14531459
{
@@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
14781484
outer_path->parent->relids))
14791485
{
14801486
/* left side of clause is outer */
1481-
outerscansel = cache->leftscansel;
1482-
innerscansel = cache->rightscansel;
1487+
outerstartsel = cache->leftstartsel;
1488+
outerendsel = cache->leftendsel;
1489+
innerstartsel = cache->rightstartsel;
1490+
innerendsel = cache->rightendsel;
14831491
}
14841492
else
14851493
{
14861494
/* left side of clause is inner */
1487-
outerscansel = cache->rightscansel;
1488-
innerscansel = cache->leftscansel;
1495+
outerstartsel = cache->rightstartsel;
1496+
outerendsel = cache->rightendsel;
1497+
innerstartsel = cache->leftstartsel;
1498+
innerendsel = cache->leftendsel;
14891499
}
14901500
if (path->jpath.jointype == JOIN_LEFT)
1491-
outerscansel = 1.0;
1501+
{
1502+
outerstartsel = 0.0;
1503+
outerendsel = 1.0;
1504+
}
14921505
else if (path->jpath.jointype == JOIN_RIGHT)
1493-
innerscansel = 1.0;
1506+
{
1507+
innerstartsel = 0.0;
1508+
innerendsel = 1.0;
1509+
}
14941510
}
14951511
else
14961512
{
14971513
/* cope with clauseless or full mergejoin */
1498-
outerscansel = innerscansel = 1.0;
1514+
outerstartsel = innerstartsel = 0.0;
1515+
outerendsel = innerendsel = 1.0;
14991516
}
15001517

1501-
/* convert selectivity to row count; must scan at least one row */
1502-
outer_rows = clamp_row_est(outer_path_rows * outerscansel);
1503-
inner_rows = clamp_row_est(inner_path_rows * innerscansel);
1518+
/*
1519+
* Convert selectivities to row counts. We force outer_rows and
1520+
* inner_rows to be at least 1, but the skip_rows estimates can be zero.
1521+
*/
1522+
outer_skip_rows = rint(outer_path_rows * outerstartsel);
1523+
inner_skip_rows = rint(inner_path_rows * innerstartsel);
1524+
outer_rows = clamp_row_est(outer_path_rows * outerendsel);
1525+
inner_rows = clamp_row_est(inner_path_rows * innerendsel);
1526+
1527+
Assert(outer_skip_rows <= outer_rows);
1528+
Assert(inner_skip_rows <= inner_rows);
15041529

15051530
/*
15061531
* Readjust scan selectivities to account for above rounding. This is
15071532
* normally an insignificant effect, but when there are only a few rows in
15081533
* the inputs, failing to do this makes for a large percentage error.
15091534
*/
1510-
outerscansel = outer_rows / outer_path_rows;
1511-
innerscansel = inner_rows / inner_path_rows;
1535+
outerstartsel = outer_skip_rows / outer_path_rows;
1536+
innerstartsel = inner_skip_rows / inner_path_rows;
1537+
outerendsel = outer_rows / outer_path_rows;
1538+
innerendsel = inner_rows / inner_path_rows;
1539+
1540+
Assert(outerstartsel <= outerendsel);
1541+
Assert(innerstartsel <= innerendsel);
15121542

15131543
/* cost of source data */
15141544

@@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15221552
outer_path->parent->width,
15231553
-1.0);
15241554
startup_cost += sort_path.startup_cost;
1555+
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
1556+
* outerstartsel;
15251557
run_cost += (sort_path.total_cost - sort_path.startup_cost)
1526-
* outerscansel;
1558+
* (outerendsel - outerstartsel);
15271559
}
15281560
else
15291561
{
15301562
startup_cost += outer_path->startup_cost;
1563+
startup_cost += (outer_path->total_cost - outer_path->startup_cost)
1564+
* outerstartsel;
15311565
run_cost += (outer_path->total_cost - outer_path->startup_cost)
1532-
* outerscansel;
1566+
* (outerendsel - outerstartsel);
15331567
}
15341568

15351569
if (innersortkeys) /* do we need to sort inner? */
@@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15421576
inner_path->parent->width,
15431577
-1.0);
15441578
startup_cost += sort_path.startup_cost;
1579+
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
1580+
* innerstartsel * rescanratio;
15451581
run_cost += (sort_path.total_cost - sort_path.startup_cost)
1546-
* innerscansel * rescanratio;
1582+
* (innerendsel - innerstartsel) * rescanratio;
15471583
}
15481584
else
15491585
{
15501586
startup_cost += inner_path->startup_cost;
1587+
startup_cost += (inner_path->total_cost - inner_path->startup_cost)
1588+
* innerstartsel * rescanratio;
15511589
run_cost += (inner_path->total_cost - inner_path->startup_cost)
1552-
* innerscansel * rescanratio;
1590+
* (innerendsel - innerstartsel) * rescanratio;
15531591
}
15541592

15551593
/* CPU costs */
@@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15711609
* joininfactor.
15721610
*/
15731611
startup_cost += merge_qual_cost.startup;
1612+
startup_cost += merge_qual_cost.per_tuple *
1613+
(outer_skip_rows + inner_skip_rows * rescanratio);
15741614
run_cost += merge_qual_cost.per_tuple *
1575-
(outer_rows + inner_rows * rescanratio);
1615+
((outer_rows - outer_skip_rows) +
1616+
(inner_rows - inner_skip_rows) * rescanratio);
15761617

15771618
/*
15781619
* For each tuple that gets through the mergejoin proper, we charge
@@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
15971638
{
15981639
MergeScanSelCache *cache;
15991640
ListCell *lc;
1600-
Selectivity leftscansel,
1601-
rightscansel;
1641+
Selectivity leftstartsel,
1642+
leftendsel,
1643+
rightstartsel,
1644+
rightendsel;
16021645
MemoryContext oldcontext;
16031646

16041647
/* Do we have this result already? */
@@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
16171660
pathkey->pk_opfamily,
16181661
pathkey->pk_strategy,
16191662
pathkey->pk_nulls_first,
1620-
&leftscansel,
1621-
&rightscansel);
1663+
&leftstartsel,
1664+
&leftendsel,
1665+
&rightstartsel,
1666+
&rightendsel);
16221667

16231668
/* Cache the result in suitably long-lived workspace */
16241669
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
@@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
16271672
cache->opfamily = pathkey->pk_opfamily;
16281673
cache->strategy = pathkey->pk_strategy;
16291674
cache->nulls_first = pathkey->pk_nulls_first;
1630-
cache->leftscansel = leftscansel;
1631-
cache->rightscansel = rightscansel;
1675+
cache->leftstartsel = leftstartsel;
1676+
cache->leftendsel = leftendsel;
1677+
cache->rightstartsel = rightstartsel;
1678+
cache->rightendsel = rightendsel;
16321679

16331680
rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
16341681

0 commit comments

Comments
 (0)