Skip to content

Commit c4d0ff3

Browse files
committed
Make estimation of mergejoin scan selectivities more robust, per recent
example from RaÇl GutiÅrrez.
1 parent c7b4047 commit c4d0ff3

File tree

2 files changed

+41
-11
lines changed

2 files changed

+41
-11
lines changed

src/backend/optimizer/path/costsize.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1994, Regents of the University of California
4343
*
4444
* IDENTIFICATION
45-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.101 2003/01/20 18:54:49 tgl Exp $
45+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.102 2003/01/22 20:16:40 tgl Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -791,8 +791,22 @@ cost_mergejoin(Path *path, Query *root,
791791
innerscansel = firstclause->left_mergescansel;
792792
}
793793

794+
/* convert selectivity to row count; must scan at least one row */
795+
794796
outer_rows = ceil(outer_path->parent->rows * outerscansel);
797+
if (outer_rows < 1)
798+
outer_rows = 1;
795799
inner_rows = ceil(inner_path->parent->rows * innerscansel);
800+
if (inner_rows < 1)
801+
inner_rows = 1;
802+
803+
/*
804+
* Readjust scan selectivities to account for above rounding. This is
805+
* normally an insignificant effect, but when there are only a few rows
806+
* in the inputs, failing to do this makes for a large percentage error.
807+
*/
808+
outerscansel = outer_rows / outer_path->parent->rows;
809+
innerscansel = inner_rows / inner_path->parent->rows;
796810

797811
/* cost of source data */
798812

src/backend/utils/adt/selfuncs.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.127 2003/01/20 18:54:59 tgl Exp $
18+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.128 2003/01/22 20:16:42 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -1742,7 +1742,9 @@ mergejoinscansel(Query *root, Node *clause,
17421742
rsortop,
17431743
ltop,
17441744
gtop,
1745-
revltop;
1745+
leop,
1746+
revgtop,
1747+
revleop;
17461748
Datum leftmax,
17471749
rightmax;
17481750
double selec;
@@ -1780,35 +1782,49 @@ mergejoinscansel(Query *root, Node *clause,
17801782
/* Look up the "left < right" and "left > right" operators */
17811783
op_mergejoin_crossops(opno, &ltop, &gtop, NULL, NULL);
17821784

1783-
/* Look up the "right < left" operator */
1784-
revltop = get_commutator(gtop);
1785-
if (!OidIsValid(revltop))
1786-
return; /* shouldn't happen */
1785+
/* Look up the "left <= right" operator */
1786+
leop = get_negator(gtop);
1787+
if (!OidIsValid(leop))
1788+
return; /* insufficient info in catalogs */
1789+
1790+
/* Look up the "right > left" operator */
1791+
revgtop = get_commutator(ltop);
1792+
if (!OidIsValid(revgtop))
1793+
return; /* insufficient info in catalogs */
1794+
1795+
/* Look up the "right <= left" operator */
1796+
revleop = get_negator(revgtop);
1797+
if (!OidIsValid(revleop))
1798+
return; /* insufficient info in catalogs */
17871799

17881800
/*
17891801
* Now, the fraction of the left variable that will be scanned is the
17901802
* fraction that's <= the right-side maximum value. But only believe
17911803
* non-default estimates, else stick with our 1.0.
17921804
*/
1793-
selec = scalarineqsel(root, ltop, false, left,
1805+
selec = scalarineqsel(root, leop, false, left,
17941806
rightmax, right->vartype);
17951807
if (selec != DEFAULT_INEQ_SEL)
17961808
*leftscan = selec;
17971809

17981810
/* And similarly for the right variable. */
1799-
selec = scalarineqsel(root, revltop, false, right,
1811+
selec = scalarineqsel(root, revleop, false, right,
18001812
leftmax, left->vartype);
18011813
if (selec != DEFAULT_INEQ_SEL)
18021814
*rightscan = selec;
18031815

18041816
/*
18051817
* Only one of the two fractions can really be less than 1.0; believe
1806-
* the smaller estimate and reset the other one to exactly 1.0.
1818+
* the smaller estimate and reset the other one to exactly 1.0. If we
1819+
* get exactly equal estimates (as can easily happen with self-joins),
1820+
* believe neither.
18071821
*/
18081822
if (*leftscan > *rightscan)
18091823
*leftscan = 1.0;
1810-
else
1824+
else if (*leftscan < *rightscan)
18111825
*rightscan = 1.0;
1826+
else
1827+
*leftscan = *rightscan = 1.0;
18121828
}
18131829

18141830
/*

0 commit comments

Comments
 (0)