Skip to content

Commit fb18055

Browse files
committed
Repair bug #4926 "too few pathkeys for mergeclauses". This example shows
that the sanity checking I added to create_mergejoin_plan() in 8.3 was a few bricks shy of a load: the mergeclauses could reference pathkeys in a noncanonical order such as x,y,x, not only cases like x,x,y which is all that the code had allowed for. The odd cases only turn up when using redundant clauses in an outer join condition, which is why no one had noticed before.
1 parent f5bc741 commit fb18055

File tree

4 files changed

+147
-32
lines changed

4 files changed

+147
-32
lines changed

src/backend/optimizer/path/pathkeys.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.97 2009/02/28 03:51:05 tgl Exp $
14+
* $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.98 2009/07/17 23:19:34 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -988,12 +988,21 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root,
988988
* no two members have the same EC, so it's not possible for this
989989
* code to enter the same mergeclause into the result list twice.
990990
*
991-
* XXX it's possible that multiple matching clauses might have
992-
* different ECs on the other side, in which case the order we put
993-
* them into our result makes a difference in the pathkeys required
994-
* for the other input path. However this routine hasn't got any info
995-
* about which order would be best, so for now we disregard that case
996-
* (which is probably a corner case anyway).
991+
* It's possible that multiple matching clauses might have different
992+
* ECs on the other side, in which case the order we put them into our
993+
* result makes a difference in the pathkeys required for the other
994+
* input path. However this routine hasn't got any info about which
995+
* order would be best, so we don't worry about that.
996+
*
997+
* It's also possible that the selected mergejoin clauses produce
998+
* a noncanonical ordering of pathkeys for the other side, ie, we
999+
* might select clauses that reference b.v1, b.v2, b.v1 in that
1000+
* order. This is not harmful in itself, though it suggests that
1001+
* the clauses are partially redundant. Since it happens only with
1002+
* redundant query conditions, we don't bother to eliminate it.
1003+
* make_inner_pathkeys_for_merge() has to delete duplicates when
1004+
* it constructs the canonical pathkeys list, and we also have to
1005+
* deal with the case in create_mergejoin_plan().
9971006
*----------
9981007
*/
9991008
foreach(j, restrictinfos)

src/backend/optimizer/plan/createplan.c

Lines changed: 99 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.260 2009/06/11 14:48:59 momjian Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.261 2009/07/17 23:19:34 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -1620,10 +1620,6 @@ create_mergejoin_plan(PlannerInfo *root,
16201620
bool *mergenullsfirst;
16211621
MergeJoin *join_plan;
16221622
int i;
1623-
EquivalenceClass *lastoeclass;
1624-
EquivalenceClass *lastieclass;
1625-
PathKey *opathkey;
1626-
PathKey *ipathkey;
16271623
ListCell *lc;
16281624
ListCell *lop;
16291625
ListCell *lip;
@@ -1729,10 +1725,6 @@ create_mergejoin_plan(PlannerInfo *root,
17291725
mergestrategies = (int *) palloc(nClauses * sizeof(int));
17301726
mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
17311727

1732-
lastoeclass = NULL;
1733-
lastieclass = NULL;
1734-
opathkey = NULL;
1735-
ipathkey = NULL;
17361728
lop = list_head(outerpathkeys);
17371729
lip = list_head(innerpathkeys);
17381730
i = 0;
@@ -1741,6 +1733,11 @@ create_mergejoin_plan(PlannerInfo *root,
17411733
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
17421734
EquivalenceClass *oeclass;
17431735
EquivalenceClass *ieclass;
1736+
PathKey *opathkey;
1737+
PathKey *ipathkey;
1738+
EquivalenceClass *opeclass;
1739+
EquivalenceClass *ipeclass;
1740+
ListCell *l2;
17441741

17451742
/* fetch outer/inner eclass from mergeclause */
17461743
Assert(IsA(rinfo, RestrictInfo));
@@ -1757,28 +1754,100 @@ create_mergejoin_plan(PlannerInfo *root,
17571754
Assert(oeclass != NULL);
17581755
Assert(ieclass != NULL);
17591756

1760-
/* should match current or next pathkeys */
1761-
/* we check this carefully for debugging reasons */
1762-
if (oeclass != lastoeclass)
1757+
/*
1758+
* For debugging purposes, we check that the eclasses match the
1759+
* paths' pathkeys. In typical cases the merge clauses are one-to-one
1760+
* with the pathkeys, but when dealing with partially redundant query
1761+
* conditions, we might have clauses that re-reference earlier path
1762+
* keys. The case that we need to reject is where a pathkey is
1763+
* entirely skipped over.
1764+
*
1765+
* lop and lip reference the first as-yet-unused pathkey elements;
1766+
* it's okay to match them, or any element before them. If they're
1767+
* NULL then we have found all pathkey elements to be used.
1768+
*/
1769+
if (lop)
17631770
{
1764-
if (!lop)
1765-
elog(ERROR, "too few pathkeys for mergeclauses");
17661771
opathkey = (PathKey *) lfirst(lop);
1767-
lop = lnext(lop);
1768-
lastoeclass = opathkey->pk_eclass;
1769-
if (oeclass != lastoeclass)
1770-
elog(ERROR, "outer pathkeys do not match mergeclause");
1772+
opeclass = opathkey->pk_eclass;
1773+
if (oeclass == opeclass)
1774+
{
1775+
/* fast path for typical case */
1776+
lop = lnext(lop);
1777+
}
1778+
else
1779+
{
1780+
/* redundant clauses ... must match something before lop */
1781+
foreach(l2, outerpathkeys)
1782+
{
1783+
if (l2 == lop)
1784+
break;
1785+
opathkey = (PathKey *) lfirst(l2);
1786+
opeclass = opathkey->pk_eclass;
1787+
if (oeclass == opeclass)
1788+
break;
1789+
}
1790+
if (oeclass != opeclass)
1791+
elog(ERROR, "outer pathkeys do not match mergeclauses");
1792+
}
17711793
}
1772-
if (ieclass != lastieclass)
1794+
else
1795+
{
1796+
/* redundant clauses ... must match some already-used pathkey */
1797+
opathkey = NULL;
1798+
opeclass = NULL;
1799+
foreach(l2, outerpathkeys)
1800+
{
1801+
opathkey = (PathKey *) lfirst(l2);
1802+
opeclass = opathkey->pk_eclass;
1803+
if (oeclass == opeclass)
1804+
break;
1805+
}
1806+
if (l2 == NULL)
1807+
elog(ERROR, "outer pathkeys do not match mergeclauses");
1808+
}
1809+
1810+
if (lip)
17731811
{
1774-
if (!lip)
1775-
elog(ERROR, "too few pathkeys for mergeclauses");
17761812
ipathkey = (PathKey *) lfirst(lip);
1777-
lip = lnext(lip);
1778-
lastieclass = ipathkey->pk_eclass;
1779-
if (ieclass != lastieclass)
1780-
elog(ERROR, "inner pathkeys do not match mergeclause");
1813+
ipeclass = ipathkey->pk_eclass;
1814+
if (ieclass == ipeclass)
1815+
{
1816+
/* fast path for typical case */
1817+
lip = lnext(lip);
1818+
}
1819+
else
1820+
{
1821+
/* redundant clauses ... must match something before lip */
1822+
foreach(l2, innerpathkeys)
1823+
{
1824+
if (l2 == lip)
1825+
break;
1826+
ipathkey = (PathKey *) lfirst(l2);
1827+
ipeclass = ipathkey->pk_eclass;
1828+
if (ieclass == ipeclass)
1829+
break;
1830+
}
1831+
if (ieclass != ipeclass)
1832+
elog(ERROR, "inner pathkeys do not match mergeclauses");
1833+
}
1834+
}
1835+
else
1836+
{
1837+
/* redundant clauses ... must match some already-used pathkey */
1838+
ipathkey = NULL;
1839+
ipeclass = NULL;
1840+
foreach(l2, innerpathkeys)
1841+
{
1842+
ipathkey = (PathKey *) lfirst(l2);
1843+
ipeclass = ipathkey->pk_eclass;
1844+
if (ieclass == ipeclass)
1845+
break;
1846+
}
1847+
if (l2 == NULL)
1848+
elog(ERROR, "inner pathkeys do not match mergeclauses");
17811849
}
1850+
17821851
/* pathkeys should match each other too (more debugging) */
17831852
if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
17841853
opathkey->pk_strategy != ipathkey->pk_strategy ||
@@ -1792,6 +1861,11 @@ create_mergejoin_plan(PlannerInfo *root,
17921861
i++;
17931862
}
17941863

1864+
/*
1865+
* Note: it is not an error if we have additional pathkey elements
1866+
* (i.e., lop or lip isn't NULL here). The input paths might be
1867+
* better-sorted than we need for the current mergejoin.
1868+
*/
17951869

17961870
/*
17971871
* Now we can build the mergejoin node.

src/test/regress/expected/join.out

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,3 +2352,18 @@ execute foo(false);
23522352
10000
23532353
(1 row)
23542354

2355+
--
2356+
-- test for sane behavior with noncanonical merge clauses, per bug #4926
2357+
--
2358+
begin;
2359+
set enable_mergejoin = 1;
2360+
set enable_hashjoin = 0;
2361+
set enable_nestloop = 0;
2362+
create temp table a (i integer);
2363+
create temp table b (x integer, y integer);
2364+
select * from a left join b on i = x and i = y and x = i;
2365+
i | x | y
2366+
---+---+---
2367+
(0 rows)
2368+
2369+
rollback;

src/test/regress/sql/join.sql

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,3 +505,20 @@ prepare foo(bool) as
505505
(select 1 from tenk1 c where c.thousand = b.unique2 and $1));
506506
execute foo(true);
507507
execute foo(false);
508+
509+
--
510+
-- test for sane behavior with noncanonical merge clauses, per bug #4926
511+
--
512+
513+
begin;
514+
515+
set enable_mergejoin = 1;
516+
set enable_hashjoin = 0;
517+
set enable_nestloop = 0;
518+
519+
create temp table a (i integer);
520+
create temp table b (x integer, y integer);
521+
522+
select * from a left join b on i = x and i = y and x = i;
523+
524+
rollback;

0 commit comments

Comments
 (0)