Skip to content

Commit 730e2ff

Browse files
committed
Back-patch code to deduce implied equalities from transitivity of
mergejoin clauses, and add these equalities to the given WHERE clauses. This is necessary to ensure that sort keys we think are equivalent really are equivalent as soon as their rels have been joined. Without this, 7.0 may create an incorrect mergejoin plan.
1 parent 783af51 commit 730e2ff

File tree

5 files changed

+207
-10
lines changed

5 files changed

+207
-10
lines changed

src/backend/optimizer/path/pathkeys.c

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.21 2000/04/12 17:15:20 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.21.2.1 2000/09/23 23:50:47 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -19,6 +19,7 @@
1919
#include "optimizer/joininfo.h"
2020
#include "optimizer/pathnode.h"
2121
#include "optimizer/paths.h"
22+
#include "optimizer/planmain.h"
2223
#include "optimizer/tlist.h"
2324
#include "optimizer/var.h"
2425
#include "parser/parsetree.h"
@@ -227,35 +228,107 @@ add_equijoined_keys(Query *root, RestrictInfo *restrictinfo)
227228
* into our new set. When done, we add the new set to the front of
228229
* equi_key_list.
229230
*
231+
* It may well be that the two items we're given are already known to
232+
* be equijoin-equivalent, in which case we don't need to change our
233+
* data structure. If we find both of them in the same equivalence
234+
* set to start with, we can quit immediately.
235+
*
230236
* This is a standard UNION-FIND problem, for which there exist better
231237
* data structures than simple lists. If this code ever proves to be
232238
* a bottleneck then it could be sped up --- but for now, simple is
233239
* beautiful.
234240
*/
235-
newset = lcons(item1, lcons(item2, NIL));
241+
newset = NIL;
236242

237243
foreach(cursetlink, root->equi_key_list)
238244
{
239245
List *curset = lfirst(cursetlink);
246+
bool item1here = member(item1, curset);
247+
bool item2here = member(item2, curset);
240248

241-
if (member(item1, curset) || member(item2, curset))
249+
if (item1here || item2here)
242250
{
251+
/* If find both in same equivalence set, no need to do any more */
252+
if (item1here && item2here)
253+
{
254+
/* Better not have seen only one in an earlier set... */
255+
Assert(newset == NIL);
256+
return;
257+
}
258+
259+
/* Build the new set only when we know we must */
260+
if (newset == NIL)
261+
newset = lcons(item1, lcons(item2, NIL));
262+
243263
/* Found a set to merge into our new set */
244264
newset = LispUnion(newset, curset);
245265

246266
/*
247267
* Remove old set from equi_key_list. NOTE this does not
248-
* change lnext(cursetlink), so the outer foreach doesn't
249-
* break.
268+
* change lnext(cursetlink), so the foreach loop doesn't break.
250269
*/
251270
root->equi_key_list = lremove(curset, root->equi_key_list);
252271
freeList(curset); /* might as well recycle old cons cells */
253272
}
254273
}
255274

275+
/* Build the new set only when we know we must */
276+
if (newset == NIL)
277+
newset = lcons(item1, lcons(item2, NIL));
278+
256279
root->equi_key_list = lcons(newset, root->equi_key_list);
257280
}
258281

282+
/*
283+
* generate_implied_equalities
284+
* Scan the completed equi_key_list for the query, and generate explicit
285+
* qualifications (WHERE clauses) for all the pairwise equalities not
286+
* already mentioned in the quals. This is useful because the additional
287+
* clauses help the selectivity-estimation code, and in fact it's
288+
* *necessary* to ensure that sort keys we think are equivalent really
289+
* are (see src/backend/optimizer/README for more info).
290+
*
291+
* This routine just walks the equi_key_list to find all pairwise equalities.
292+
* We call process_implied_equality (in plan/initsplan.c) to determine whether
293+
* each is already known and add it to the proper restrictinfo list if not.
294+
*/
295+
void
296+
generate_implied_equalities(Query *root)
297+
{
298+
List *cursetlink;
299+
300+
foreach(cursetlink, root->equi_key_list)
301+
{
302+
List *curset = lfirst(cursetlink);
303+
List *ptr1;
304+
305+
/*
306+
* A set containing only two items cannot imply any equalities
307+
* beyond the one that created the set, so we can skip it.
308+
*/
309+
if (length(curset) < 3)
310+
continue;
311+
312+
/*
313+
* Match each item in the set with all that appear after it
314+
* (it's sufficient to generate A=B, need not process B=A too).
315+
*/
316+
foreach(ptr1, curset)
317+
{
318+
PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1);
319+
List *ptr2;
320+
321+
foreach(ptr2, lnext(ptr1))
322+
{
323+
PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2);
324+
325+
process_implied_equality(root, item1->key, item2->key,
326+
item1->sortop, item2->sortop);
327+
}
328+
}
329+
}
330+
}
331+
259332
/*
260333
* make_canonical_pathkey
261334
* Given a PathKeyItem, find the equi_key_list subset it is a member of,

src/backend/optimizer/plan/initsplan.c

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.46 2000/04/12 17:15:21 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.46.2.1 2000/09/23 23:50:47 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
1515
#include <sys/types.h>
1616

1717
#include "postgres.h"
18+
#include "catalog/pg_operator.h"
1819
#include "catalog/pg_type.h"
1920
#include "nodes/makefuncs.h"
2021
#include "optimizer/clauses.h"
@@ -25,6 +26,9 @@
2526
#include "optimizer/planmain.h"
2627
#include "optimizer/tlist.h"
2728
#include "optimizer/var.h"
29+
#include "parser/parse_expr.h"
30+
#include "parser/parse_oper.h"
31+
#include "parser/parse_type.h"
2832
#include "utils/lsyscache.h"
2933

3034

@@ -280,6 +284,113 @@ add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
280284
}
281285
}
282286

287+
/*
288+
* process_implied_equality
289+
* Check to see whether we already have a restrictinfo item that says
290+
* item1 = item2, and create one if not. This is a consequence of
291+
* transitivity of mergejoin equality: if we have mergejoinable
292+
* clauses A = B and B = C, we can deduce A = C (where = is an
293+
* appropriate mergejoinable operator).
294+
*/
295+
void
296+
process_implied_equality(Query *root, Node *item1, Node *item2,
297+
Oid sortop1, Oid sortop2)
298+
{
299+
Index irel1;
300+
Index irel2;
301+
RelOptInfo *rel1;
302+
List *restrictlist;
303+
List *itm;
304+
Oid ltype,
305+
rtype;
306+
Operator eq_operator;
307+
Form_pg_operator pgopform;
308+
Expr *clause;
309+
310+
/*
311+
* Currently, since check_mergejoinable only accepts Var = Var clauses,
312+
* we should only see Var nodes here. Would have to work a little
313+
* harder to locate the right rel(s) if more-general mergejoin clauses
314+
* were accepted.
315+
*/
316+
Assert(IsA(item1, Var));
317+
irel1 = ((Var *) item1)->varno;
318+
Assert(IsA(item2, Var));
319+
irel2 = ((Var *) item2)->varno;
320+
/*
321+
* If both vars belong to same rel, we need to look at that rel's
322+
* baserestrictinfo list. If different rels, each will have a
323+
* joininfo node for the other, and we can scan either list.
324+
*/
325+
rel1 = get_base_rel(root, irel1);
326+
if (irel1 == irel2)
327+
restrictlist = rel1->baserestrictinfo;
328+
else
329+
{
330+
JoinInfo *joininfo = find_joininfo_node(rel1,
331+
lconsi(irel2, NIL));
332+
333+
restrictlist = joininfo->jinfo_restrictinfo;
334+
}
335+
/*
336+
* Scan to see if equality is already known.
337+
*/
338+
foreach(itm, restrictlist)
339+
{
340+
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(itm);
341+
Node *left,
342+
*right;
343+
344+
if (restrictinfo->mergejoinoperator == InvalidOid)
345+
continue; /* ignore non-mergejoinable clauses */
346+
/* We now know the restrictinfo clause is a binary opclause */
347+
left = (Node *) get_leftop(restrictinfo->clause);
348+
right = (Node *) get_rightop(restrictinfo->clause);
349+
if ((equal(item1, left) && equal(item2, right)) ||
350+
(equal(item2, left) && equal(item1, right)))
351+
return; /* found a matching clause */
352+
}
353+
/*
354+
* This equality is new information, so construct a clause
355+
* representing it to add to the query data structures.
356+
*/
357+
ltype = exprType(item1);
358+
rtype = exprType(item2);
359+
eq_operator = oper("=", ltype, rtype, true);
360+
if (!HeapTupleIsValid(eq_operator))
361+
{
362+
/*
363+
* Would it be safe to just not add the equality to the query if
364+
* we have no suitable equality operator for the combination of
365+
* datatypes? NO, because sortkey selection may screw up anyway.
366+
*/
367+
elog(ERROR, "Unable to identify an equality operator for types '%s' and '%s'",
368+
typeidTypeName(ltype), typeidTypeName(rtype));
369+
}
370+
pgopform = (Form_pg_operator) GETSTRUCT(eq_operator);
371+
/*
372+
* Let's just make sure this appears to be a compatible operator.
373+
*/
374+
if (pgopform->oprlsortop != sortop1 ||
375+
pgopform->oprrsortop != sortop2 ||
376+
pgopform->oprresult != BOOLOID)
377+
elog(ERROR, "Equality operator for types '%s' and '%s' should be mergejoinable, but isn't",
378+
typeidTypeName(ltype), typeidTypeName(rtype));
379+
380+
clause = makeNode(Expr);
381+
clause->typeOid = BOOLOID;
382+
clause->opType = OP_EXPR;
383+
clause->oper = (Node *) makeOper(oprid(eq_operator), /* opno */
384+
InvalidOid, /* opid */
385+
BOOLOID, /* operator result type */
386+
0,
387+
NULL);
388+
clause->args = lcons(item1, lcons(item2, NIL));
389+
390+
add_restrict_and_join_to_rel(root, (Node *) clause);
391+
}
392+
393+
283394
/*****************************************************************************
284395
*
285396
* CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES

src/backend/optimizer/plan/planmain.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
*
1616
* IDENTIFICATION
17-
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.55 2000/04/12 17:15:22 momjian Exp $
17+
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.55.2.1 2000/09/23 23:50:47 tgl Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -184,7 +184,7 @@ subplanner(Query *root,
184184
* base_rel_list as relation references are found (e.g., in the
185185
* qualification, the targetlist, etc.). Restrict and join clauses
186186
* are added to appropriate lists belonging to the mentioned
187-
* relations, and we also build lists of equijoined keys for pathkey
187+
* relations. We also build lists of equijoined keys for pathkey
188188
* construction.
189189
*/
190190
root->base_rel_list = NIL;
@@ -193,8 +193,18 @@ subplanner(Query *root,
193193

194194
make_var_only_tlist(root, flat_tlist);
195195
add_restrict_and_join_to_rels(root, qual);
196+
197+
/*
198+
* Make sure we have RelOptInfo nodes for all relations used.
199+
*/
196200
add_missing_rels_to_query(root);
197201

202+
/*
203+
* Use the completed lists of equijoined keys to deduce any implied
204+
* but unstated equalities (for example, A=B and B=C imply A=C).
205+
*/
206+
generate_implied_equalities(root);
207+
198208
/*
199209
* We should now have all the pathkey equivalence sets built, so it's
200210
* now possible to convert the requested query_pathkeys to canonical

src/include/optimizer/paths.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $Id: paths.h,v 1.44 2000/04/12 17:16:42 momjian Exp $
11+
* $Id: paths.h,v 1.44.2.1 2000/09/23 23:50:46 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -90,6 +90,7 @@ typedef enum
9090
} PathKeysComparison;
9191

9292
extern void add_equijoined_keys(Query *root, RestrictInfo *restrictinfo);
93+
extern void generate_implied_equalities(Query *root);
9394
extern List *canonicalize_pathkeys(Query *root, List *pathkeys);
9495
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
9596
extern bool pathkeys_contained_in(List *keys1, List *keys2);

src/include/optimizer/planmain.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Id: planmain.h,v 1.39 2000/04/12 17:16:42 momjian Exp $
10+
* $Id: planmain.h,v 1.39.2.1 2000/09/23 23:50:46 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -43,6 +43,8 @@ extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
4343
extern void make_var_only_tlist(Query *root, List *tlist);
4444
extern void add_restrict_and_join_to_rels(Query *root, List *clauses);
4545
extern void add_missing_rels_to_query(Query *root);
46+
extern void process_implied_equality(Query *root, Node *item1, Node *item2,
47+
Oid sortop1, Oid sortop2);
4648

4749
/*
4850
* prototypes for plan/setrefs.c

0 commit comments

Comments
 (0)