Skip to content

Commit bdfbfde

Browse files
committed
IN clauses appearing at top level of WHERE can now be handled as joins.
There are two implementation techniques: the executor understands a new JOIN_IN jointype, which emits at most one matching row per left-hand row, or the result of the IN's sub-select can be fed through a DISTINCT filter and then joined as an ordinary relation. Along the way, some minor code cleanup in the optimizer; notably, break out most of the jointree-rearrangement preprocessing in planner.c and put it in a new file prep/prepjointree.c.
1 parent be2b660 commit bdfbfde

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2076
-876
lines changed

doc/src/sgml/release.sgml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.178 2003/01/11 21:02:49 momjian Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.179 2003/01/20 18:54:44 tgl Exp $
33
-->
44

55
<appendix id="release">
@@ -24,6 +24,7 @@ CDATA means the content is "SGML-free", so you can write without
2424
worries about funny characters.
2525
-->
2626
<literallayout><![CDATA[
27+
Performance of "foo IN (SELECT ...)" queries has been considerably improved
2728
FETCH 0 now re-fetches cursor's current row, per SQL spec
2829
Revised executor state representation; plan trees are read-only to executor now
2930
Information schema

src/backend/executor/nodeHashjoin.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.47 2003/01/20 18:54:45 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -95,6 +95,15 @@ ExecHashJoin(HashJoinState *node)
9595
node->js.ps.ps_TupFromTlist = false;
9696
}
9797

98+
/*
99+
* If we're doing an IN join, we want to return at most one row per
100+
* outer tuple; so we can stop scanning the inner scan if we matched on
101+
* the previous try.
102+
*/
103+
if (node->js.jointype == JOIN_IN &&
104+
node->hj_MatchedOuter)
105+
node->hj_NeedNewOuter = true;
106+
98107
/*
99108
* Reset per-tuple memory context to free any expression evaluation
100109
* storage allocated in the previous tuple cycle. Note this can't
@@ -353,6 +362,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
353362
switch (node->join.jointype)
354363
{
355364
case JOIN_INNER:
365+
case JOIN_IN:
356366
break;
357367
case JOIN_LEFT:
358368
hjstate->hj_NullInnerTupleSlot =

src/backend/executor/nodeMergejoin.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.55 2002/12/15 16:17:46 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.56 2003/01/20 18:54:45 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -381,6 +381,7 @@ ExecMergeJoin(MergeJoinState *node)
381381
switch (node->js.jointype)
382382
{
383383
case JOIN_INNER:
384+
case JOIN_IN:
384385
doFillOuter = false;
385386
doFillInner = false;
386387
break;
@@ -581,9 +582,15 @@ ExecMergeJoin(MergeJoinState *node)
581582
* the econtext's tuple pointers were set up before
582583
* checking the merge qual, so we needn't do it again.
583584
*/
584-
qualResult = (joinqual == NIL ||
585-
ExecQual(joinqual, econtext, false));
586-
MJ_DEBUG_QUAL(joinqual, qualResult);
585+
if (node->js.jointype == JOIN_IN &&
586+
node->mj_MatchedOuter)
587+
qualResult = false;
588+
else
589+
{
590+
qualResult = (joinqual == NIL ||
591+
ExecQual(joinqual, econtext, false));
592+
MJ_DEBUG_QUAL(joinqual, qualResult);
593+
}
587594

588595
if (qualResult)
589596
{
@@ -1452,6 +1459,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate)
14521459
switch (node->join.jointype)
14531460
{
14541461
case JOIN_INNER:
1462+
case JOIN_IN:
14551463
break;
14561464
case JOIN_LEFT:
14571465
mergestate->mj_NullInnerTupleSlot =

src/backend/executor/nodeNestloop.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v 1.29 2002/12/15 16:17:46 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v 1.30 2003/01/20 18:54:46 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -101,6 +101,15 @@ ExecNestLoop(NestLoopState *node)
101101
node->js.ps.ps_TupFromTlist = false;
102102
}
103103

104+
/*
105+
* If we're doing an IN join, we want to return at most one row per
106+
* outer tuple; so we can stop scanning the inner scan if we matched on
107+
* the previous try.
108+
*/
109+
if (node->js.jointype == JOIN_IN &&
110+
node->nl_MatchedOuter)
111+
node->nl_NeedNewOuter = true;
112+
104113
/*
105114
* Reset per-tuple memory context to free any expression evaluation
106115
* storage allocated in the previous tuple cycle. Note this can't
@@ -312,6 +321,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate)
312321
switch (node->join.jointype)
313322
{
314323
case JOIN_INNER:
324+
case JOIN_IN:
315325
break;
316326
case JOIN_LEFT:
317327
nlstate->nl_NullInnerTupleSlot =

src/backend/nodes/copyfuncs.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Portions Copyright (c) 1994, Regents of the University of California
1616
*
1717
* IDENTIFICATION
18-
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.236 2003/01/15 19:35:35 tgl Exp $
18+
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.237 2003/01/20 18:54:46 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -1095,6 +1095,21 @@ _copyJoinInfo(JoinInfo *from)
10951095
return newnode;
10961096
}
10971097

1098+
/*
1099+
* _copyInClauseInfo
1100+
*/
1101+
static InClauseInfo *
1102+
_copyInClauseInfo(InClauseInfo *from)
1103+
{
1104+
InClauseInfo *newnode = makeNode(InClauseInfo);
1105+
1106+
COPY_INTLIST_FIELD(lefthand);
1107+
COPY_INTLIST_FIELD(righthand);
1108+
COPY_NODE_FIELD(sub_targetlist);
1109+
1110+
return newnode;
1111+
}
1112+
10981113
/* ****************************************************************
10991114
* parsenodes.h copy functions
11001115
* ****************************************************************
@@ -1424,9 +1439,9 @@ _copyQuery(Query *from)
14241439

14251440
/*
14261441
* We do not copy the planner internal fields: base_rel_list,
1427-
* other_rel_list, join_rel_list, equi_key_list, query_pathkeys,
1428-
* hasJoinRTEs. That would get us into copying RelOptInfo/Path
1429-
* trees, which we don't want to do.
1442+
* other_rel_list, join_rel_list, equi_key_list, in_info_list,
1443+
* query_pathkeys, hasJoinRTEs. That would get us into copying
1444+
* RelOptInfo/Path trees, which we don't want to do.
14301445
*/
14311446

14321447
return newnode;
@@ -2490,6 +2505,9 @@ copyObject(void *from)
24902505
case T_JoinInfo:
24912506
retval = _copyJoinInfo(from);
24922507
break;
2508+
case T_InClauseInfo:
2509+
retval = _copyInClauseInfo(from);
2510+
break;
24932511

24942512
/*
24952513
* VALUE NODES

src/backend/nodes/equalfuncs.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Portions Copyright (c) 1994, Regents of the University of California
1919
*
2020
* IDENTIFICATION
21-
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.180 2003/01/15 19:35:37 tgl Exp $
21+
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.181 2003/01/20 18:54:46 tgl Exp $
2222
*
2323
*-------------------------------------------------------------------------
2424
*/
@@ -486,6 +486,16 @@ _equalJoinInfo(JoinInfo *a, JoinInfo *b)
486486
return true;
487487
}
488488

489+
static bool
490+
_equalInClauseInfo(InClauseInfo *a, InClauseInfo *b)
491+
{
492+
COMPARE_INTLIST_FIELD(lefthand);
493+
COMPARE_INTLIST_FIELD(righthand);
494+
COMPARE_NODE_FIELD(sub_targetlist);
495+
496+
return true;
497+
}
498+
489499

490500
/*
491501
* Stuff from parsenodes.h
@@ -518,9 +528,9 @@ _equalQuery(Query *a, Query *b)
518528

519529
/*
520530
* We do not check the internal-to-the-planner fields: base_rel_list,
521-
* other_rel_list, join_rel_list, equi_key_list, query_pathkeys,
522-
* hasJoinRTEs. They might not be set yet, and in any case they should
523-
* be derivable from the other fields.
531+
* other_rel_list, join_rel_list, equi_key_list, in_info_list,
532+
* query_pathkeys, hasJoinRTEs. They might not be set yet, and in any
533+
* case they should be derivable from the other fields.
524534
*/
525535
return true;
526536
}
@@ -1618,6 +1628,9 @@ equal(void *a, void *b)
16181628
case T_JoinInfo:
16191629
retval = _equalJoinInfo(a, b);
16201630
break;
1631+
case T_InClauseInfo:
1632+
retval = _equalInClauseInfo(a, b);
1633+
break;
16211634

16221635
/*
16231636
* LIST NODES

src/backend/nodes/list.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.43 2002/12/17 01:18:18 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.44 2003/01/20 18:54:47 tgl Exp $
1212
*
1313
* NOTES
1414
* XXX a few of the following functions are duplicated to handle
@@ -638,10 +638,10 @@ lreverse(List *l)
638638
}
639639

640640
/*
641-
* Return t if two integer lists have no members in common.
641+
* Return t if two integer lists have any members in common.
642642
*/
643643
bool
644-
nonoverlap_setsi(List *list1, List *list2)
644+
overlap_setsi(List *list1, List *list2)
645645
{
646646
List *x;
647647

@@ -650,9 +650,9 @@ nonoverlap_setsi(List *list1, List *list2)
650650
int e = lfirsti(x);
651651

652652
if (intMember(e, list2))
653-
return false;
653+
return true;
654654
}
655-
return true;
655+
return false;
656656
}
657657

658658
/*

src/backend/nodes/outfuncs.c

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.193 2003/01/15 19:35:39 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.194 2003/01/20 18:54:47 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -905,6 +905,18 @@ _outMaterialPath(StringInfo str, MaterialPath *node)
905905
WRITE_NODE_FIELD(subpath);
906906
}
907907

908+
static void
909+
_outUniquePath(StringInfo str, UniquePath *node)
910+
{
911+
WRITE_NODE_TYPE("UNIQUEPATH");
912+
913+
_outPathInfo(str, (Path *) node);
914+
915+
WRITE_NODE_FIELD(subpath);
916+
WRITE_BOOL_FIELD(use_hash);
917+
WRITE_FLOAT_FIELD(rows, "%.0f");
918+
}
919+
908920
static void
909921
_outNestPath(StringInfo str, NestPath *node)
910922
{
@@ -969,6 +981,16 @@ _outJoinInfo(StringInfo str, JoinInfo *node)
969981
WRITE_NODE_FIELD(jinfo_restrictinfo);
970982
}
971983

984+
static void
985+
_outInClauseInfo(StringInfo str, InClauseInfo *node)
986+
{
987+
WRITE_NODE_TYPE("INCLAUSEINFO");
988+
989+
WRITE_INTLIST_FIELD(lefthand);
990+
WRITE_INTLIST_FIELD(righthand);
991+
WRITE_NODE_FIELD(sub_targetlist);
992+
}
993+
972994
/*****************************************************************************
973995
*
974996
* Stuff from parsenodes.h.
@@ -1563,6 +1585,9 @@ _outNode(StringInfo str, void *obj)
15631585
case T_MaterialPath:
15641586
_outMaterialPath(str, obj);
15651587
break;
1588+
case T_UniquePath:
1589+
_outUniquePath(str, obj);
1590+
break;
15661591
case T_NestPath:
15671592
_outNestPath(str, obj);
15681593
break;
@@ -1581,6 +1606,9 @@ _outNode(StringInfo str, void *obj)
15811606
case T_JoinInfo:
15821607
_outJoinInfo(str, obj);
15831608
break;
1609+
case T_InClauseInfo:
1610+
_outInClauseInfo(str, obj);
1611+
break;
15841612

15851613
case T_CreateStmt:
15861614
_outCreateStmt(str, obj);

src/backend/optimizer/README

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ RelOptInfo - a relation or joined relations
263263
AppendPath - append multiple subpaths together
264264
ResultPath - a Result plan node (used for variable-free tlist or qual)
265265
MaterialPath - a Material plan node
266+
UniquePath - remove duplicate rows
266267
NestPath - nested-loop joins
267268
MergePath - merge joins
268269
HashPath - hash joins

src/backend/optimizer/geqo/geqo_eval.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.60 2002/12/16 21:30:29 tgl Exp $
9+
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.61 2003/01/20 18:54:49 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -22,8 +22,8 @@
2222
#include "postgres.h"
2323

2424
#include <float.h>
25-
#include <math.h>
2625
#include <limits.h>
26+
#include <math.h>
2727

2828
#include "optimizer/geqo.h"
2929
#include "optimizer/pathnode.h"
@@ -91,7 +91,10 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
9191
* XXX geqo does not currently support optimization for partial result
9292
* retrieval --- how to fix?
9393
*/
94-
fitness = joinrel->cheapest_total_path->total_cost;
94+
if (joinrel)
95+
fitness = joinrel->cheapest_total_path->total_cost;
96+
else
97+
fitness = DBL_MAX;
9598

9699
/* restore join_rel_list */
97100
root->join_rel_list = savelist;
@@ -113,7 +116,7 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
113116
* 'tour' is the proposed join order, of length 'num_gene'
114117
*
115118
* Returns a new join relation whose cheapest path is the best plan for
116-
* this join order.
119+
* this join order. NB: will return NULL if join order is invalid.
117120
*
118121
* Note that at each step we consider using the next rel as both left and
119122
* right side of a join. However, we cannot build general ("bushy") plan
@@ -154,6 +157,10 @@ gimme_tree(Query *root, List *initial_rels,
154157
*/
155158
new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER);
156159

160+
/* Fail if join order is not valid */
161+
if (new_rel == NULL)
162+
return NULL;
163+
157164
/* Find and save the cheapest paths for this rel */
158165
set_cheapest(new_rel);
159166

0 commit comments

Comments
 (0)