Skip to content

Commit ff673f5

Browse files
committed
Fix convert_IN_to_join to properly handle the case where the subselect's
output is not of the same type that's needed for the IN comparison (ie, where the parser inserted an implicit coercion above the subselect result). We should record the coerced expression, not just a raw Var referencing the subselect output, as the quantity that needs to be unique-ified if we choose to implement the IN as Unique followed by a plain join. As of 8.3 this error was causing crashes, as seen in bug #4113 from Javier Hernandez, because the executor was being told to hash or sort the raw subselect output column using operators appropriate to the coerced type. In prior versions there was no crash because the executor chose the hash or sort operators for itself based on the column type it saw. However, that's still not really right, because what's unique for one data type might not be unique for another. In corner cases we could get multiple outputs of a row that should appear only once, as demonstrated by the regression test case included in this commit. However, this patch doesn't apply cleanly to 8.2 or before, and the code involved has shifted enough over time that I'm hesitant to try to back-patch. Given the lack of complaints from the field about such corner cases, I think the bug may not be important enough to risk breaking other things with a back-patch.
1 parent a31b03b commit ff673f5

File tree

5 files changed

+104
-25
lines changed

5 files changed

+104
-25
lines changed

src/backend/optimizer/plan/subselect.c

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.129 2008/01/17 20:35:27 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.130 2008/04/21 20:54:15 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -725,10 +725,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
725725
Query *parse = root->parse;
726726
Query *subselect = (Query *) sublink->subselect;
727727
List *in_operators;
728+
List *left_exprs;
729+
List *right_exprs;
728730
Relids left_varnos;
729731
int rtindex;
730732
RangeTblEntry *rte;
731733
RangeTblRef *rtr;
734+
List *subquery_vars;
732735
InClauseInfo *ininfo;
733736
Node *result;
734737

@@ -744,28 +747,37 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
744747
return NULL;
745748
if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
746749
{
747-
Oid opno = ((OpExpr *) sublink->testexpr)->opno;
750+
OpExpr *op = (OpExpr *) sublink->testexpr;
751+
Oid opno = op->opno;
748752
List *opfamilies;
749753
List *opstrats;
750754

755+
if (list_length(op->args) != 2)
756+
return NULL; /* not binary operator? */
751757
get_op_btree_interpretation(opno, &opfamilies, &opstrats);
752758
if (!list_member_int(opstrats, ROWCOMPARE_EQ))
753759
return NULL;
754760
in_operators = list_make1_oid(opno);
761+
left_exprs = list_make1(linitial(op->args));
762+
right_exprs = list_make1(lsecond(op->args));
755763
}
756764
else if (and_clause(sublink->testexpr))
757765
{
758766
ListCell *lc;
759767

760-
/* OK, but we need to extract the per-column operator OIDs */
761-
in_operators = NIL;
768+
/* OK, but we need to extract the per-column info */
769+
in_operators = left_exprs = right_exprs = NIL;
762770
foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
763771
{
764772
OpExpr *op = (OpExpr *) lfirst(lc);
765773

766774
if (!IsA(op, OpExpr)) /* probably shouldn't happen */
767775
return NULL;
776+
if (list_length(op->args) != 2)
777+
return NULL; /* not binary operator? */
768778
in_operators = lappend_oid(in_operators, op->opno);
779+
left_exprs = lappend(left_exprs, linitial(op->args));
780+
right_exprs = lappend(right_exprs, lsecond(op->args));
769781
}
770782
}
771783
else
@@ -782,10 +794,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
782794
* The left-hand expressions must contain some Vars of the current query,
783795
* else it's not gonna be a join.
784796
*/
785-
left_varnos = pull_varnos(sublink->testexpr);
797+
left_varnos = pull_varnos((Node *) left_exprs);
786798
if (bms_is_empty(left_varnos))
787799
return NULL;
788800

801+
/* ... and the right-hand expressions better not contain Vars at all */
802+
Assert(!contain_var_clause((Node *) right_exprs));
803+
789804
/*
790805
* The combining operators and left-hand expressions mustn't be volatile.
791806
*/
@@ -810,6 +825,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
810825
rtr->rtindex = rtindex;
811826
parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
812827

828+
/*
829+
* Build a list of Vars representing the subselect outputs.
830+
*/
831+
subquery_vars = generate_subquery_vars(root,
832+
subselect->targetList,
833+
rtindex);
834+
835+
/*
836+
* Build the result qual expression, replacing Params with these Vars.
837+
*/
838+
result = convert_testexpr(root,
839+
sublink->testexpr,
840+
subquery_vars);
841+
813842
/*
814843
* Now build the InClauseInfo node.
815844
*/
@@ -819,24 +848,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
819848
ininfo->in_operators = in_operators;
820849

821850
/*
822-
* ininfo->sub_targetlist is filled with a list of Vars representing the
823-
* subselect outputs.
851+
* ininfo->sub_targetlist must be filled with a list of expressions that
852+
* would need to be unique-ified if we try to implement the IN using a
853+
* regular join to unique-ified subquery output. This is most easily done
854+
* by applying convert_testexpr to just the RHS inputs of the testexpr
855+
* operators. That handles cases like type coercions of the subquery
856+
* outputs, clauses dropped due to const-simplification, etc.
824857
*/
825-
ininfo->sub_targetlist = generate_subquery_vars(root,
826-
subselect->targetList,
827-
rtindex);
828-
Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist));
858+
ininfo->sub_targetlist = (List *) convert_testexpr(root,
859+
(Node *) right_exprs,
860+
subquery_vars);
829861

830862
/* Add the completed node to the query's list */
831863
root->in_info_list = lappend(root->in_info_list, ininfo);
832864

833-
/*
834-
* Build the result qual expression.
835-
*/
836-
result = convert_testexpr(root,
837-
sublink->testexpr,
838-
ininfo->sub_targetlist);
839-
840865
return result;
841866
}
842867

src/backend/optimizer/util/pathnode.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.142 2008/01/01 19:45:50 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -898,7 +898,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
898898
/*
899899
* translate_sub_tlist - get subquery column numbers represented by tlist
900900
*
901-
* The given targetlist should contain only Vars referencing the given relid.
901+
* The given targetlist usually contains only Vars referencing the given relid.
902902
* Extract their varattnos (ie, the column numbers of the subquery) and return
903903
* as an integer List.
904904
*

src/include/nodes/relation.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.155 2008/03/24 21:53:04 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -1107,17 +1107,19 @@ typedef struct OuterJoinInfo
11071107
* We record information about each such IN clause in an InClauseInfo struct.
11081108
* These structs are kept in the PlannerInfo node's in_info_list.
11091109
*
1110-
* Note: sub_targetlist is just a list of Vars or expressions; it does not
1111-
* contain TargetEntry nodes.
1110+
* Note: sub_targetlist is a bit misnamed; it is a list of the expressions
1111+
* on the RHS of the IN's join clauses. (This normally starts out as a list
1112+
* of Vars referencing the subquery outputs, but can get mutated if the
1113+
* subquery is flattened into the main query.)
11121114
*/
11131115

11141116
typedef struct InClauseInfo
11151117
{
11161118
NodeTag type;
11171119
Relids lefthand; /* base relids in lefthand expressions */
11181120
Relids righthand; /* base relids coming from the subselect */
1119-
List *sub_targetlist; /* targetlist of original RHS subquery */
1120-
List *in_operators; /* OIDs of the IN's equality operator(s) */
1121+
List *sub_targetlist; /* RHS expressions of the IN's comparisons */
1122+
List *in_operators; /* OIDs of the IN's equality operators */
11211123
} InClauseInfo;
11221124

11231125
/*

src/test/regress/expected/subselect.out

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,3 +408,34 @@ select * from (
408408
0
409409
(1 row)
410410

411+
--
412+
-- Test that an IN implemented using a UniquePath does unique-ification
413+
-- with the right semantics, as per bug #4113. (Unfortunately we have
414+
-- no simple way to ensure that this test case actually chooses that type
415+
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
416+
-- here might mean that some other plan type is being used, rendering the test
417+
-- pointless.)
418+
--
419+
create temp table numeric_table (num_col numeric);
420+
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
421+
create temp table float_table (float_col float8);
422+
insert into float_table values (1), (2), (3);
423+
select * from float_table
424+
where float_col in (select num_col from numeric_table);
425+
float_col
426+
-----------
427+
1
428+
2
429+
3
430+
(3 rows)
431+
432+
select * from numeric_table
433+
where num_col in (select float_col from float_table);
434+
num_col
435+
-------------------------
436+
1
437+
1.000000000000000000001
438+
2
439+
3
440+
(4 rows)
441+

src/test/regress/sql/subselect.sql

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,24 @@ select * from (
251251
select min(unique1) from tenk1 as a
252252
where not exists (select 1 from tenk1 as b where b.unique2 = 10000)
253253
) ss;
254+
255+
--
256+
-- Test that an IN implemented using a UniquePath does unique-ification
257+
-- with the right semantics, as per bug #4113. (Unfortunately we have
258+
-- no simple way to ensure that this test case actually chooses that type
259+
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
260+
-- here might mean that some other plan type is being used, rendering the test
261+
-- pointless.)
262+
--
263+
264+
create temp table numeric_table (num_col numeric);
265+
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
266+
267+
create temp table float_table (float_col float8);
268+
insert into float_table values (1), (2), (3);
269+
270+
select * from float_table
271+
where float_col in (select num_col from numeric_table);
272+
273+
select * from numeric_table
274+
where num_col in (select float_col from float_table);

0 commit comments

Comments
 (0)