Skip to content

Commit 51b6ae6

Browse files
committed
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
1 parent 42f8e26 commit 51b6ae6

File tree

8 files changed

+114
-7
lines changed

8 files changed

+114
-7
lines changed

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
16901690
WRITE_UINT_FIELD(query_level);
16911691
WRITE_NODE_FIELD(plan_params);
16921692
WRITE_BITMAPSET_FIELD(all_baserels);
1693+
WRITE_BITMAPSET_FIELD(nullable_baserels);
16931694
WRITE_NODE_FIELD(join_rel_list);
16941695
WRITE_INT_FIELD(join_cur_level);
16951696
WRITE_NODE_FIELD(init_plans);

src/backend/optimizer/path/equivclass.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,13 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
510510
* equivalence class it is a member of; if none, optionally build a new
511511
* single-member EquivalenceClass for it.
512512
*
513+
* expr is the expression, and nullable_relids is the set of base relids
514+
* that are potentially nullable below it. We actually only care about
515+
* the set of such relids that are used in the expression; but for caller
516+
* convenience, we perform that intersection step here. The caller need
517+
* only be sure that nullable_relids doesn't omit any nullable rels that
518+
* might appear in the expr.
519+
*
513520
* sortref is the SortGroupRef of the originating SortGroupClause, if any,
514521
* or zero if not. (It should never be zero if the expression is volatile!)
515522
*
@@ -538,13 +545,15 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
538545
EquivalenceClass *
539546
get_eclass_for_sort_expr(PlannerInfo *root,
540547
Expr *expr,
548+
Relids nullable_relids,
541549
List *opfamilies,
542550
Oid opcintype,
543551
Oid collation,
544552
Index sortref,
545553
Relids rel,
546554
bool create_it)
547555
{
556+
Relids expr_relids;
548557
EquivalenceClass *newec;
549558
EquivalenceMember *newem;
550559
ListCell *lc1;
@@ -555,6 +564,12 @@ get_eclass_for_sort_expr(PlannerInfo *root,
555564
*/
556565
expr = canonicalize_ec_expression(expr, opcintype, collation);
557566

567+
/*
568+
* Get the precise set of nullable relids appearing in the expression.
569+
*/
570+
expr_relids = pull_varnos((Node *) expr);
571+
nullable_relids = bms_intersect(nullable_relids, expr_relids);
572+
558573
/*
559574
* Scan through the existing EquivalenceClasses for a match
560575
*/
@@ -629,8 +644,8 @@ get_eclass_for_sort_expr(PlannerInfo *root,
629644
if (newec->ec_has_volatile && sortref == 0) /* should not happen */
630645
elog(ERROR, "volatile EquivalenceClass has no sortref");
631646

632-
newem = add_eq_member(newec, copyObject(expr), pull_varnos((Node *) expr),
633-
NULL, false, opcintype);
647+
newem = add_eq_member(newec, copyObject(expr), expr_relids,
648+
nullable_relids, false, opcintype);
634649

635650
/*
636651
* add_eq_member doesn't check for volatile functions, set-returning

src/backend/optimizer/path/pathkeys.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
170170
* Given an expression and sort-order information, create a PathKey.
171171
* The result is always a "canonical" PathKey, but it might be redundant.
172172
*
173+
* expr is the expression, and nullable_relids is the set of base relids
174+
* that are potentially nullable below it.
175+
*
173176
* If the PathKey is being generated from a SortGroupClause, sortref should be
174177
* the SortGroupClause's SortGroupRef; otherwise zero.
175178
*
@@ -185,6 +188,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
185188
static PathKey *
186189
make_pathkey_from_sortinfo(PlannerInfo *root,
187190
Expr *expr,
191+
Relids nullable_relids,
188192
Oid opfamily,
189193
Oid opcintype,
190194
Oid collation,
@@ -223,8 +227,8 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
223227
equality_op);
224228

225229
/* Now find or (optionally) create a matching EquivalenceClass */
226-
eclass = get_eclass_for_sort_expr(root, expr, opfamilies,
227-
opcintype, collation,
230+
eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
231+
opfamilies, opcintype, collation,
228232
sortref, rel, create_it);
229233

230234
/* Fail if no EC and !create_it */
@@ -246,6 +250,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
246250
static PathKey *
247251
make_pathkey_from_sortop(PlannerInfo *root,
248252
Expr *expr,
253+
Relids nullable_relids,
249254
Oid ordering_op,
250255
bool nulls_first,
251256
Index sortref,
@@ -268,6 +273,7 @@ make_pathkey_from_sortop(PlannerInfo *root,
268273

269274
return make_pathkey_from_sortinfo(root,
270275
expr,
276+
nullable_relids,
271277
opfamily,
272278
opcintype,
273279
collation,
@@ -482,9 +488,13 @@ build_index_pathkeys(PlannerInfo *root,
482488
nulls_first = index->nulls_first[i];
483489
}
484490

485-
/* OK, try to make a canonical pathkey for this sort key */
491+
/*
492+
* OK, try to make a canonical pathkey for this sort key. Note we're
493+
* underneath any outer joins, so nullable_relids should be NULL.
494+
*/
486495
cpathkey = make_pathkey_from_sortinfo(root,
487496
indexkey,
497+
NULL,
488498
index->sortopfamily[i],
489499
index->opcintype[i],
490500
index->indexcollations[i],
@@ -573,11 +583,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
573583
* expression is *not* volatile in the outer query: it's just
574584
* a Var referencing whatever the subquery emitted. (IOW, the
575585
* outer query isn't going to re-execute the volatile
576-
* expression itself.) So this is okay.
586+
* expression itself.) So this is okay. Likewise, it's
587+
* correct to pass nullable_relids = NULL, because we're
588+
* underneath any outer joins appearing in the outer query.
577589
*/
578590
outer_ec =
579591
get_eclass_for_sort_expr(root,
580592
outer_expr,
593+
NULL,
581594
sub_eclass->ec_opfamilies,
582595
sub_member->em_datatype,
583596
sub_eclass->ec_collation,
@@ -665,6 +678,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
665678
/* See if we have a matching EC for that */
666679
outer_ec = get_eclass_for_sort_expr(root,
667680
outer_expr,
681+
NULL,
668682
sub_eclass->ec_opfamilies,
669683
sub_expr_type,
670684
sub_expr_coll,
@@ -770,6 +784,13 @@ build_join_pathkeys(PlannerInfo *root,
770784
* The resulting PathKeys are always in canonical form. (Actually, there
771785
* is no longer any code anywhere that creates non-canonical PathKeys.)
772786
*
787+
* We assume that root->nullable_baserels is the set of base relids that could
788+
* have gone to NULL below the SortGroupClause expressions. This is okay if
789+
* the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
790+
* and if this function is only invoked after deconstruct_jointree. In the
791+
* future we might have to make callers pass in the appropriate
792+
* nullable-relids set, but for now it seems unnecessary.
793+
*
773794
* 'sortclauses' is a list of SortGroupClause nodes
774795
* 'tlist' is the targetlist to find the referenced tlist entries in
775796
*/
@@ -794,6 +815,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
794815
Assert(OidIsValid(sortcl->sortop));
795816
pathkey = make_pathkey_from_sortop(root,
796817
sortkey,
818+
root->nullable_baserels,
797819
sortcl->sortop,
798820
sortcl->nulls_first,
799821
sortcl->tleSortGroupRef,
@@ -850,6 +872,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
850872
restrictinfo->left_ec =
851873
get_eclass_for_sort_expr(root,
852874
(Expr *) get_leftop(clause),
875+
restrictinfo->nullable_relids,
853876
restrictinfo->mergeopfamilies,
854877
lefttype,
855878
((OpExpr *) clause)->inputcollid,
@@ -859,6 +882,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
859882
restrictinfo->right_ec =
860883
get_eclass_for_sort_expr(root,
861884
(Expr *) get_rightop(clause),
885+
restrictinfo->nullable_relids,
862886
restrictinfo->mergeopfamilies,
863887
righttype,
864888
((OpExpr *) clause)->inputcollid,

src/backend/optimizer/plan/initsplan.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ deconstruct_jointree(PlannerInfo *root)
250250
Assert(root->parse->jointree != NULL &&
251251
IsA(root->parse->jointree, FromExpr));
252252

253+
/* this is filled as we scan the jointree */
254+
root->nullable_baserels = NULL;
255+
253256
return deconstruct_recurse(root, (Node *) root->parse->jointree, false,
254257
&qualscope, &inner_join_rels);
255258
}
@@ -361,6 +364,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
361364
left_inners,
362365
right_inners,
363366
nonnullable_rels,
367+
nullable_rels,
364368
ojscope;
365369
List *leftjoinlist,
366370
*rightjoinlist;
@@ -392,6 +396,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
392396
*inner_join_rels = *qualscope;
393397
/* Inner join adds no restrictions for quals */
394398
nonnullable_rels = NULL;
399+
/* and it doesn't force anything to null, either */
400+
nullable_rels = NULL;
395401
break;
396402
case JOIN_LEFT:
397403
case JOIN_ANTI:
@@ -404,6 +410,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
404410
*qualscope = bms_union(leftids, rightids);
405411
*inner_join_rels = bms_union(left_inners, right_inners);
406412
nonnullable_rels = leftids;
413+
nullable_rels = rightids;
407414
break;
408415
case JOIN_SEMI:
409416
leftjoinlist = deconstruct_recurse(root, j->larg,
@@ -416,6 +423,13 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
416423
*inner_join_rels = bms_union(left_inners, right_inners);
417424
/* Semi join adds no restrictions for quals */
418425
nonnullable_rels = NULL;
426+
427+
/*
428+
* Theoretically, a semijoin would null the RHS; but since the
429+
* RHS can't be accessed above the join, this is immaterial
430+
* and we needn't account for it.
431+
*/
432+
nullable_rels = NULL;
419433
break;
420434
case JOIN_FULL:
421435
leftjoinlist = deconstruct_recurse(root, j->larg,
@@ -428,16 +442,22 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
428442
*inner_join_rels = bms_union(left_inners, right_inners);
429443
/* each side is both outer and inner */
430444
nonnullable_rels = *qualscope;
445+
nullable_rels = *qualscope;
431446
break;
432447
default:
433448
/* JOIN_RIGHT was eliminated during reduce_outer_joins() */
434449
elog(ERROR, "unrecognized join type: %d",
435450
(int) j->jointype);
436451
nonnullable_rels = NULL; /* keep compiler quiet */
452+
nullable_rels = NULL;
437453
leftjoinlist = rightjoinlist = NIL;
438454
break;
439455
}
440456

457+
/* Report all rels that will be nulled anywhere in the jointree */
458+
root->nullable_baserels = bms_add_members(root->nullable_baserels,
459+
nullable_rels);
460+
441461
/*
442462
* For an OJ, form the SpecialJoinInfo now, because we need the OJ's
443463
* semantic scope (ojscope) to pass to distribute_qual_to_rels. But

src/include/nodes/relation.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ typedef struct PlannerInfo
151151
/*
152152
* all_baserels is a Relids set of all base relids (but not "other"
153153
* relids) in the query; that is, the Relids identifier of the final join
154-
* we need to form.
154+
* we need to form. This is computed in make_one_rel, just before we
155+
* start making Paths.
155156
*/
156157
Relids all_baserels;
157158

@@ -244,6 +245,9 @@ typedef struct PlannerInfo
244245

245246
/* Added post-release, will be in a saner place in 9.3: */
246247
List *plan_params; /* list of PlannerParamItems, see below */
248+
249+
/* This will be in a saner place in 9.4: */
250+
Relids nullable_baserels;
247251
} PlannerInfo;
248252

249253

src/include/optimizer/paths.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ extern Expr *canonicalize_ec_expression(Expr *expr,
106106
extern void reconsider_outer_join_clauses(PlannerInfo *root);
107107
extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
108108
Expr *expr,
109+
Relids nullable_relids,
109110
List *opfamilies,
110111
Oid opcintype,
111112
Oid collation,

src/test/regress/expected/join.out

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,6 +2905,35 @@ select f1, unique2, case when unique2 is null then f1 else 0 end
29052905
0 | 0 | 0
29062906
(1 row)
29072907

2908+
--
2909+
-- another case with equivalence clauses above outer joins (bug #8591)
2910+
--
2911+
explain (costs off)
2912+
select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
2913+
from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
2914+
where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
2915+
QUERY PLAN
2916+
---------------------------------------------------------------------------------------------
2917+
Nested Loop Left Join
2918+
-> Nested Loop Left Join
2919+
Filter: (COALESCE(b.twothousand, a.twothousand) = 44)
2920+
-> Index Scan using tenk1_unique2 on tenk1 a
2921+
Index Cond: (unique2 = 5530)
2922+
-> Bitmap Heap Scan on tenk1 b
2923+
Recheck Cond: (thousand = a.unique1)
2924+
-> Bitmap Index Scan on tenk1_thous_tenthous
2925+
Index Cond: (thousand = a.unique1)
2926+
-> Index Scan using tenk1_unique2 on tenk1 c
2927+
Index Cond: ((unique2 = COALESCE(b.twothousand, a.twothousand)) AND (unique2 = 44))
2928+
(11 rows)
2929+
2930+
select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
2931+
from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
2932+
where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
2933+
unique1 | unique1 | unique1 | coalesce
2934+
---------+---------+---------+----------
2935+
(0 rows)
2936+
29082937
--
29092938
-- test ability to push constants through outer join clauses
29102939
--

src/test/regress/sql/join.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,19 @@ select f1, unique2, case when unique2 is null then f1 else 0 end
788788
from int4_tbl a left join tenk1 b on f1 = unique2
789789
where (case when unique2 is null then f1 else 0 end) = 0;
790790

791+
--
792+
-- another case with equivalence clauses above outer joins (bug #8591)
793+
--
794+
795+
explain (costs off)
796+
select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
797+
from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
798+
where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
799+
800+
select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
801+
from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
802+
where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
803+
791804
--
792805
-- test ability to push constants through outer join clauses
793806
--

0 commit comments

Comments
 (0)