Skip to content

Commit 946cdf9

Browse files
committed
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag: it would generate the expected targetlist but then it felt free to add resjunk sort targets to it. This demonstrably leads to assertion failures in v11 and HEAD, and it's probably just accidental that we don't see the same in older branches. I've not looked into whether there would be any real-world consequences in non-assert builds. In HEAD, create_append_plan has sprouted the same problem, so fix that too (although we do not have any test cases that seem able to reach that bug). This is an oversight in commit 3fc6e2d which invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that came in. convert_subquery_pathkeys would create pathkeys for subquery output values if they match any EquivalenceClass known in the outer query and are available in the subquery's syntactic targetlist. However, the second part of that condition is wrong, because such values might not appear in the subquery relation's reltarget list, which would mean that they couldn't be accessed above the level of the subquery scan. We must check that they appear in the reltarget list, instead. This can lead to dropping knowledge about the subquery's sort ordering, but I believe it's okay, because any sort key that the outer query actually has any interest in would appear in the reltarget list. This second issue is of very long standing, but right now there's no evidence that it causes observable problems before 9.6, so I refrained from back-patching further than that. We can revisit that choice if somebody finds a way to make it cause problems in older branches. (Developing useful test cases for these issues is really problematic; fixing convert_subquery_pathkeys removes the only known way to exhibit the create_merge_append_plan bug, and neither of the test cases added by this patch causes a problem in all branches, even when considering the issues separately.) The second issue explains bug #15795 from Suresh Kumar R ("could not find pathkey item to sort" with nested DISTINCT queries). I stumbled across the first issue while investigating that. Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
1 parent db8802a commit 946cdf9

File tree

4 files changed

+180
-25
lines changed

4 files changed

+180
-25
lines changed

src/backend/optimizer/path/pathkeys.c

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030

3131
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
32+
static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
3233
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
3334

3435

@@ -599,9 +600,11 @@ build_expression_pathkey(PlannerInfo *root,
599600
* 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
600601
* 'subquery_tlist': the subquery's output targetlist, in its terms.
601602
*
602-
* It is not necessary for caller to do truncate_useless_pathkeys(),
603-
* because we select keys in a way that takes usefulness of the keys into
604-
* account.
603+
* We intentionally don't do truncate_useless_pathkeys() here, because there
604+
* are situations where seeing the raw ordering of the subquery is helpful.
605+
* For example, if it returns ORDER BY x DESC, that may prompt us to
606+
* construct a mergejoin using DESC order rather than ASC order; but the
607+
* right_merge_direction heuristic would have us throw the knowledge away.
605608
*/
606609
List *
607610
convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
@@ -627,22 +630,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
627630
* that same targetlist entry.
628631
*/
629632
TargetEntry *tle;
633+
Var *outer_var;
630634

631635
if (sub_eclass->ec_sortref == 0) /* can't happen */
632636
elog(ERROR, "volatile EquivalenceClass has no sortref");
633637
tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
634638
Assert(tle);
635-
/* resjunk items aren't visible to outer query */
636-
if (!tle->resjunk)
639+
/* Is TLE actually available to the outer query? */
640+
outer_var = find_var_for_subquery_tle(rel, tle);
641+
if (outer_var)
637642
{
638643
/* We can represent this sub_pathkey */
639644
EquivalenceMember *sub_member;
640-
Expr *outer_expr;
641645
EquivalenceClass *outer_ec;
642646

643647
Assert(list_length(sub_eclass->ec_members) == 1);
644648
sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
645-
outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
646649

647650
/*
648651
* Note: it might look funny to be setting sortref = 0 for a
@@ -656,7 +659,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
656659
*/
657660
outer_ec =
658661
get_eclass_for_sort_expr(root,
659-
outer_expr,
662+
(Expr *) outer_var,
660663
NULL,
661664
sub_eclass->ec_opfamilies,
662665
sub_member->em_datatype,
@@ -713,14 +716,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
713716
foreach(k, subquery_tlist)
714717
{
715718
TargetEntry *tle = (TargetEntry *) lfirst(k);
719+
Var *outer_var;
716720
Expr *tle_expr;
717-
Expr *outer_expr;
718721
EquivalenceClass *outer_ec;
719722
PathKey *outer_pk;
720723
int score;
721724

722-
/* resjunk items aren't visible to outer query */
723-
if (tle->resjunk)
725+
/* Is TLE actually available to the outer query? */
726+
outer_var = find_var_for_subquery_tle(rel, tle);
727+
if (!outer_var)
724728
continue;
725729

726730
/*
@@ -735,16 +739,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
735739
if (!equal(tle_expr, sub_expr))
736740
continue;
737741

738-
/*
739-
* Build a representation of this targetlist entry as an
740-
* outer Var.
741-
*/
742-
outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
743-
tle);
744-
745-
/* See if we have a matching EC for that */
742+
/* See if we have a matching EC for the TLE */
746743
outer_ec = get_eclass_for_sort_expr(root,
747-
outer_expr,
744+
(Expr *) outer_var,
748745
NULL,
749746
sub_eclass->ec_opfamilies,
750747
sub_expr_type,
@@ -801,6 +798,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
801798
return retval;
802799
}
803800

801+
/*
802+
* find_var_for_subquery_tle
803+
*
804+
* If the given subquery tlist entry is due to be emitted by the subquery's
805+
* scan node, return a Var for it, else return NULL.
806+
*
807+
* We need this to ensure that we don't return pathkeys describing values
808+
* that are unavailable above the level of the subquery scan.
809+
*/
810+
static Var *
811+
find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
812+
{
813+
ListCell *lc;
814+
815+
/* If the TLE is resjunk, it's certainly not visible to the outer query */
816+
if (tle->resjunk)
817+
return NULL;
818+
819+
/* Search the rel's targetlist to see what it will return */
820+
foreach(lc, rel->reltarget->exprs)
821+
{
822+
Var *var = (Var *) lfirst(lc);
823+
824+
/* Ignore placeholders */
825+
if (!IsA(var, Var))
826+
continue;
827+
Assert(var->varno == rel->relid);
828+
829+
/* If we find a Var referencing this TLE, we're good */
830+
if (var->varattno == tle->resno)
831+
return copyObject(var); /* Make a copy for safety */
832+
}
833+
return NULL;
834+
}
835+
804836
/*
805837
* build_join_pathkeys
806838
* Build the path keys for a join relation constructed by mergejoin or

src/backend/optimizer/plan/createplan.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan,
7878
List *gating_quals);
7979
static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path);
8080
static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
81-
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
81+
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
82+
int flags);
8283
static Result *create_result_plan(PlannerInfo *root, ResultPath *best_path);
8384
static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path);
8485
static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path,
@@ -386,7 +387,8 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
386387
break;
387388
case T_MergeAppend:
388389
plan = create_merge_append_plan(root,
389-
(MergeAppendPath *) best_path);
390+
(MergeAppendPath *) best_path,
391+
flags);
390392
break;
391393
case T_Result:
392394
if (IsA(best_path, ProjectionPath))
@@ -1064,11 +1066,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
10641066
* Returns a Plan node.
10651067
*/
10661068
static Plan *
1067-
create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
1069+
create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
1070+
int flags)
10681071
{
10691072
MergeAppend *node = makeNode(MergeAppend);
10701073
Plan *plan = &node->plan;
10711074
List *tlist = build_path_tlist(root, &best_path->path);
1075+
int orig_tlist_length = list_length(tlist);
1076+
bool tlist_was_changed;
10721077
List *pathkeys = best_path->path.pathkeys;
10731078
List *subplans = NIL;
10741079
ListCell *subpaths;
@@ -1085,7 +1090,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
10851090
plan->lefttree = NULL;
10861091
plan->righttree = NULL;
10871092

1088-
/* Compute sort column info, and adjust MergeAppend's tlist as needed */
1093+
/*
1094+
* Compute sort column info, and adjust MergeAppend's tlist as needed.
1095+
* Because we pass adjust_tlist_in_place = true, we may ignore the
1096+
* function result; it must be the same plan node. However, we then need
1097+
* to detect whether any tlist entries were added.
1098+
*/
10891099
(void) prepare_sort_from_pathkeys(plan, pathkeys,
10901100
best_path->path.parent->relids,
10911101
NULL,
@@ -1095,6 +1105,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
10951105
&node->sortOperators,
10961106
&node->collations,
10971107
&node->nullsFirst);
1108+
tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist));
10981109

10991110
/*
11001111
* Now prepare the child plans. We must apply prepare_sort_from_pathkeys
@@ -1160,7 +1171,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
11601171
node->partitioned_rels = best_path->partitioned_rels;
11611172
node->mergeplans = subplans;
11621173

1163-
return (Plan *) node;
1174+
/*
1175+
* If prepare_sort_from_pathkeys added sort columns, but we were told to
1176+
* produce either the exact tlist or a narrow tlist, we should get rid of
1177+
* the sort columns again. We must inject a projection node to do so.
1178+
*/
1179+
if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
1180+
{
1181+
tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length);
1182+
return inject_projection_plan(plan, tlist, plan->parallel_safe);
1183+
}
1184+
else
1185+
return plan;
11641186
}
11651187

11661188
/*

src/test/regress/expected/union.out

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,79 @@ ORDER BY x;
916916
2 | 4
917917
(1 row)
918918

919+
-- Test cases where the native ordering of a sub-select has more pathkeys
920+
-- than the outer query cares about
921+
explain (costs off)
922+
select distinct q1 from
923+
(select distinct * from int8_tbl i81
924+
union all
925+
select distinct * from int8_tbl i82) ss
926+
where q2 = q2;
927+
QUERY PLAN
928+
--------------------------------------------------------
929+
Unique
930+
-> Merge Append
931+
Sort Key: "*SELECT* 1".q1
932+
-> Subquery Scan on "*SELECT* 1"
933+
-> Unique
934+
-> Sort
935+
Sort Key: i81.q1, i81.q2
936+
-> Seq Scan on int8_tbl i81
937+
Filter: (q2 = q2)
938+
-> Subquery Scan on "*SELECT* 2"
939+
-> Unique
940+
-> Sort
941+
Sort Key: i82.q1, i82.q2
942+
-> Seq Scan on int8_tbl i82
943+
Filter: (q2 = q2)
944+
(15 rows)
945+
946+
select distinct q1 from
947+
(select distinct * from int8_tbl i81
948+
union all
949+
select distinct * from int8_tbl i82) ss
950+
where q2 = q2;
951+
q1
952+
------------------
953+
123
954+
4567890123456789
955+
(2 rows)
956+
957+
explain (costs off)
958+
select distinct q1 from
959+
(select distinct * from int8_tbl i81
960+
union all
961+
select distinct * from int8_tbl i82) ss
962+
where -q1 = q2;
963+
QUERY PLAN
964+
--------------------------------------------------------
965+
Unique
966+
-> Merge Append
967+
Sort Key: "*SELECT* 1".q1
968+
-> Subquery Scan on "*SELECT* 1"
969+
-> Unique
970+
-> Sort
971+
Sort Key: i81.q1, i81.q2
972+
-> Seq Scan on int8_tbl i81
973+
Filter: ((- q1) = q2)
974+
-> Subquery Scan on "*SELECT* 2"
975+
-> Unique
976+
-> Sort
977+
Sort Key: i82.q1, i82.q2
978+
-> Seq Scan on int8_tbl i82
979+
Filter: ((- q1) = q2)
980+
(15 rows)
981+
982+
select distinct q1 from
983+
(select distinct * from int8_tbl i81
984+
union all
985+
select distinct * from int8_tbl i82) ss
986+
where -q1 = q2;
987+
q1
988+
------------------
989+
4567890123456789
990+
(1 row)
991+
919992
-- Test proper handling of parameterized appendrel paths when the
920993
-- potential join qual is expensive
921994
create function expensivefunc(int) returns int

src/test/regress/sql/union.sql

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,34 @@ SELECT * FROM
379379
WHERE x > 3
380380
ORDER BY x;
381381

382+
-- Test cases where the native ordering of a sub-select has more pathkeys
383+
-- than the outer query cares about
384+
explain (costs off)
385+
select distinct q1 from
386+
(select distinct * from int8_tbl i81
387+
union all
388+
select distinct * from int8_tbl i82) ss
389+
where q2 = q2;
390+
391+
select distinct q1 from
392+
(select distinct * from int8_tbl i81
393+
union all
394+
select distinct * from int8_tbl i82) ss
395+
where q2 = q2;
396+
397+
explain (costs off)
398+
select distinct q1 from
399+
(select distinct * from int8_tbl i81
400+
union all
401+
select distinct * from int8_tbl i82) ss
402+
where -q1 = q2;
403+
404+
select distinct q1 from
405+
(select distinct * from int8_tbl i81
406+
union all
407+
select distinct * from int8_tbl i82) ss
408+
where -q1 = q2;
409+
382410
-- Test proper handling of parameterized appendrel paths when the
383411
-- potential join qual is expensive
384412
create function expensivefunc(int) returns int

0 commit comments

Comments
 (0)