Skip to content

Commit e7eed0b

Browse files
committed
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag: it would generate the expected targetlist but then it felt free to add resjunk sort targets to it. This demonstrably leads to assertion failures in v11 and HEAD, and it's probably just accidental that we don't see the same in older branches. I've not looked into whether there would be any real-world consequences in non-assert builds. In HEAD, create_append_plan has sprouted the same problem, so fix that too (although we do not have any test cases that seem able to reach that bug). This is an oversight in commit 3fc6e2d which invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that came in. convert_subquery_pathkeys would create pathkeys for subquery output values if they match any EquivalenceClass known in the outer query and are available in the subquery's syntactic targetlist. However, the second part of that condition is wrong, because such values might not appear in the subquery relation's reltarget list, which would mean that they couldn't be accessed above the level of the subquery scan. We must check that they appear in the reltarget list, instead. This can lead to dropping knowledge about the subquery's sort ordering, but I believe it's okay, because any sort key that the outer query actually has any interest in would appear in the reltarget list. This second issue is of very long standing, but right now there's no evidence that it causes observable problems before 9.6, so I refrained from back-patching further than that. We can revisit that choice if somebody finds a way to make it cause problems in older branches. (Developing useful test cases for these issues is really problematic; fixing convert_subquery_pathkeys removes the only known way to exhibit the create_merge_append_plan bug, and neither of the test cases added by this patch causes a problem in all branches, even when considering the issues separately.) The second issue explains bug #15795 from Suresh Kumar R ("could not find pathkey item to sort" with nested DISTINCT queries). I stumbled across the first issue while investigating that. Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
1 parent 25f12ac commit e7eed0b

File tree

4 files changed

+180
-25
lines changed

4 files changed

+180
-25
lines changed

src/backend/optimizer/path/pathkeys.c

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030

3131
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
32+
static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
3233
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
3334

3435

@@ -608,9 +609,11 @@ build_expression_pathkey(PlannerInfo *root,
608609
* 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
609610
* 'subquery_tlist': the subquery's output targetlist, in its terms.
610611
*
611-
* It is not necessary for caller to do truncate_useless_pathkeys(),
612-
* because we select keys in a way that takes usefulness of the keys into
613-
* account.
612+
* We intentionally don't do truncate_useless_pathkeys() here, because there
613+
* are situations where seeing the raw ordering of the subquery is helpful.
614+
* For example, if it returns ORDER BY x DESC, that may prompt us to
615+
* construct a mergejoin using DESC order rather than ASC order; but the
616+
* right_merge_direction heuristic would have us throw the knowledge away.
614617
*/
615618
List *
616619
convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
@@ -636,22 +639,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
636639
* that same targetlist entry.
637640
*/
638641
TargetEntry *tle;
642+
Var *outer_var;
639643

640644
if (sub_eclass->ec_sortref == 0) /* can't happen */
641645
elog(ERROR, "volatile EquivalenceClass has no sortref");
642646
tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
643647
Assert(tle);
644-
/* resjunk items aren't visible to outer query */
645-
if (!tle->resjunk)
648+
/* Is TLE actually available to the outer query? */
649+
outer_var = find_var_for_subquery_tle(rel, tle);
650+
if (outer_var)
646651
{
647652
/* We can represent this sub_pathkey */
648653
EquivalenceMember *sub_member;
649-
Expr *outer_expr;
650654
EquivalenceClass *outer_ec;
651655

652656
Assert(list_length(sub_eclass->ec_members) == 1);
653657
sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
654-
outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
655658

656659
/*
657660
* Note: it might look funny to be setting sortref = 0 for a
@@ -665,7 +668,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
665668
*/
666669
outer_ec =
667670
get_eclass_for_sort_expr(root,
668-
outer_expr,
671+
(Expr *) outer_var,
669672
NULL,
670673
sub_eclass->ec_opfamilies,
671674
sub_member->em_datatype,
@@ -722,14 +725,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
722725
foreach(k, subquery_tlist)
723726
{
724727
TargetEntry *tle = (TargetEntry *) lfirst(k);
728+
Var *outer_var;
725729
Expr *tle_expr;
726-
Expr *outer_expr;
727730
EquivalenceClass *outer_ec;
728731
PathKey *outer_pk;
729732
int score;
730733

731-
/* resjunk items aren't visible to outer query */
732-
if (tle->resjunk)
734+
/* Is TLE actually available to the outer query? */
735+
outer_var = find_var_for_subquery_tle(rel, tle);
736+
if (!outer_var)
733737
continue;
734738

735739
/*
@@ -744,16 +748,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
744748
if (!equal(tle_expr, sub_expr))
745749
continue;
746750

747-
/*
748-
* Build a representation of this targetlist entry as an
749-
* outer Var.
750-
*/
751-
outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
752-
tle);
753-
754-
/* See if we have a matching EC for that */
751+
/* See if we have a matching EC for the TLE */
755752
outer_ec = get_eclass_for_sort_expr(root,
756-
outer_expr,
753+
(Expr *) outer_var,
757754
NULL,
758755
sub_eclass->ec_opfamilies,
759756
sub_expr_type,
@@ -810,6 +807,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
810807
return retval;
811808
}
812809

810+
/*
811+
* find_var_for_subquery_tle
812+
*
813+
* If the given subquery tlist entry is due to be emitted by the subquery's
814+
* scan node, return a Var for it, else return NULL.
815+
*
816+
* We need this to ensure that we don't return pathkeys describing values
817+
* that are unavailable above the level of the subquery scan.
818+
*/
819+
static Var *
820+
find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
821+
{
822+
ListCell *lc;
823+
824+
/* If the TLE is resjunk, it's certainly not visible to the outer query */
825+
if (tle->resjunk)
826+
return NULL;
827+
828+
/* Search the rel's targetlist to see what it will return */
829+
foreach(lc, rel->reltarget->exprs)
830+
{
831+
Var *var = (Var *) lfirst(lc);
832+
833+
/* Ignore placeholders */
834+
if (!IsA(var, Var))
835+
continue;
836+
Assert(var->varno == rel->relid);
837+
838+
/* If we find a Var referencing this TLE, we're good */
839+
if (var->varattno == tle->resno)
840+
return copyObject(var); /* Make a copy for safety */
841+
}
842+
return NULL;
843+
}
844+
813845
/*
814846
* build_join_pathkeys
815847
* Build the path keys for a join relation constructed by mergejoin or

src/backend/optimizer/plan/createplan.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan,
8383
List *gating_quals);
8484
static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path);
8585
static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
86-
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
86+
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
87+
int flags);
8788
static Result *create_result_plan(PlannerInfo *root, ResultPath *best_path);
8889
static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path);
8990
static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path,
@@ -391,7 +392,8 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
391392
break;
392393
case T_MergeAppend:
393394
plan = create_merge_append_plan(root,
394-
(MergeAppendPath *) best_path);
395+
(MergeAppendPath *) best_path,
396+
flags);
395397
break;
396398
case T_Result:
397399
if (IsA(best_path, ProjectionPath))
@@ -1130,11 +1132,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
11301132
* Returns a Plan node.
11311133
*/
11321134
static Plan *
1133-
create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
1135+
create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
1136+
int flags)
11341137
{
11351138
MergeAppend *node = makeNode(MergeAppend);
11361139
Plan *plan = &node->plan;
11371140
List *tlist = build_path_tlist(root, &best_path->path);
1141+
int orig_tlist_length = list_length(tlist);
1142+
bool tlist_was_changed;
11381143
List *pathkeys = best_path->path.pathkeys;
11391144
List *subplans = NIL;
11401145
ListCell *subpaths;
@@ -1151,7 +1156,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
11511156
plan->lefttree = NULL;
11521157
plan->righttree = NULL;
11531158

1154-
/* Compute sort column info, and adjust MergeAppend's tlist as needed */
1159+
/*
1160+
* Compute sort column info, and adjust MergeAppend's tlist as needed.
1161+
* Because we pass adjust_tlist_in_place = true, we may ignore the
1162+
* function result; it must be the same plan node. However, we then need
1163+
* to detect whether any tlist entries were added.
1164+
*/
11551165
(void) prepare_sort_from_pathkeys(plan, pathkeys,
11561166
best_path->path.parent->relids,
11571167
NULL,
@@ -1161,6 +1171,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
11611171
&node->sortOperators,
11621172
&node->collations,
11631173
&node->nullsFirst);
1174+
tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist));
11641175

11651176
/*
11661177
* Now prepare the child plans. We must apply prepare_sort_from_pathkeys
@@ -1227,7 +1238,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
12271238
flatten_partitioned_rels(best_path->partitioned_rels);
12281239
node->mergeplans = subplans;
12291240

1230-
return (Plan *) node;
1241+
/*
1242+
* If prepare_sort_from_pathkeys added sort columns, but we were told to
1243+
* produce either the exact tlist or a narrow tlist, we should get rid of
1244+
* the sort columns again. We must inject a projection node to do so.
1245+
*/
1246+
if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
1247+
{
1248+
tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length);
1249+
return inject_projection_plan(plan, tlist, plan->parallel_safe);
1250+
}
1251+
else
1252+
return plan;
12311253
}
12321254

12331255
/*

src/test/regress/expected/union.out

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,79 @@ ORDER BY x;
916916
2 | 4
917917
(1 row)
918918

919+
-- Test cases where the native ordering of a sub-select has more pathkeys
920+
-- than the outer query cares about
921+
explain (costs off)
922+
select distinct q1 from
923+
(select distinct * from int8_tbl i81
924+
union all
925+
select distinct * from int8_tbl i82) ss
926+
where q2 = q2;
927+
QUERY PLAN
928+
----------------------------------------------------------
929+
Unique
930+
-> Merge Append
931+
Sort Key: "*SELECT* 1".q1
932+
-> Subquery Scan on "*SELECT* 1"
933+
-> Unique
934+
-> Sort
935+
Sort Key: i81.q1, i81.q2
936+
-> Seq Scan on int8_tbl i81
937+
Filter: (q2 IS NOT NULL)
938+
-> Subquery Scan on "*SELECT* 2"
939+
-> Unique
940+
-> Sort
941+
Sort Key: i82.q1, i82.q2
942+
-> Seq Scan on int8_tbl i82
943+
Filter: (q2 IS NOT NULL)
944+
(15 rows)
945+
946+
select distinct q1 from
947+
(select distinct * from int8_tbl i81
948+
union all
949+
select distinct * from int8_tbl i82) ss
950+
where q2 = q2;
951+
q1
952+
------------------
953+
123
954+
4567890123456789
955+
(2 rows)
956+
957+
explain (costs off)
958+
select distinct q1 from
959+
(select distinct * from int8_tbl i81
960+
union all
961+
select distinct * from int8_tbl i82) ss
962+
where -q1 = q2;
963+
QUERY PLAN
964+
--------------------------------------------------------
965+
Unique
966+
-> Merge Append
967+
Sort Key: "*SELECT* 1".q1
968+
-> Subquery Scan on "*SELECT* 1"
969+
-> Unique
970+
-> Sort
971+
Sort Key: i81.q1, i81.q2
972+
-> Seq Scan on int8_tbl i81
973+
Filter: ((- q1) = q2)
974+
-> Subquery Scan on "*SELECT* 2"
975+
-> Unique
976+
-> Sort
977+
Sort Key: i82.q1, i82.q2
978+
-> Seq Scan on int8_tbl i82
979+
Filter: ((- q1) = q2)
980+
(15 rows)
981+
982+
select distinct q1 from
983+
(select distinct * from int8_tbl i81
984+
union all
985+
select distinct * from int8_tbl i82) ss
986+
where -q1 = q2;
987+
q1
988+
------------------
989+
4567890123456789
990+
(1 row)
991+
919992
-- Test proper handling of parameterized appendrel paths when the
920993
-- potential join qual is expensive
921994
create function expensivefunc(int) returns int

src/test/regress/sql/union.sql

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,34 @@ SELECT * FROM
379379
WHERE x > 3
380380
ORDER BY x;
381381

382+
-- Test cases where the native ordering of a sub-select has more pathkeys
383+
-- than the outer query cares about
384+
explain (costs off)
385+
select distinct q1 from
386+
(select distinct * from int8_tbl i81
387+
union all
388+
select distinct * from int8_tbl i82) ss
389+
where q2 = q2;
390+
391+
select distinct q1 from
392+
(select distinct * from int8_tbl i81
393+
union all
394+
select distinct * from int8_tbl i82) ss
395+
where q2 = q2;
396+
397+
explain (costs off)
398+
select distinct q1 from
399+
(select distinct * from int8_tbl i81
400+
union all
401+
select distinct * from int8_tbl i82) ss
402+
where -q1 = q2;
403+
404+
select distinct q1 from
405+
(select distinct * from int8_tbl i81
406+
union all
407+
select distinct * from int8_tbl i82) ss
408+
where -q1 = q2;
409+
382410
-- Test proper handling of parameterized appendrel paths when the
383411
-- potential join qual is expensive
384412
create function expensivefunc(int) returns int

0 commit comments

Comments
 (0)