Skip to content

Commit 5372275

Browse files
committed
Fix planning of parameterized appendrel paths with expensive join quals.
The code in set_append_rel_pathlist() for building parameterized paths for append relations (inheritance and UNION ALL combinations) supposed that the cheapest regular path for a child relation would still be cheapest when reparameterized. Which might not be the case, particularly if the added join conditions are expensive to compute, as in a recent example from Jeff Janes. Fix it to compare child path costs *after* reparameterizing. We can short-circuit that if the cheapest pre-existing path is already parameterized correctly, which seems likely to be true often enough to be worth checking for. Back-patch to 9.2 where parameterized paths were introduced.
1 parent 9b2543a commit 5372275

File tree

3 files changed

+140
-19
lines changed

3 files changed

+140
-19
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 85 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
6868
static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
6969
List *live_childrels,
7070
List *all_child_pathkeys);
71+
static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
72+
RelOptInfo *rel,
73+
Relids required_outer);
7174
static List *accumulate_append_subpath(List *subpaths, Path *path);
7275
static void set_dummy_rel_pathlist(RelOptInfo *rel);
7376
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -831,28 +834,18 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
831834
foreach(lcr, live_childrels)
832835
{
833836
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
834-
Path *cheapest_total;
837+
Path *subpath;
835838

836-
cheapest_total =
837-
get_cheapest_path_for_pathkeys(childrel->pathlist,
838-
NIL,
839-
required_outer,
840-
TOTAL_COST);
841-
Assert(cheapest_total != NULL);
842-
843-
/* Children must have exactly the desired parameterization */
844-
if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer))
839+
subpath = get_cheapest_parameterized_child_path(root,
840+
childrel,
841+
required_outer);
842+
if (subpath == NULL)
845843
{
846-
cheapest_total = reparameterize_path(root, cheapest_total,
847-
required_outer, 1.0);
848-
if (cheapest_total == NULL)
849-
{
850-
subpaths_valid = false;
851-
break;
852-
}
844+
/* failed to make a suitable path for this child */
845+
subpaths_valid = false;
846+
break;
853847
}
854-
855-
subpaths = accumulate_append_subpath(subpaths, cheapest_total);
848+
subpaths = accumulate_append_subpath(subpaths, subpath);
856849
}
857850

858851
if (subpaths_valid)
@@ -962,6 +955,79 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
962955
}
963956
}
964957

958+
/*
959+
* get_cheapest_parameterized_child_path
960+
* Get cheapest path for this relation that has exactly the requested
961+
* parameterization.
962+
*
963+
* Returns NULL if unable to create such a path.
964+
*/
965+
static Path *
966+
get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
967+
Relids required_outer)
968+
{
969+
Path *cheapest;
970+
ListCell *lc;
971+
972+
/*
973+
* Look up the cheapest existing path with no more than the needed
974+
* parameterization. If it has exactly the needed parameterization, we're
975+
* done.
976+
*/
977+
cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
978+
NIL,
979+
required_outer,
980+
TOTAL_COST);
981+
Assert(cheapest != NULL);
982+
if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
983+
return cheapest;
984+
985+
/*
986+
* Otherwise, we can "reparameterize" an existing path to match the given
987+
* parameterization, which effectively means pushing down additional
988+
* joinquals to be checked within the path's scan. However, some existing
989+
* paths might check the available joinquals already while others don't;
990+
* therefore, it's not clear which existing path will be cheapest after
991+
* reparameterization. We have to go through them all and find out.
992+
*/
993+
cheapest = NULL;
994+
foreach(lc, rel->pathlist)
995+
{
996+
Path *path = (Path *) lfirst(lc);
997+
998+
/* Can't use it if it needs more than requested parameterization */
999+
if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1000+
continue;
1001+
1002+
/*
1003+
* Reparameterization can only increase the path's cost, so if it's
1004+
* already more expensive than the current cheapest, forget it.
1005+
*/
1006+
if (cheapest != NULL &&
1007+
compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1008+
continue;
1009+
1010+
/* Reparameterize if needed, then recheck cost */
1011+
if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
1012+
{
1013+
path = reparameterize_path(root, path, required_outer, 1.0);
1014+
if (path == NULL)
1015+
continue; /* failed to reparameterize this one */
1016+
Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
1017+
1018+
if (cheapest != NULL &&
1019+
compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1020+
continue;
1021+
}
1022+
1023+
/* We have a new best path */
1024+
cheapest = path;
1025+
}
1026+
1027+
/* Return the best path, or NULL if we found no suitable candidate */
1028+
return cheapest;
1029+
}
1030+
9651031
/*
9661032
* accumulate_append_subpath
9671033
* Add a subpath to the list being built for an Append or MergeAppend

src/test/regress/expected/union.out

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,3 +603,37 @@ WHERE x > 3;
603603
2 | 4
604604
(1 row)
605605

606+
-- Test proper handling of parameterized appendrel paths when the
607+
-- potential join qual is expensive
608+
create function expensivefunc(int) returns int
609+
language plpgsql immutable strict cost 10000
610+
as $$begin return $1; end$$;
611+
create temp table t3 as select generate_series(-1000,1000) as x;
612+
create index t3i on t3 (expensivefunc(x));
613+
analyze t3;
614+
explain (costs off)
615+
select * from
616+
(select * from t3 a union all select * from t3 b) ss
617+
join int4_tbl on f1 = expensivefunc(x);
618+
QUERY PLAN
619+
------------------------------------------------------------
620+
Nested Loop
621+
-> Seq Scan on int4_tbl
622+
-> Append
623+
-> Index Scan using t3i on t3 a
624+
Index Cond: (expensivefunc(x) = int4_tbl.f1)
625+
-> Index Scan using t3i on t3 b
626+
Index Cond: (expensivefunc(x) = int4_tbl.f1)
627+
(7 rows)
628+
629+
select * from
630+
(select * from t3 a union all select * from t3 b) ss
631+
join int4_tbl on f1 = expensivefunc(x);
632+
x | f1
633+
---+----
634+
0 | 0
635+
0 | 0
636+
(2 rows)
637+
638+
drop table t3;
639+
drop function expensivefunc(int);

src/test/regress/sql/union.sql

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,24 @@ SELECT * FROM
249249
UNION
250250
SELECT 2 AS t, 4 AS x) ss
251251
WHERE x > 3;
252+
253+
-- Test proper handling of parameterized appendrel paths when the
254+
-- potential join qual is expensive
255+
create function expensivefunc(int) returns int
256+
language plpgsql immutable strict cost 10000
257+
as $$begin return $1; end$$;
258+
259+
create temp table t3 as select generate_series(-1000,1000) as x;
260+
create index t3i on t3 (expensivefunc(x));
261+
analyze t3;
262+
263+
explain (costs off)
264+
select * from
265+
(select * from t3 a union all select * from t3 b) ss
266+
join int4_tbl on f1 = expensivefunc(x);
267+
select * from
268+
(select * from t3 a union all select * from t3 b) ss
269+
join int4_tbl on f1 = expensivefunc(x);
270+
271+
drop table t3;
272+
drop function expensivefunc(int);

0 commit comments

Comments
 (0)