Skip to content

Commit 1bd25c0

Browse files
committed
Fix planning of parameterized appendrel paths with expensive join quals.
The code in set_append_rel_pathlist() for building parameterized paths for append relations (inheritance and UNION ALL combinations) supposed that the cheapest regular path for a child relation would still be cheapest when reparameterized. Which might not be the case, particularly if the added join conditions are expensive to compute, as in a recent example from Jeff Janes. Fix it to compare child path costs *after* reparameterizing. We can short-circuit that if the cheapest pre-existing path is already parameterized correctly, which seems likely to be true often enough to be worth checking for. Back-patch to 9.2 where parameterized paths were introduced.
1 parent 62ba0c1 commit 1bd25c0

File tree

3 files changed

+142
-21
lines changed

3 files changed

+142
-21
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 87 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
6868
static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
6969
List *live_childrels,
7070
List *all_child_pathkeys);
71+
static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
72+
RelOptInfo *rel,
73+
Relids required_outer);
7174
static List *accumulate_append_subpath(List *subpaths, Path *path);
7275
static void set_dummy_rel_pathlist(RelOptInfo *rel);
7376
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -803,39 +806,29 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
803806
foreach(l, all_child_outers)
804807
{
805808
Relids required_outer = (Relids) lfirst(l);
806-
bool ok = true;
809+
bool subpaths_valid = true;
807810
ListCell *lcr;
808811

809812
/* Select the child paths for an Append with this parameterization */
810813
subpaths = NIL;
811814
foreach(lcr, live_childrels)
812815
{
813816
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
814-
Path *cheapest_total;
817+
Path *subpath;
815818

816-
cheapest_total =
817-
get_cheapest_path_for_pathkeys(childrel->pathlist,
818-
NIL,
819-
required_outer,
820-
TOTAL_COST);
821-
Assert(cheapest_total != NULL);
822-
823-
/* Children must have exactly the desired parameterization */
824-
if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer))
819+
subpath = get_cheapest_parameterized_child_path(root,
820+
childrel,
821+
required_outer);
822+
if (subpath == NULL)
825823
{
826-
cheapest_total = reparameterize_path(root, cheapest_total,
827-
required_outer, 1.0);
828-
if (cheapest_total == NULL)
829-
{
830-
ok = false;
831-
break;
832-
}
824+
/* failed to make a suitable path for this child */
825+
subpaths_valid = false;
826+
break;
833827
}
834-
835-
subpaths = accumulate_append_subpath(subpaths, cheapest_total);
828+
subpaths = accumulate_append_subpath(subpaths, subpath);
836829
}
837830

838-
if (ok)
831+
if (subpaths_valid)
839832
add_path(rel, (Path *)
840833
create_append_path(rel, subpaths, required_outer));
841834
}
@@ -941,6 +934,79 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
941934
}
942935
}
943936

937+
/*
938+
* get_cheapest_parameterized_child_path
939+
* Get cheapest path for this relation that has exactly the requested
940+
* parameterization.
941+
*
942+
* Returns NULL if unable to create such a path.
943+
*/
944+
static Path *
945+
get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
946+
Relids required_outer)
947+
{
948+
Path *cheapest;
949+
ListCell *lc;
950+
951+
/*
952+
* Look up the cheapest existing path with no more than the needed
953+
* parameterization. If it has exactly the needed parameterization, we're
954+
* done.
955+
*/
956+
cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
957+
NIL,
958+
required_outer,
959+
TOTAL_COST);
960+
Assert(cheapest != NULL);
961+
if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
962+
return cheapest;
963+
964+
/*
965+
* Otherwise, we can "reparameterize" an existing path to match the given
966+
* parameterization, which effectively means pushing down additional
967+
* joinquals to be checked within the path's scan. However, some existing
968+
* paths might check the available joinquals already while others don't;
969+
* therefore, it's not clear which existing path will be cheapest after
970+
* reparameterization. We have to go through them all and find out.
971+
*/
972+
cheapest = NULL;
973+
foreach(lc, rel->pathlist)
974+
{
975+
Path *path = (Path *) lfirst(lc);
976+
977+
/* Can't use it if it needs more than requested parameterization */
978+
if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
979+
continue;
980+
981+
/*
982+
* Reparameterization can only increase the path's cost, so if it's
983+
* already more expensive than the current cheapest, forget it.
984+
*/
985+
if (cheapest != NULL &&
986+
compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
987+
continue;
988+
989+
/* Reparameterize if needed, then recheck cost */
990+
if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
991+
{
992+
path = reparameterize_path(root, path, required_outer, 1.0);
993+
if (path == NULL)
994+
continue; /* failed to reparameterize this one */
995+
Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
996+
997+
if (cheapest != NULL &&
998+
compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
999+
continue;
1000+
}
1001+
1002+
/* We have a new best path */
1003+
cheapest = path;
1004+
}
1005+
1006+
/* Return the best path, or NULL if we found no suitable candidate */
1007+
return cheapest;
1008+
}
1009+
9441010
/*
9451011
* accumulate_append_subpath
9461012
* Add a subpath to the list being built for an Append or MergeAppend

src/test/regress/expected/union.out

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,3 +605,37 @@ WHERE x > 3;
605605
2 | 4
606606
(1 row)
607607

608+
-- Test proper handling of parameterized appendrel paths when the
609+
-- potential join qual is expensive
610+
create function expensivefunc(int) returns int
611+
language plpgsql immutable strict cost 10000
612+
as $$begin return $1; end$$;
613+
create temp table t3 as select generate_series(-1000,1000) as x;
614+
create index t3i on t3 (expensivefunc(x));
615+
analyze t3;
616+
explain (costs off)
617+
select * from
618+
(select * from t3 a union all select * from t3 b) ss
619+
join int4_tbl on f1 = expensivefunc(x);
620+
QUERY PLAN
621+
------------------------------------------------------------
622+
Nested Loop
623+
-> Seq Scan on int4_tbl
624+
-> Append
625+
-> Index Scan using t3i on t3 a
626+
Index Cond: (expensivefunc(x) = int4_tbl.f1)
627+
-> Index Scan using t3i on t3 b
628+
Index Cond: (expensivefunc(x) = int4_tbl.f1)
629+
(7 rows)
630+
631+
select * from
632+
(select * from t3 a union all select * from t3 b) ss
633+
join int4_tbl on f1 = expensivefunc(x);
634+
x | f1
635+
---+----
636+
0 | 0
637+
0 | 0
638+
(2 rows)
639+
640+
drop table t3;
641+
drop function expensivefunc(int);

src/test/regress/sql/union.sql

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,24 @@ SELECT * FROM
249249
UNION
250250
SELECT 2 AS t, 4 AS x) ss
251251
WHERE x > 3;
252+
253+
-- Test proper handling of parameterized appendrel paths when the
254+
-- potential join qual is expensive
255+
create function expensivefunc(int) returns int
256+
language plpgsql immutable strict cost 10000
257+
as $$begin return $1; end$$;
258+
259+
create temp table t3 as select generate_series(-1000,1000) as x;
260+
create index t3i on t3 (expensivefunc(x));
261+
analyze t3;
262+
263+
explain (costs off)
264+
select * from
265+
(select * from t3 a union all select * from t3 b) ss
266+
join int4_tbl on f1 = expensivefunc(x);
267+
select * from
268+
(select * from t3 a union all select * from t3 b) ss
269+
join int4_tbl on f1 = expensivefunc(x);
270+
271+
drop table t3;
272+
drop function expensivefunc(int);

0 commit comments

Comments
 (0)