Skip to content

Commit 6b94e7a

Browse files
committed
Consider fractional paths in generate_orderedappend_paths
When building append paths, we've been looking only at startup and total costs for the paths. When building fractional paths that may eliminate the cheapest one, because it may be dominated by two separate paths (one for startup, one for total cost). This extends generate_orderedappend_paths() to also consider which paths have lowest fractional cost. Currently we only consider paths matching pathkeys - in the future this may be improved by also considering paths that are only partially sorted, with an incremental sort on top. Original report of an issue by Arne Roland, patch by me (based on a suggestion by Tom Lane). Reviewed-by: Arne Roland, Zhihong Yu Discussion: https://postgr.es/m/e8f9ec90-546d-e948-acce-0525f3e92773%40enterprisedb.com Discussion: https://postgr.es/m/1581042da8044e71ada2d6e3a51bf7bb%40index.de
1 parent 025b920 commit 6b94e7a

File tree

3 files changed

+141
-1
lines changed

3 files changed

+141
-1
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1716,6 +1716,7 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
17161716
List *pathkeys = (List *) lfirst(lcp);
17171717
List *startup_subpaths = NIL;
17181718
List *total_subpaths = NIL;
1719+
List *fractional_subpaths = NIL;
17191720
bool startup_neq_total = false;
17201721
ListCell *lcr;
17211722
bool match_partition_order;
@@ -1745,7 +1746,8 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
17451746
{
17461747
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
17471748
Path *cheapest_startup,
1748-
*cheapest_total;
1749+
*cheapest_total,
1750+
*cheapest_fractional = NULL;
17491751

17501752
/* Locate the right paths, if they are available. */
17511753
cheapest_startup =
@@ -1773,6 +1775,37 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
17731775
Assert(cheapest_total->param_info == NULL);
17741776
}
17751777

1778+
/*
1779+
* When building a fractional path, determine a cheapest fractional
1780+
* path for each child relation too. Looking at startup and total
1781+
* costs is not enough, because the cheapest fractional path may be
1782+
* dominated by two separate paths (one for startup, one for total).
1783+
*
1784+
* When needed (building fractional path), determine the cheapest
1785+
* fractional path too.
1786+
*/
1787+
if (root->tuple_fraction > 0)
1788+
{
1789+
double path_fraction = (1.0 / root->tuple_fraction);
1790+
1791+
cheapest_fractional =
1792+
get_cheapest_fractional_path_for_pathkeys(childrel->pathlist,
1793+
pathkeys,
1794+
NULL,
1795+
path_fraction);
1796+
1797+
/*
1798+
* If we found no path with matching pathkeys, use the cheapest
1799+
* total path instead.
1800+
*
1801+
* XXX We might consider partially sorted paths too (with an
1802+
* incremental sort on top). But we'd have to build all the
1803+
* incremental paths, do the costing etc.
1804+
*/
1805+
if (!cheapest_fractional)
1806+
cheapest_fractional = cheapest_total;
1807+
}
1808+
17761809
/*
17771810
* Notice whether we actually have different paths for the
17781811
* "cheapest" and "total" cases; frequently there will be no point
@@ -1799,6 +1832,12 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
17991832

18001833
startup_subpaths = lappend(startup_subpaths, cheapest_startup);
18011834
total_subpaths = lappend(total_subpaths, cheapest_total);
1835+
1836+
if (cheapest_fractional)
1837+
{
1838+
cheapest_fractional = get_singleton_append_subpath(cheapest_fractional);
1839+
fractional_subpaths = lappend(fractional_subpaths, cheapest_fractional);
1840+
}
18021841
}
18031842
else if (match_partition_order_desc)
18041843
{
@@ -1812,6 +1851,12 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
18121851

18131852
startup_subpaths = lcons(cheapest_startup, startup_subpaths);
18141853
total_subpaths = lcons(cheapest_total, total_subpaths);
1854+
1855+
if (cheapest_fractional)
1856+
{
1857+
cheapest_fractional = get_singleton_append_subpath(cheapest_fractional);
1858+
fractional_subpaths = lcons(cheapest_fractional, fractional_subpaths);
1859+
}
18151860
}
18161861
else
18171862
{
@@ -1823,6 +1868,10 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
18231868
&startup_subpaths, NULL);
18241869
accumulate_append_subpath(cheapest_total,
18251870
&total_subpaths, NULL);
1871+
1872+
if (cheapest_fractional)
1873+
accumulate_append_subpath(cheapest_fractional,
1874+
&fractional_subpaths, NULL);
18261875
}
18271876
}
18281877

@@ -1849,6 +1898,17 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
18491898
0,
18501899
false,
18511900
-1));
1901+
1902+
if (fractional_subpaths)
1903+
add_path(rel, (Path *) create_append_path(root,
1904+
rel,
1905+
fractional_subpaths,
1906+
NIL,
1907+
pathkeys,
1908+
NULL,
1909+
0,
1910+
false,
1911+
-1));
18521912
}
18531913
else
18541914
{
@@ -1864,6 +1924,13 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
18641924
total_subpaths,
18651925
pathkeys,
18661926
NULL));
1927+
1928+
if (fractional_subpaths)
1929+
add_path(rel, (Path *) create_merge_append_path(root,
1930+
rel,
1931+
fractional_subpaths,
1932+
pathkeys,
1933+
NULL));
18671934
}
18681935
}
18691936
}

src/test/regress/expected/partition_join.out

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4862,3 +4862,51 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
48624862
1 | 209 | 0009 | 1 | 209 | 0009
48634863
(8 rows)
48644864

4865+
-- partitionwise join with fractional paths
4866+
CREATE TABLE fract_t (id BIGINT, PRIMARY KEY (id)) PARTITION BY RANGE (id);
4867+
CREATE TABLE fract_t0 PARTITION OF fract_t FOR VALUES FROM ('0') TO ('1000');
4868+
CREATE TABLE fract_t1 PARTITION OF fract_t FOR VALUES FROM ('1000') TO ('2000');
4869+
-- insert data
4870+
INSERT INTO fract_t (id) (SELECT generate_series(0, 1999));
4871+
ANALYZE fract_t;
4872+
-- verify plan; nested index only scans
4873+
SET max_parallel_workers_per_gather = 0;
4874+
SET enable_partitionwise_join = on;
4875+
EXPLAIN (COSTS OFF)
4876+
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
4877+
QUERY PLAN
4878+
-----------------------------------------------------------------------
4879+
Limit
4880+
-> Merge Append
4881+
Sort Key: x.id
4882+
-> Merge Left Join
4883+
Merge Cond: (x_1.id = y_1.id)
4884+
-> Index Only Scan using fract_t0_pkey on fract_t0 x_1
4885+
-> Index Only Scan using fract_t0_pkey on fract_t0 y_1
4886+
-> Merge Left Join
4887+
Merge Cond: (x_2.id = y_2.id)
4888+
-> Index Only Scan using fract_t1_pkey on fract_t1 x_2
4889+
-> Index Only Scan using fract_t1_pkey on fract_t1 y_2
4890+
(11 rows)
4891+
4892+
EXPLAIN (COSTS OFF)
4893+
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
4894+
QUERY PLAN
4895+
--------------------------------------------------------------------------------
4896+
Limit
4897+
-> Merge Append
4898+
Sort Key: x.id DESC
4899+
-> Nested Loop Left Join
4900+
-> Index Only Scan Backward using fract_t0_pkey on fract_t0 x_1
4901+
-> Index Only Scan using fract_t0_pkey on fract_t0 y_1
4902+
Index Cond: (id = x_1.id)
4903+
-> Nested Loop Left Join
4904+
-> Index Only Scan Backward using fract_t1_pkey on fract_t1 x_2
4905+
-> Index Only Scan using fract_t1_pkey on fract_t1 y_2
4906+
Index Cond: (id = x_2.id)
4907+
(11 rows)
4908+
4909+
-- cleanup
4910+
DROP TABLE fract_t;
4911+
RESET max_parallel_workers_per_gather;
4912+
RESET enable_partitionwise_join;

src/test/regress/sql/partition_join.sql

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,3 +1142,28 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.c = t2
11421142
EXPLAIN (COSTS OFF)
11431143
SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;
11441144
SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;
1145+
1146+
-- partitionwise join with fractional paths
1147+
CREATE TABLE fract_t (id BIGINT, PRIMARY KEY (id)) PARTITION BY RANGE (id);
1148+
CREATE TABLE fract_t0 PARTITION OF fract_t FOR VALUES FROM ('0') TO ('1000');
1149+
CREATE TABLE fract_t1 PARTITION OF fract_t FOR VALUES FROM ('1000') TO ('2000');
1150+
1151+
-- insert data
1152+
INSERT INTO fract_t (id) (SELECT generate_series(0, 1999));
1153+
ANALYZE fract_t;
1154+
1155+
-- verify plan; nested index only scans
1156+
SET max_parallel_workers_per_gather = 0;
1157+
SET enable_partitionwise_join = on;
1158+
1159+
EXPLAIN (COSTS OFF)
1160+
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
1161+
1162+
EXPLAIN (COSTS OFF)
1163+
SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
1164+
1165+
-- cleanup
1166+
DROP TABLE fract_t;
1167+
1168+
RESET max_parallel_workers_per_gather;
1169+
RESET enable_partitionwise_join;

0 commit comments

Comments
 (0)