Skip to content

Commit 828e94c

Browse files
author
Richard Guo
committed
Consider explicit incremental sort for mergejoins
For a mergejoin, if the given outer path or inner path is not already well enough ordered, we need to do an explicit sort. Currently, we only consider explicit full sort and do not account for incremental sort. In this patch, for the outer path of a mergejoin, we choose to use explicit incremental sort if it is enabled and there are presorted keys. For the inner path, though, we cannot use incremental sort because it does not support mark/restore at present. The rationale is based on the assumption that incremental sort is always faster than full sort when there are presorted keys, a premise that has been applied in various parts of the code. In addition, the current cost model tends to favor incremental sort as being cheaper than full sort in the presence of presorted keys, making it reasonable not to consider full sort in such cases. It could be argued that what if a mergejoin with an incremental sort as the outer path is selected as the inner path of another mergejoin. However, this should not be a problem, because mergejoin itself does not support mark/restore either, and we will add a Material node on top of it anyway in this case (see final_cost_mergejoin). There is one ensuing plan change in the regression tests, and we have to modify that test case to ensure that it continues to test what it is intended to. No backpatch as this could result in plan changes. Author: Richard Guo Reviewed-by: David Rowley, Tomas Vondra Discussion: https://postgr.es/m/CAMbWs49x425QrX7h=Ux05WEnt8GS757H-jOP3_xsX5t1FoUsZw@mail.gmail.com
1 parent c4528fd commit 828e94c

File tree

6 files changed

+184
-41
lines changed

6 files changed

+184
-41
lines changed

src/backend/optimizer/path/costsize.c

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3532,7 +3532,8 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
35323532
* join quals here, except for obtaining the scan selectivity estimate which
35333533
* is really essential (but fortunately, use of caching keeps the cost of
35343534
* getting that down to something reasonable).
3535-
* We also assume that cost_sort is cheap enough to use here.
3535+
* We also assume that cost_sort/cost_incremental_sort is cheap enough to use
3536+
* here.
35363537
*
35373538
* 'workspace' is to be filled with startup_cost, total_cost, and perhaps
35383539
* other data to be used by final_cost_mergejoin
@@ -3569,7 +3570,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
35693570
outerendsel,
35703571
innerstartsel,
35713572
innerendsel;
3572-
Path sort_path; /* dummy for result of cost_sort */
3573+
Path sort_path; /* dummy for result of
3574+
* cost_sort/cost_incremental_sort */
35733575

35743576
/* Protect some assumptions below that rowcounts aren't zero */
35753577
if (outer_path_rows <= 0)
@@ -3682,16 +3684,54 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
36823684

36833685
if (outersortkeys) /* do we need to sort outer? */
36843686
{
3685-
cost_sort(&sort_path,
3686-
root,
3687-
outersortkeys,
3688-
outer_path->disabled_nodes,
3689-
outer_path->total_cost,
3690-
outer_path_rows,
3691-
outer_path->pathtarget->width,
3692-
0.0,
3693-
work_mem,
3694-
-1.0);
3687+
bool use_incremental_sort = false;
3688+
int presorted_keys;
3689+
3690+
/*
3691+
* We choose to use incremental sort if it is enabled and there are
3692+
* presorted keys; otherwise we use full sort.
3693+
*/
3694+
if (enable_incremental_sort)
3695+
{
3696+
bool is_sorted PG_USED_FOR_ASSERTS_ONLY;
3697+
3698+
is_sorted = pathkeys_count_contained_in(outersortkeys,
3699+
outer_path->pathkeys,
3700+
&presorted_keys);
3701+
Assert(!is_sorted);
3702+
3703+
if (presorted_keys > 0)
3704+
use_incremental_sort = true;
3705+
}
3706+
3707+
if (!use_incremental_sort)
3708+
{
3709+
cost_sort(&sort_path,
3710+
root,
3711+
outersortkeys,
3712+
outer_path->disabled_nodes,
3713+
outer_path->total_cost,
3714+
outer_path_rows,
3715+
outer_path->pathtarget->width,
3716+
0.0,
3717+
work_mem,
3718+
-1.0);
3719+
}
3720+
else
3721+
{
3722+
cost_incremental_sort(&sort_path,
3723+
root,
3724+
outersortkeys,
3725+
presorted_keys,
3726+
outer_path->disabled_nodes,
3727+
outer_path->startup_cost,
3728+
outer_path->total_cost,
3729+
outer_path_rows,
3730+
outer_path->pathtarget->width,
3731+
0.0,
3732+
work_mem,
3733+
-1.0);
3734+
}
36953735
disabled_nodes += sort_path.disabled_nodes;
36963736
startup_cost += sort_path.startup_cost;
36973737
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
@@ -3711,6 +3751,11 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
37113751

37123752
if (innersortkeys) /* do we need to sort inner? */
37133753
{
3754+
/*
3755+
* We do not consider incremental sort for inner path, because
3756+
* incremental sort does not support mark/restore.
3757+
*/
3758+
37143759
cost_sort(&sort_path,
37153760
root,
37163761
innersortkeys,

src/backend/optimizer/plan/createplan.c

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ static void copy_generic_path_info(Plan *dest, Path *src);
179179
static void copy_plan_costsize(Plan *dest, Plan *src);
180180
static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
181181
double limit_tuples);
182+
static void label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
183+
List *pathkeys, double limit_tuples);
182184
static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
183185
static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
184186
TableSampleClause *tsc);
@@ -4523,19 +4525,63 @@ create_mergejoin_plan(PlannerInfo *root,
45234525
if (best_path->outersortkeys)
45244526
{
45254527
Relids outer_relids = outer_path->parent->relids;
4526-
Sort *sort = make_sort_from_pathkeys(outer_plan,
4528+
Plan *sort_plan;
4529+
bool use_incremental_sort = false;
4530+
int presorted_keys;
4531+
4532+
/*
4533+
* We choose to use incremental sort if it is enabled and there are
4534+
* presorted keys; otherwise we use full sort.
4535+
*/
4536+
if (enable_incremental_sort)
4537+
{
4538+
bool is_sorted PG_USED_FOR_ASSERTS_ONLY;
4539+
4540+
is_sorted = pathkeys_count_contained_in(best_path->outersortkeys,
4541+
outer_path->pathkeys,
4542+
&presorted_keys);
4543+
Assert(!is_sorted);
4544+
4545+
if (presorted_keys > 0)
4546+
use_incremental_sort = true;
4547+
}
4548+
4549+
if (!use_incremental_sort)
4550+
{
4551+
sort_plan = (Plan *)
4552+
make_sort_from_pathkeys(outer_plan,
4553+
best_path->outersortkeys,
4554+
outer_relids);
4555+
4556+
label_sort_with_costsize(root, (Sort *) sort_plan, -1.0);
4557+
}
4558+
else
4559+
{
4560+
sort_plan = (Plan *)
4561+
make_incrementalsort_from_pathkeys(outer_plan,
45274562
best_path->outersortkeys,
4528-
outer_relids);
4563+
outer_relids,
4564+
presorted_keys);
45294565

4530-
label_sort_with_costsize(root, sort, -1.0);
4531-
outer_plan = (Plan *) sort;
4566+
label_incrementalsort_with_costsize(root,
4567+
(IncrementalSort *) sort_plan,
4568+
best_path->outersortkeys,
4569+
-1.0);
4570+
}
4571+
4572+
outer_plan = sort_plan;
45324573
outerpathkeys = best_path->outersortkeys;
45334574
}
45344575
else
45354576
outerpathkeys = best_path->jpath.outerjoinpath->pathkeys;
45364577

45374578
if (best_path->innersortkeys)
45384579
{
4580+
/*
4581+
* We do not consider incremental sort for inner path, because
4582+
* incremental sort does not support mark/restore.
4583+
*/
4584+
45394585
Relids inner_relids = inner_path->parent->relids;
45404586
Sort *sort = make_sort_from_pathkeys(inner_plan,
45414587
best_path->innersortkeys,
@@ -5447,10 +5493,6 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
54475493
Plan *lefttree = plan->plan.lefttree;
54485494
Path sort_path; /* dummy for result of cost_sort */
54495495

5450-
/*
5451-
* This function shouldn't have to deal with IncrementalSort plans because
5452-
* they are only created from corresponding Path nodes.
5453-
*/
54545496
Assert(IsA(plan, Sort));
54555497

54565498
cost_sort(&sort_path, root, NIL,
@@ -5469,6 +5511,37 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
54695511
plan->plan.parallel_safe = lefttree->parallel_safe;
54705512
}
54715513

5514+
/*
5515+
* Same as label_sort_with_costsize, but labels the IncrementalSort node
5516+
* instead.
5517+
*/
5518+
static void
5519+
label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
5520+
List *pathkeys, double limit_tuples)
5521+
{
5522+
Plan *lefttree = plan->sort.plan.lefttree;
5523+
Path sort_path; /* dummy for result of cost_incremental_sort */
5524+
5525+
Assert(IsA(plan, IncrementalSort));
5526+
5527+
cost_incremental_sort(&sort_path, root, pathkeys,
5528+
plan->nPresortedCols,
5529+
plan->sort.plan.disabled_nodes,
5530+
lefttree->startup_cost,
5531+
lefttree->total_cost,
5532+
lefttree->plan_rows,
5533+
lefttree->plan_width,
5534+
0.0,
5535+
work_mem,
5536+
limit_tuples);
5537+
plan->sort.plan.startup_cost = sort_path.startup_cost;
5538+
plan->sort.plan.total_cost = sort_path.total_cost;
5539+
plan->sort.plan.plan_rows = lefttree->plan_rows;
5540+
plan->sort.plan.plan_width = lefttree->plan_width;
5541+
plan->sort.plan.parallel_aware = false;
5542+
plan->sort.plan.parallel_safe = lefttree->parallel_safe;
5543+
}
5544+
54725545
/*
54735546
* bitmap_subplan_mark_shared
54745547
* Set isshared flag in bitmap subplan so that it will be created in

src/test/regress/expected/aggregates.out

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2858,29 +2858,27 @@ GROUP BY w, x, z, y;
28582858
-> Index Scan using btg_x_y_idx on btg
28592859
(6 rows)
28602860

2861-
-- Utilize the ordering of merge join to avoid a full Sort operation
2861+
-- Utilize the ordering of merge join to avoid a Sort operation
28622862
SET enable_hashjoin = off;
28632863
SET enable_nestloop = off;
28642864
EXPLAIN (COSTS OFF)
28652865
SELECT count(*)
2866-
FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
2867-
GROUP BY t1.x, t1.y, t1.z, t1.w;
2868-
QUERY PLAN
2869-
-------------------------------------------------------------------------------
2866+
FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z
2867+
GROUP BY t1.w, t1.z, t1.x;
2868+
QUERY PLAN
2869+
-------------------------------------------------------------------------
28702870
GroupAggregate
2871-
Group Key: t1.z, t1.w, t1.x, t1.y
2872-
-> Incremental Sort
2873-
Sort Key: t1.z, t1.w, t1.x, t1.y
2874-
Presorted Key: t1.z, t1.w, t1.x
2875-
-> Merge Join
2876-
Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
2877-
-> Sort
2878-
Sort Key: t1.z, t1.w, t1.x
2879-
-> Index Scan using btg_x_y_idx on btg t1
2880-
-> Sort
2881-
Sort Key: t2.z, t2.w, t2.x
2882-
-> Index Scan using btg_x_y_idx on btg t2
2883-
(13 rows)
2871+
Group Key: t1.x, t1.w, t1.z
2872+
-> Merge Join
2873+
Merge Cond: ((t1.x = t2.x) AND (t1.w = t2.w) AND (t1.z = t2.z))
2874+
-> Incremental Sort
2875+
Sort Key: t1.x, t1.w, t1.z
2876+
Presorted Key: t1.x
2877+
-> Index Scan using btg_x_y_idx on btg t1
2878+
-> Sort
2879+
Sort Key: t2.x, t2.w, t2.z
2880+
-> Index Scan using btg_x_y_idx on btg t2
2881+
(11 rows)
28842882

28852883
RESET enable_nestloop;
28862884
RESET enable_hashjoin;

src/test/regress/expected/incremental_sort.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,3 +1701,24 @@ explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order b
17011701
Order By: (a <-> '(5,5)'::point)
17021702
(6 rows)
17031703

1704+
-- Ensure we get an incremental sort on the outer side of the mergejoin
1705+
explain (costs off)
1706+
select * from
1707+
(select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
1708+
order by t1.four, t1.two limit 1;
1709+
QUERY PLAN
1710+
-----------------------------------------------------------------------
1711+
Limit
1712+
-> Merge Join
1713+
Merge Cond: ((tenk1.four = t2.four) AND (tenk1.two = t2.two))
1714+
-> Incremental Sort
1715+
Sort Key: tenk1.four, tenk1.two
1716+
Presorted Key: tenk1.four
1717+
-> Sort
1718+
Sort Key: tenk1.four
1719+
-> Seq Scan on tenk1
1720+
-> Sort
1721+
Sort Key: t2.four, t2.two
1722+
-> Seq Scan on tenk1 t2
1723+
(12 rows)
1724+

src/test/regress/sql/aggregates.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,13 +1232,13 @@ EXPLAIN (COSTS OFF) SELECT count(*)
12321232
FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
12331233
GROUP BY w, x, z, y;
12341234

1235-
-- Utilize the ordering of merge join to avoid a full Sort operation
1235+
-- Utilize the ordering of merge join to avoid a Sort operation
12361236
SET enable_hashjoin = off;
12371237
SET enable_nestloop = off;
12381238
EXPLAIN (COSTS OFF)
12391239
SELECT count(*)
1240-
FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
1241-
GROUP BY t1.x, t1.y, t1.z, t1.w;
1240+
FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z
1241+
GROUP BY t1.w, t1.z, t1.x;
12421242
RESET enable_nestloop;
12431243
RESET enable_hashjoin;
12441244

src/test/regress/sql/incremental_sort.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,3 +292,9 @@ create index point_table_a_idx on point_table using gist(a);
292292
-- Ensure we get an incremental sort plan for both of the following queries
293293
explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b limit 1;
294294
explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b desc limit 1;
295+
296+
-- Ensure we get an incremental sort on the outer side of the mergejoin
297+
explain (costs off)
298+
select * from
299+
(select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
300+
order by t1.four, t1.two limit 1;

0 commit comments

Comments
 (0)