Skip to content

Commit b588cad

Browse files
committed
Consider the "LIMIT 1" optimization with parallel DISTINCT
Similar to what was done in 5543677 for non-parallel DISTINCT, apply the same optimization when the distinct_pathkeys are empty for the partial paths too. This can be faster than the non-parallel version when the first row matching the WHERE clause of the query takes a while to find. Parallel workers could speed that process up considerably. Author: Richard Guo Reviewed-by: David Rowley Discussion: https://postgr.es/m/CAMbWs49JC0qvfUbzs-TVzgMpSSBiMJ_6sN=BaA9iohBgYkr=LA@mail.gmail.com
1 parent 3e91dba commit b588cad

File tree

3 files changed

+72
-5
lines changed

3 files changed

+72
-5
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4737,11 +4737,45 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
47374737
-1.0);
47384738
}
47394739

4740-
add_partial_path(partial_distinct_rel, (Path *)
4741-
create_upper_unique_path(root, partial_distinct_rel,
4742-
sorted_path,
4743-
list_length(root->distinct_pathkeys),
4744-
numDistinctRows));
4740+
/*
4741+
* An empty distinct_pathkeys means all tuples have the same value
4742+
* for the DISTINCT clause. See create_final_distinct_paths()
4743+
*/
4744+
if (root->distinct_pathkeys == NIL)
4745+
{
4746+
Node *limitCount;
4747+
4748+
limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4749+
sizeof(int64),
4750+
Int64GetDatum(1), false,
4751+
FLOAT8PASSBYVAL);
4752+
4753+
/*
4754+
* Apply a LimitPath onto the partial path to restrict the
4755+
* tuples from each worker to 1. create_final_distinct_paths
4756+
* will need to apply an additional LimitPath to restrict this
4757+
* to a single row after the Gather node. If the query
4758+
* already has a LIMIT clause, then we could end up with three
4759+
* Limit nodes in the final plan. Consolidating the top two
4760+
* of these could be done, but does not seem worth troubling
4761+
* over.
4762+
*/
4763+
add_partial_path(partial_distinct_rel, (Path *)
4764+
create_limit_path(root, partial_distinct_rel,
4765+
sorted_path,
4766+
NULL,
4767+
limitCount,
4768+
LIMIT_OPTION_COUNT,
4769+
0, 1));
4770+
}
4771+
else
4772+
{
4773+
add_partial_path(partial_distinct_rel, (Path *)
4774+
create_upper_unique_path(root, partial_distinct_rel,
4775+
sorted_path,
4776+
list_length(root->distinct_pathkeys),
4777+
numDistinctRows));
4778+
}
47454779
}
47464780
}
47474781

src/test/regress/expected/select_distinct.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,26 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
348348
0 | 1 | 2 | 3
349349
(1 row)
350350

351+
SET parallel_setup_cost=0;
352+
SET min_parallel_table_scan_size=0;
353+
SET max_parallel_workers_per_gather=2;
354+
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
355+
-- distinct
356+
EXPLAIN (COSTS OFF)
357+
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
358+
QUERY PLAN
359+
----------------------------------------------
360+
Limit
361+
-> Gather
362+
Workers Planned: 2
363+
-> Limit
364+
-> Parallel Seq Scan on tenk1
365+
Filter: (four = 10)
366+
(6 rows)
367+
368+
RESET max_parallel_workers_per_gather;
369+
RESET min_parallel_table_scan_size;
370+
RESET parallel_setup_cost;
351371
--
352372
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
353373
-- very own regression file.

src/test/regress/sql/select_distinct.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,19 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
180180
-- Ensure we only get 1 row
181181
SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
182182

183+
SET parallel_setup_cost=0;
184+
SET min_parallel_table_scan_size=0;
185+
SET max_parallel_workers_per_gather=2;
186+
187+
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
188+
-- distinct
189+
EXPLAIN (COSTS OFF)
190+
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
191+
192+
RESET max_parallel_workers_per_gather;
193+
RESET min_parallel_table_scan_size;
194+
RESET parallel_setup_cost;
195+
183196
--
184197
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
185198
-- very own regression file.

0 commit comments

Comments
 (0)