Skip to content

Commit 80558c1

Browse files
committed
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating whether a plan for that relation could conceivably be run inside of a parallel worker. Right now, we're pretty conservative: for example, it might be possible to defer applying a parallel-restricted qual in a worker, and later do it in the leader, but right now we just don't try to parallelize access to that relation. That's probably the right decision in most cases, anyway. Using the new flag, generate parallel sequential scan plans for plain baserels, meaning that we now have parallel sequential scan in PostgreSQL. The logic here is pretty unsophisticated right now: the costing model probably isn't right in detail, and we can't push joins beneath Gather nodes, so the number of plans that can actually benefit from this is pretty limited right now. Lots more work is needed. Nevertheless, it seems time to enable this functionality so that all this code can actually be tested easily by users and developers. Note that, if you wish to test this functionality, it will be necessary to set max_parallel_degree to a value greater than the default of 0. Once a few more loose ends have been tidied up here, we might want to consider changing the default value of this GUC, but I'm leaving it alone for now. Along the way, fix a bug in cost_gather: the previous coding thought that a Gather node's transfer overhead should be costed on the basis of the relation size rather than the number of tuples that actually need to be passed off to the leader. Patch by me, reviewed in earlier versions by Amit Kapila.
1 parent f0661c4 commit 80558c1

File tree

11 files changed

+400
-44
lines changed

11 files changed

+400
-44
lines changed

src/backend/nodes/outfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
18821882
WRITE_INT_FIELD(width);
18831883
WRITE_BOOL_FIELD(consider_startup);
18841884
WRITE_BOOL_FIELD(consider_param_startup);
1885+
WRITE_BOOL_FIELD(consider_parallel);
18851886
WRITE_NODE_FIELD(reltargetlist);
18861887
WRITE_NODE_FIELD(pathlist);
18871888
WRITE_NODE_FIELD(ppilist);

src/backend/optimizer/path/allpaths.c

+187-3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "access/tsmapi.h"
2222
#include "catalog/pg_class.h"
2323
#include "catalog/pg_operator.h"
24+
#include "catalog/pg_proc.h"
2425
#include "foreign/fdwapi.h"
2526
#include "nodes/makefuncs.h"
2627
#include "nodes/nodeFuncs.h"
@@ -71,6 +72,9 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
7172
Index rti, RangeTblEntry *rte);
7273
static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
7374
RangeTblEntry *rte);
75+
static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
76+
RangeTblEntry *rte);
77+
static bool function_rte_parallel_ok(RangeTblEntry *rte);
7478
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
7579
RangeTblEntry *rte);
7680
static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
@@ -158,7 +162,8 @@ make_one_rel(PlannerInfo *root, List *joinlist)
158162
set_base_rel_consider_startup(root);
159163

160164
/*
161-
* Generate access paths for the base rels.
165+
* Generate access paths for the base rels. set_base_rel_sizes also
166+
* sets the consider_parallel flag for each baserel, if appropriate.
162167
*/
163168
set_base_rel_sizes(root);
164169
set_base_rel_pathlists(root);
@@ -222,9 +227,12 @@ set_base_rel_consider_startup(PlannerInfo *root)
222227
/*
223228
* set_base_rel_sizes
224229
* Set the size estimates (rows and widths) for each base-relation entry.
230+
* Also determine whether to consider parallel paths for base relations.
225231
*
226232
* We do this in a separate pass over the base rels so that rowcount
227-
* estimates are available for parameterized path generation.
233+
* estimates are available for parameterized path generation, and also so
234+
* that the consider_parallel flag is set correctly before we begin to
235+
* generate paths.
228236
*/
229237
static void
230238
set_base_rel_sizes(PlannerInfo *root)
@@ -234,6 +242,7 @@ set_base_rel_sizes(PlannerInfo *root)
234242
for (rti = 1; rti < root->simple_rel_array_size; rti++)
235243
{
236244
RelOptInfo *rel = root->simple_rel_array[rti];
245+
RangeTblEntry *rte;
237246

238247
/* there may be empty slots corresponding to non-baserel RTEs */
239248
if (rel == NULL)
@@ -245,7 +254,19 @@ set_base_rel_sizes(PlannerInfo *root)
245254
if (rel->reloptkind != RELOPT_BASEREL)
246255
continue;
247256

248-
set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
257+
rte = root->simple_rte_array[rti];
258+
259+
/*
260+
* If parallelism is allowable for this query in general, see whether
261+
* it's allowable for this rel in particular. We have to do this
262+
* before set_rel_size, because that if this is an inheritance parent,
263+
* set_append_rel_size will pass the consider_parallel flag down to
264+
* inheritance children.
265+
*/
266+
if (root->glob->parallelModeOK)
267+
set_rel_consider_parallel(root, rel, rte);
268+
269+
set_rel_size(root, rel, rti, rte);
249270
}
250271
}
251272

@@ -458,6 +479,131 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
458479
set_baserel_size_estimates(root, rel);
459480
}
460481

482+
/*
483+
* If this relation could possibly be scanned from within a worker, then set
484+
* the consider_parallel flag. The flag has previously been initialized to
485+
* false, so we just bail out if it becomes clear that we can't safely set it.
486+
*/
487+
static void
488+
set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
489+
RangeTblEntry *rte)
490+
{
491+
/* Don't call this if parallelism is disallowed for the entire query. */
492+
Assert(root->glob->parallelModeOK);
493+
494+
/* Don't call this for non-baserels. */
495+
Assert(rel->reloptkind == RELOPT_BASEREL);
496+
497+
/* Assorted checks based on rtekind. */
498+
switch (rte->rtekind)
499+
{
500+
case RTE_RELATION:
501+
/*
502+
* Currently, parallel workers can't access the leader's temporary
503+
* tables. We could possibly relax this if the wrote all of its
504+
* local buffers at the start of the query and made no changes
505+
* thereafter (maybe we could allow hint bit changes), and if we
506+
* taught the workers to read them. Writing a large number of
507+
* temporary buffers could be expensive, though, and we don't have
508+
* the rest of the necessary infrastructure right now anyway. So
509+
* for now, bail out if we see a temporary table.
510+
*/
511+
if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
512+
return;
513+
514+
/*
515+
* Table sampling can be pushed down to workers if the sample
516+
* function and its arguments are safe.
517+
*/
518+
if (rte->tablesample != NULL)
519+
{
520+
Oid proparallel = func_parallel(rte->tablesample->tsmhandler);
521+
522+
if (proparallel != PROPARALLEL_SAFE)
523+
return;
524+
if (has_parallel_hazard((Node *) rte->tablesample->args,
525+
false))
526+
return;
527+
return;
528+
}
529+
break;
530+
531+
case RTE_SUBQUERY:
532+
/*
533+
* Subplans currently aren't passed to workers. Even if they
534+
* were, the subplan might be using parallelism internally, and
535+
* we can't support nested Gather nodes at present. Finally,
536+
* we don't have a good way of knowing whether the subplan
537+
* involves any parallel-restricted operations. It would be
538+
* nice to relax this restriction some day, but it's going to
539+
* take a fair amount of work.
540+
*/
541+
return;
542+
543+
case RTE_JOIN:
544+
/* Shouldn't happen; we're only considering baserels here. */
545+
Assert(false);
546+
return;
547+
548+
case RTE_FUNCTION:
549+
/* Check for parallel-restricted functions. */
550+
if (!function_rte_parallel_ok(rte))
551+
return;
552+
break;
553+
554+
case RTE_VALUES:
555+
/*
556+
* The data for a VALUES clause is stored in the plan tree itself,
557+
* so scanning it in a worker is fine.
558+
*/
559+
break;
560+
561+
case RTE_CTE:
562+
/*
563+
* CTE tuplestores aren't shared among parallel workers, so we
564+
* force all CTE scans to happen in the leader. Also, populating
565+
* the CTE would require executing a subplan that's not available
566+
* in the worker, might be parallel-restricted, and must get
567+
* executed only once.
568+
*/
569+
return;
570+
}
571+
572+
/*
573+
* If there's anything in baserestrictinfo that's parallel-restricted,
574+
* we give up on parallelizing access to this relation. We could consider
575+
* instead postponing application of the restricted quals until we're
576+
* above all the parallelism in the plan tree, but it's not clear that
577+
* this would be a win in very many cases, and it might be tricky to make
578+
* outer join clauses work correctly.
579+
*/
580+
if (has_parallel_hazard((Node *) rel->baserestrictinfo, false))
581+
return;
582+
583+
/* We have a winner. */
584+
rel->consider_parallel = true;
585+
}
586+
587+
/*
588+
* Check whether a function RTE is scanning something parallel-restricted.
589+
*/
590+
static bool
591+
function_rte_parallel_ok(RangeTblEntry *rte)
592+
{
593+
ListCell *lc;
594+
595+
foreach(lc, rte->functions)
596+
{
597+
RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
598+
599+
Assert(IsA(rtfunc, RangeTblFunction));
600+
if (has_parallel_hazard(rtfunc->funcexpr, false))
601+
return false;
602+
}
603+
604+
return true;
605+
}
606+
461607
/*
462608
* set_plain_rel_pathlist
463609
* Build access paths for a plain relation (no subquery, no inheritance)
@@ -466,6 +612,7 @@ static void
466612
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
467613
{
468614
Relids required_outer;
615+
int parallel_threshold = 1000;
469616

470617
/*
471618
* We don't support pushing join clauses into the quals of a seqscan, but
@@ -477,6 +624,40 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
477624
/* Consider sequential scan */
478625
add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
479626

627+
/* Consider parallel sequential scan */
628+
if (rel->consider_parallel && rel->pages > parallel_threshold &&
629+
required_outer == NULL)
630+
{
631+
Path *path;
632+
int parallel_degree = 1;
633+
634+
/*
635+
* Limit the degree of parallelism logarithmically based on the size
636+
* of the relation. This probably needs to be a good deal more
637+
* sophisticated, but we need something here for now.
638+
*/
639+
while (rel->pages > parallel_threshold * 3 &&
640+
parallel_degree < max_parallel_degree)
641+
{
642+
parallel_degree++;
643+
parallel_threshold *= 3;
644+
if (parallel_threshold >= PG_INT32_MAX / 3)
645+
break;
646+
}
647+
648+
/*
649+
* Ideally we should consider postponing the gather operation until
650+
* much later, after we've pushed joins and so on atop the parallel
651+
* sequential scan path. But we don't have the infrastructure for
652+
* that yet, so just do this for now.
653+
*/
654+
path = create_seqscan_path(root, rel, required_outer, parallel_degree);
655+
path = (Path *)
656+
create_gather_path(root, rel, path, required_outer,
657+
parallel_degree);
658+
add_path(rel, path);
659+
}
660+
480661
/* Consider index scans */
481662
create_index_paths(root, rel);
482663

@@ -714,6 +895,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
714895
continue;
715896
}
716897

898+
/* Copy consider_parallel flag from parent. */
899+
childrel->consider_parallel = rel->consider_parallel;
900+
717901
/*
718902
* CE failed, so finish copying/modifying targetlist and join quals.
719903
*

src/backend/optimizer/path/costsize.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ cost_gather(GatherPath *path, PlannerInfo *root,
334334

335335
/* Parallel setup and communication cost. */
336336
startup_cost += parallel_setup_cost;
337-
run_cost += parallel_tuple_cost * rel->tuples;
337+
run_cost += parallel_tuple_cost * path->path.rows;
338338

339339
path->path.startup_cost = startup_cost;
340340
path->path.total_cost = (startup_cost + run_cost);

src/backend/optimizer/plan/planmain.c

+12
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
#include "postgres.h"
2222

23+
#include "optimizer/clauses.h"
2324
#include "optimizer/orclauses.h"
2425
#include "optimizer/pathnode.h"
2526
#include "optimizer/paths.h"
@@ -70,6 +71,17 @@ query_planner(PlannerInfo *root, List *tlist,
7071
/* We need a dummy joinrel to describe the empty set of baserels */
7172
final_rel = build_empty_join_rel(root);
7273

74+
/*
75+
* If query allows parallelism in general, check whether the quals
76+
* are parallel-restricted. There's currently no real benefit to
77+
* setting this flag correctly because we can't yet reference subplans
78+
* from parallel workers. But that might change someday, so set this
79+
* correctly anyway.
80+
*/
81+
if (root->glob->parallelModeOK)
82+
final_rel->consider_parallel =
83+
!has_parallel_hazard(parse->jointree->quals, false);
84+
7385
/* The only path for it is a trivial Result path */
7486
add_path(final_rel, (Path *)
7587
create_result_path((List *) parse->jointree->quals));

src/backend/optimizer/plan/planner.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
204204
/*
205205
* Assess whether it's feasible to use parallel mode for this query.
206206
* We can't do this in a standalone backend, or if the command will
207-
* try to modify any data, or if this is a cursor operation, or if any
207+
* try to modify any data, or if this is a cursor operation, or if
208+
* GUCs are set to values that don't permit parallelism, or if
208209
* parallel-unsafe functions are present in the query tree.
209210
*
210211
* For now, we don't try to use parallel mode if we're running inside
@@ -223,9 +224,9 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
223224
glob->parallelModeOK = (cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
224225
IsUnderPostmaster && dynamic_shared_memory_type != DSM_IMPL_NONE &&
225226
parse->commandType == CMD_SELECT && !parse->hasModifyingCTE &&
226-
parse->utilityStmt == NULL && !IsParallelWorker() &&
227-
!IsolationIsSerializable() &&
228-
!contain_parallel_unsafe((Node *) parse);
227+
parse->utilityStmt == NULL && max_parallel_degree > 0 &&
228+
!IsParallelWorker() && !IsolationIsSerializable() &&
229+
!has_parallel_hazard((Node *) parse, true);
229230

230231
/*
231232
* glob->parallelModeOK should tell us whether it's necessary to impose

0 commit comments

Comments
 (0)