Skip to content

Commit e211481

Browse files
committed
Implement choice between hash-based and sort-based grouping for doing
DISTINCT processing on the output of an IN sub-select.
1 parent a4482f4 commit e211481

File tree

2 files changed

+105
-12
lines changed

2 files changed

+105
-12
lines changed

src/backend/optimizer/plan/createplan.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $
13+
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.133 2003/01/22 00:07:00 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
1717
#include "postgres.h"
1818

19+
#include <limits.h>
1920

2021
#include "nodes/makefuncs.h"
2122
#include "nodes/nodeFuncs.h"
@@ -418,6 +419,7 @@ create_unique_plan(Query *root, UniquePath *best_path)
418419
Plan *plan;
419420
Plan *subplan;
420421
List *sub_targetlist;
422+
List *my_tlist;
421423
List *l;
422424

423425
subplan = create_plan(root, best_path->subpath);
@@ -474,21 +476,39 @@ create_unique_plan(Query *root, UniquePath *best_path)
474476
subplan->targetlist = newtlist;
475477
}
476478

479+
my_tlist = new_unsorted_tlist(subplan->targetlist);
480+
477481
if (best_path->use_hash)
478482
{
479-
elog(ERROR, "create_unique_plan: hash case not implemented yet");
480-
plan = NULL;
483+
int numGroupCols = length(my_tlist);
484+
long numGroups;
485+
AttrNumber *groupColIdx;
486+
int i;
487+
488+
numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
489+
490+
groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
491+
for (i = 0; i < numGroupCols; i++)
492+
groupColIdx[i] = i+1;
493+
494+
plan = (Plan *) make_agg(root,
495+
my_tlist,
496+
NIL,
497+
AGG_HASHED,
498+
numGroupCols,
499+
groupColIdx,
500+
numGroups,
501+
0,
502+
subplan);
481503
}
482504
else
483505
{
484-
List *sort_tlist;
485506
List *sortList;
486507

487-
sort_tlist = new_unsorted_tlist(subplan->targetlist);
488-
sortList = addAllTargetsToSortList(NIL, sort_tlist);
489-
plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist,
508+
sortList = addAllTargetsToSortList(NIL, my_tlist);
509+
plan = (Plan *) make_sort_from_sortclauses(root, my_tlist,
490510
subplan, sortList);
491-
plan = (Plan *) make_unique(sort_tlist, plan, sortList);
511+
plan = (Plan *) make_unique(my_tlist, plan, sortList);
492512
}
493513

494514
plan->plan_rows = best_path->rows;

src/backend/optimizer/util/pathnode.c

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,30 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.85 2003/01/22 00:07:00 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
1515
#include "postgres.h"
1616

1717
#include <math.h>
1818

19+
#include "catalog/pg_operator.h"
1920
#include "executor/executor.h"
21+
#include "miscadmin.h"
2022
#include "nodes/plannodes.h"
2123
#include "optimizer/cost.h"
2224
#include "optimizer/pathnode.h"
2325
#include "optimizer/paths.h"
2426
#include "optimizer/restrictinfo.h"
27+
#include "parser/parse_expr.h"
28+
#include "parser/parse_oper.h"
2529
#include "utils/memutils.h"
2630
#include "utils/selfuncs.h"
31+
#include "utils/syscache.h"
32+
33+
34+
static bool hash_safe_tlist(List *tlist);
2735

2836

2937
/*****************************************************************************
@@ -506,6 +514,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
506514
{
507515
UniquePath *pathnode;
508516
Path sort_path; /* dummy for result of cost_sort */
517+
Path agg_path; /* dummy for result of cost_agg */
509518
MemoryContext oldcontext;
510519
List *sub_targetlist;
511520
List *l;
@@ -587,16 +596,80 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
587596
*/
588597
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
589598

590-
pathnode->use_hash = false; /* for now */
599+
/*
600+
* Is it safe to use a hashed implementation? If so, estimate and
601+
* compare costs. We only try this if we know the targetlist for
602+
* sure (else we can't be sure about the datatypes involved).
603+
*/
604+
pathnode->use_hash = false;
605+
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
606+
{
607+
/*
608+
* Estimate the overhead per hashtable entry at 64 bytes (same
609+
* as in planner.c).
610+
*/
611+
int hashentrysize = rel->width + 64;
591612

592-
pathnode->path.startup_cost = sort_path.startup_cost;
593-
pathnode->path.total_cost = sort_path.total_cost;
613+
if (hashentrysize * pathnode->rows <= SortMem * 1024L)
614+
{
615+
cost_agg(&agg_path, root,
616+
AGG_HASHED, 0,
617+
numCols, pathnode->rows,
618+
subpath->startup_cost,
619+
subpath->total_cost,
620+
rel->rows);
621+
if (agg_path.total_cost < sort_path.total_cost)
622+
pathnode->use_hash = true;
623+
}
624+
}
625+
626+
if (pathnode->use_hash)
627+
{
628+
pathnode->path.startup_cost = agg_path.startup_cost;
629+
pathnode->path.total_cost = agg_path.total_cost;
630+
}
631+
else
632+
{
633+
pathnode->path.startup_cost = sort_path.startup_cost;
634+
pathnode->path.total_cost = sort_path.total_cost;
635+
}
594636

595637
rel->cheapest_unique_path = (Path *) pathnode;
596638

597639
return pathnode;
598640
}
599641

642+
/*
643+
* hash_safe_tlist - can datatypes of given tlist be hashed?
644+
*
645+
* We assume hashed aggregation will work if the datatype's equality operator
646+
* is marked hashjoinable.
647+
*
648+
* XXX this probably should be somewhere else. See also hash_safe_grouping
649+
* in plan/planner.c.
650+
*/
651+
static bool
652+
hash_safe_tlist(List *tlist)
653+
{
654+
List *tl;
655+
656+
foreach(tl, tlist)
657+
{
658+
Node *expr = (Node *) lfirst(tl);
659+
Operator optup;
660+
bool oprcanhash;
661+
662+
optup = equality_oper(exprType(expr), true);
663+
if (!optup)
664+
return false;
665+
oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
666+
ReleaseSysCache(optup);
667+
if (!oprcanhash)
668+
return false;
669+
}
670+
return true;
671+
}
672+
600673
/*
601674
* create_subqueryscan_path
602675
* Creates a path corresponding to a sequential scan of a subquery,

0 commit comments

Comments
 (0)