Skip to content

Commit 2103b7b

Browse files
committed
Phase 2 of hashed-aggregation project. nodeAgg.c now knows how to do
hashed aggregation, but there's not yet planner support for it.
1 parent fc9814d commit 2103b7b

File tree

12 files changed

+696
-266
lines changed

12 files changed

+696
-266
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 569 additions & 215 deletions
Large diffs are not rendered by default.

src/backend/executor/nodeGroup.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* locate group boundaries.
1616
*
1717
* IDENTIFICATION
18-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $
18+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.49 2002/11/06 22:31:23 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent)
151151
*/
152152
grpstate = makeNode(GroupState);
153153
node->grpstate = grpstate;
154-
grpstate->grp_useFirstTuple = FALSE;
155-
grpstate->grp_done = FALSE;
156154
grpstate->grp_firstTuple = NULL;
155+
grpstate->grp_done = FALSE;
157156

158157
/*
159158
* create expression context
@@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent)
236235
{
237236
GroupState *grpstate = node->grpstate;
238237

239-
grpstate->grp_useFirstTuple = FALSE;
240238
grpstate->grp_done = FALSE;
241239
if (grpstate->grp_firstTuple != NULL)
242240
{

src/backend/executor/nodeHash.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
*
10-
* $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $
10+
* IDENTIFICATION
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.67 2002/11/06 22:31:23 tgl Exp $
1112
*
1213
*-------------------------------------------------------------------------
1314
*/
@@ -31,8 +32,6 @@
3132
#include "utils/lsyscache.h"
3233

3334

34-
static uint32 hashFunc(Datum key, int typLen, bool byVal);
35-
3635
/* ----------------------------------------------------------------
3736
* ExecHash
3837
*
@@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
532531

533532
/*
534533
* We reset the eval context each time to reclaim any memory leaked in
535-
* the hashkey expression or hashFunc itself.
534+
* the hashkey expression or ComputeHashFunc itself.
536535
*/
537536
ResetExprContext(econtext);
538537

@@ -550,9 +549,9 @@ ExecHashGetBucket(HashJoinTable hashtable,
550549
bucketno = 0;
551550
else
552551
{
553-
bucketno = hashFunc(keyval,
554-
(int) hashtable->typLen,
555-
hashtable->typByVal)
552+
bucketno = ComputeHashFunc(keyval,
553+
(int) hashtable->typLen,
554+
hashtable->typByVal)
556555
% (uint32) hashtable->totalbuckets;
557556
}
558557

@@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate,
622621
}
623622

624623
/* ----------------------------------------------------------------
625-
* hashFunc
624+
* ComputeHashFunc
626625
*
627-
* the hash function for hash joins
626+
* the hash function for hash joins (also used for hash aggregation)
628627
*
629628
* XXX this probably ought to be replaced with datatype-specific
630629
* hash functions, such as those already implemented for hash indexes.
631630
* ----------------------------------------------------------------
632631
*/
633-
static uint32
634-
hashFunc(Datum key, int typLen, bool byVal)
632+
uint32
633+
ComputeHashFunc(Datum key, int typLen, bool byVal)
635634
{
636635
unsigned char *k;
637636

@@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal)
681680
}
682681
else
683682
{
684-
elog(ERROR, "hashFunc: Invalid typLen %d", typLen);
683+
elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
685684
k = NULL; /* keep compiler quiet */
686685
}
687686
}

src/backend/nodes/copyfuncs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Portions Copyright (c) 1994, Regents of the University of California
1616
*
1717
* IDENTIFICATION
18-
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $
18+
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.216 2002/11/06 22:31:23 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -524,6 +524,7 @@ _copyAgg(Agg *from)
524524
memcpy(newnode->grpColIdx, from->grpColIdx,
525525
from->numCols * sizeof(AttrNumber));
526526
}
527+
newnode->numGroups = from->numGroups;
527528

528529
return newnode;
529530
}

src/backend/nodes/outfuncs.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
8-
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $
8+
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.178 2002/11/06 22:31:24 tgl Exp $
99
*
1010
* NOTES
1111
* Every (plan) node in POSTGRES has an associated "out" routine which
@@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node)
597597
{
598598
appendStringInfo(str, " AGG ");
599599
_outPlanInfo(str, (Plan *) node);
600-
appendStringInfo(str, " :aggstrategy %d :numCols %d ",
601-
(int) node->aggstrategy, node->numCols);
600+
appendStringInfo(str, " :aggstrategy %d :numCols %d :numGroups %ld ",
601+
(int) node->aggstrategy, node->numCols, node->numGroups);
602602
}
603603

604604
static void

src/backend/optimizer/plan/createplan.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $
13+
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.121 2002/11/06 22:31:24 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
16751675
plan->plan_rows *= 0.1;
16761676
if (plan->plan_rows < 1)
16771677
plan->plan_rows = 1;
1678+
node->numGroups = (long) plan->plan_rows;
16781679
}
16791680

16801681
plan->state = (EState *) NULL;

src/backend/optimizer/plan/planner.c

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.127 2002/11/06 22:31:24 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction)
931931
AttrNumber *groupColIdx = NULL;
932932
Path *cheapest_path;
933933
Path *sorted_path;
934+
bool use_hashed_grouping = false;
934935

935936
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
936937
tlist = preprocess_targetlist(tlist,
@@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction)
12091210
group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
12101211
sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
12111212

1213+
/*
1214+
* Consider whether we might want to use hashed grouping.
1215+
*/
1216+
if (parse->groupClause)
1217+
{
1218+
/*
1219+
* Executor doesn't support hashed aggregation with DISTINCT
1220+
* aggregates. (Doing so would imply storing *all* the input
1221+
* values in the hash table, which seems like a certain loser.)
1222+
*/
1223+
if (parse->hasAggs &&
1224+
(contain_distinct_agg_clause((Node *) tlist) ||
1225+
contain_distinct_agg_clause(parse->havingQual)))
1226+
use_hashed_grouping = false;
1227+
else
1228+
{
1229+
#if 0 /* much more to do here */
1230+
/* TEMPORARY HOTWIRE FOR TESTING */
1231+
use_hashed_grouping = true;
1232+
#endif
1233+
}
1234+
}
1235+
12121236
/*
12131237
* Select the best path and create a plan to execute it.
12141238
*
@@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction)
12791303
}
12801304

12811305
/*
1282-
* If any aggregate is present, insert the Agg node, plus an explicit
1283-
* sort if necessary.
1306+
* Insert AGG or GROUP node if needed, plus an explicit sort step
1307+
* if necessary.
12841308
*
12851309
* HAVING clause, if any, becomes qual of the Agg node
12861310
*/
1287-
if (parse->hasAggs)
1311+
if (use_hashed_grouping)
12881312
{
1313+
/* Hashed aggregate plan --- no sort needed */
1314+
result_plan = (Plan *) make_agg(tlist,
1315+
(List *) parse->havingQual,
1316+
AGG_HASHED,
1317+
length(parse->groupClause),
1318+
groupColIdx,
1319+
result_plan);
1320+
/* Hashed aggregation produces randomly-ordered results */
1321+
current_pathkeys = NIL;
1322+
}
1323+
else if (parse->hasAggs)
1324+
{
1325+
/* Plain aggregate plan --- sort if needed */
12891326
AggStrategy aggstrategy;
12901327

12911328
if (parse->groupClause)
12921329
{
1293-
aggstrategy = AGG_SORTED;
1294-
/*
1295-
* Add an explicit sort if we couldn't make the path come out
1296-
* the way the AGG node needs it.
1297-
*/
12981330
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
12991331
{
13001332
result_plan = make_groupsortplan(parse,
@@ -1303,20 +1335,25 @@ grouping_planner(Query *parse, double tuple_fraction)
13031335
result_plan);
13041336
current_pathkeys = group_pathkeys;
13051337
}
1338+
aggstrategy = AGG_SORTED;
1339+
/*
1340+
* The AGG node will not change the sort ordering of its
1341+
* groups, so current_pathkeys describes the result too.
1342+
*/
13061343
}
13071344
else
1345+
{
13081346
aggstrategy = AGG_PLAIN;
1347+
/* Result will be only one row anyway; no sort order */
1348+
current_pathkeys = NIL;
1349+
}
13091350

13101351
result_plan = (Plan *) make_agg(tlist,
13111352
(List *) parse->havingQual,
13121353
aggstrategy,
13131354
length(parse->groupClause),
13141355
groupColIdx,
13151356
result_plan);
1316-
/*
1317-
* Note: plain or grouped Agg does not affect any existing
1318-
* sort order of the tuples
1319-
*/
13201357
}
13211358
else
13221359
{

src/backend/optimizer/util/clauses.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.109 2002/09/11 14:48:54 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.110 2002/11/06 22:31:24 tgl Exp $
1212
*
1313
* HISTORY
1414
* AUTHOR DATE MAJOR EVENT
@@ -46,6 +46,7 @@ typedef struct
4646
} check_subplans_for_ungrouped_vars_context;
4747

4848
static bool contain_agg_clause_walker(Node *node, void *context);
49+
static bool contain_distinct_agg_clause_walker(Node *node, void *context);
4950
static bool pull_agg_clause_walker(Node *node, List **listptr);
5051
static bool expression_returns_set_walker(Node *node, void *context);
5152
static bool contain_subplans_walker(Node *node, void *context);
@@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context)
410411
return expression_tree_walker(node, contain_agg_clause_walker, context);
411412
}
412413

414+
/*
415+
* contain_distinct_agg_clause
416+
* Recursively search for DISTINCT Aggref nodes within a clause.
417+
*
418+
* Returns true if any DISTINCT aggregate found.
419+
*/
420+
bool
421+
contain_distinct_agg_clause(Node *clause)
422+
{
423+
return contain_distinct_agg_clause_walker(clause, NULL);
424+
}
425+
426+
static bool
427+
contain_distinct_agg_clause_walker(Node *node, void *context)
428+
{
429+
if (node == NULL)
430+
return false;
431+
if (IsA(node, Aggref))
432+
{
433+
if (((Aggref *) node)->aggdistinct)
434+
return true; /* abort the tree traversal and return
435+
* true */
436+
}
437+
return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
438+
}
439+
413440
/*
414441
* pull_agg_clause
415442
* Recursively pulls all Aggref nodes from an expression tree.

src/include/executor/nodeHash.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Id: nodeHash.h,v 1.24 2002/06/20 20:29:49 momjian Exp $
10+
* $Id: nodeHash.h,v 1.25 2002/11/06 22:31:24 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -36,5 +36,6 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
3636
int *virtualbuckets,
3737
int *physicalbuckets,
3838
int *numbatches);
39+
extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
3940

4041
#endif /* NODEHASH_H */

0 commit comments

Comments
 (0)