Skip to content

Commit e3a33a9

Browse files
committed
Marginal hack to avoid spending a lot of time in find_join_rel during
large planning problems: when the list of join rels gets too long, make an auxiliary hash table that hashes on the identifying Bitmapset.
1 parent 77c168a commit e3a33a9

File tree

9 files changed

+194
-25
lines changed

9 files changed

+194
-25
lines changed

src/backend/nodes/bitmapset.c

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.7 2005/01/01 20:44:15 tgl Exp $
17+
* $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -763,3 +763,28 @@ bms_first_member(Bitmapset *a)
763763
}
764764
return -1;
765765
}
766+
767+
/*
768+
* bms_hash_value - compute a hash key for a Bitmapset
769+
*
770+
* Note: we must ensure that any two bitmapsets that are bms_equal() will
771+
* hash to the same value; in practice this means that trailing all-zero
772+
* words cannot affect the result. Longitudinal XOR provides a reasonable
773+
* hash value that has this property.
774+
*/
775+
uint32
776+
bms_hash_value(const Bitmapset *a)
777+
{
778+
bitmapword result = 0;
779+
int nwords;
780+
int wordnum;
781+
782+
if (a == NULL)
783+
return 0; /* All empty sets hash to 0 */
784+
nwords = a->nwords;
785+
for (wordnum = 0; wordnum < nwords; wordnum++)
786+
{
787+
result ^= a->words[wordnum];
788+
}
789+
return (uint32) result;
790+
}

src/backend/optimizer/geqo/geqo_eval.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.74 2005/06/05 22:32:55 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.75 2005/06/08 23:02:04 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -47,7 +47,8 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
4747
MemoryContext oldcxt;
4848
RelOptInfo *joinrel;
4949
Cost fitness;
50-
List *savelist;
50+
int savelength;
51+
struct HTAB *savehash;
5152

5253
/*
5354
* Because gimme_tree considers both left- and right-sided trees,
@@ -83,13 +84,19 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
8384
* gimme_tree will add entries to root->join_rel_list, which may or may
8485
* not already contain some entries. The newly added entries will be
8586
* recycled by the MemoryContextDelete below, so we must ensure that
86-
* the list is restored to its former state before exiting. With the
87-
* new List implementation, the easiest way is to make a duplicate list
88-
* that gimme_tree can modify.
87+
* the list is restored to its former state before exiting. We can
88+
* do this by truncating the list to its original length. NOTE this
89+
* assumes that any added entries are appended at the end!
90+
*
91+
* We also must take care not to mess up the outer join_rel_hash,
92+
* if there is one. We can do this by just temporarily setting the
93+
* link to NULL. (If we are dealing with enough join rels, which we
94+
* very likely are, a new hash table will get built and used locally.)
8995
*/
90-
savelist = evaldata->root->join_rel_list;
96+
savelength = list_length(evaldata->root->join_rel_list);
97+
savehash = evaldata->root->join_rel_hash;
9198

92-
evaldata->root->join_rel_list = list_copy(savelist);
99+
evaldata->root->join_rel_hash = NULL;
93100

94101
/* construct the best path for the given combination of relations */
95102
joinrel = gimme_tree(tour, num_gene, evaldata);
@@ -105,8 +112,13 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
105112
else
106113
fitness = DBL_MAX;
107114

108-
/* restore join_rel_list */
109-
evaldata->root->join_rel_list = savelist;
115+
/*
116+
* Restore join_rel_list to its former state, and put back original
117+
* hashtable if any.
118+
*/
119+
evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
120+
savelength);
121+
evaldata->root->join_rel_hash = savehash;
110122

111123
/* release all the memory acquired within gimme_tree */
112124
MemoryContextSwitchTo(oldcxt);

src/backend/optimizer/geqo/geqo_main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.49 2005/06/05 22:32:55 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.50 2005/06/08 23:02:04 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -252,7 +252,6 @@ geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
252252
*/
253253
best_tour = (Gene *) pool->data[0].string;
254254

255-
/* root->join_rel_list will be modified during this ! */
256255
best_rel = gimme_tree(best_tour, pool->string_length, &evaldata);
257256

258257
if (best_rel == NULL)

src/backend/optimizer/plan/planmain.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.83 2005/06/06 04:13:35 tgl Exp $
17+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.84 2005/06/08 23:02:04 tgl Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -116,6 +116,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
116116
root->base_rel_array = (RelOptInfo **)
117117
palloc0(root->base_rel_array_size * sizeof(RelOptInfo *));
118118
root->join_rel_list = NIL;
119+
root->join_rel_hash = NULL;
119120
root->equi_key_list = NIL;
120121

121122
/*

src/backend/optimizer/util/relnode.c

Lines changed: 101 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.68 2005/06/06 04:13:36 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.69 2005/06/08 23:02:05 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -21,8 +21,15 @@
2121
#include "optimizer/restrictinfo.h"
2222
#include "optimizer/tlist.h"
2323
#include "parser/parsetree.h"
24+
#include "utils/hsearch.h"
2425

2526

27+
typedef struct JoinHashEntry
28+
{
29+
Relids join_relids; /* hash key --- MUST BE FIRST */
30+
RelOptInfo *join_rel;
31+
} JoinHashEntry;
32+
2633
static RelOptInfo *make_reloptinfo(PlannerInfo *root, int relid,
2734
RelOptKind reloptkind);
2835
static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
@@ -197,6 +204,47 @@ find_base_rel(PlannerInfo *root, int relid)
197204
return NULL; /* keep compiler quiet */
198205
}
199206

207+
/*
208+
* build_join_rel_hash
209+
* Construct the auxiliary hash table for join relations.
210+
*/
211+
static void
212+
build_join_rel_hash(PlannerInfo *root)
213+
{
214+
HTAB *hashtab;
215+
HASHCTL hash_ctl;
216+
ListCell *l;
217+
218+
/* Create the hash table */
219+
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
220+
hash_ctl.keysize = sizeof(Relids);
221+
hash_ctl.entrysize = sizeof(JoinHashEntry);
222+
hash_ctl.hash = bitmap_hash;
223+
hash_ctl.match = bitmap_match;
224+
hash_ctl.hcxt = CurrentMemoryContext;
225+
hashtab = hash_create("JoinRelHashTable",
226+
256L,
227+
&hash_ctl,
228+
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
229+
230+
/* Insert all the already-existing joinrels */
231+
foreach(l, root->join_rel_list)
232+
{
233+
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
234+
JoinHashEntry *hentry;
235+
bool found;
236+
237+
hentry = (JoinHashEntry *) hash_search(hashtab,
238+
&(rel->relids),
239+
HASH_ENTER,
240+
&found);
241+
Assert(!found);
242+
hentry->join_rel = rel;
243+
}
244+
245+
root->join_rel_hash = hashtab;
246+
}
247+
200248
/*
201249
* find_join_rel
202250
* Returns relation entry corresponding to 'relids' (a set of RT indexes),
@@ -205,14 +253,44 @@ find_base_rel(PlannerInfo *root, int relid)
205253
RelOptInfo *
206254
find_join_rel(PlannerInfo *root, Relids relids)
207255
{
208-
ListCell *l;
256+
/*
257+
* Switch to using hash lookup when list grows "too long". The threshold
258+
* is arbitrary and is known only here.
259+
*/
260+
if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
261+
build_join_rel_hash(root);
209262

210-
foreach(l, root->join_rel_list)
263+
/*
264+
* Use either hashtable lookup or linear search, as appropriate.
265+
*
266+
* Note: the seemingly redundant hashkey variable is used to avoid
267+
* taking the address of relids; unless the compiler is exceedingly
268+
* smart, doing so would force relids out of a register and thus
269+
* probably slow down the list-search case.
270+
*/
271+
if (root->join_rel_hash)
211272
{
212-
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
273+
Relids hashkey = relids;
274+
JoinHashEntry *hentry;
275+
276+
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
277+
&hashkey,
278+
HASH_FIND,
279+
NULL);
280+
if (hentry)
281+
return hentry->join_rel;
282+
}
283+
else
284+
{
285+
ListCell *l;
213286

214-
if (bms_equal(rel->relids, relids))
215-
return rel;
287+
foreach(l, root->join_rel_list)
288+
{
289+
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
290+
291+
if (bms_equal(rel->relids, relids))
292+
return rel;
293+
}
216294
}
217295

218296
return NULL;
@@ -329,9 +407,24 @@ build_join_rel(PlannerInfo *root,
329407
jointype, restrictlist);
330408

331409
/*
332-
* Add the joinrel to the query's joinrel list.
410+
* Add the joinrel to the query's joinrel list, and store it into
411+
* the auxiliary hashtable if there is one. NB: GEQO requires us
412+
* to append the new joinrel to the end of the list!
333413
*/
334-
root->join_rel_list = lcons(joinrel, root->join_rel_list);
414+
root->join_rel_list = lappend(root->join_rel_list, joinrel);
415+
416+
if (root->join_rel_hash)
417+
{
418+
JoinHashEntry *hentry;
419+
bool found;
420+
421+
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
422+
&(joinrel->relids),
423+
HASH_ENTER,
424+
&found);
425+
Assert(!found);
426+
hentry->join_rel = joinrel;
427+
}
335428

336429
return joinrel;
337430
}

src/backend/utils/hash/hashfn.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.23 2005/04/14 20:32:43 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.24 2005/06/08 23:02:05 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
1616
#include "postgres.h"
1717

1818
#include "access/hash.h"
19+
#include "nodes/bitmapset.h"
1920
#include "utils/hsearch.h"
2021

2122

@@ -53,3 +54,26 @@ oid_hash(const void *key, Size keysize)
5354
/* We don't actually bother to do anything to the OID value ... */
5455
return (uint32) *((const Oid *) key);
5556
}
57+
58+
/*
59+
* bitmap_hash: hash function for keys that are (pointers to) Bitmapsets
60+
*
61+
* Note: don't forget to specify bitmap_match as the match function!
62+
*/
63+
uint32
64+
bitmap_hash(const void *key, Size keysize)
65+
{
66+
Assert(keysize == sizeof(Bitmapset *));
67+
return bms_hash_value(*((const Bitmapset * const *) key));
68+
}
69+
70+
/*
71+
* bitmap_match: match function to use with bitmap_hash
72+
*/
73+
int
74+
bitmap_match(const void *key1, const void *key2, Size keysize)
75+
{
76+
Assert(keysize == sizeof(Bitmapset *));
77+
return !bms_equal(*((const Bitmapset * const *) key1),
78+
*((const Bitmapset * const *) key2));
79+
}

src/include/nodes/bitmapset.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
1515
*
16-
* $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.6 2005/01/01 20:44:28 tgl Exp $
16+
* $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.7 2005/06/08 23:02:05 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -80,4 +80,7 @@ extern Bitmapset *bms_join(Bitmapset *a, Bitmapset *b);
8080
/* support for iterating through the integer elements of a set: */
8181
extern int bms_first_member(Bitmapset *a);
8282

83+
/* support for hashtables using Bitmapsets as keys: */
84+
extern uint32 bms_hash_value(const Bitmapset *a);
85+
8386
#endif /* BITMAPSET_H */

src/include/nodes/relation.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.111 2005/06/06 04:13:36 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.112 2005/06/08 23:02:05 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -72,7 +72,17 @@ typedef struct PlannerInfo
7272
struct RelOptInfo **base_rel_array; /* All one-relation RelOptInfos */
7373
int base_rel_array_size; /* current allocated array len */
7474

75+
/*
76+
* join_rel_list is a list of all join-relation RelOptInfos we have
77+
* considered in this planning run. For small problems we just scan
78+
* the list to do lookups, but when there are many join relations we
79+
* build a hash table for faster lookups. The hash table is present
80+
* and valid when join_rel_hash is not NULL. Note that we still maintain
81+
* the list even when using the hash table for lookups; this simplifies
82+
* life for GEQO.
83+
*/
7584
List *join_rel_list; /* list of join-relation RelOptInfos */
85+
struct HTAB *join_rel_hash; /* optional hashtable for join relations */
7686

7787
List *equi_key_list; /* list of lists of equijoined
7888
* PathKeyItems */

src/include/utils/hsearch.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.36 2005/05/29 04:23:06 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.37 2005/06/08 23:02:05 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -184,5 +184,7 @@ extern long hash_select_dirsize(long num_entries);
184184
extern uint32 string_hash(const void *key, Size keysize);
185185
extern uint32 tag_hash(const void *key, Size keysize);
186186
extern uint32 oid_hash(const void *key, Size keysize);
187+
extern uint32 bitmap_hash(const void *key, Size keysize);
188+
extern int bitmap_match(const void *key1, const void *key2, Size keysize);
187189

188190
#endif /* HSEARCH_H */

0 commit comments

Comments
 (0)