Skip to content

Commit 84c0e4b

Browse files
committed
Improve programmer docs for simplehash and dynahash.
When reading the code it's not obvious when one should prefer dynahash over simplehash and vice-versa, so, for programmer-friendliness, add comments to inform that decision. Show sample simplehash method signatures. Author: James Coleman <jtc331@gmail.com> Discussion: https://postgr.es/m/CAAaqYe_dOF39gAJ8rL-a3YO3Qo96MHMRQ2whFjK5ZcU6YvMQSA%40mail.gmail.com
1 parent c79aed4 commit 84c0e4b

File tree

2 files changed

+80
-5
lines changed

2 files changed

+80
-5
lines changed

src/backend/utils/hash/dynahash.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*-------------------------------------------------------------------------
22
*
33
* dynahash.c
4-
* dynamic hash tables
4+
* dynamic chained hash tables
55
*
66
* dynahash.c supports both local-to-a-backend hash tables and hash tables in
77
* shared memory. For shared hash tables, it is the caller's responsibility
@@ -41,6 +41,16 @@
4141
* function must be supplied; comparison defaults to memcmp() and key copying
4242
* to memcpy() when a user-defined hashing function is selected.
4343
*
44+
* Compared to simplehash, dynahash has the following benefits:
45+
*
46+
* - It supports partitioning, which is useful for shared memory access using
47+
* locks.
48+
* - Shared memory hashes are allocated in a fixed size area at startup and
49+
* are discoverable by name from other processes.
50+
* - Because entries don't need to be moved in the case of hash conflicts, has
51+
* better performance for large entries
52+
* - Guarantees stable pointers to entries.
53+
*
4454
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
4555
* Portions Copyright (c) 1994, Regents of the University of California
4656
*

src/include/lib/simplehash.h

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11
/*
22
* simplehash.h
33
*
4-
* Hash table implementation which will be specialized to user-defined
5-
* types, by including this file to generate the required code. It's
6-
* probably not worthwhile to do so for hash tables that aren't performance
7-
* or space sensitive.
4+
* When included this file generates a "templated" (by way of macros)
5+
* open-addressing hash table implementation specialized to user-defined
6+
* types.
7+
*
8+
* It's probably not worthwhile to generate such a specialized implementation
9+
* for hash tables that aren't performance or space sensitive.
10+
*
11+
* Compared to dynahash, simplehash has the following benefits:
12+
*
13+
* - Due to the "templated" code generation has known structure sizes and no
14+
* indirect function calls (which show up substantially in dynahash
15+
* profiles). These features considerably increase speed for small
16+
* entries.
17+
* - Open addressing has better CPU cache behavior than dynahash's chained
18+
* hashtables.
19+
* - The generated interface is type-safe and easier to use than dynahash,
20+
* though at the cost of more complex setup.
21+
* - Allocates memory in a MemoryContext or another allocator with a
22+
* malloc/free style interface (which isn't easily usable in a shared
23+
* memory context)
24+
* - Does not require the overhead of a separate memory context.
825
*
926
* Usage notes:
1027
*
@@ -34,6 +51,19 @@
3451
* - SH_STORE_HASH - if defined the hash is stored in the elements
3552
* - SH_GET_HASH(tb, a) - return the field to store the hash in
3653
*
54+
* The element type is required to contain a "uint32 status" member.
55+
*
56+
* While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
57+
* the hash table implementation needs to compare hashes to move elements
58+
* (particularly when growing the hash), it's preferable, if possible, to
59+
* store the element's hash in the element's data type. If the hash is so
60+
* stored, the hash table will also compare hashes before calling SH_EQUAL
61+
* when comparing two keys.
62+
*
63+
* For convenience the hash table create functions accept a void pointer
64+
* that will be stored in the hash table type's member private_data. This
65+
* allows callbacks to reference caller provided data.
66+
*
3767
* For examples of usage look at tidbitmap.c (file local definition) and
3868
* execnodes.h/execGrouping.c (exposed declaration, file local
3969
* implementation).
@@ -149,24 +179,59 @@ typedef struct SH_ITERATOR
149179

150180
/* externally visible function prototypes */
151181
#ifdef SH_RAW_ALLOCATOR
182+
/* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
152183
SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
153184
#else
185+
/*
186+
* <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
187+
* void *private_data)
188+
*/
154189
SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
155190
void *private_data);
156191
#endif
192+
193+
/* void <prefix>_destroy(<prefix>_hash *tb) */
157194
SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
195+
196+
/* void <prefix>_reset(<prefix>_hash *tb) */
158197
SH_SCOPE void SH_RESET(SH_TYPE * tb);
198+
199+
/* void <prefix>_grow(<prefix>_hash *tb) */
159200
SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
201+
202+
/* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
160203
SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
204+
205+
/*
206+
* <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
207+
* bool *found)
208+
*/
161209
SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
162210
uint32 hash, bool *found);
211+
212+
/* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
163213
SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
214+
215+
/* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
164216
SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
165217
uint32 hash);
218+
219+
/* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
166220
SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
221+
222+
/* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
167223
SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
224+
225+
/*
226+
* void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
227+
* uint32 at)
228+
*/
168229
SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
230+
231+
/* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
169232
SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
233+
234+
/* void <prefix>_stat(<prefix>_hash *tb */
170235
SH_SCOPE void SH_STAT(SH_TYPE * tb);
171236

172237
#endif /* SH_DECLARE */

0 commit comments

Comments
 (0)