Skip to content

Commit 0aba255

Browse files
committed
Add optimized C string hashing
Given an already-initialized hash state and a NUL-terminated string, accumulate the hash of the string into the hash state and return the length for the caller to (optionally) save for the finalizer. This avoids a strlen call. If the string pointer is aligned, we can use a word-at-a-time algorithm for NUL lookahead. The aligned case is only used on 64-bit platforms, since it's not worth the extra complexity for 32-bit. Handling the tail of the string after finishing the word-wise loop was inspired by NetBSD's strlen(), but no code was taken since that is written in assembly language. As demonstration, use this in the search path cache. This brings the general case performance closer to the special case optimization done in commit a86c61c. There are other places that could benefit, but that is left for future work. Jeff Davis and John Naylor Reviewed by Heikki Linnakangas, Jian He, Junwang Zhao Discussion: https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.com Discussion: https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
1 parent e97b672 commit 0aba255

File tree

2 files changed

+145
-5
lines changed

2 files changed

+145
-5
lines changed

src/backend/catalog/namespace.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
#include "catalog/pg_ts_template.h"
4242
#include "catalog/pg_type.h"
4343
#include "commands/dbcommands.h"
44-
#include "common/hashfn.h"
44+
#include "common/hashfn_unstable.h"
4545
#include "funcapi.h"
4646
#include "mb/pg_wchar.h"
4747
#include "miscadmin.h"
@@ -253,11 +253,21 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
253253
static inline uint32
254254
spcachekey_hash(SearchPathCacheKey key)
255255
{
256-
const unsigned char *bytes = (const unsigned char *) key.searchPath;
257-
int blen = strlen(key.searchPath);
256+
fasthash_state hs;
257+
int sp_len;
258258

259-
return hash_combine(hash_bytes(bytes, blen),
260-
hash_uint32(key.roleid));
259+
fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
260+
261+
hs.accum = key.roleid;
262+
fasthash_combine(&hs);
263+
264+
/*
265+
* Combine search path into the hash and save the length for tweaking the
266+
* final mix.
267+
*/
268+
sp_len = fasthash_accum_cstring(&hs, key.searchPath);
269+
270+
return fasthash_final32(&hs, sp_len);
261271
}
262272

263273
static inline bool

src/include/common/hashfn_unstable.h

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,24 @@
5858
* 2) Incremental interface. This can used for incorporating multiple
5959
* inputs. The standalone functions use this internally, so see fasthash64()
6060
* for an an example of how this works.
61+
*
62+
* The incremental interface is especially useful if any of the inputs
63+
* are NUL-terminated C strings, since the length is not needed ahead
64+
* of time. This avoids needing to call strlen(). This case is optimized
65+
* in fasthash_accum_cstring() :
66+
*
67+
* fasthash_state hs;
68+
* fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
69+
* len = fasthash_accum_cstring(&hs, *str);
70+
* ...
71+
* return fasthash_final32(&hs, len);
72+
*
73+
* Here we pass FH_UNKNOWN_LENGTH as a convention, since passing zero
74+
* would zero out the internal seed as well. fasthash_accum_cstring()
75+
* returns the length of the string, which is computed on-the-fly while
76+
* mixing the string into the hash. Experimentation has found that
77+
* SMHasher fails unless we incorporate the length, so it is passed to
78+
* the finalizer as a tweak.
6179
*/
6280

6381

@@ -151,6 +169,118 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
151169
fasthash_combine(hs);
152170
}
153171

172+
/*
173+
* Set high bit in lowest byte where the input is zero, from:
174+
* https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
175+
*/
176+
#define haszero64(v) \
177+
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
178+
179+
/*
180+
* all-purpose workhorse for fasthash_accum_cstring
181+
*/
182+
static inline int
183+
fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
184+
{
185+
const char *const start = str;
186+
187+
while (*str)
188+
{
189+
int chunk_len = 0;
190+
191+
while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
192+
chunk_len++;
193+
194+
fasthash_accum(hs, str, chunk_len);
195+
str += chunk_len;
196+
}
197+
198+
return str - start;
199+
}
200+
201+
/*
202+
* specialized workhorse for fasthash_accum_cstring
203+
*
204+
* With an aligned pointer, we consume the string a word at a time.
205+
* Loading the word containing the NUL terminator cannot segfault since
206+
* allocation boundaries are suitably aligned.
207+
*/
208+
static inline int
209+
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
210+
{
211+
const char *const start = str;
212+
int remainder;
213+
uint64 zero_bytes_le;
214+
215+
Assert(PointerIsAligned(start, uint64));
216+
for (;;)
217+
{
218+
uint64 chunk = *(uint64 *) str;
219+
220+
/*
221+
* With little-endian representation, we can use this calculation,
222+
* which sets bits in the first byte in the result word that
223+
* corresponds to a zero byte in the original word. The rest of the
224+
* bytes are indeterminate, so cannot be used on big-endian machines
225+
* without either swapping or a bytewise check.
226+
*/
227+
#ifdef WORDS_BIGENDIAN
228+
zero_bytes_le = haszero64(pg_bswap(chunk));
229+
#else
230+
zero_bytes_le = haszero64(chunk);
231+
#endif
232+
if (zero_bytes_le)
233+
break;
234+
235+
hs->accum = chunk;
236+
fasthash_combine(hs);
237+
str += FH_SIZEOF_ACCUM;
238+
}
239+
240+
/*
241+
* For the last word, only use bytes up to the NUL for the hash. Bytes
242+
* with set bits will be 0x80, so calculate the first occurrence of a zero
243+
* byte within the input word by counting the number of trailing (because
244+
* little-endian) zeros and dividing the result by 8.
245+
*/
246+
remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
247+
fasthash_accum(hs, str, remainder);
248+
str += remainder;
249+
250+
return str - start;
251+
}
252+
253+
/*
254+
* Mix 'str' into the hash state and return the length of the string.
255+
*/
256+
static inline int
257+
fasthash_accum_cstring(fasthash_state *hs, const char *str)
258+
{
259+
#if SIZEOF_VOID_P >= 8
260+
261+
int len;
262+
#ifdef USE_ASSERT_CHECKING
263+
int len_check;
264+
fasthash_state hs_check;
265+
266+
memcpy(&hs_check, hs, sizeof(fasthash_state));
267+
len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
268+
#endif
269+
if (PointerIsAligned(str, uint64))
270+
{
271+
len = fasthash_accum_cstring_aligned(hs, str);
272+
Assert(hs_check.hash == hs->hash && len_check == len);
273+
return len;
274+
}
275+
#endif /* SIZEOF_VOID_P */
276+
277+
/*
278+
* It's not worth it to try to make the word-at-a-time optimization work
279+
* on 32-bit platforms.
280+
*/
281+
return fasthash_accum_cstring_unaligned(hs, str);
282+
}
283+
154284
/*
155285
* The finalizer
156286
*

0 commit comments

Comments
 (0)