Add optimized C string hashing

j-naylor · j-naylor · commit 0aba2554409e · 2024-01-19T12:56:15.000+07:00
Given an already-initialized hash state and a NUL-terminated string, accumulate the hash of the string into the hash state and return the length for the caller to (optionally) save for the finalizer. This avoids a strlen call. If the string pointer is aligned, we can use a word-at-a-time algorithm for NUL lookahead. The aligned case is only used on 64-bit platforms, since it's not worth the extra complexity for 32-bit. Handling the tail of the string after finishing the word-wise loop was inspired by NetBSD's strlen(), but no code was taken since that is written in assembly language. As demonstration, use this in the search path cache. This brings the general case performance closer to the special case optimization done in commit a86c61c. There are other places that could benefit, but that is left for future work. Jeff Davis and John Naylor Reviewed by Heikki Linnakangas, Jian He, Junwang Zhao Discussion: https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.com Discussion: https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
 #include "catalog/pg_ts_template.h"
 #include "catalog/pg_type.h"
 #include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -253,11 +253,21 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
 static inline uint32
 spcachekey_hash(SearchPathCacheKey key)
 {
-	const unsigned char *bytes = (const unsigned char *) key.searchPath;
-	int			blen = strlen(key.searchPath);
+	fasthash_state hs;
+	int			sp_len;
 
-	return hash_combine(hash_bytes(bytes, blen),
-						hash_uint32(key.roleid));
+	fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+
+	hs.accum = key.roleid;
+	fasthash_combine(&hs);
+
+	/*
+	 * Combine search path into the hash and save the length for tweaking the
+	 * final mix.
+	 */
+	sp_len = fasthash_accum_cstring(&hs, key.searchPath);
+
+	return fasthash_final32(&hs, sp_len);
 }
 
 static inline bool
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
@@ -58,6 +58,24 @@
  * 2) Incremental interface. This can used for incorporating multiple
  * inputs. The standalone functions use this internally, so see fasthash64()
  * for an an example of how this works.
+ *
+ * The incremental interface is especially useful if any of the inputs
+ * are NUL-terminated C strings, since the length is not needed ahead
+ * of time. This avoids needing to call strlen(). This case is optimized
+ * in fasthash_accum_cstring() :
+ *
+ * fasthash_state hs;
+ * fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ * len = fasthash_accum_cstring(&hs, *str);
+ * ...
+ * return fasthash_final32(&hs, len);
+ *
+ * Here we pass FH_UNKNOWN_LENGTH as a convention, since passing zero
+ * would zero out the internal seed as well. fasthash_accum_cstring()
+ * returns the length of the string, which is computed on-the-fly while
+ * mixing the string into the hash. Experimentation has found that
+ * SMHasher fails unless we incorporate the length, so it is passed to
+ * the finalizer as a tweak.
  */
 
 
@@ -151,6 +169,118 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
 	fasthash_combine(hs);
 }
 
+/*
+ * Set high bit in lowest byte where the input is zero, from:
+ * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ */
+#define haszero64(v) \
+	(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
+
+/*
+ * all-purpose workhorse for fasthash_accum_cstring
+ */
+static inline int
+fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
+{
+	const char *const start = str;
+
+	while (*str)
+	{
+		int			chunk_len = 0;
+
+		while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
+			chunk_len++;
+
+		fasthash_accum(hs, str, chunk_len);
+		str += chunk_len;
+	}
+
+	return str - start;
+}
+
+/*
+ * specialized workhorse for fasthash_accum_cstring
+ *
+ * With an aligned pointer, we consume the string a word at a time.
+ * Loading the word containing the NUL terminator cannot segfault since
+ * allocation boundaries are suitably aligned.
+ */
+static inline int
+fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
+{
+	const char *const start = str;
+	int			remainder;
+	uint64		zero_bytes_le;
+
+	Assert(PointerIsAligned(start, uint64));
+	for (;;)
+	{
+		uint64		chunk = *(uint64 *) str;
+
+		/*
+		 * With little-endian representation, we can use this calculation,
+		 * which sets bits in the first byte in the result word that
+		 * corresponds to a zero byte in the original word. The rest of the
+		 * bytes are indeterminate, so cannot be used on big-endian machines
+		 * without either swapping or a bytewise check.
+		 */
+#ifdef WORDS_BIGENDIAN
+		zero_bytes_le = haszero64(pg_bswap(chunk));
+#else
+		zero_bytes_le = haszero64(chunk);
+#endif
+		if (zero_bytes_le)
+			break;
+
+		hs->accum = chunk;
+		fasthash_combine(hs);
+		str += FH_SIZEOF_ACCUM;
+	}
+
+	/*
+	 * For the last word, only use bytes up to the NUL for the hash. Bytes
+	 * with set bits will be 0x80, so calculate the first occurrence of a zero
+	 * byte within the input word by counting the number of trailing (because
+	 * little-endian) zeros and dividing the result by 8.
+	 */
+	remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+	fasthash_accum(hs, str, remainder);
+	str += remainder;
+
+	return str - start;
+}
+
+/*
+ * Mix 'str' into the hash state and return the length of the string.
+ */
+static inline int
+fasthash_accum_cstring(fasthash_state *hs, const char *str)
+{
+#if SIZEOF_VOID_P >= 8
+
+	int			len;
+#ifdef USE_ASSERT_CHECKING
+	int			len_check;
+	fasthash_state hs_check;
+
+	memcpy(&hs_check, hs, sizeof(fasthash_state));
+	len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
+#endif
+	if (PointerIsAligned(str, uint64))
+	{
+		len = fasthash_accum_cstring_aligned(hs, str);
+		Assert(hs_check.hash == hs->hash && len_check == len);
+		return len;
+	}
+#endif							/* SIZEOF_VOID_P */
+
+	/*
+	 * It's not worth it to try to make the word-at-a-time optimization work
+	 * on 32-bit platforms.
+	 */
+	return fasthash_accum_cstring_unaligned(hs, str);
+}
+
 /*
  * The finalizer
  *