8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.31 2002/02/25 04:06:47 momjian Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
12
12
*
13
13
* NOTES
14
14
* These functions are stored in pg_amproc. For each operator class
@@ -95,6 +95,8 @@ hashname(PG_FUNCTION_ARGS)
95
95
{
96
96
char * key = NameStr (* PG_GETARG_NAME (0 ));
97
97
98
+ Assert (strlen (key ) <= NAMEDATALEN );
99
+
98
100
return hash_any (key , strlen (key ));
99
101
}
100
102
@@ -116,61 +118,89 @@ hashvarlena(PG_FUNCTION_ARGS)
116
118
return result ;
117
119
}
118
120
121
+ /* This hash function was written by Bob Jenkins
122
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
123
+ * for PostgreSQL by Neil Conway. For more information on this
124
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html
125
+ */
119
126
120
127
/*
121
- * hash_any --- compute a hash function for any specified chunk of memory
122
- *
123
- * This can be used as the underlying hash function for any pass-by-reference
124
- * data type in which there are no non-significant bits.
125
- *
126
- * (Comment from the original db3 hashing code: )
127
- *
128
- * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
129
- * units. On the first time through the loop we get the 'leftover bytes'
130
- * (strlen % 8). On every later iteration, we perform 8 HASHC's so we handle
131
- * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
132
- * this routine is heavily used enough, it's worth the ugly coding.
133
- *
134
- * "OZ's original sdbm hash"
128
+ * mix -- mix 3 32-bit values reversibly.
129
+ * For every delta with one or two bits set, and the deltas of all three
130
+ * high bits or all three low bits, whether the original value of a,b,c
131
+ * is almost all zero or is uniformly distributed,
132
+ * - If mix() is run forward or backward, at least 32 bits in a,b,c
133
+ * have at least 1/4 probability of changing.
134
+ * - If mix() is run forward, every bit of c will change between 1/3 and
135
+ * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
136
+ */
137
+ #define mix (a ,b ,c ) \
138
+ { \
139
+ a -= b; a -= c; a ^= (c>>13); \
140
+ b -= c; b -= a; b ^= (a<<8); \
141
+ c -= a; c -= b; c ^= (b>>13); \
142
+ a -= b; a -= c; a ^= (c>>12); \
143
+ b -= c; b -= a; b ^= (a<<16); \
144
+ c -= a; c -= b; c ^= (b>>5); \
145
+ a -= b; a -= c; a ^= (c>>3); \
146
+ b -= c; b -= a; b ^= (a<<10); \
147
+ c -= a; c -= b; c ^= (b>>15); \
148
+ }
149
+
150
+ /*
151
+ * hash_any() -- hash a variable-length key into a 32-bit value
152
+ * k : the key (the unaligned variable-length array of bytes)
153
+ * len : the length of the key, counting by bytes
154
+ * Returns a 32-bit value. Every bit of the key affects every bit of
155
+ * the return value. Every 1-bit and 2-bit delta achieves avalanche.
156
+ * About 6*len+35 instructions. The best hash table sizes are powers
157
+ * of 2. There is no need to do mod a prime (mod is sooo slow!).
158
+ * If you need less than 32 bits, use a bitmask.
135
159
*/
136
160
Datum
137
- hash_any (const char * keydata , int keylen )
161
+ hash_any (register const char * k , register int keylen )
138
162
{
139
- uint32 n ;
140
- int loop ;
141
-
142
- #define HASHC n = *keydata++ + 65599 * n
143
-
144
- n = 0 ;
145
- if (keylen > 0 )
146
- {
147
- loop = (keylen + 8 - 1 ) >> 3 ;
148
-
149
- switch (keylen & (8 - 1 ))
150
- {
151
- case 0 :
152
- do
153
- { /* All fall throughs */
154
- HASHC ;
155
- case 7 :
156
- HASHC ;
157
- case 6 :
158
- HASHC ;
159
- case 5 :
160
- HASHC ;
161
- case 4 :
162
- HASHC ;
163
- case 3 :
164
- HASHC ;
165
- case 2 :
166
- HASHC ;
167
- case 1 :
168
- HASHC ;
169
- } while (-- loop );
170
- }
171
- }
172
-
173
- #undef HASHC
174
-
175
- PG_RETURN_UINT32 (n );
163
+ register Datum a ,b ,c ,len ;
164
+
165
+ /* Set up the internal state */
166
+ len = keylen ;
167
+ a = b = 0x9e3779b9 ; /* the golden ratio; an arbitrary value */
168
+ /* Another arbitrary value. If the hash function is called
169
+ * multiple times, this could be the previously generated
170
+ * hash value; however, the interface currently doesn't allow
171
+ * this. AFAIK this isn't a big deal.
172
+ */
173
+ c = 3923095 ;
174
+
175
+ /* handle most of the key */
176
+ while (len >= 12 )
177
+ {
178
+ a += (k [0 ] + ((Datum )k [1 ]<<8 ) + ((Datum )k [2 ]<<16 ) + ((Datum )k [3 ]<<24 ));
179
+ b += (k [4 ] + ((Datum )k [5 ]<<8 ) + ((Datum )k [6 ]<<16 ) + ((Datum )k [7 ]<<24 ));
180
+ c += (k [8 ] + ((Datum )k [9 ]<<8 ) + ((Datum )k [10 ]<<16 )+ ((Datum )k [11 ]<<24 ));
181
+ mix (a ,b ,c );
182
+ k += 12 ; len -= 12 ;
183
+ }
184
+
185
+ /* handle the last 11 bytes */
186
+ c += keylen ;
187
+ switch (len ) /* all the case statements fall through */
188
+ {
189
+ case 11 : c += ((Datum )k [10 ]<<24 );
190
+ case 10 : c += ((Datum )k [9 ]<<16 );
191
+ case 9 : c += ((Datum )k [8 ]<<8 );
192
+ /* the first byte of c is reserved for the length */
193
+ case 8 : b += ((Datum )k [7 ]<<24 );
194
+ case 7 : b += ((Datum )k [6 ]<<16 );
195
+ case 6 : b += ((Datum )k [5 ]<<8 );
196
+ case 5 : b += k [4 ];
197
+ case 4 : a += ((Datum )k [3 ]<<24 );
198
+ case 3 : a += ((Datum )k [2 ]<<16 );
199
+ case 2 : a += ((Datum )k [1 ]<<8 );
200
+ case 1 : a += k [0 ];
201
+ /* case 0: nothing left to add */
202
+ }
203
+ mix (a ,b ,c );
204
+ /* report the result */
205
+ return c ;
176
206
}
0 commit comments