Skip to content

Commit 96de9bb

Browse files
author
Andi Gutmans
committed
- Significantly improve hash table performance by using djb's hash function
instead of hashpjw() and by using power of two sizes of hash tables (this saves the % and isn't necessary with a good hash function). Please try this patch.
1 parent 710d927 commit 96de9bb

File tree

2 files changed

+42
-53
lines changed

2 files changed

+42
-53
lines changed

Zend/zend_hash.c

Lines changed: 40 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ static void _zend_is_inconsistent(HashTable *ht, char *file, int line)
117117
(ht)->nApplyCount--;
118118

119119

120-
/* Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster */
121120
static uint PrimeNumbers[] =
122121
{5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963, 16229, 32531, 65407, 130987, 262237, 524521, 1048793, 2097397, 4194103, 8388857, 16777447, 33554201, 67108961, 134217487, 268435697, 536870683, 1073741621, 2147483399};
123122
static uint nNumPrimeNumbers = sizeof(PrimeNumbers) / sizeof(uint);
@@ -129,22 +128,23 @@ static uint nNumPrimeNumbers = sizeof(PrimeNumbers) / sizeof(uint);
129128

130129
static int zend_hash_do_resize(HashTable *ht);
131130

132-
133-
ZEND_API ulong hashpjw(char *arKey, uint nKeyLength)
131+
static inline ulong zend_inline_hash_func(char *arKey, uint nKeyLength)
134132
{
135-
ulong h = 0, g;
136-
char *arEnd=arKey+nKeyLength;
133+
ulong h = 5381;
134+
char *arEnd = arKey + nKeyLength;
137135

138136
while (arKey < arEnd) {
139-
h = (h << 4) + *arKey++;
140-
if ((g = (h & 0xF0000000))) {
141-
h = h ^ (g >> 24);
142-
h = h ^ g;
143-
}
137+
h += (h << 5);
138+
h ^= (ulong) *arKey++;
144139
}
145140
return h;
146141
}
147142

143+
ZEND_API ulong zend_hash_func(char *arKey, uint nKeyLength)
144+
{
145+
return zend_inline_hash_func(arKey, nKeyLength);
146+
}
147+
148148

149149
#define UPDATE_DATA(ht, p, pData, nDataSize) \
150150
if (nDataSize == sizeof(void*)) { \
@@ -178,35 +178,25 @@ ZEND_API ulong hashpjw(char *arKey, uint nKeyLength)
178178

179179
ZEND_API int zend_hash_init(HashTable *ht, uint nSize, hash_func_t pHashFunction, dtor_func_t pDestructor, int persistent)
180180
{
181-
uint i;
181+
uint i = 3;
182182

183183
SET_INCONSISTENT(HT_OK);
184-
185-
for (i = 0; i < nNumPrimeNumbers; i++) {
186-
if (nSize <= PrimeNumbers[i]) {
187-
nSize = PrimeNumbers[i];
188-
ht->nHashSizeIndex = i;
189-
break;
190-
}
191-
}
192-
if (i == nNumPrimeNumbers) { /* This shouldn't really happen unless the ask for a ridiculous size */
193-
nSize = PrimeNumbers[i - 1];
194-
ht->nHashSizeIndex = i - 1;
184+
185+
while ((1 << i) < nSize) {
186+
i++;
195187
}
188+
189+
ht->nTableSize = 1 << i;
190+
ht->nTableMask = ht->nTableSize - 1;
196191

197192
/* Uses ecalloc() so that Bucket* == NULL */
198-
ht->arBuckets = (Bucket **) pecalloc(nSize, sizeof(Bucket *), persistent);
193+
ht->arBuckets = (Bucket **) pecalloc(ht->nTableSize, sizeof(Bucket *), persistent);
199194

200195
if (!ht->arBuckets) {
201196
return FAILURE;
202197
}
203-
if (pHashFunction == NULL) {
204-
ht->pHashFunction = hashpjw;
205-
} else {
206-
ht->pHashFunction = pHashFunction;
207-
}
198+
208199
ht->pDestructor = pDestructor;
209-
ht->nTableSize = nSize;
210200
ht->pListHead = NULL;
211201
ht->pListTail = NULL;
212202
ht->nNumOfElements = 0;
@@ -252,8 +242,8 @@ ZEND_API int zend_hash_add_or_update(HashTable *ht, char *arKey, uint nKeyLength
252242

253243
HANDLE_NUMERIC(arKey, nKeyLength, zend_hash_index_update_or_next_insert(ht, idx, pData, nDataSize, pDest, flag));
254244

255-
h = ht->pHashFunction(arKey, nKeyLength);
256-
nIndex = h % ht->nTableSize;
245+
h = zend_inline_hash_func(arKey, nKeyLength);
246+
nIndex = h & ht->nTableMask;
257247

258248
p = ht->arBuckets[nIndex];
259249
while (p != NULL) {
@@ -321,7 +311,7 @@ ZEND_API int zend_hash_quick_add_or_update(HashTable *ht, char *arKey, uint nKey
321311
return FAILURE;
322312
}
323313

324-
nIndex = h % ht->nTableSize;
314+
nIndex = h & ht->nTableMask;
325315

326316
p = ht->arBuckets[nIndex];
327317
while (p != NULL) {
@@ -397,7 +387,7 @@ ZEND_API int zend_hash_index_update_or_next_insert(HashTable *ht, ulong h, void
397387
if (flag & HASH_NEXT_INSERT) {
398388
h = ht->nNextFreeElement;
399389
}
400-
nIndex = h % ht->nTableSize;
390+
nIndex = h & ht->nTableMask;
401391

402392
p = ht->arBuckets[nIndex];
403393
while (p != NULL) {
@@ -461,13 +451,13 @@ static int zend_hash_do_resize(HashTable *ht)
461451

462452
IS_CONSISTENT(ht);
463453

464-
if ((ht->nHashSizeIndex < nNumPrimeNumbers - 1)) { /* Let's double the table size */
465-
t = (Bucket **) perealloc_recoverable(ht->arBuckets, PrimeNumbers[ht->nHashSizeIndex + 1] * sizeof(Bucket *), ht->persistent);
454+
if ((ht->nTableSize << 1) > 0) { /* Let's double the table size */
455+
t = (Bucket **) perealloc_recoverable(ht->arBuckets, (ht->nTableSize << 1) * sizeof(Bucket *), ht->persistent);
466456
if (t) {
467457
HANDLE_BLOCK_INTERRUPTIONS();
468458
ht->arBuckets = t;
469-
ht->nTableSize = PrimeNumbers[ht->nHashSizeIndex + 1];
470-
ht->nHashSizeIndex++;
459+
ht->nTableSize = (ht->nTableSize << 1);
460+
ht->nTableMask = ht->nTableSize - 1;
471461
zend_hash_rehash(ht);
472462
HANDLE_UNBLOCK_INTERRUPTIONS();
473463
return SUCCESS;
@@ -484,10 +474,10 @@ ZEND_API int zend_hash_rehash(HashTable *ht)
484474

485475
IS_CONSISTENT(ht);
486476

487-
memset(ht->arBuckets, 0, PrimeNumbers[ht->nHashSizeIndex] * sizeof(Bucket *));
477+
memset(ht->arBuckets, 0, ht->nTableSize * sizeof(Bucket *));
488478
p = ht->pListHead;
489479
while (p != NULL) {
490-
nIndex = p->h % ht->nTableSize;
480+
nIndex = p->h & ht->nTableMask;
491481
CONNECT_TO_BUCKET_DLLIST(p, ht->arBuckets[nIndex]);
492482
ht->arBuckets[nIndex] = p;
493483
p = p->pListNext;
@@ -504,9 +494,9 @@ ZEND_API int zend_hash_del_key_or_index(HashTable *ht, char *arKey, uint nKeyLen
504494

505495
if (flag == HASH_DEL_KEY) {
506496
HANDLE_NUMERIC(arKey, nKeyLength, zend_hash_del_key_or_index(ht, arKey, nKeyLength, idx, HASH_DEL_INDEX));
507-
h = ht->pHashFunction(arKey, nKeyLength);
497+
h = zend_inline_hash_func(arKey, nKeyLength);
508498
}
509-
nIndex = h % ht->nTableSize;
499+
nIndex = h & ht->nTableMask;
510500

511501
p = ht->arBuckets[nIndex];
512502
while (p != NULL) {
@@ -632,7 +622,7 @@ static Bucket *zend_hash_apply_deleter(HashTable *ht, Bucket *p)
632622
} else {
633623
uint nIndex;
634624

635-
nIndex = p->h % ht->nTableSize;
625+
nIndex = p->h & ht->nTableMask;
636626
ht->arBuckets[nIndex] = p->pNext;
637627
}
638628
if (p->pNext) {
@@ -837,7 +827,7 @@ ZEND_API ulong zend_get_hash_value(HashTable *ht, char *arKey, uint nKeyLength)
837827
{
838828
IS_CONSISTENT(ht);
839829

840-
return ht->pHashFunction(arKey, nKeyLength);
830+
return zend_inline_hash_func(arKey, nKeyLength);
841831
}
842832

843833

@@ -855,8 +845,8 @@ ZEND_API int zend_hash_find(HashTable *ht, char *arKey, uint nKeyLength, void **
855845

856846
HANDLE_NUMERIC(arKey, nKeyLength, zend_hash_index_find(ht, idx, pData));
857847

858-
h = ht->pHashFunction(arKey, nKeyLength);
859-
nIndex = h % ht->nTableSize;
848+
h = zend_inline_hash_func(arKey, nKeyLength);
849+
nIndex = h & ht->nTableMask;
860850

861851
p = ht->arBuckets[nIndex];
862852
while (p != NULL) {
@@ -879,7 +869,7 @@ ZEND_API int zend_hash_quick_find(HashTable *ht, char *arKey, uint nKeyLength, u
879869

880870
IS_CONSISTENT(ht);
881871

882-
nIndex = h % ht->nTableSize;
872+
nIndex = h & ht->nTableMask;
883873

884874
p = ht->arBuckets[nIndex];
885875
while (p != NULL) {
@@ -905,8 +895,8 @@ ZEND_API int zend_hash_exists(HashTable *ht, char *arKey, uint nKeyLength)
905895

906896
HANDLE_NUMERIC(arKey, nKeyLength, zend_hash_index_exists(ht, idx));
907897

908-
h = ht->pHashFunction(arKey, nKeyLength);
909-
nIndex = h % ht->nTableSize;
898+
h = zend_inline_hash_func(arKey, nKeyLength);
899+
nIndex = h & ht->nTableMask;
910900

911901
p = ht->arBuckets[nIndex];
912902
while (p != NULL) {
@@ -928,7 +918,7 @@ ZEND_API int zend_hash_index_find(HashTable *ht, ulong h, void **pData)
928918

929919
IS_CONSISTENT(ht);
930920

931-
nIndex = h % ht->nTableSize;
921+
nIndex = h & ht->nTableMask;
932922

933923
p = ht->arBuckets[nIndex];
934924
while (p != NULL) {
@@ -949,7 +939,7 @@ ZEND_API int zend_hash_index_exists(HashTable *ht, ulong h)
949939

950940
IS_CONSISTENT(ht);
951941

952-
nIndex = h % ht->nTableSize;
942+
nIndex = h & ht->nTableMask;
953943

954944
p = ht->arBuckets[nIndex];
955945
while (p != NULL) {

Zend/zend_hash.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,12 @@
3333
#define HASH_DEL_KEY 0
3434
#define HASH_DEL_INDEX 1
3535

36+
typedef ulong (*hash_func_t)(char *arKey, uint nKeyLength);
3637
typedef int (*compare_func_t)(const void *, const void *);
3738
typedef void (*sort_func_t)(void *, size_t, register size_t, compare_func_t);
3839
typedef void (*dtor_func_t)(void *pDest);
3940
typedef int (*apply_func_t)(void *pDest);
4041
typedef int (*apply_func_arg_t)(void *pDest, void *argument);
41-
typedef ulong (*hash_func_t)(char *arKey, uint nKeyLength);
4242
typedef void (*copy_ctor_func_t)(void *pElement);
4343

4444
struct _hashtable;
@@ -57,10 +57,9 @@ typedef struct bucket {
5757

5858
typedef struct _hashtable {
5959
uint nTableSize;
60-
uint nHashSizeIndex;
60+
uint nTableMask;
6161
uint nNumOfElements;
6262
ulong nNextFreeElement;
63-
hash_func_t pHashFunction;
6463
Bucket *pInternalPointer; /* Used for element traversal */
6564
Bucket *pListHead;
6665
Bucket *pListTail;

0 commit comments

Comments
 (0)