Skip to content

Commit 2604359

Browse files
committed
Improve hash_any() to use word-wide fetches when hashing suitably aligned
data. This makes for a significant speedup at the cost that the results now vary between little-endian and big-endian machines; which forces us to add explicit ORDER BYs in a couple of regression tests to preserve machine-independent comparison results. Also, force initdb by bumping catversion, since the contents of hash indexes will change (at least on big-endian machines). Kenneth Marshall and Tom Lane, based on work from Bob Jenkins. This commit does not adopt Bob's new faster mix() algorithm, however, since we still need to convince ourselves that that doesn't degrade the quality of the hashing.
1 parent d785b39 commit 2604359

File tree

6 files changed

+227
-55
lines changed

6 files changed

+227
-55
lines changed

contrib/dblink/expected/dblink.out

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -711,11 +711,19 @@ UNION
711711
UNION
712712
(SELECT * from dblink_get_result('dtest3') as t3(f1 int, f2 text, f3 text[]))
713713
ORDER by f1;
714-
SELECT dblink_get_connections();
715-
dblink_get_connections
716-
------------------------
717-
{dtest1,dtest2,dtest3}
718-
(1 row)
714+
-- dblink_get_connections returns an array with elements in a machine-dependent
715+
-- ordering, so we must resort to unnesting and sorting for a stable result
716+
create function unnest(anyarray) returns setof anyelement
717+
language sql strict immutable as $$
718+
select $1[i] from generate_series(array_lower($1,1), array_upper($1,1)) as i
719+
$$;
720+
SELECT * FROM unnest(dblink_get_connections()) ORDER BY 1;
721+
unnest
722+
--------
723+
dtest1
724+
dtest2
725+
dtest3
726+
(3 rows)
719727

720728
SELECT dblink_is_busy('dtest1');
721729
dblink_is_busy

contrib/dblink/sql/dblink.sql

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,15 @@ UNION
340340
(SELECT * from dblink_get_result('dtest3') as t3(f1 int, f2 text, f3 text[]))
341341
ORDER by f1;
342342

343-
SELECT dblink_get_connections();
343+
-- dblink_get_connections returns an array with elements in a machine-dependent
344+
-- ordering, so we must resort to unnesting and sorting for a stable result
345+
create function unnest(anyarray) returns setof anyelement
346+
language sql strict immutable as $$
347+
select $1[i] from generate_series(array_lower($1,1), array_upper($1,1)) as i
348+
$$;
349+
350+
SELECT * FROM unnest(dblink_get_connections()) ORDER BY 1;
351+
344352
SELECT dblink_is_busy('dtest1');
345353

346354
SELECT dblink_disconnect('dtest1');

src/backend/access/hash/hashfunc.c

Lines changed: 193 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.55 2008/01/01 19:45:46 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.56 2008/04/06 16:54:48 tgl Exp $
1212
*
1313
* NOTES
1414
* These functions are stored in pg_amproc. For each operator class
@@ -199,8 +199,18 @@ hashvarlena(PG_FUNCTION_ARGS)
199199
* for PostgreSQL by Neil Conway. For more information on this
200200
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
201201
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
202+
*
203+
* In the current code, we have adopted an idea from Bob's 2006 update
204+
* of his hash function, which is to fetch the data a word at a time when
205+
* it is suitably aligned. This makes for a useful speedup, at the cost
206+
* of having to maintain four code paths (aligned vs unaligned, and
207+
* little-endian vs big-endian). Note that we have NOT adopted his newer
208+
* mix() function, which is faster but may sacrifice some randomness.
202209
*/
203210

211+
/* Get a bit mask of the bits set in non-uint32 aligned addresses */
212+
#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
213+
204214
/*----------
205215
* mix -- mix 3 32-bit values reversibly.
206216
* For every delta with one or two bits set, and the deltas of all three
@@ -235,6 +245,10 @@ hashvarlena(PG_FUNCTION_ARGS)
235245
* About 6*len+35 instructions. The best hash table sizes are powers
236246
* of 2. There is no need to do mod a prime (mod is sooo slow!).
237247
* If you need less than 32 bits, use a bitmask.
248+
*
249+
* Note: we could easily change this function to return a 64-bit hash value
250+
* by using the final values of both b and c. b is perhaps a little less
251+
* well mixed than c, however.
238252
*/
239253
Datum
240254
hash_any(register const unsigned char *k, register int keylen)
@@ -249,46 +263,188 @@ hash_any(register const unsigned char *k, register int keylen)
249263
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
250264
c = 3923095; /* initialize with an arbitrary value */
251265

252-
/* handle most of the key */
253-
while (len >= 12)
266+
/* If the source pointer is word-aligned, we use word-wide fetches */
267+
if (((long) k & UINT32_ALIGN_MASK) == 0)
254268
{
255-
a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
256-
b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
257-
c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
258-
mix(a, b, c);
259-
k += 12;
260-
len -= 12;
269+
/* Code path for aligned source data */
270+
register const uint32 *ka = (const uint32 *) k;
271+
272+
/* handle most of the key */
273+
while (len >= 12)
274+
{
275+
a += ka[0];
276+
b += ka[1];
277+
c += ka[2];
278+
mix(a, b, c);
279+
ka += 3;
280+
len -= 12;
281+
}
282+
283+
/* handle the last 11 bytes */
284+
k = (const unsigned char *) ka;
285+
c += keylen;
286+
#ifdef WORDS_BIGENDIAN
287+
switch (len)
288+
{
289+
case 11:
290+
c += ((uint32) k[10] << 8);
291+
/* fall through */
292+
case 10:
293+
c += ((uint32) k[9] << 16);
294+
/* fall through */
295+
case 9:
296+
c += ((uint32) k[8] << 24);
297+
/* the lowest byte of c is reserved for the length */
298+
/* fall through */
299+
case 8:
300+
b += ka[1];
301+
a += ka[0];
302+
break;
303+
case 7:
304+
b += ((uint32) k[6] << 8);
305+
/* fall through */
306+
case 6:
307+
b += ((uint32) k[5] << 16);
308+
/* fall through */
309+
case 5:
310+
b += ((uint32) k[4] << 24);
311+
/* fall through */
312+
case 4:
313+
a += ka[0];
314+
break;
315+
case 3:
316+
a += ((uint32) k[2] << 8);
317+
/* fall through */
318+
case 2:
319+
a += ((uint32) k[1] << 16);
320+
/* fall through */
321+
case 1:
322+
a += ((uint32) k[0] << 24);
323+
/* case 0: nothing left to add */
324+
}
325+
#else /* !WORDS_BIGENDIAN */
326+
switch (len)
327+
{
328+
case 11:
329+
c += ((uint32) k[10] << 24);
330+
/* fall through */
331+
case 10:
332+
c += ((uint32) k[9] << 16);
333+
/* fall through */
334+
case 9:
335+
c += ((uint32) k[8] << 8);
336+
/* the lowest byte of c is reserved for the length */
337+
/* fall through */
338+
case 8:
339+
b += ka[1];
340+
a += ka[0];
341+
break;
342+
case 7:
343+
b += ((uint32) k[6] << 16);
344+
/* fall through */
345+
case 6:
346+
b += ((uint32) k[5] << 8);
347+
/* fall through */
348+
case 5:
349+
b += k[4];
350+
/* fall through */
351+
case 4:
352+
a += ka[0];
353+
break;
354+
case 3:
355+
a += ((uint32) k[2] << 16);
356+
/* fall through */
357+
case 2:
358+
a += ((uint32) k[1] << 8);
359+
/* fall through */
360+
case 1:
361+
a += k[0];
362+
/* case 0: nothing left to add */
363+
}
364+
#endif /* WORDS_BIGENDIAN */
261365
}
262-
263-
/* handle the last 11 bytes */
264-
c += keylen;
265-
switch (len) /* all the case statements fall through */
366+
else
266367
{
267-
case 11:
268-
c += ((uint32) k[10] << 24);
269-
case 10:
270-
c += ((uint32) k[9] << 16);
271-
case 9:
272-
c += ((uint32) k[8] << 8);
273-
/* the first byte of c is reserved for the length */
274-
case 8:
275-
b += ((uint32) k[7] << 24);
276-
case 7:
277-
b += ((uint32) k[6] << 16);
278-
case 6:
279-
b += ((uint32) k[5] << 8);
280-
case 5:
281-
b += k[4];
282-
case 4:
283-
a += ((uint32) k[3] << 24);
284-
case 3:
285-
a += ((uint32) k[2] << 16);
286-
case 2:
287-
a += ((uint32) k[1] << 8);
288-
case 1:
289-
a += k[0];
368+
/* Code path for non-aligned source data */
369+
370+
/* handle most of the key */
371+
while (len >= 12)
372+
{
373+
#ifdef WORDS_BIGENDIAN
374+
a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
375+
b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
376+
c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
377+
#else /* !WORDS_BIGENDIAN */
378+
a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
379+
b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
380+
c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
381+
#endif /* WORDS_BIGENDIAN */
382+
mix(a, b, c);
383+
k += 12;
384+
len -= 12;
385+
}
386+
387+
/* handle the last 11 bytes */
388+
c += keylen;
389+
#ifdef WORDS_BIGENDIAN
390+
switch (len) /* all the case statements fall through */
391+
{
392+
case 11:
393+
c += ((uint32) k[10] << 8);
394+
case 10:
395+
c += ((uint32) k[9] << 16);
396+
case 9:
397+
c += ((uint32) k[8] << 24);
398+
/* the lowest byte of c is reserved for the length */
399+
case 8:
400+
b += k[7];
401+
case 7:
402+
b += ((uint32) k[6] << 8);
403+
case 6:
404+
b += ((uint32) k[5] << 16);
405+
case 5:
406+
b += ((uint32) k[4] << 24);
407+
case 4:
408+
a += k[3];
409+
case 3:
410+
a += ((uint32) k[2] << 8);
411+
case 2:
412+
a += ((uint32) k[1] << 16);
413+
case 1:
414+
a += ((uint32) k[0] << 24);
290415
/* case 0: nothing left to add */
416+
}
417+
#else /* !WORDS_BIGENDIAN */
418+
switch (len) /* all the case statements fall through */
419+
{
420+
case 11:
421+
c += ((uint32) k[10] << 24);
422+
case 10:
423+
c += ((uint32) k[9] << 16);
424+
case 9:
425+
c += ((uint32) k[8] << 8);
426+
/* the lowest byte of c is reserved for the length */
427+
case 8:
428+
b += ((uint32) k[7] << 24);
429+
case 7:
430+
b += ((uint32) k[6] << 16);
431+
case 6:
432+
b += ((uint32) k[5] << 8);
433+
case 5:
434+
b += k[4];
435+
case 4:
436+
a += ((uint32) k[3] << 24);
437+
case 3:
438+
a += ((uint32) k[2] << 16);
439+
case 2:
440+
a += ((uint32) k[1] << 8);
441+
case 1:
442+
a += k[0];
443+
/* case 0: nothing left to add */
444+
}
445+
#endif /* WORDS_BIGENDIAN */
291446
}
447+
292448
mix(a, b, c);
293449

294450
/* report the result */
@@ -298,7 +454,7 @@ hash_any(register const unsigned char *k, register int keylen)
298454
/*
299455
* hash_uint32() -- hash a 32-bit value
300456
*
301-
* This has the same result (at least on little-endian machines) as
457+
* This has the same result as
302458
* hash_any(&k, sizeof(uint32))
303459
* but is faster and doesn't force the caller to store k into memory.
304460
*/

src/include/catalog/catversion.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
3838
* Portions Copyright (c) 1994, Regents of the University of California
3939
*
40-
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.445 2008/04/04 18:45:36 tgl Exp $
40+
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.446 2008/04/06 16:54:48 tgl Exp $
4141
*
4242
*-------------------------------------------------------------------------
4343
*/
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 200804041
56+
#define CATALOG_VERSION_NO 200804051
5757

5858
#endif

src/test/regress/expected/portals.out

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -678,20 +678,20 @@ CLOSE foo12;
678678
-- leave some cursors open, to test that auto-close works.
679679
-- record this in the system view as well (don't query the time field there
680680
-- however)
681-
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
681+
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
682682
name | statement | is_holdable | is_binary | is_scrollable
683683
-------+-----------------------------------------------------------------------+-------------+-----------+---------------
684684
foo13 | DECLARE foo13 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
685+
foo14 | DECLARE foo14 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
685686
foo15 | DECLARE foo15 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
686-
foo19 | DECLARE foo19 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
687+
foo16 | DECLARE foo16 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
687688
foo17 | DECLARE foo17 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
688-
foo14 | DECLARE foo14 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
689-
foo21 | DECLARE foo21 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
690-
foo23 | DECLARE foo23 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
691689
foo18 | DECLARE foo18 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
690+
foo19 | DECLARE foo19 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
692691
foo20 | DECLARE foo20 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
692+
foo21 | DECLARE foo21 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
693693
foo22 | DECLARE foo22 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
694-
foo16 | DECLARE foo16 SCROLL CURSOR FOR SELECT * FROM tenk2; | f | f | t
694+
foo23 | DECLARE foo23 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f | f | t
695695
(11 rows)
696696

697697
END;
@@ -851,11 +851,11 @@ SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
851851
(1 row)
852852

853853
DECLARE bc BINARY CURSOR FOR SELECT * FROM tenk1;
854-
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
854+
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
855855
name | statement | is_holdable | is_binary | is_scrollable
856856
------+----------------------------------------------------------------------+-------------+-----------+---------------
857-
c2 | declare c2 cursor with hold for select count_tt1_v(), count_tt1_s(); | t | f | f
858857
bc | DECLARE bc BINARY CURSOR FOR SELECT * FROM tenk1; | f | t | t
858+
c2 | declare c2 cursor with hold for select count_tt1_v(), count_tt1_s(); | t | f | f
859859
(2 rows)
860860

861861
ROLLBACK;

src/test/regress/sql/portals.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ CLOSE foo12;
170170

171171
-- record this in the system view as well (don't query the time field there
172172
-- however)
173-
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
173+
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
174174

175175
END;
176176

@@ -295,7 +295,7 @@ drop function count_tt1_s();
295295
BEGIN;
296296
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
297297
DECLARE bc BINARY CURSOR FOR SELECT * FROM tenk1;
298-
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
298+
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
299299
ROLLBACK;
300300

301301
-- We should not see the portal that is created internally to

0 commit comments

Comments
 (0)