Skip to content

Commit b6ef167

Browse files
committed
Introduce optimized routine for linear searches of arrays
Use SSE2 intrinsics to speed up the search, where available. Otherwise, use a simple 'for' loop. The motivation to add this now is to speed up XidInMVCCSnapshot(), which is the reason only unsigned 32-bit integer arrays are optimized. Other types are left for future work, as is the extension of this technique to non-x86 platforms. Nathan Bossart Reviewed by: Andres Freund, Bharath Rupireddy, Masahiko Sawada Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13
1 parent 356dd2c commit b6ef167

File tree

9 files changed

+215
-0
lines changed

9 files changed

+215
-0
lines changed

src/include/port/pg_lfind.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* pg_lfind.h
4+
* Optimized linear search routines.
5+
*
6+
* Copyright (c) 2022, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/include/port/pg_lfind.h
10+
*
11+
*-------------------------------------------------------------------------
12+
*/
13+
#ifndef PG_LFIND_H
14+
#define PG_LFIND_H
15+
16+
#include "port/simd.h"
17+
18+
/*
19+
* pg_lfind32
20+
*
21+
* Return true if there is an element in 'base' that equals 'key', otherwise
22+
* return false.
23+
*/
24+
static inline bool
25+
pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
26+
{
27+
uint32 i = 0;
28+
29+
/* Use SIMD intrinsics where available. */
30+
#ifdef USE_SSE2
31+
32+
/*
33+
* A 16-byte register only has four 4-byte lanes. For better
34+
* instruction-level parallelism, each loop iteration operates on a block
35+
* of four registers. Testing has showed this is ~40% faster than using a
36+
* block of two registers.
37+
*/
38+
const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */
39+
uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */
40+
41+
#if defined(USE_ASSERT_CHECKING)
42+
bool assert_result = false;
43+
44+
/* pre-compute the result for assert checking */
45+
for (i = 0; i < nelem; i++)
46+
{
47+
if (key == base[i])
48+
{
49+
assert_result = true;
50+
break;
51+
}
52+
}
53+
#endif
54+
55+
for (i = 0; i < iterations; i += 16)
56+
{
57+
/* load the next block into 4 registers holding 4 values each */
58+
const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]);
59+
const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]);
60+
const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]);
61+
const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]);
62+
63+
/* compare each value to the key */
64+
const __m128i result1 = _mm_cmpeq_epi32(keys, vals1);
65+
const __m128i result2 = _mm_cmpeq_epi32(keys, vals2);
66+
const __m128i result3 = _mm_cmpeq_epi32(keys, vals3);
67+
const __m128i result4 = _mm_cmpeq_epi32(keys, vals4);
68+
69+
/* combine the results into a single variable */
70+
const __m128i tmp1 = _mm_or_si128(result1, result2);
71+
const __m128i tmp2 = _mm_or_si128(result3, result4);
72+
const __m128i result = _mm_or_si128(tmp1, tmp2);
73+
74+
/* see if there was a match */
75+
if (_mm_movemask_epi8(result) != 0)
76+
{
77+
#if defined(USE_ASSERT_CHECKING)
78+
Assert(assert_result == true);
79+
#endif
80+
return true;
81+
}
82+
}
83+
#endif /* USE_SSE2 */
84+
85+
/* Process the remaining elements one at a time. */
86+
for (; i < nelem; i++)
87+
{
88+
if (key == base[i])
89+
{
90+
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
91+
Assert(assert_result == true);
92+
#endif
93+
return true;
94+
}
95+
}
96+
97+
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
98+
Assert(assert_result == false);
99+
#endif
100+
return false;
101+
}
102+
103+
#endif /* PG_LFIND_H */

src/test/modules/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ SUBDIRS = \
1919
test_extensions \
2020
test_ginpostinglist \
2121
test_integerset \
22+
test_lfind \
2223
test_misc \
2324
test_oat_hooks \
2425
test_parser \
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Generated subdirectories
2+
/log/
3+
/results/
4+
/tmp_check/

src/test/modules/test_lfind/Makefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# src/test/modules/test_lfind/Makefile
2+
3+
MODULE_big = test_lfind
4+
OBJS = \
5+
$(WIN32RES) \
6+
test_lfind.o
7+
PGFILEDESC = "test_lfind - test code for optimized linear search functions"
8+
9+
EXTENSION = test_lfind
10+
DATA = test_lfind--1.0.sql
11+
12+
REGRESS = test_lfind
13+
14+
ifdef USE_PGXS
15+
PG_CONFIG = pg_config
16+
PGXS := $(shell $(PG_CONFIG) --pgxs)
17+
include $(PGXS)
18+
else
19+
subdir = src/test/modules/test_lfind
20+
top_builddir = ../../../..
21+
include $(top_builddir)/src/Makefile.global
22+
include $(top_srcdir)/contrib/contrib-global.mk
23+
endif
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
CREATE EXTENSION test_lfind;
2+
--
3+
-- These tests don't produce any interesting output. We're checking that
4+
-- the operations complete without crashing or hanging and that none of their
5+
-- internal sanity tests fail.
6+
--
7+
SELECT test_lfind();
8+
test_lfind
9+
------------
10+
11+
(1 row)
12+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE EXTENSION test_lfind;
2+
3+
--
4+
-- These tests don't produce any interesting output. We're checking that
5+
-- the operations complete without crashing or hanging and that none of their
6+
-- internal sanity tests fail.
7+
--
8+
SELECT test_lfind();
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/* src/test/modules/test_lfind/test_lfind--1.0.sql */
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit
5+
6+
CREATE FUNCTION test_lfind()
7+
RETURNS pg_catalog.void
8+
AS 'MODULE_PATHNAME' LANGUAGE C;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*--------------------------------------------------------------------------
2+
*
3+
* test_lfind.c
4+
* Test correctness of optimized linear search functions.
5+
*
6+
* Copyright (c) 2022, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/test/modules/test_lfind/test_lfind.c
10+
*
11+
* -------------------------------------------------------------------------
12+
*/
13+
14+
#include "postgres.h"
15+
16+
#include "fmgr.h"
17+
#include "port/pg_lfind.h"
18+
19+
PG_MODULE_MAGIC;
20+
21+
PG_FUNCTION_INFO_V1(test_lfind);
22+
23+
Datum
24+
test_lfind(PG_FUNCTION_ARGS)
25+
{
26+
#define TEST_ARRAY_SIZE 135
27+
uint32 test_array[TEST_ARRAY_SIZE] = {0};
28+
29+
test_array[8] = 1;
30+
test_array[64] = 2;
31+
test_array[TEST_ARRAY_SIZE - 1] = 3;
32+
33+
if (pg_lfind32(1, test_array, 4))
34+
elog(ERROR, "pg_lfind32() found nonexistent element");
35+
if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE))
36+
elog(ERROR, "pg_lfind32() did not find existing element");
37+
38+
if (pg_lfind32(2, test_array, 32))
39+
elog(ERROR, "pg_lfind32() found nonexistent element");
40+
if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE))
41+
elog(ERROR, "pg_lfind32() did not find existing element");
42+
43+
if (pg_lfind32(3, test_array, 96))
44+
elog(ERROR, "pg_lfind32() found nonexistent element");
45+
if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE))
46+
elog(ERROR, "pg_lfind32() did not find existing element");
47+
48+
if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE))
49+
elog(ERROR, "pg_lfind32() found nonexistent element");
50+
51+
PG_RETURN_VOID();
52+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
comment = 'Test code for optimized linear search functions'
2+
default_version = '1.0'
3+
module_pathname = '$libdir/test_lfind'
4+
relocatable = true

0 commit comments

Comments
 (0)