Introduce optimized routine for linear searches of arrays

Use SSE2 intrinsics to speed up the search, where available.  Otherwise,
use a simple 'for' loop.  The motivation to add this now is to speed up
XidInMVCCSnapshot(), which is the reason only unsigned 32-bit integer
arrays are optimized. Other types are left for future work, as is the
extension of this technique to non-x86 platforms.

Nathan Bossart

Reviewed by: Andres Freund, Bharath Rupireddy, Masahiko Sawada
Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13
This commit is contained in:
John Naylor 2022-08-03 09:49:04 -07:00
parent 356dd2ce5b
commit b6ef167564
9 changed files with 215 additions and 0 deletions

103
src/include/port/pg_lfind.h Normal file
View File

@ -0,0 +1,103 @@
/*-------------------------------------------------------------------------
*
* pg_lfind.h
* Optimized linear search routines.
*
* Copyright (c) 2022, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/port/pg_lfind.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_LFIND_H
#define PG_LFIND_H
#include "port/simd.h"
/*
* pg_lfind32
*
* Return true if there is an element in 'base' that equals 'key', otherwise
* return false.
*/
static inline bool
pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
{
uint32 i = 0;
/* Use SIMD intrinsics where available. */
#ifdef USE_SSE2
/*
* A 16-byte register only has four 4-byte lanes. For better
* instruction-level parallelism, each loop iteration operates on a block
* of four registers. Testing has showed this is ~40% faster than using a
* block of two registers.
*/
const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */
uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */
#if defined(USE_ASSERT_CHECKING)
bool assert_result = false;
/* pre-compute the result for assert checking */
for (i = 0; i < nelem; i++)
{
if (key == base[i])
{
assert_result = true;
break;
}
}
#endif
for (i = 0; i < iterations; i += 16)
{
/* load the next block into 4 registers holding 4 values each */
const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]);
const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]);
const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]);
const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]);
/* compare each value to the key */
const __m128i result1 = _mm_cmpeq_epi32(keys, vals1);
const __m128i result2 = _mm_cmpeq_epi32(keys, vals2);
const __m128i result3 = _mm_cmpeq_epi32(keys, vals3);
const __m128i result4 = _mm_cmpeq_epi32(keys, vals4);
/* combine the results into a single variable */
const __m128i tmp1 = _mm_or_si128(result1, result2);
const __m128i tmp2 = _mm_or_si128(result3, result4);
const __m128i result = _mm_or_si128(tmp1, tmp2);
/* see if there was a match */
if (_mm_movemask_epi8(result) != 0)
{
#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == true);
#endif
return true;
}
}
#endif /* USE_SSE2 */
/* Process the remaining elements one at a time. */
for (; i < nelem; i++)
{
if (key == base[i])
{
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
Assert(assert_result == true);
#endif
return true;
}
}
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
Assert(assert_result == false);
#endif
return false;
}
#endif /* PG_LFIND_H */

View File

@ -19,6 +19,7 @@ SUBDIRS = \
test_extensions \
test_ginpostinglist \
test_integerset \
test_lfind \
test_misc \
test_oat_hooks \
test_parser \

View File

@ -0,0 +1,4 @@
# Generated subdirectories
/log/
/results/
/tmp_check/

View File

@ -0,0 +1,23 @@
# src/test/modules/test_lfind/Makefile
MODULE_big = test_lfind
OBJS = \
$(WIN32RES) \
test_lfind.o
PGFILEDESC = "test_lfind - test code for optimized linear search functions"
EXTENSION = test_lfind
DATA = test_lfind--1.0.sql
REGRESS = test_lfind
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/test_lfind
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

View File

@ -0,0 +1,12 @@
CREATE EXTENSION test_lfind;
--
-- These tests don't produce any interesting output. We're checking that
-- the operations complete without crashing or hanging and that none of their
-- internal sanity tests fail.
--
SELECT test_lfind();
test_lfind
------------
(1 row)

View File

@ -0,0 +1,8 @@
CREATE EXTENSION test_lfind;
--
-- These tests don't produce any interesting output. We're checking that
-- the operations complete without crashing or hanging and that none of their
-- internal sanity tests fail.
--
SELECT test_lfind();

View File

@ -0,0 +1,8 @@
/* src/test/modules/test_lfind/test_lfind--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit
CREATE FUNCTION test_lfind()
RETURNS pg_catalog.void
AS 'MODULE_PATHNAME' LANGUAGE C;

View File

@ -0,0 +1,52 @@
/*--------------------------------------------------------------------------
*
* test_lfind.c
* Test correctness of optimized linear search functions.
*
* Copyright (c) 2022, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/test/modules/test_lfind/test_lfind.c
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "port/pg_lfind.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(test_lfind);
Datum
test_lfind(PG_FUNCTION_ARGS)
{
#define TEST_ARRAY_SIZE 135
uint32 test_array[TEST_ARRAY_SIZE] = {0};
test_array[8] = 1;
test_array[64] = 2;
test_array[TEST_ARRAY_SIZE - 1] = 3;
if (pg_lfind32(1, test_array, 4))
elog(ERROR, "pg_lfind32() found nonexistent element");
if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE))
elog(ERROR, "pg_lfind32() did not find existing element");
if (pg_lfind32(2, test_array, 32))
elog(ERROR, "pg_lfind32() found nonexistent element");
if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE))
elog(ERROR, "pg_lfind32() did not find existing element");
if (pg_lfind32(3, test_array, 96))
elog(ERROR, "pg_lfind32() found nonexistent element");
if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE))
elog(ERROR, "pg_lfind32() did not find existing element");
if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE))
elog(ERROR, "pg_lfind32() found nonexistent element");
PG_RETURN_VOID();
}

View File

@ -0,0 +1,4 @@
comment = 'Test code for optimized linear search functions'
default_version = '1.0'
module_pathname = '$libdir/test_lfind'
relocatable = true