Skip to content

Commit f78667b

Browse files
Use __attribute__((target(...))) for AVX-512 support.
Presently, we check for compiler support for the required intrinsics both with and without extra compiler flags (e.g., -mxsave), and then depending on the results of those checks, we pick which files to compile with which flags. This is tedious and complicated, and it results in unsustainable coding patterns such as separate files for each portion of code may need to be built with different compiler flags. This commit introduces support for __attribute__((target(...))) and uses it for the AVX-512 code. This simplifies both the configure-time checks and the build scripts, and it allows us to place the functions that use the intrinsics in files that we otherwise do not want to build with special CPU instructions. We are careful to avoid using __attribute__((target(...))) on compilers that do not understand it, but we still perform the configure-time checks in case the compiler allows using the intrinsics without it (e.g., MSVC). A similar change could likely be made for some of the CRC-32C code, but that is left as a future exercise. Suggested-by: Andres Freund Reviewed-by: Raghuveer Devulapalli, Andres Freund Discussion: https://postgr.es/m/20240731205254.vfpap7uxwmebqeaf%40awork3.anarazel.de
1 parent f56a01e commit f78667b

11 files changed

+185
-312
lines changed

config/c-compiler.m4

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -700,20 +700,22 @@ undefine([Ac_cachevar])dnl
700700
# Check if the compiler supports the XSAVE instructions using the _xgetbv
701701
# intrinsic function.
702702
#
703-
# An optional compiler flag can be passed as argument (e.g., -mxsave). If the
704-
# intrinsic is supported, sets pgac_xsave_intrinsics and CFLAGS_XSAVE.
703+
# If the intrinsics are supported, sets pgac_xsave_intrinsics.
705704
AC_DEFUN([PGAC_XSAVE_INTRINSICS],
706-
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_xsave_intrinsics_$1])])dnl
707-
AC_CACHE_CHECK([for _xgetbv with CFLAGS=$1], [Ac_cachevar],
708-
[pgac_save_CFLAGS=$CFLAGS
709-
CFLAGS="$pgac_save_CFLAGS $1"
710-
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>],
711-
[return _xgetbv(0) & 0xe0;])],
705+
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_xsave_intrinsics])])dnl
706+
AC_CACHE_CHECK([for _xgetbv], [Ac_cachevar],
707+
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
708+
#if defined(__has_attribute) && __has_attribute (target)
709+
__attribute__((target("xsave")))
710+
#endif
711+
static int xsave_test(void)
712+
{
713+
return _xgetbv(0) & 0xe0;
714+
}],
715+
[return xsave_test();])],
712716
[Ac_cachevar=yes],
713-
[Ac_cachevar=no])
714-
CFLAGS="$pgac_save_CFLAGS"])
717+
[Ac_cachevar=no])])
715718
if test x"$Ac_cachevar" = x"yes"; then
716-
CFLAGS_XSAVE="$1"
717719
pgac_xsave_intrinsics=yes
718720
fi
719721
undefine([Ac_cachevar])dnl
@@ -725,29 +727,29 @@ undefine([Ac_cachevar])dnl
725727
# _mm512_setzero_si512, _mm512_maskz_loadu_epi8, _mm512_popcnt_epi64,
726728
# _mm512_add_epi64, and _mm512_reduce_add_epi64 intrinsic functions.
727729
#
728-
# Optional compiler flags can be passed as argument (e.g., -mavx512vpopcntdq
729-
# -mavx512bw). If the intrinsics are supported, sets
730-
# pgac_avx512_popcnt_intrinsics and CFLAGS_POPCNT.
730+
# If the intrinsics are supported, sets pgac_avx512_popcnt_intrinsics.
731731
AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS],
732-
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics_$1])])dnl
733-
AC_CACHE_CHECK([for _mm512_popcnt_epi64 with CFLAGS=$1], [Ac_cachevar],
734-
[pgac_save_CFLAGS=$CFLAGS
735-
CFLAGS="$pgac_save_CFLAGS $1"
736-
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>],
737-
[const char buf@<:@sizeof(__m512i)@:>@;
738-
PG_INT64_TYPE popcnt = 0;
739-
__m512i accum = _mm512_setzero_si512();
740-
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
741-
const __m512i cnt = _mm512_popcnt_epi64(val);
742-
accum = _mm512_add_epi64(accum, cnt);
743-
popcnt = _mm512_reduce_add_epi64(accum);
744-
/* return computed value, to prevent the above being optimized away */
745-
return popcnt == 0;])],
732+
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl
733+
AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar],
734+
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
735+
#if defined(__has_attribute) && __has_attribute (target)
736+
__attribute__((target("avx512vpopcntdq","avx512bw")))
737+
#endif
738+
static int popcount_test(void)
739+
{
740+
const char buf@<:@sizeof(__m512i)@:>@;
741+
PG_INT64_TYPE popcnt = 0;
742+
__m512i accum = _mm512_setzero_si512();
743+
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
744+
const __m512i cnt = _mm512_popcnt_epi64(val);
745+
accum = _mm512_add_epi64(accum, cnt);
746+
popcnt = _mm512_reduce_add_epi64(accum);
747+
return (int) popcnt;
748+
}],
749+
[return popcount_test();])],
746750
[Ac_cachevar=yes],
747-
[Ac_cachevar=no])
748-
CFLAGS="$pgac_save_CFLAGS"])
751+
[Ac_cachevar=no])])
749752
if test x"$Ac_cachevar" = x"yes"; then
750-
CFLAGS_POPCNT="$1"
751753
pgac_avx512_popcnt_intrinsics=yes
752754
fi
753755
undefine([Ac_cachevar])dnl

configure

Lines changed: 41 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -647,9 +647,6 @@ MSGFMT_FLAGS
647647
MSGFMT
648648
PG_CRC32C_OBJS
649649
CFLAGS_CRC
650-
PG_POPCNT_OBJS
651-
CFLAGS_POPCNT
652-
CFLAGS_XSAVE
653650
LIBOBJS
654651
OPENSSL
655652
ZSTD
@@ -17272,185 +17269,103 @@ fi
1727217269

1727317270
# Check for XSAVE intrinsics
1727417271
#
17275-
CFLAGS_XSAVE=""
17276-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=" >&5
17277-
$as_echo_n "checking for _xgetbv with CFLAGS=... " >&6; }
17278-
if ${pgac_cv_xsave_intrinsics_+:} false; then :
17272+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv" >&5
17273+
$as_echo_n "checking for _xgetbv... " >&6; }
17274+
if ${pgac_cv_xsave_intrinsics+:} false; then :
1727917275
$as_echo_n "(cached) " >&6
1728017276
else
17281-
pgac_save_CFLAGS=$CFLAGS
17282-
CFLAGS="$pgac_save_CFLAGS "
17283-
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
17284-
/* end confdefs.h. */
17285-
#include <immintrin.h>
17286-
int
17287-
main ()
17288-
{
17289-
return _xgetbv(0) & 0xe0;
17290-
;
17291-
return 0;
17292-
}
17293-
_ACEOF
17294-
if ac_fn_c_try_link "$LINENO"; then :
17295-
pgac_cv_xsave_intrinsics_=yes
17296-
else
17297-
pgac_cv_xsave_intrinsics_=no
17298-
fi
17299-
rm -f core conftest.err conftest.$ac_objext \
17300-
conftest$ac_exeext conftest.$ac_ext
17301-
CFLAGS="$pgac_save_CFLAGS"
17302-
fi
17303-
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics_" >&5
17304-
$as_echo "$pgac_cv_xsave_intrinsics_" >&6; }
17305-
if test x"$pgac_cv_xsave_intrinsics_" = x"yes"; then
17306-
CFLAGS_XSAVE=""
17307-
pgac_xsave_intrinsics=yes
17308-
fi
17309-
17310-
if test x"$pgac_xsave_intrinsics" != x"yes"; then
17311-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=-mxsave" >&5
17312-
$as_echo_n "checking for _xgetbv with CFLAGS=-mxsave... " >&6; }
17313-
if ${pgac_cv_xsave_intrinsics__mxsave+:} false; then :
17314-
$as_echo_n "(cached) " >&6
17315-
else
17316-
pgac_save_CFLAGS=$CFLAGS
17317-
CFLAGS="$pgac_save_CFLAGS -mxsave"
17318-
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
17277+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1731917278
/* end confdefs.h. */
1732017279
#include <immintrin.h>
17280+
#if defined(__has_attribute) && __has_attribute (target)
17281+
__attribute__((target("xsave")))
17282+
#endif
17283+
static int xsave_test(void)
17284+
{
17285+
return _xgetbv(0) & 0xe0;
17286+
}
1732117287
int
1732217288
main ()
1732317289
{
17324-
return _xgetbv(0) & 0xe0;
17290+
return xsave_test();
1732517291
;
1732617292
return 0;
1732717293
}
1732817294
_ACEOF
1732917295
if ac_fn_c_try_link "$LINENO"; then :
17330-
pgac_cv_xsave_intrinsics__mxsave=yes
17296+
pgac_cv_xsave_intrinsics=yes
1733117297
else
17332-
pgac_cv_xsave_intrinsics__mxsave=no
17298+
pgac_cv_xsave_intrinsics=no
1733317299
fi
1733417300
rm -f core conftest.err conftest.$ac_objext \
1733517301
conftest$ac_exeext conftest.$ac_ext
17336-
CFLAGS="$pgac_save_CFLAGS"
1733717302
fi
17338-
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics__mxsave" >&5
17339-
$as_echo "$pgac_cv_xsave_intrinsics__mxsave" >&6; }
17340-
if test x"$pgac_cv_xsave_intrinsics__mxsave" = x"yes"; then
17341-
CFLAGS_XSAVE="-mxsave"
17303+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics" >&5
17304+
$as_echo "$pgac_cv_xsave_intrinsics" >&6; }
17305+
if test x"$pgac_cv_xsave_intrinsics" = x"yes"; then
1734217306
pgac_xsave_intrinsics=yes
1734317307
fi
1734417308

17345-
fi
1734617309
if test x"$pgac_xsave_intrinsics" = x"yes"; then
1734717310

1734817311
$as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
1734917312

1735017313
fi
1735117314

17352-
1735317315
# Check for AVX-512 popcount intrinsics
1735417316
#
17355-
CFLAGS_POPCNT=""
17356-
PG_POPCNT_OBJS=""
1735717317
if test x"$host_cpu" = x"x86_64"; then
17358-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=" >&5
17359-
$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=... " >&6; }
17360-
if ${pgac_cv_avx512_popcnt_intrinsics_+:} false; then :
17318+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64" >&5
17319+
$as_echo_n "checking for _mm512_popcnt_epi64... " >&6; }
17320+
if ${pgac_cv_avx512_popcnt_intrinsics+:} false; then :
1736117321
$as_echo_n "(cached) " >&6
1736217322
else
17363-
pgac_save_CFLAGS=$CFLAGS
17364-
CFLAGS="$pgac_save_CFLAGS "
17365-
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
17366-
/* end confdefs.h. */
17367-
#include <immintrin.h>
17368-
int
17369-
main ()
17370-
{
17371-
const char buf[sizeof(__m512i)];
17372-
PG_INT64_TYPE popcnt = 0;
17373-
__m512i accum = _mm512_setzero_si512();
17374-
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
17375-
const __m512i cnt = _mm512_popcnt_epi64(val);
17376-
accum = _mm512_add_epi64(accum, cnt);
17377-
popcnt = _mm512_reduce_add_epi64(accum);
17378-
/* return computed value, to prevent the above being optimized away */
17379-
return popcnt == 0;
17380-
;
17381-
return 0;
17382-
}
17383-
_ACEOF
17384-
if ac_fn_c_try_link "$LINENO"; then :
17385-
pgac_cv_avx512_popcnt_intrinsics_=yes
17386-
else
17387-
pgac_cv_avx512_popcnt_intrinsics_=no
17388-
fi
17389-
rm -f core conftest.err conftest.$ac_objext \
17390-
conftest$ac_exeext conftest.$ac_ext
17391-
CFLAGS="$pgac_save_CFLAGS"
17392-
fi
17393-
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics_" >&5
17394-
$as_echo "$pgac_cv_avx512_popcnt_intrinsics_" >&6; }
17395-
if test x"$pgac_cv_avx512_popcnt_intrinsics_" = x"yes"; then
17396-
CFLAGS_POPCNT=""
17397-
pgac_avx512_popcnt_intrinsics=yes
17398-
fi
17399-
17400-
if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
17401-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw" >&5
17402-
$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw... " >&6; }
17403-
if ${pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw+:} false; then :
17404-
$as_echo_n "(cached) " >&6
17405-
else
17406-
pgac_save_CFLAGS=$CFLAGS
17407-
CFLAGS="$pgac_save_CFLAGS -mavx512vpopcntdq -mavx512bw"
17408-
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
17323+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1740917324
/* end confdefs.h. */
1741017325
#include <immintrin.h>
17326+
#if defined(__has_attribute) && __has_attribute (target)
17327+
__attribute__((target("avx512vpopcntdq","avx512bw")))
17328+
#endif
17329+
static int popcount_test(void)
17330+
{
17331+
const char buf[sizeof(__m512i)];
17332+
PG_INT64_TYPE popcnt = 0;
17333+
__m512i accum = _mm512_setzero_si512();
17334+
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
17335+
const __m512i cnt = _mm512_popcnt_epi64(val);
17336+
accum = _mm512_add_epi64(accum, cnt);
17337+
popcnt = _mm512_reduce_add_epi64(accum);
17338+
return (int) popcnt;
17339+
}
1741117340
int
1741217341
main ()
1741317342
{
17414-
const char buf[sizeof(__m512i)];
17415-
PG_INT64_TYPE popcnt = 0;
17416-
__m512i accum = _mm512_setzero_si512();
17417-
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
17418-
const __m512i cnt = _mm512_popcnt_epi64(val);
17419-
accum = _mm512_add_epi64(accum, cnt);
17420-
popcnt = _mm512_reduce_add_epi64(accum);
17421-
/* return computed value, to prevent the above being optimized away */
17422-
return popcnt == 0;
17343+
return popcount_test();
1742317344
;
1742417345
return 0;
1742517346
}
1742617347
_ACEOF
1742717348
if ac_fn_c_try_link "$LINENO"; then :
17428-
pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=yes
17349+
pgac_cv_avx512_popcnt_intrinsics=yes
1742917350
else
17430-
pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=no
17351+
pgac_cv_avx512_popcnt_intrinsics=no
1743117352
fi
1743217353
rm -f core conftest.err conftest.$ac_objext \
1743317354
conftest$ac_exeext conftest.$ac_ext
17434-
CFLAGS="$pgac_save_CFLAGS"
1743517355
fi
17436-
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&5
17437-
$as_echo "$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&6; }
17438-
if test x"$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" = x"yes"; then
17439-
CFLAGS_POPCNT="-mavx512vpopcntdq -mavx512bw"
17356+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics" >&5
17357+
$as_echo "$pgac_cv_avx512_popcnt_intrinsics" >&6; }
17358+
if test x"$pgac_cv_avx512_popcnt_intrinsics" = x"yes"; then
1744017359
pgac_avx512_popcnt_intrinsics=yes
1744117360
fi
1744217361

17443-
fi
1744417362
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
17445-
PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
1744617363

1744717364
$as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h
1744817365

1744917366
fi
1745017367
fi
1745117368

17452-
17453-
1745417369
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
1745517370
#
1745617371
# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used

configure.ac

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2050,32 +2050,19 @@ fi
20502050

20512051
# Check for XSAVE intrinsics
20522052
#
2053-
CFLAGS_XSAVE=""
2054-
PGAC_XSAVE_INTRINSICS([])
2055-
if test x"$pgac_xsave_intrinsics" != x"yes"; then
2056-
PGAC_XSAVE_INTRINSICS([-mxsave])
2057-
fi
2053+
PGAC_XSAVE_INTRINSICS()
20582054
if test x"$pgac_xsave_intrinsics" = x"yes"; then
20592055
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
20602056
fi
2061-
AC_SUBST(CFLAGS_XSAVE)
20622057

20632058
# Check for AVX-512 popcount intrinsics
20642059
#
2065-
CFLAGS_POPCNT=""
2066-
PG_POPCNT_OBJS=""
20672060
if test x"$host_cpu" = x"x86_64"; then
2068-
PGAC_AVX512_POPCNT_INTRINSICS([])
2069-
if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
2070-
PGAC_AVX512_POPCNT_INTRINSICS([-mavx512vpopcntdq -mavx512bw])
2071-
fi
2061+
PGAC_AVX512_POPCNT_INTRINSICS()
20722062
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
2073-
PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
20742063
AC_DEFINE(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX-512 popcount instructions with a runtime check.])
20752064
fi
20762065
fi
2077-
AC_SUBST(CFLAGS_POPCNT)
2078-
AC_SUBST(PG_POPCNT_OBJS)
20792066

20802067
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
20812068
#

0 commit comments

Comments
 (0)