Skip to content

Commit fdb5dd6

Browse files
committed
Be more paranoid in configure's checks for CRC and POPCNT intrinsics.
In these tests, we need to verify not only that the compiler has heard of these intrinsics, but that lower-level tools cope with them too. (For example, the assembler must also know the instructions, and on some platforms there might be library support involved.) The hazard is that the compiler might optimize away the calls altogether, allowing the configure check to succeed only to have the build fail later if lower-level support is missing. The existing code tried to prevent that by ensuring that the result of the intrinsic is used for something, but that's really insufficient because we were feeding constant input to it. So the compiler would be perfectly entitled to optimize away the calls anyway. Fix by making the inputs into global variables. (Hypothetically, LTO optimization could still remove the code --- but that's well past where we'd be likely to hit trouble.) It is not known that any current compiler would actually optimize away these calls, and even if that happened it would be unlikely that any problem would manifest. Our concern for this stems from largely-bygone days when it was common to install gcc on platforms with some other native compiler, so that a compiler-vs-library support discrepancy was more probable. Still, there's little point in defending against such cases in a way that is visibly incomplete. I'm content to fix this in master for now; we can back-patch if any indication appears that it's a live problem for someone. Discussion: https://postgr.es/m/3368102.1741993462@sss.pgh.pa.us
1 parent 50ba65e commit fdb5dd6

File tree

3 files changed

+36
-31
lines changed

3 files changed

+36
-31
lines changed

config/c-compiler.m4

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -553,16 +553,20 @@ fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
553553
# the other ones are, on x86-64 platforms)
554554
#
555555
# If the intrinsics are supported, sets pgac_sse42_crc32_intrinsics.
556+
#
557+
# To detect the case where the compiler knows the function but library support
558+
# is missing, we must link not just compile, and store the results in global
559+
# variables so the compiler doesn't optimize away the call.
556560
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
557561
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics])])dnl
558562
AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32], [Ac_cachevar],
559563
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>
564+
unsigned int crc;
560565
#if defined(__has_attribute) && __has_attribute (target)
561566
__attribute__((target("sse4.2")))
562567
#endif
563568
static int crc32_sse42_test(void)
564569
{
565-
unsigned int crc = 0;
566570
crc = _mm_crc32_u8(crc, 0);
567571
crc = _mm_crc32_u32(crc, 0);
568572
/* return computed value, to prevent the above being optimized away */
@@ -593,9 +597,9 @@ AC_DEFUN([PGAC_ARMV8_CRC32C_INTRINSICS],
593597
AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar],
594598
[pgac_save_CFLAGS=$CFLAGS
595599
CFLAGS="$pgac_save_CFLAGS $1"
596-
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>],
597-
[unsigned int crc = 0;
598-
crc = __crc32cb(crc, 0);
600+
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>
601+
unsigned int crc;],
602+
[crc = __crc32cb(crc, 0);
599603
crc = __crc32ch(crc, 0);
600604
crc = __crc32cw(crc, 0);
601605
crc = __crc32cd(crc, 0);
@@ -628,9 +632,8 @@ AC_DEFUN([PGAC_LOONGARCH_CRC32C_INTRINSICS],
628632
AC_CACHE_CHECK(
629633
[for __builtin_loongarch_crcc_w_b_w, __builtin_loongarch_crcc_w_h_w, __builtin_loongarch_crcc_w_w_w and __builtin_loongarch_crcc_w_d_w],
630634
[Ac_cachevar],
631-
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
632-
[unsigned int crc = 0;
633-
crc = __builtin_loongarch_crcc_w_b_w(0, crc);
635+
[AC_LINK_IFELSE([AC_LANG_PROGRAM([unsigned int crc;],
636+
[crc = __builtin_loongarch_crcc_w_b_w(0, crc);
634637
crc = __builtin_loongarch_crcc_w_h_w(0, crc);
635638
crc = __builtin_loongarch_crcc_w_w_w(0, crc);
636639
crc = __builtin_loongarch_crcc_w_d_w(0, crc);
@@ -680,22 +683,23 @@ undefine([Ac_cachevar])dnl
680683
AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS],
681684
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl
682685
AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar],
683-
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
686+
[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <immintrin.h>
684687
#include <stdint.h>
688+
char buf[sizeof(__m512i)];
689+
685690
#if defined(__has_attribute) && __has_attribute (target)
686691
__attribute__((target("avx512vpopcntdq,avx512bw")))
687692
#endif
688693
static int popcount_test(void)
689694
{
690-
const char buf@<:@sizeof(__m512i)@:>@;
691695
int64_t popcnt = 0;
692696
__m512i accum = _mm512_setzero_si512();
693-
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
694-
const __m512i cnt = _mm512_popcnt_epi64(val);
697+
__m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
698+
__m512i cnt = _mm512_popcnt_epi64(val);
695699
accum = _mm512_add_epi64(accum, cnt);
696700
popcnt = _mm512_reduce_add_epi64(accum);
697701
return (int) popcnt;
698-
}],
702+
}]],
699703
[return popcount_test();])],
700704
[Ac_cachevar=yes],
701705
[Ac_cachevar=no])])

configure

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17334,16 +17334,17 @@ else
1733417334
/* end confdefs.h. */
1733517335
#include <immintrin.h>
1733617336
#include <stdint.h>
17337+
char buf[sizeof(__m512i)];
17338+
1733717339
#if defined(__has_attribute) && __has_attribute (target)
1733817340
__attribute__((target("avx512vpopcntdq,avx512bw")))
1733917341
#endif
1734017342
static int popcount_test(void)
1734117343
{
17342-
const char buf[sizeof(__m512i)];
1734317344
int64_t popcnt = 0;
1734417345
__m512i accum = _mm512_setzero_si512();
17345-
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
17346-
const __m512i cnt = _mm512_popcnt_epi64(val);
17346+
__m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
17347+
__m512i cnt = _mm512_popcnt_epi64(val);
1734717348
accum = _mm512_add_epi64(accum, cnt);
1734817349
popcnt = _mm512_reduce_add_epi64(accum);
1734917350
return (int) popcnt;
@@ -17387,12 +17388,12 @@ else
1738717388
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1738817389
/* end confdefs.h. */
1738917390
#include <nmmintrin.h>
17391+
unsigned int crc;
1739017392
#if defined(__has_attribute) && __has_attribute (target)
1739117393
__attribute__((target("sse4.2")))
1739217394
#endif
1739317395
static int crc32_sse42_test(void)
1739417396
{
17395-
unsigned int crc = 0;
1739617397
crc = _mm_crc32_u8(crc, 0);
1739717398
crc = _mm_crc32_u32(crc, 0);
1739817399
/* return computed value, to prevent the above being optimized away */
@@ -17459,11 +17460,11 @@ CFLAGS="$pgac_save_CFLAGS "
1745917460
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1746017461
/* end confdefs.h. */
1746117462
#include <arm_acle.h>
17463+
unsigned int crc;
1746217464
int
1746317465
main ()
1746417466
{
17465-
unsigned int crc = 0;
17466-
crc = __crc32cb(crc, 0);
17467+
crc = __crc32cb(crc, 0);
1746717468
crc = __crc32ch(crc, 0);
1746817469
crc = __crc32cw(crc, 0);
1746917470
crc = __crc32cd(crc, 0);
@@ -17500,11 +17501,11 @@ CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc+simd"
1750017501
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1750117502
/* end confdefs.h. */
1750217503
#include <arm_acle.h>
17504+
unsigned int crc;
1750317505
int
1750417506
main ()
1750517507
{
17506-
unsigned int crc = 0;
17507-
crc = __crc32cb(crc, 0);
17508+
crc = __crc32cb(crc, 0);
1750817509
crc = __crc32ch(crc, 0);
1750917510
crc = __crc32cw(crc, 0);
1751017511
crc = __crc32cd(crc, 0);
@@ -17541,11 +17542,11 @@ CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc"
1754117542
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1754217543
/* end confdefs.h. */
1754317544
#include <arm_acle.h>
17545+
unsigned int crc;
1754417546
int
1754517547
main ()
1754617548
{
17547-
unsigned int crc = 0;
17548-
crc = __crc32cb(crc, 0);
17549+
crc = __crc32cb(crc, 0);
1754917550
crc = __crc32ch(crc, 0);
1755017551
crc = __crc32cw(crc, 0);
1755117552
crc = __crc32cd(crc, 0);
@@ -17585,12 +17586,11 @@ if ${pgac_cv_loongarch_crc32c_intrinsics+:} false; then :
1758517586
else
1758617587
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
1758717588
/* end confdefs.h. */
17588-
17589+
unsigned int crc;
1758917590
int
1759017591
main ()
1759117592
{
17592-
unsigned int crc = 0;
17593-
crc = __builtin_loongarch_crcc_w_b_w(0, crc);
17593+
crc = __builtin_loongarch_crcc_w_b_w(0, crc);
1759417594
crc = __builtin_loongarch_crcc_w_h_w(0, crc);
1759517595
crc = __builtin_loongarch_crcc_w_w_w(0, crc);
1759617596
crc = __builtin_loongarch_crcc_w_d_w(0, crc);

meson.build

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,17 +2259,17 @@ if host_cpu == 'x86_64'
22592259
prog = '''
22602260
#include <immintrin.h>
22612261
#include <stdint.h>
2262+
char buf[sizeof(__m512i)];
22622263
22632264
#if defined(__has_attribute) && __has_attribute (target)
22642265
__attribute__((target("avx512vpopcntdq,avx512bw")))
22652266
#endif
22662267
int main(void)
22672268
{
2268-
const char buf[sizeof(__m512i)];
22692269
int64_t popcnt = 0;
22702270
__m512i accum = _mm512_setzero_si512();
2271-
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
2272-
const __m512i cnt = _mm512_popcnt_epi64(val);
2271+
__m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
2272+
__m512i cnt = _mm512_popcnt_epi64(val);
22732273
accum = _mm512_add_epi64(accum, cnt);
22742274
popcnt = _mm512_reduce_add_epi64(accum);
22752275
/* return computed value, to prevent the above being optimized away */
@@ -2317,13 +2317,13 @@ if host_cpu == 'x86' or host_cpu == 'x86_64'
23172317

23182318
prog = '''
23192319
#include <nmmintrin.h>
2320+
unsigned int crc;
23202321
23212322
#if defined(__has_attribute) && __has_attribute (target)
23222323
__attribute__((target("sse4.2")))
23232324
#endif
23242325
int main(void)
23252326
{
2326-
unsigned int crc = 0;
23272327
crc = _mm_crc32_u8(crc, 0);
23282328
crc = _mm_crc32_u32(crc, 0);
23292329
/* return computed value, to prevent the above being optimized away */
@@ -2352,10 +2352,10 @@ elif host_cpu == 'arm' or host_cpu == 'aarch64'
23522352

23532353
prog = '''
23542354
#include <arm_acle.h>
2355+
unsigned int crc;
23552356
23562357
int main(void)
23572358
{
2358-
unsigned int crc = 0;
23592359
crc = __crc32cb(crc, 0);
23602360
crc = __crc32ch(crc, 0);
23612361
crc = __crc32cw(crc, 0);
@@ -2390,9 +2390,10 @@ int main(void)
23902390
elif host_cpu == 'loongarch64'
23912391

23922392
prog = '''
2393+
unsigned int crc;
2394+
23932395
int main(void)
23942396
{
2395-
unsigned int crc = 0;
23962397
crc = __builtin_loongarch_crcc_w_b_w(0, crc);
23972398
crc = __builtin_loongarch_crcc_w_h_w(0, crc);
23982399
crc = __builtin_loongarch_crcc_w_w_w(0, crc);

0 commit comments

Comments
 (0)