-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Headers][X86] Allow AVX512 integer min/max mask/maskz variants intrinsics to be used in constexpr #156901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Headers][X86] Allow AVX512 integer min/max mask/maskz variants intrinsics to be used in constexpr #156901
Conversation
38e6025
to
ee8fe12
Compare
…nsics to be used in constexpr Update the AVX512 integer min/max mask/maskz variants intrinsics to be constexpr compatible. This is the last in the 'Allow MMX/SSE/AVX2/AVX512 integer min/max intrinsics to be used in constexpr' series.
ee8fe12
to
2b91d9f
Compare
@llvm/pr-subscribers-clang Author: Bhasawut Singhaphan (markbhasawut) ChangesUpdate the AVX512 integer min/max mask/maskz variants intrinsics to be constexpr compatible. This is a follow-up to #156833, and the last in the 'Allow MMX/SSE/AVX2/AVX512 integer min/max intrinsics to be used in constexpr' series. Closes #153153 Patch is 46.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156901.diff 4 Files Affected:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index a08735b937704..e0d3e676ac470 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -742,17 +742,15 @@ _mm512_max_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epi8(__A, __B),
(__v64qi)__W);
@@ -763,18 +761,15 @@ _mm512_max_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epi16(__A, __B),
(__v32hi)__W);
@@ -785,17 +780,15 @@ _mm512_max_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epu8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epu8(__A, __B),
(__v64qi)__W);
@@ -806,17 +799,15 @@ _mm512_max_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epu16(__A, __B),
(__v32hi)__W);
@@ -827,17 +818,15 @@ _mm512_min_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epi8(__A, __B),
(__v64qi)__W);
@@ -848,17 +837,15 @@ _mm512_min_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epi16(__A, __B),
(__v32hi)__W);
@@ -869,17 +856,15 @@ _mm512_min_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epu8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epu8(__A, __B),
(__v64qi)__W);
@@ -890,17 +875,15 @@ _mm512_min_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epu16(__A, __B),
(__v32hi)__W);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 3e285f031ffe8..b9475d3f1d410 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1091,17 +1091,15 @@ static __inline __m512i
return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1112,17 +1110,15 @@ _mm512_max_epu32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epu32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epu32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1133,17 +1129,15 @@ _mm512_max_epi64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -1154,17 +1148,15 @@ _mm512_max_epu64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epu64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epu64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -1321,17 +1313,15 @@ static __inline __m512i
return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1342,17 +1332,15 @@ _mm512_min_epu32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epu32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epu32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1363,17 +1351,15 @@ _mm512_min_epi64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -1384,17 +1370,15 @@ _mm512_min_epu64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epu64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epu64(__A, __B),
(__v8di)_mm512_setzero_si512());
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index f9330a1d914bc..1f67a9e4d2e53 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1103,12 +1103,18 @@ __m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}}
return _mm512_maskz_max_epi8(__M,__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_max_epi8(0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
__m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_max_epi8
// CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}}
return _mm512_mask_max_epi8(__W,__M,__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_max_epi8((__m512i)(__v64qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63, +64}, 0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Bhasawut Singhaphan (markbhasawut) ChangesUpdate the AVX512 integer min/max mask/maskz variants intrinsics to be constexpr compatible. This is a follow-up to #156833, and the last in the 'Allow MMX/SSE/AVX2/AVX512 integer min/max intrinsics to be used in constexpr' series. Closes #153153 Patch is 46.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156901.diff 4 Files Affected:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index a08735b937704..e0d3e676ac470 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -742,17 +742,15 @@ _mm512_max_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epi8(__A, __B),
(__v64qi)__W);
@@ -763,18 +761,15 @@ _mm512_max_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epi16(__A, __B),
(__v32hi)__W);
@@ -785,17 +780,15 @@ _mm512_max_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epu8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_max_epu8(__A, __B),
(__v64qi)__W);
@@ -806,17 +799,15 @@ _mm512_max_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_max_epu16(__A, __B),
(__v32hi)__W);
@@ -827,17 +818,15 @@ _mm512_min_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epi8(__A, __B),
(__v64qi)__W);
@@ -848,17 +837,15 @@ _mm512_min_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epi16(__A, __B),
(__v32hi)__W);
@@ -869,17 +856,15 @@ _mm512_min_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epu8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_min_epu8(__A, __B),
(__v64qi)__W);
@@ -890,17 +875,15 @@ _mm512_min_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_min_epu16(__A, __B),
(__v32hi)__W);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 3e285f031ffe8..b9475d3f1d410 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1091,17 +1091,15 @@ static __inline __m512i
return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1112,17 +1110,15 @@ _mm512_max_epu32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epu32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_max_epu32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1133,17 +1129,15 @@ _mm512_max_epi64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -1154,17 +1148,15 @@ _mm512_max_epu64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epu64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_max_epu64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -1321,17 +1313,15 @@ static __inline __m512i
return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1342,17 +1332,15 @@ _mm512_min_epu32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epu32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_min_epu32(__A, __B),
(__v16si)_mm512_setzero_si512());
@@ -1363,17 +1351,15 @@ _mm512_min_epi64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -1384,17 +1370,15 @@ _mm512_min_epu64(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epu64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_min_epu64(__A, __B),
(__v8di)_mm512_setzero_si512());
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index f9330a1d914bc..1f67a9e4d2e53 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1103,12 +1103,18 @@ __m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}}
return _mm512_maskz_max_epi8(__M,__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_max_epi8(0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
__m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_max_epi8
// CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}}
return _mm512_mask_max_epi8(__W,__M,__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_max_epi8((__m512i)(__v64qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63, +64}, 0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -...
[truncated]
|
Update the AVX512 integer min/max mask/maskz variants intrinsics to be constexpr compatible.
This is a follow-up to #156833, and the last in the 'Allow MMX/SSE/AVX2/AVX512 integer min/max intrinsics to be used in constexpr' series.
Closes #153153