-
Notifications
You must be signed in to change notification settings - Fork 15k
[Headers][X86] Allow AVX512 masked blend intrinsics to be used in constexpr #156234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-backend-x86 Author: Harsh Tiwary (notnotharsh) ChangesThis patch enables AVX-512 masked blend intrinsics to be usable in constant expressions ( Fixes #155796. Patch is 25.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156234.diff 10 Files Affected:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 2d2bf59fc5b76..5ad2856f8b996 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -465,17 +465,15 @@ _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
(__v64qi) __W,
(__v64qi) __A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
(__v32hi) __W,
(__v32hi) __A);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e23b1c0381ab1..1ce468c22bdd6 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3366,33 +3366,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
/* Vector Blend */
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
(__v8df) __W,
(__v8df) __A);
}
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
(__v16sf) __W,
(__v16sf) __A);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
(__v8di) __W,
(__v8di) __A);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
(__v16si) __W,
(__v16si) __A);
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 6989b86a7b68c..a6b0820de90cf 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -3306,7 +3306,7 @@ _mm512_reduce_min_ph(__m512h __V) {
return __builtin_ia32_reduce_fmin_ph512(__V);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
(__v32hf)__A);
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index f96468c56c392..81a77fa573a90 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -454,33 +454,29 @@ _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
(__v16qi) __W,
(__v16qi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
(__v32qi) __W,
(__v32qi) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
(__v8hi) __W,
(__v8hi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
(__v16hi) __W,
(__v16hi) __A);
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index 98ad9b54eef39..6be559d9a1a33 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -1984,14 +1984,13 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
(__v8sf)__C, (__mmask8)__U);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
- __m128h __A,
- __m128h __W) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
(__v8hf)__A);
}
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
(__v16hf)__A);
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 5a5b09e274563..f92a01293d1e9 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1631,57 +1631,57 @@ _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
(__v4si) __W,
(__v4si) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
(__v8si) __W,
(__v8si) __A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
(__v2df) __W,
(__v2df) __A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
(__v4df) __W,
(__v4df) __A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
(__v4sf) __W,
(__v4sf) __A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
(__v8sf) __W,
(__v8sf) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
(__v2di) __W,
(__v2di) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
(__v4di) __W,
(__v4di) __A);
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 6e10af2c282e0..0d01a26077bf9 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -847,11 +847,57 @@ __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v64qi(
+ _mm512_mask_blend_epi8(
+ (__mmask64) 0x00000001,
+ (__m512i)(__v64qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v64qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
__m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
// CHECK-LABEL: test_mm512_mask_blend_epi16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_blend_epi16(
+ (__mmask32) 0x00000001,
+ (__m512i)(__v32hi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v32hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
+__m512i test_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_epi32
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_blend_epi32(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_blend_epi32(
+ (__mmask16) 0x0001,
+ (__m512i)(__v16si) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v16si){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
+__m512i test_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_epi64
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_blend_epi64(__U, __A, __W);
+}
+
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m512i)(__v8di){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
+
__m512i test_mm512_abs_epi8(__m512i __A) {
// CHECK-LABEL: test_mm512_abs_epi8
// CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false)
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 6858f114c29c4..55a1d7fc184fe 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -3521,41 +3521,140 @@ __m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_blend_epi32(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_blend_epi32(
+ (__mmask8)0x01,
+ (__m128i)(__v4si){2, 2, 2, 2},
+ (__m128i)(__v4si){ 10,11,12,13 }
+ ),
+ 10, 2, 2, 2
+));
__m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi32
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_blend_epi32(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_blend_epi32(
+ (__mmask8)0x01,
+ (__m256i)(__v8si){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m256i)(__v8si){ 10,11,12,13,14,15,16,17 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
__m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
// CHECK-LABEL: test_mm_mask_blend_pd
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_blend_pd(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m128d)(__v2df){2.0, 2.0},
+ (__m128d)(__v2df){10.0, 20.0}
+ ),
+ 10.0, 2.0
+));
__m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
// CHECK-LABEL: test_mm256_mask_blend_pd
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_blend_pd(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m256d)(__v4df){2.0, 2.0, 2.0, 2.0},
+ (__m256d)(__v4df){10.0, 11.0, 12.0, 13.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0
+));
+
+__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_pd
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_blend_pd(__U, __A, __W);
+}
+
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0},
+ (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
+));
+
__m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
// CHECK-LABEL: test_mm_mask_blend_ps
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_blend_ps(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_blend_ps(
+ (__mmask8)0x01,
+ (__m128)(__v4sf){2.0f, 2.0f, 2.0f, 2.0f},
+ (__m128)(__v4sf){10.0f, 11.0f, 12.0f, 13.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f
+));
+
__m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
// CHECK-LABEL: test_mm256_mask_blend_ps
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_blend_ps(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_blend_ps(
+ (__mmask8)0x01,
+ (__m256)(__v8sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m256)(__v8sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
+__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_ps
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_blend_ps(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_blend_ps(
+ (__mmask16)0x01,
+ (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
__m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi64
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_blend_epi64(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m128i)(__v2di){2, 2},
+ (__m128i)(__v2di){ 10,11 }
+ ),
+ 10, 2
+));
__m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi64
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_blend_epi64(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m256i)(__v4di){2, 2, 2, 2},
+ (__m256i)(__v4di){ 10,11,12,13 }
+ ),
+ 10, 2, 2, 2
+));
__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_compress_pd
// CHECK: @llvm.x86.avx512.mask.compress
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 5d697440bfc1b..59e37c47e2eb3 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -882,23 +882,56 @@ __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v16qi(
+ _mm_mask_blend_epi8(
+ (__mmask16)0x0001,
+ (__m128i)(__v16qi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m128i)(__v16qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
__m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v32qi(
+ _mm256_mask_blend_epi8(
+ (__mmask32) 0x00000001,
+ (__m256i)(__v32qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m256i)(__v32qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
__m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask_blend_epi16(
+ (__mmask8)0x01,
+ (__m128i)(__v8hi){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m128i)(__v8hi){ 10,11,12,13,14,15,16,17 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
__m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_m...
[truncated]
|
@llvm/pr-subscribers-clang Author: Harsh Tiwary (notnotharsh) ChangesThis patch enables AVX-512 masked blend intrinsics to be usable in constant expressions ( Fixes #155796. Patch is 25.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156234.diff 10 Files Affected:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 2d2bf59fc5b76..5ad2856f8b996 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -465,17 +465,15 @@ _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
(__v64qi) __W,
(__v64qi) __A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
(__v32hi) __W,
(__v32hi) __A);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e23b1c0381ab1..1ce468c22bdd6 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3366,33 +3366,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
/* Vector Blend */
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
(__v8df) __W,
(__v8df) __A);
}
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
(__v16sf) __W,
(__v16sf) __A);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
(__v8di) __W,
(__v8di) __A);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
(__v16si) __W,
(__v16si) __A);
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 6989b86a7b68c..a6b0820de90cf 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -3306,7 +3306,7 @@ _mm512_reduce_min_ph(__m512h __V) {
return __builtin_ia32_reduce_fmin_ph512(__V);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
(__v32hf)__A);
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index f96468c56c392..81a77fa573a90 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -454,33 +454,29 @@ _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
(__v16qi) __W,
(__v16qi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
(__v32qi) __W,
(__v32qi) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
(__v8hi) __W,
(__v8hi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
(__v16hi) __W,
(__v16hi) __A);
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index 98ad9b54eef39..6be559d9a1a33 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -1984,14 +1984,13 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
(__v8sf)__C, (__mmask8)__U);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
- __m128h __A,
- __m128h __W) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
(__v8hf)__A);
}
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
(__v16hf)__A);
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 5a5b09e274563..f92a01293d1e9 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1631,57 +1631,57 @@ _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
(__v4si) __W,
(__v4si) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
(__v8si) __W,
(__v8si) __A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
(__v2df) __W,
(__v2df) __A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
(__v4df) __W,
(__v4df) __A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
(__v4sf) __W,
(__v4sf) __A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
(__v8sf) __W,
(__v8sf) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
(__v2di) __W,
(__v2di) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
(__v4di) __W,
(__v4di) __A);
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 6e10af2c282e0..0d01a26077bf9 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -847,11 +847,57 @@ __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v64qi(
+ _mm512_mask_blend_epi8(
+ (__mmask64) 0x00000001,
+ (__m512i)(__v64qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v64qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
__m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
// CHECK-LABEL: test_mm512_mask_blend_epi16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_blend_epi16(
+ (__mmask32) 0x00000001,
+ (__m512i)(__v32hi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v32hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
+__m512i test_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_epi32
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_blend_epi32(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_blend_epi32(
+ (__mmask16) 0x0001,
+ (__m512i)(__v16si) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v16si){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
+__m512i test_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_epi64
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_blend_epi64(__U, __A, __W);
+}
+
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m512i)(__v8di){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
+
__m512i test_mm512_abs_epi8(__m512i __A) {
// CHECK-LABEL: test_mm512_abs_epi8
// CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false)
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 6858f114c29c4..55a1d7fc184fe 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -3521,41 +3521,140 @@ __m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_blend_epi32(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_blend_epi32(
+ (__mmask8)0x01,
+ (__m128i)(__v4si){2, 2, 2, 2},
+ (__m128i)(__v4si){ 10,11,12,13 }
+ ),
+ 10, 2, 2, 2
+));
__m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi32
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_blend_epi32(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_blend_epi32(
+ (__mmask8)0x01,
+ (__m256i)(__v8si){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m256i)(__v8si){ 10,11,12,13,14,15,16,17 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
__m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
// CHECK-LABEL: test_mm_mask_blend_pd
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_blend_pd(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m128d)(__v2df){2.0, 2.0},
+ (__m128d)(__v2df){10.0, 20.0}
+ ),
+ 10.0, 2.0
+));
__m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
// CHECK-LABEL: test_mm256_mask_blend_pd
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_blend_pd(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m256d)(__v4df){2.0, 2.0, 2.0, 2.0},
+ (__m256d)(__v4df){10.0, 11.0, 12.0, 13.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0
+));
+
+__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_pd
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_blend_pd(__U, __A, __W);
+}
+
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0},
+ (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
+));
+
__m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
// CHECK-LABEL: test_mm_mask_blend_ps
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_blend_ps(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_blend_ps(
+ (__mmask8)0x01,
+ (__m128)(__v4sf){2.0f, 2.0f, 2.0f, 2.0f},
+ (__m128)(__v4sf){10.0f, 11.0f, 12.0f, 13.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f
+));
+
__m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
// CHECK-LABEL: test_mm256_mask_blend_ps
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_blend_ps(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_blend_ps(
+ (__mmask8)0x01,
+ (__m256)(__v8sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m256)(__v8sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
+__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_ps
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_blend_ps(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_blend_ps(
+ (__mmask16)0x01,
+ (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
__m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi64
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_blend_epi64(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m128i)(__v2di){2, 2},
+ (__m128i)(__v2di){ 10,11 }
+ ),
+ 10, 2
+));
__m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi64
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_blend_epi64(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m256i)(__v4di){2, 2, 2, 2},
+ (__m256i)(__v4di){ 10,11,12,13 }
+ ),
+ 10, 2, 2, 2
+));
__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_compress_pd
// CHECK: @llvm.x86.avx512.mask.compress
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 5d697440bfc1b..59e37c47e2eb3 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -882,23 +882,56 @@ __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v16qi(
+ _mm_mask_blend_epi8(
+ (__mmask16)0x0001,
+ (__m128i)(__v16qi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m128i)(__v16qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
__m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v32qi(
+ _mm256_mask_blend_epi8(
+ (__mmask32) 0x00000001,
+ (__m256i)(__v32qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m256i)(__v32qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
__m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask_blend_epi16(
+ (__mmask8)0x01,
+ (__m128i)(__v8hi){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m128i)(__v8hi){ 10,11,12,13,14,15,16,17 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
__m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_m...
[truncated]
|
@@ -915,7 +948,14 @@ __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { | |||
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} | |||
return _mm_mask_abs_epi8(__W,__U,__A); | |||
} | |||
TEST_CONSTEXPR(match_v16qi(_mm_mask_abs_epi8((__m128i)(__v16qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask16)0x0001, (__m128i)(__v16qi){(char)-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); | |||
TEST_CONSTEXPR(match_v16qi( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) don't touch/format lines unrelated to a patch
This patch enables AVX-512 masked blend intrinsics to be usable in constant expressions (
constexpr
) across various vector widths (128-bit, 256-bit, 512-bit). It updates the respective Clang headers to include the__DEFAULT_FN_ATTRS_CONSTEXPR
annotation where applicable, and supplements the change with thoroughTEST_CONSTEXPR
checks in the X86 CodeGen test suite to validate constexpr evaluation.Fixes #155796.