-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Headers][X86] Allow FMA intrinsics to be used in constexpr #156385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Fixes llvm#155265 Add constexpr support for the following: _mm512_fmadd_pd _mm512_mask_fmadd_pd _mm512_mask3_fmadd_pd _mm512_maskz_fmadd_pd _mm512_fmadd_ps _mm512_mask_fmadd_ps _mm512_mask3_fmadd_ps _mm512_maskz_fmadd_ps _mm_mask_fmadd_pd _mm_mask3_fmadd_pd _mm_maskz_fmadd_pd _mm_mask_fmadd_ps _mm_mask3_fmadd_ps _mm_maskz_fmadd_ps _mm256_mask_fmadd_pd _mm256_mask3_fmadd_pd _mm256_maskz_fmadd_pd _mm256_mask_fmadd_ps _mm256_mask3_fmadd_ps _mm256_maskz_fmadd_ps _mm512_fmsub_pd _mm512_mask_fmsub_pd _mm512_mask3_fmsub_pd _mm512_maskz_fmsub_pd _mm512_fmsub_ps _mm512_mask_fmsub_ps _mm512_mask3_fmsub_ps _mm512_maskz_fmsub_ps _mm_mask_fmsub_pd _mm_mask3_fmsub_pd _mm_maskz_fmsub_pd _mm_mask_fmsub_ps _mm_mask3_fmsub_ps _mm_maskz_fmsub_ps _mm256_mask_fmsub_pd _mm256_mask3_fmsub_pd _mm256_maskz_fmsub_pd _mm256_mask_fmsub_ps _mm256_mask3_fmsub_ps _mm256_maskz_fmsub_ps _mm512_fnmadd_pd _mm512_mask_fnmadd_pd _mm512_mask3_fnmadd_pd _mm512_maskz_fnmadd_pd _mm512_fnmsub_pd _mm512_mask_fnmsub_pd _mm512_mask3_fnmsub_pd _mm512_maskz_fnmsub_pd _mm_mask_fnmadd_pd _mm_mask3_fnmadd_pd _mm_maskz_fnmadd_pd _mm_mask_fnmadd_ps _mm_mask3_fnmadd_ps _mm_maskz_fnmadd_ps _mm256_mask_fnmadd_pd _mm256_mask3_fnmadd_pd _mm256_maskz_fnmadd_pd _mm256_mask_fnmadd_ps _mm256_mask3_fnmadd_ps _mm256_maskz_fnmadd_ps _mm512_fnmadd_ps _mm512_mask_fnmadd_ps _mm512_mask3_fnmadd_ps _mm512_maskz_fnmadd_ps _mm512_fnmsub_ps _mm512_mask_fnmsub_ps _mm512_mask3_fnmsub_ps _mm512_maskz_fnmsub_ps _mm_mask_fnmsub_pd _mm_mask3_fnmsub_pd _mm_maskz_fnmsub_pd _mm_mask_fnmsub_ps _mm_mask3_fnmsub_ps _mm_maskz_fnmsub_ps _mm256_mask_fnmsub_pd _mm256_mask3_fnmsub_pd _mm256_maskz_fnmsub_pd _mm256_mask_fnmsub_ps _mm256_mask3_fnmsub_ps _mm256_maskz_fnmsub_ps
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Chaitanya Koparkar (ckoparkar) ChangesFixes #155265 Add constexpr support for the following: _mm512_fmadd_pd _mm512_mask_fmadd_pd _mm512_mask3_fmadd_pd _mm512_maskz_fmadd_pd _mm512_fmadd_ps _mm512_mask_fmadd_ps _mm512_mask3_fmadd_ps _mm512_maskz_fmadd_ps _mm_mask_fmadd_pd _mm_mask3_fmadd_pd _mm_maskz_fmadd_pd _mm_mask_fmadd_ps _mm_mask3_fmadd_ps _mm_maskz_fmadd_ps _mm256_mask_fmadd_pd _mm256_mask3_fmadd_pd _mm256_maskz_fmadd_pd _mm256_mask_fmadd_ps _mm256_mask3_fmadd_ps _mm256_maskz_fmadd_ps _mm512_fmsub_pd _mm512_mask_fmsub_pd _mm512_mask3_fmsub_pd _mm512_maskz_fmsub_pd _mm512_fmsub_ps _mm512_mask_fmsub_ps _mm512_mask3_fmsub_ps _mm512_maskz_fmsub_ps _mm_mask_fmsub_pd _mm_mask3_fmsub_pd _mm_maskz_fmsub_pd _mm_mask_fmsub_ps _mm_mask3_fmsub_ps _mm_maskz_fmsub_ps _mm256_mask_fmsub_pd _mm256_mask3_fmsub_pd _mm256_maskz_fmsub_pd _mm256_mask_fmsub_ps _mm256_mask3_fmsub_ps _mm256_maskz_fmsub_ps _mm512_fnmadd_pd _mm512_mask_fnmadd_pd _mm512_mask3_fnmadd_pd _mm512_maskz_fnmadd_pd _mm512_fnmsub_pd _mm512_mask_fnmsub_pd _mm512_mask3_fnmsub_pd _mm512_maskz_fnmsub_pd _mm_mask_fnmadd_pd _mm_mask3_fnmadd_pd _mm_maskz_fnmadd_pd _mm_mask_fnmadd_ps _mm_mask3_fnmadd_ps _mm_maskz_fnmadd_ps _mm256_mask_fnmadd_pd _mm256_mask3_fnmadd_pd _mm256_maskz_fnmadd_pd _mm256_mask_fnmadd_ps _mm256_mask3_fnmadd_ps _mm256_maskz_fnmadd_ps _mm512_fnmadd_ps _mm512_mask_fnmadd_ps _mm512_mask3_fnmadd_ps _mm512_maskz_fnmadd_ps _mm512_fnmsub_ps _mm512_mask_fnmsub_ps _mm512_mask3_fnmsub_ps _mm512_maskz_fnmsub_ps _mm_mask_fnmsub_pd _mm_mask3_fnmsub_pd _mm_maskz_fnmsub_pd _mm_mask_fnmsub_ps _mm_mask3_fnmsub_ps _mm_maskz_fnmsub_ps _mm256_mask_fnmsub_pd _mm256_mask3_fnmsub_pd _mm256_maskz_fnmsub_pd _mm256_mask_fnmsub_ps _mm256_mask3_fnmsub_ps _mm256_maskz_fnmsub_ps Patch is 121.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156385.diff 4 Files Affected:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e23b1c0381ab1..741ce26aaa043 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -2502,124 +2502,136 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __A);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __A);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- -(__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- -(__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
#define _mm512_fmadd_round_ps(A, B, C, R) \
@@ -2706,124 +2718,136 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
(__mmask16)(U), (int)(R)))
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512 ) __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__A);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__A);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- -(__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- -(__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
#define _mm512_fmaddsub_round_pd(A, B, C, R) \
@@ -3071,15 +3095,6 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
- return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
@@ -3087,16 +3102,6 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
e546bec
to
b9bc673
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - the mask3 intrinsics seem to be mixed up randomly and could do with sorting out at some point to be consistently with their siblings, but that's not relevant for this patch.
Indeed. I considered rearranging them, and the tests too, but chose to leave it for another time to not make this patch even longer. |
Thanks for the review and merge! |
Fixes #155265
Add constexpr support for the following:
_mm512_fmadd_pd _mm512_mask_fmadd_pd _mm512_mask3_fmadd_pd _mm512_maskz_fmadd_pd _mm512_fmadd_ps _mm512_mask_fmadd_ps _mm512_mask3_fmadd_ps _mm512_maskz_fmadd_ps _mm_mask_fmadd_pd _mm_mask3_fmadd_pd _mm_maskz_fmadd_pd _mm_mask_fmadd_ps _mm_mask3_fmadd_ps _mm_maskz_fmadd_ps _mm256_mask_fmadd_pd _mm256_mask3_fmadd_pd _mm256_maskz_fmadd_pd _mm256_mask_fmadd_ps _mm256_mask3_fmadd_ps _mm256_maskz_fmadd_ps
_mm512_fmsub_pd _mm512_mask_fmsub_pd _mm512_mask3_fmsub_pd _mm512_maskz_fmsub_pd _mm512_fmsub_ps _mm512_mask_fmsub_ps _mm512_mask3_fmsub_ps _mm512_maskz_fmsub_ps _mm_mask_fmsub_pd _mm_mask3_fmsub_pd _mm_maskz_fmsub_pd _mm_mask_fmsub_ps _mm_mask3_fmsub_ps _mm_maskz_fmsub_ps _mm256_mask_fmsub_pd _mm256_mask3_fmsub_pd _mm256_maskz_fmsub_pd _mm256_mask_fmsub_ps _mm256_mask3_fmsub_ps _mm256_maskz_fmsub_ps
_mm512_fnmadd_pd _mm512_mask_fnmadd_pd _mm512_mask3_fnmadd_pd _mm512_maskz_fnmadd_pd _mm512_fnmsub_pd _mm512_mask_fnmsub_pd _mm512_mask3_fnmsub_pd _mm512_maskz_fnmsub_pd _mm_mask_fnmadd_pd _mm_mask3_fnmadd_pd _mm_maskz_fnmadd_pd _mm_mask_fnmadd_ps _mm_mask3_fnmadd_ps _mm_maskz_fnmadd_ps _mm256_mask_fnmadd_pd _mm256_mask3_fnmadd_pd _mm256_maskz_fnmadd_pd _mm256_mask_fnmadd_ps _mm256_mask3_fnmadd_ps _mm256_maskz_fnmadd_ps
_mm512_fnmadd_ps _mm512_mask_fnmadd_ps _mm512_mask3_fnmadd_ps _mm512_maskz_fnmadd_ps _mm512_fnmsub_ps _mm512_mask_fnmsub_ps _mm512_mask3_fnmsub_ps _mm512_maskz_fnmsub_ps _mm_mask_fnmsub_pd _mm_mask3_fnmsub_pd _mm_maskz_fnmsub_pd _mm_mask_fnmsub_ps _mm_mask3_fnmsub_ps _mm_maskz_fnmsub_ps _mm256_mask_fnmsub_pd _mm256_mask3_fnmsub_pd _mm256_maskz_fnmsub_pd _mm256_mask_fnmsub_ps _mm256_mask3_fnmsub_ps _mm256_maskz_fnmsub_ps