-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Headers][X86] Allow MMX/SSE integer min/max intrinsics to be used in constexpr #156678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Headers][X86] Allow MMX/SSE integer min/max intrinsics to be used in constexpr #156678
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
803db31
to
b25aeee
Compare
… constexpr Update the MMX/SSE integer min/max intrinsics to be constexpr compatible.
b25aeee
to
5bdb914
Compare
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Bhasawut Singhaphan (markbhasawut) ChangesUpdate the MMX/SSE integer min/max intrinsics to be constexpr compatible. This is a part of #153153. Full diff: https://github.com/llvm/llvm-project/pull/156678.diff 7 Files Affected:
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e15a260514f2d..482632157d9b4 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2316,8 +2316,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
/// A 128-bit signed [8 x i16] vector.
/// \returns A 128-bit signed [8 x i16] vector containing the greater value of
/// each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
}
@@ -2335,8 +2335,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
/// A 128-bit unsigned [16 x i8] vector.
/// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
/// each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epu8(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
}
@@ -2354,8 +2354,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
/// A 128-bit signed [8 x i16] vector.
/// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
/// each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
}
@@ -2373,8 +2373,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
/// A 128-bit unsigned [16 x i8] vector.
/// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
/// each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epu8(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index e8f1f57c97c08..06b3da8b48dfe 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -675,8 +675,8 @@ _mm_stream_load_si128(const void *__V) {
/// \param __V2
/// A 128-bit vector of [16 x i8]
/// \returns A 128-bit vector of [16 x i8] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epi8(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2);
}
@@ -693,8 +693,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [16 x i8].
/// \returns A 128-bit vector of [16 x i8] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epi8(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2);
}
@@ -711,8 +711,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [8 x u16].
/// \returns A 128-bit vector of [8 x u16] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epu16(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2);
}
@@ -729,8 +729,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [8 x u16].
/// \returns A 128-bit vector of [8 x u16] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epu16(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2);
}
@@ -747,8 +747,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [4 x i32] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2);
}
@@ -765,8 +765,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [4 x i32] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2);
}
@@ -783,8 +783,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [4 x u32].
/// \returns A 128-bit vector of [4 x u32] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epu32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2);
}
@@ -801,8 +801,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
/// \param __V2
/// A 128-bit vector of [4 x u32].
/// \returns A 128-bit vector of [4 x u32] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epu32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2);
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 6d44cff46661f..08f562cc38bc8 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2353,9 +2353,8 @@ void _mm_sfence(void);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_max_pi16(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_max_pi16(__m64 __a, __m64 __b) {
return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b);
}
@@ -2391,9 +2390,8 @@ _mm_max_pu8(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_min_pi16(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_min_pi16(__m64 __a, __m64 __b) {
return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b);
}
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index 2476a2b7c723a..fcaf360626a2d 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -230,6 +230,11 @@ constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f,
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
}
+constexpr bool match_v16su(__m512i _v, unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o, unsigned int p) {
+ __v16su v = (__v16su)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, short __e03, short __e04, short __e05, short __e06, short __e07,
short __e08, short __e09, short __e10, short __e11, short __e12, short __e13, short __e14, short __e15,
short __e16, short __e17, short __e18, short __e19, short __e20, short __e21, short __e22, short __e23,
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 7bd2475399bf9..b19e82383cbfd 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -370,6 +370,8 @@ __m64 test_mm_max_pi16(__m64 a, __m64 b) {
return _mm_max_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_max_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, 2, -3, 4}), 1, 2, 3, 4));
+
__m64 test_mm_max_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_max_pu8
// CHECK: call <8 x i8> @llvm.umax.v8i8(
@@ -382,6 +384,8 @@ __m64 test_mm_min_pi16(__m64 a, __m64 b) {
return _mm_min_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_min_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, 2, -3, 4}), -1, -2, -3, -4));
+
__m64 test_mm_min_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_min_pu8
// CHECK: call <8 x i8> @llvm.umin.v8i8(
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 69a6d89715359..f5de5069c0046 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -855,12 +855,16 @@ __m128i test_mm_max_epi16(__m128i A, __m128i B) {
return _mm_max_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_max_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 5, 8, 12, 20, 32));
+
__m128i test_mm_max_epu8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_max_epu8
// CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_max_epu8(A, B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_max_epu8((__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 9, 10, 11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16));
+
__m128d test_mm_max_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_max_pd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
@@ -885,12 +889,16 @@ __m128i test_mm_min_epi16(__m128i A, __m128i B) {
return _mm_min_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_min_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 4, 5, 6, 7, 8));
+
__m128i test_mm_min_epu8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_min_epu8
// CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_min_epu8(A, B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_min_epu8((__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
+
__m128d test_mm_min_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_min_pd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 5f53614872604..06c992e20baeb 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -291,48 +291,64 @@ __m128i test_mm_max_epi8(__m128i x, __m128i y) {
return _mm_max_epi8(x, y);
}
+TEST_CONSTEXPR(match_v16qi(_mm_max_epi8((__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16));
+
__m128i test_mm_max_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_max_epi32
// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_max_epi32(x, y);
}
+TEST_CONSTEXPR(match_v4si(_mm_max_epi32((__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), +1, +2, +3, +4 ));
+
__m128i test_mm_max_epu16(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_max_epu16
// CHECK: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_max_epu16(x, y);
}
+TEST_CONSTEXPR(match_v8hu(_mm_max_epu16((__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 3, 4, 5, 7, 9, 11, 13, 15));
+
__m128i test_mm_max_epu32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_max_epu32
// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_max_epu32(x, y);
}
+TEST_CONSTEXPR(match_v4su(_mm_max_epu32((__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 3, 4, 5, 7));
+
__m128i test_mm_min_epi8(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_min_epi8
// CHECK: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_min_epi8(x, y);
}
+TEST_CONSTEXPR(match_v16qi(_mm_min_epi8((__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16));
+
__m128i test_mm_min_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_min_epi32
// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_min_epi32(x, y);
}
+TEST_CONSTEXPR(match_v4si(_mm_min_epi32((__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), -1, -2, -3, -4 ));
+
__m128i test_mm_min_epu16(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_min_epu16
// CHECK: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_min_epu16(x, y);
}
+TEST_CONSTEXPR(match_v8hu(_mm_min_epu16((__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 1, 3, 5, 6, 7, 8, 9, 10));
+
__m128i test_mm_min_epu32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_min_epu32
// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_min_epu32(x, y);
}
+TEST_CONSTEXPR(match_v4su(_mm_min_epu32((__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 1, 3, 5, 6));
+
__m128i test_mm_minpos_epu16(__m128i x) {
// CHECK-LABEL: test_mm_minpos_epu16
// CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Thanks for reviewing! @RKSimon |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/26639 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/154/builds/21029 Here is the relevant piece of the build log for the reference
|
Update the MMX/SSE integer min/max intrinsics to be constexpr compatible.
This is a part of #153153.