Skip to content

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Aug 21, 2025

This handles constant folding for the AVX2 per-element shift intrinsics, which handle out of bounds shift amounts (logical result = 0, arithmetic result = signbit splat)

AVX512 intrinsics will follow in follow up patches

First stage of #154287

This handles constant folding for the AVX2 per-element shift intrinsics, which handle out of bounds shift amounts (logical result = 0, arithmetic result = signbit splat)

AVX512 intrinsics will follow in follow up patches

First stage of llvm#154287
@RKSimon RKSimon requested a review from phoebewang August 21, 2025 15:05
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics labels Aug 21, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 21, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

This handles constant folding for the AVX2 per-element shift intrinsics, which handle out of bounds shift amounts (logical result = 0, arithmetic result = signbit splat)

AVX512 intrinsics will follow in follow up patches

First stage of #154287


Patch is 20.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154780.diff

4 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.td (+14-42)
  • (modified) clang/lib/AST/ExprConstant.cpp (+48-3)
  • (modified) clang/lib/Headers/avx2intrin.h (+10-10)
  • (modified) clang/test/CodeGen/X86/avx2-builtins.c (+10-12)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cc1da937455a2..527acd9ef086e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -627,11 +627,23 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
   def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
-}
 
-let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
   def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+
+  def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+  def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
 }
 
 let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
@@ -654,46 +666,6 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
   def maskstoreq : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, _Vector<2, long long int>)">;
 }
 
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
 let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
   def gatherd_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double const *, _Vector<4, int>, _Vector<2, double>, _Constant char)">;
 }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a03e64fcffde2..9b934753bcc3c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11669,13 +11669,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   case clang::X86::BI__builtin_ia32_pmulhuw512:
   case clang::X86::BI__builtin_ia32_pmulhw128:
   case clang::X86::BI__builtin_ia32_pmulhw256:
-  case clang::X86::BI__builtin_ia32_pmulhw512: {
+  case clang::X86::BI__builtin_ia32_pmulhw512:
+  case clang::X86::BI__builtin_ia32_psllv2di:
+  case clang::X86::BI__builtin_ia32_psllv4di:
+  case clang::X86::BI__builtin_ia32_psllv4si:
+  case clang::X86::BI__builtin_ia32_psllv8si:
+  case clang::X86::BI__builtin_ia32_psrav4si:
+  case clang::X86::BI__builtin_ia32_psrav8si:
+  case clang::X86::BI__builtin_ia32_psrlv2di:
+  case clang::X86::BI__builtin_ia32_psrlv4di:
+  case clang::X86::BI__builtin_ia32_psrlv4si:
+  case clang::X86::BI__builtin_ia32_psrlv8si:{
     APValue SourceLHS, SourceRHS;
     if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
         !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
       return false;
 
     QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
     unsigned SourceLen = SourceLHS.getVectorLength();
     SmallVector<APValue, 4> ResultElements;
     ResultElements.reserve(SourceLen);
@@ -11687,12 +11698,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
       case Builtin::BI__builtin_elementwise_add_sat:
         ResultElements.push_back(APValue(
             APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
-                   DestEltTy->isUnsignedIntegerOrEnumerationType())));
+                   DestUnsigned)));
         break;
       case Builtin::BI__builtin_elementwise_sub_sat:
         ResultElements.push_back(APValue(
             APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
-                   DestEltTy->isUnsignedIntegerOrEnumerationType())));
+                   DestUnsigned)));
         break;
       case clang::X86::BI__builtin_ia32_pmulhuw128:
       case clang::X86::BI__builtin_ia32_pmulhuw256:
@@ -11706,6 +11717,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
         ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
                                                 /*isUnsigned=*/false)));
         break;
+      case clang::X86::BI__builtin_ia32_psllv2di:
+      case clang::X86::BI__builtin_ia32_psllv4di:
+      case clang::X86::BI__builtin_ia32_psllv4si:
+      case clang::X86::BI__builtin_ia32_psllv8si:
+        if (RHS.uge(RHS.getBitWidth())) {
+          ResultElements.push_back(
+              APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
+          break;
+        }
+        ResultElements.push_back(
+            APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned)));
+        break;
+      case clang::X86::BI__builtin_ia32_psrav4si:
+      case clang::X86::BI__builtin_ia32_psrav8si:
+        if (RHS.uge(RHS.getBitWidth())) {
+          ResultElements.push_back(
+              APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned)));
+          break;
+        }
+        ResultElements.push_back(
+            APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned)));
+        break;
+      case clang::X86::BI__builtin_ia32_psrlv2di:
+      case clang::X86::BI__builtin_ia32_psrlv4di:
+      case clang::X86::BI__builtin_ia32_psrlv4si:
+      case clang::X86::BI__builtin_ia32_psrlv8si:
+        if (RHS.uge(RHS.getBitWidth())) {
+          ResultElements.push_back(
+              APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
+          break;
+        }
+        ResultElements.push_back(
+            APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned)));
+        break;
       }
     }
 
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index c7e1c4446e85d..ce5b2b7544d8c 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3721,7 +3721,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
@@ -3743,7 +3743,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
@@ -3765,7 +3765,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
@@ -3787,7 +3787,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
@@ -3810,7 +3810,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srav_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
@@ -3833,7 +3833,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srav_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
@@ -3855,7 +3855,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
@@ -3877,7 +3877,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
@@ -3899,7 +3899,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
@@ -3921,7 +3921,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 5b252fa315ef8..29cb3e8860be9 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -327,7 +327,6 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) {
   // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepi8_epi16(a);
 }
-
 TEST_CONSTEXPR(match_v16hi(_mm256_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12));
 
 __m256i test_mm256_cvtepi8_epi32(__m128i a) {
@@ -336,7 +335,6 @@ __m256i test_mm256_cvtepi8_epi32(__m128i a) {
   // CHECK: sext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepi8_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4));
 
 __m256i test_mm256_cvtepi8_epi64(__m128i a) {
@@ -345,7 +343,6 @@ __m256i test_mm256_cvtepi8_epi64(__m128i a) {
   // CHECK: sext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepi8_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0));
 
 __m256i test_mm256_cvtepi16_epi32(__m128i a) {
@@ -353,7 +350,6 @@ __m256i test_mm256_cvtepi16_epi32(__m128i a) {
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
   return _mm256_cvtepi16_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4));
 
 __m256i test_mm256_cvtepi16_epi64(__m128i a) {
@@ -362,7 +358,6 @@ __m256i test_mm256_cvtepi16_epi64(__m128i a) {
   // CHECK: sext <4 x i16> %{{.*}} to <4 x i64>
   return _mm256_cvtepi16_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0));
 
 __m256i test_mm256_cvtepi32_epi64(__m128i a) {
@@ -370,7 +365,6 @@ __m256i test_mm256_cvtepi32_epi64(__m128i a) {
   // CHECK: sext <4 x i32> %{{.*}} to <4 x i64>
   return _mm256_cvtepi32_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), -70000, 2, -1, 0));
 
 __m256i test_mm256_cvtepu8_epi16(__m128i a) {
@@ -378,7 +372,6 @@ __m256i test_mm256_cvtepu8_epi16(__m128i a) {
   // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepu8_epi16(a);
 }
-
 TEST_CONSTEXPR(match_v16hi(_mm256_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244));
 
 __m256i test_mm256_cvtepu8_epi32(__m128i a) {
@@ -387,7 +380,6 @@ __m256i test_mm256_cvtepu8_epi32(__m128i a) {
   // CHECK: zext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepu8_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252));
 
 __m256i test_mm256_cvtepu8_epi64(__m128i a) {
@@ -396,7 +388,6 @@ __m256i test_mm256_cvtepu8_epi64(__m128i a) {
   // CHECK: zext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepu8_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0));
 
 __m256i test_mm256_cvtepu16_epi32(__m128i a) {
@@ -404,7 +395,6 @@ __m256i test_mm256_cvtepu16_epi32(__m128i a) {
   // CHECK: zext <8 x i16> {{.*}} to <8 x i32>
   return _mm256_cvtepu16_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
 
 __m256i test_mm256_cvtepu16_epi64(__m128i a) {
@@ -413,7 +403,6 @@ __m256i test_mm256_cvtepu16_epi64(__m128i a) {
   // CHECK: zext <4 x i16> %{{.*}} to <4 x i64>
   return _mm256_cvtepu16_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0));
 
 __m256i test_mm256_cvtepu32_epi64(__m128i a) {
@@ -421,7 +410,6 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) {
   // CHECK: zext <4 x i32> %{{.*}} to <4 x i64>
   return _mm256_cvtepu32_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), 4294897296, 2, 4294967295, 0));
 
 __m128i test0_mm256_extracti128_si256_0(__m256i a) {
@@ -1120,24 +1108,28 @@ __m128i test_mm_sllv_epi32(__m128i a, __m128i b) {
   // CHECK: call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_sllv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_sllv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 2, -8, 24, 0));
 
 __m256i test_mm256_sllv_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sllv_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
   return _mm256_sllv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_sllv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 2, -8, 24, -64, 0, 0, 0, 0));
 
 __m128i test_mm_sllv_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_sllv_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_sllv_epi64(a, b);
 }
+TEST_CONSTEXPR(match_m128i(_mm_sllv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 256, 0x8000000000000000ULL));
 
 __m256i test_mm256_sllv_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sllv_epi64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_sllv_epi64(a, b);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_sllv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 2, -8, 24, 0));
 
 __m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
   // CHECK-LABEL: test_mm256_sra_epi16
@@ -1180,12 +1172,14 @@ __m128i test_mm_srav_epi32(__m128i a, __m128i b) {
   // CHECK: call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_srav_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_srav_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, -1, 0, -1));
 
 __m256i test_mm256_srav_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_srav_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
   return _mm256_srav_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_srav_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -1, 0, -1, 0, -1, 0, -1));
 
 __m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
   // CHECK-LABEL: test_mm256_srl_epi16
@@ -1252,24 +1246,28 @@ __m128i test_mm_srlv_epi32(__m128i a, __m128i b) {
   // CHECK: call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_srlv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_srlv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 1073741823, 0, 0));
 
 __m256i test_mm256_srlv_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_srlv_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
   return _mm256_srlv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_srlv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3,...
[truncated]

Copy link

github-actions bot commented Aug 21, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff HEAD~1 HEAD --extensions c,cpp,h -- clang/lib/AST/ExprConstant.cpp clang/lib/Headers/avx2intrin.h clang/test/CodeGen/X86/avx2-builtins.c
View the diff from clang-format here.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9b934753b..261b3f490 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11679,7 +11679,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   case clang::X86::BI__builtin_ia32_psrlv2di:
   case clang::X86::BI__builtin_ia32_psrlv4di:
   case clang::X86::BI__builtin_ia32_psrlv4si:
-  case clang::X86::BI__builtin_ia32_psrlv8si:{
+  case clang::X86::BI__builtin_ia32_psrlv8si: {
     APValue SourceLHS, SourceRHS;
     if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
         !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index ce5b2b754..a23b8e0bb 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3722,8 +3722,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sllv_epi32(__m256i __X, __m256i __Y)
-{
+_mm256_sllv_epi32(__m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
 }
 
@@ -3744,8 +3743,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_sllv_epi32(__m128i __X, __m128i __Y)
-{
+_mm_sllv_epi32(__m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
 }
 
@@ -3766,8 +3764,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sllv_epi64(__m256i __X, __m256i __Y)
-{
+_mm256_sllv_epi64(__m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
 }
 
@@ -3788,8 +3785,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_sllv_epi64(__m128i __X, __m128i __Y)
-{
+_mm_sllv_epi64(__m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
 }
 
@@ -3811,8 +3807,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_srav_epi32(__m256i __X, __m256i __Y)
-{
+_mm256_srav_epi32(__m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
 }
 
@@ -3834,8 +3829,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_srav_epi32(__m128i __X, __m128i __Y)
-{
+_mm_srav_epi32(__m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
 }
 
@@ -3856,8 +3850,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_srlv_epi32(__m256i __X, __m256i __Y)
-{
+_mm256_srlv_epi32(__m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
 }
 
@@ -3878,8 +3871,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_srlv_epi32(__m128i __X, __m128i __Y)
-{
+_mm_srlv_epi32(__m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
 }
 
@@ -3900,8 +3892,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_srlv_epi64(__m256i __X, __m256i __Y)
-{
+_mm256_srlv_epi64(__m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
 }
 
@@ -3922,8 +3913,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_srlv_epi64(__m128i __X, __m128i __Y)
-{
+_mm_srlv_epi64(__m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
 }
 

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@RKSimon RKSimon merged commit 8d7df8b into llvm:main Aug 22, 2025
8 of 9 checks passed
@RKSimon RKSimon deleted the x86-avx2-shift-constexpr branch August 22, 2025 08:24
RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Aug 29, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants