[DAG] Constant fold ISD::FSHL/FSHR nodes #154480

XChy · 2025-08-20T06:25:44Z

Fixes #153612.
This patch handles trinary scalar integers for FSHL/R in FoldConstantArithmetic.
Pending until #153790 is merged.

llvmbot · 2025-08-20T06:26:20Z

@llvm/pr-subscribers-backend-x86

Author: XChy (XChy)

Changes

Fixes #153612.
This patch handles trinary scalar integers for FSHL/R in FoldConstantArithmetic.
Pending until #153790 is merged.

Full diff: https://github.com/llvm/llvm-project/pull/154480.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+36)
(added) llvm/test/CodeGen/X86/fshl-fshr-constant.ll (+168)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68ea72c732e1e..533eb8bffe8f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6740,6 +6740,18 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
   }
   return std::nullopt;
 }
+
+static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+                                      const APInt &C2, const APInt &C3) {
+  //switch (Opcode) {
+  //case ISD::FSHL:
+    //return APIntOps::fshl(C1, C2, C3);
+  //case ISD::FSHR:
+    //return APIntOps::fshr(C1, C2, C3);
+  //}
+  return std::nullopt;
+}
+
 // Handle constant folding with UNDEF.
 // TODO: Handle more cases.
 static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1,
@@ -7054,6 +7066,30 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     }
   }
 
+  // Handle trinary-op special cases.
+  if (NumOps == 3) {
+    // TODO: Handle FP math if ternary support is added to foldConstantFPMath
+    auto *C1 = dyn_cast<ConstantSDNode>(Ops[0]);
+    auto *C2 = dyn_cast<ConstantSDNode>(Ops[1]);
+    auto *C3 = dyn_cast<ConstantSDNode>(Ops[2]);
+
+    if (C1 && C2 && C3) {
+      if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque())
+        return SDValue();
+
+      std::optional<APInt> FoldAttempt =
+          FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue(),
+                    C3->getAPIntValue());
+      if (!FoldAttempt)
+        return SDValue();
+
+      SDValue Folded = getConstant(*FoldAttempt, DL, VT);
+      assert((!Folded || !VT.isVector()) &&
+             "Can't fold vectors ops with scalar operands");
+      return Folded;
+    }
+  }
+
   // This is for vector folding only from here on.
   if (!VT.isVector())
     return SDValue();
diff --git a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
new file mode 100644
index 0000000000000..e2a72799d8872
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK-EXPAND
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s --check-prefix=CHECK-UNEXPAND
+
+define <4 x i32> @test_fshl_constants() {
+; CHECK-EXPAND-LABEL: test_fshl_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,512,2048,6144]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_splat_constants() {
+; CHECK-EXPAND-LABEL: test_fshl_splat_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [256,256,256,256]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_splat_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [256,256,256,256]
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshl_two_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_two_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshl_one_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_one_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshl_none_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT:    vpsrld $1, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT:    vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_none_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_constants() {
+; CHECK-EXPAND-LABEL: test_fshr_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshr_two_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_two_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshr_one_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_one_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshr_none_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT:    vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_none_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshrdvd %xmm2, %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_splat_constants() {
+; CHECK-EXPAND-LABEL: test_fshr_splat_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [16777216,16777216,16777216,16777216]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_splat_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [16777216,16777216,16777216,16777216]
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  ret <4 x i32> %res
+}

llvmbot · 2025-08-20T06:26:20Z

@llvm/pr-subscribers-llvm-selectiondag

Author: XChy (XChy)

Changes

Fixes #153612.
This patch handles trinary scalar integers for FSHL/R in FoldConstantArithmetic.
Pending until #153790 is merged.

Full diff: https://github.com/llvm/llvm-project/pull/154480.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+36)
(added) llvm/test/CodeGen/X86/fshl-fshr-constant.ll (+168)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68ea72c732e1e..533eb8bffe8f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6740,6 +6740,18 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
   }
   return std::nullopt;
 }
+
+static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+                                      const APInt &C2, const APInt &C3) {
+  //switch (Opcode) {
+  //case ISD::FSHL:
+    //return APIntOps::fshl(C1, C2, C3);
+  //case ISD::FSHR:
+    //return APIntOps::fshr(C1, C2, C3);
+  //}
+  return std::nullopt;
+}
+
 // Handle constant folding with UNDEF.
 // TODO: Handle more cases.
 static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1,
@@ -7054,6 +7066,30 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     }
   }
 
+  // Handle trinary-op special cases.
+  if (NumOps == 3) {
+    // TODO: Handle FP math if ternary support is added to foldConstantFPMath
+    auto *C1 = dyn_cast<ConstantSDNode>(Ops[0]);
+    auto *C2 = dyn_cast<ConstantSDNode>(Ops[1]);
+    auto *C3 = dyn_cast<ConstantSDNode>(Ops[2]);
+
+    if (C1 && C2 && C3) {
+      if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque())
+        return SDValue();
+
+      std::optional<APInt> FoldAttempt =
+          FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue(),
+                    C3->getAPIntValue());
+      if (!FoldAttempt)
+        return SDValue();
+
+      SDValue Folded = getConstant(*FoldAttempt, DL, VT);
+      assert((!Folded || !VT.isVector()) &&
+             "Can't fold vectors ops with scalar operands");
+      return Folded;
+    }
+  }
+
   // This is for vector folding only from here on.
   if (!VT.isVector())
     return SDValue();
diff --git a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
new file mode 100644
index 0000000000000..e2a72799d8872
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK-EXPAND
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s --check-prefix=CHECK-UNEXPAND
+
+define <4 x i32> @test_fshl_constants() {
+; CHECK-EXPAND-LABEL: test_fshl_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,512,2048,6144]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_splat_constants() {
+; CHECK-EXPAND-LABEL: test_fshl_splat_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [256,256,256,256]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_splat_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [256,256,256,256]
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshl_two_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_two_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshl_one_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_one_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshl_none_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT:    vpsrld $1, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT:    vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_none_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_constants() {
+; CHECK-EXPAND-LABEL: test_fshr_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshr_two_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_two_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshr_one_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_one_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshr_none_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT:    vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_none_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshrdvd %xmm2, %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_splat_constants() {
+; CHECK-EXPAND-LABEL: test_fshr_splat_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [16777216,16777216,16777216,16777216]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_splat_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vbroadcastss {{.*#+}} xmm0 = [16777216,16777216,16777216,16777216]
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  ret <4 x i32> %res
+}

github-actions · 2025-08-20T06:28:49Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/test/CodeGen/X86/fshl-fshr-constant.ll

RKSimon · 2025-08-21T17:02:15Z

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+    // Constant folding.
+    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}))
+      return V;
+    break;


better to do this in DAGCombiner::visitFunnelShift

IIUC, it doesn't work for constant vectors without the modification in getNode. The constant fold for vector depends on constant fold in getNode:

llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Line 7330 in bce9b6d

SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);

How about adding it to the bottom of getNode before memoization - like we do for the 2-op getNode? Keep it in visitFunnelShift as well if you can.

Thanks. That sounds good to me.

Hmm, a quick try shows that some operators like VECTOR_COMPRESS cause the assertion in FoldConstantArithmetic. I will look into the reason later.

FoldConstantArithmetic would try folding non-arithmetic node vector_compress <1, 2>, <3, 4>, <5, 6> to a scalar version <(vector_compress 1, 3, 5), (vector_compress 2, 4, 6)>. That's illegal.

For other vector operators like VECTOR_SPLICE, VSELECT, it's illegal as well. Should we handle specific arithmetic operators at the bottom?

Cheers - ideally we can get this deal with so we can pull out the constant folding ISD::FMA/FMAD as well.

Sorry for not noticing this response. What do you mean by "pull out the constant folding ISD::FMA/FMAD"? Is it moving the fold in getNode to FoldConstantArithmetic?

Yes. I don't want to derail this patch. So if you have to add back FSHL/R cases and constant fold there, then do so and we can look at this and FMA/FMAD later.

Thanks, though I have refactored and pulled out the FMA/FMAD fold yesterday. Feel free to review the current implementation if you have any problem.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

RKSimon

LGTM with a couple of minors

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

RKSimon

LGTM

XChy requested a review from RKSimon August 20, 2025 06:25

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Aug 20, 2025

arsenm reviewed Aug 20, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Outdated Show resolved Hide resolved

RKSimon reviewed Aug 20, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Outdated Show resolved Hide resolved

RKSimon reviewed Aug 21, 2025

View reviewed changes

llvm/test/CodeGen/X86/fshl-fshr-constant.ll Outdated Show resolved Hide resolved

XChy added 2 commits August 21, 2025 23:59

[SelectionDAG] NFC. Add constant fold testcases for fshl/fshr

de442b6

[SelectionDAG] Constant fold ISD:FSHL/FSHR nodes

d1a1e48

XChy force-pushed the selectiondag-fold-fshlr-constant branch from 121d2d1 to d1a1e48 Compare August 21, 2025 16:53

RKSimon reviewed Aug 21, 2025

View reviewed changes

Refactor to folding ternary arithmetic operator

1892862

arsenm reviewed Aug 22, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Outdated Show resolved Hide resolved

add const reference

8b607f9

RKSimon reviewed Aug 22, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Outdated Show resolved Hide resolved

remove assertion

aa2f225

RKSimon approved these changes Aug 22, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Show resolved Hide resolved

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Outdated Show resolved Hide resolved

resolve comment

4c42db6

RKSimon approved these changes Aug 22, 2025

View reviewed changes

arsenm merged commit fd330de into llvm:main Aug 23, 2025
9 checks passed

[DAG] Constant fold ISD::FSHL/FSHR nodes #154480

[DAG] Constant fold ISD::FSHL/FSHR nodes #154480

Uh oh!

Conversation

XChy commented Aug 20, 2025

Uh oh!

llvmbot commented Aug 20, 2025

Uh oh!

llvmbot commented Aug 20, 2025

Uh oh!

github-actions bot commented Aug 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

XChy Aug 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Aug 20, 2025 •

edited

Loading

XChy Aug 21, 2025 •

edited

Loading