Skip to content

Conversation

XChy
Copy link
Member

@XChy XChy commented Sep 4, 2025

Address TODO and compute the known bits with the intermediate result.

@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2025

@llvm/pr-subscribers-backend-x86

Author: Hongyu Chen (XChy)

Changes

Address TODO and compute the known bits with the intermediate result.


Full diff: https://github.com/llvm/llvm-project/pull/156847.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+7-3)
  • (modified) llvm/test/CodeGen/X86/combine-vpmadd52.ll (+108)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 572cfdad3c93b..923af28c8cf34 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44964,7 +44964,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
   }
   case X86ISD::VPMADD52L:
   case X86ISD::VPMADD52H: {
-    KnownBits KnownOp0, KnownOp1;
+    KnownBits KnownOp0, KnownOp1, KnownOp2;
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
     SDValue Op2 = Op.getOperand(2);
@@ -44979,6 +44979,10 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
                              TLO, Depth + 1))
       return true;
 
+    if (SimplifyDemandedBits(Op2, APInt::getAllOnes(64), OriginalDemandedElts,
+                             KnownOp2, TLO, Depth + 1))
+      return true;
+
     KnownBits KnownMul;
     KnownOp0 = KnownOp0.trunc(52);
     KnownOp1 = KnownOp1.trunc(52);
@@ -44993,8 +44997,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ADD, DL, VT, C, Op2));
     }
 
-    // TODO: Compute the known bits for VPMADD52L/VPMADD52H.
-    break;
+    Known = KnownBits::add(KnownMul, KnownOp2);
+    return false;
   }
   }
 
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
index 9afc1119267ec..2cb060ea92b14 100644
--- a/llvm/test/CodeGen/X86/combine-vpmadd52.ll
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -290,3 +290,111 @@ define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1,
   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2)
   ret <2 x i64> %1
 }
+
+define <2 x i64> @test1_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test1_knownbits_vpmadd52l:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = [1,1]
+; CHECK-NEXT:    # xmm0 = mem[0,0]
+; CHECK-NEXT:    retq
+  %and1 = and <2 x i64> %x0, splat (i64 4)
+  %and2 = and <2 x i64> %x1, splat (i64 4)
+  %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2)
+  %ret = and <2 x i64> %madd, splat (i64 1)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @test1_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test1_knownbits_vpmadd52h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = [3,3]
+; CHECK-NEXT:    # xmm0 = mem[0,0]
+; CHECK-NEXT:    retq
+  %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
+  %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+  %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 3), <2 x i64> %and1, <2 x i64> %and2)
+  %ret = and <2 x i64> %madd, splat (i64 3)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @test2_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test2_knownbits_vpmadd52l:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = [1234,1234]
+; CHECK-NEXT:    # xmm0 = mem[0,0]
+; CHECK-NEXT:    retq
+  %and1 = and <2 x i64> %x0, splat (i64 67108864) ; 1LL << 26
+  %and2 = and <2 x i64> %x1, splat (i64 33554432) ; 1LL << 25
+  %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1234), <2 x i64> %and1, <2 x i64> %and2)
+  %ret = and <2 x i64> %madd, splat (i64 1234)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @test2_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test2_knownbits_vpmadd52h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = [1,1]
+; CHECK-NEXT:    # xmm0 = mem[0,0]
+; CHECK-NEXT:    retq
+  %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
+  %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+  ; add (1LL << 20) + 1
+  %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1025), <2 x i64> %and1, <2 x i64> %and2)
+  %ret = and <2 x i64> %madd, splat (i64 1)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @test3_knownbits_vpmadd52l_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test3_knownbits_vpmadd52l_negative:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [1,1]
+; AVX512-NEXT:    vpor %xmm2, %xmm1, %xmm1
+; AVX512-NEXT:    vmovdqa %xmm2, %xmm3
+; AVX512-NEXT:    vpmadd52luq %xmm1, %xmm0, %xmm3
+; AVX512-NEXT:    vpand %xmm2, %xmm3, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test3_knownbits_vpmadd52l_negative:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [1,1]
+; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vmovdqa %xmm2, %xmm3
+; AVX-NEXT:    {vex} vpmadd52luq %xmm1, %xmm0, %xmm3
+; AVX-NEXT:    vpand %xmm2, %xmm3, %xmm0
+; AVX-NEXT:    retq
+  %and1 = and <2 x i64> %x0, splat (i64 67108865) ; (1LL << 26) + 1
+  %or = or <2 x i64> %x1, splat (i64 1)
+  %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %or)
+  %ret = and <2 x i64> %madd, splat (i64 1)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test3_knownbits_vpmadd52h_negative:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [1,1]
+; AVX512-NEXT:    vmovdqa %xmm2, %xmm3
+; AVX512-NEXT:    vpmadd52huq %xmm1, %xmm0, %xmm3
+; AVX512-NEXT:    vpand %xmm2, %xmm3, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test3_knownbits_vpmadd52h_negative:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [1,1]
+; AVX-NEXT:    vmovdqa %xmm2, %xmm3
+; AVX-NEXT:    {vex} vpmadd52huq %xmm1, %xmm0, %xmm3
+; AVX-NEXT:    vpand %xmm2, %xmm3, %xmm0
+; AVX-NEXT:    retq
+  %and1 = and <2 x i64> %x0, splat (i64 4194304) ; 1LL << 22
+  %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+  ; add (1LL << 20) + 1
+  %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2)
+  %ret = and <2 x i64> %madd, splat (i64 1)
+  ret <2 x i64> %ret
+}

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@XChy XChy merged commit 379e121 into llvm:main Sep 4, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants