-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[SelectionDAG] Add computeKnownBits for ISD::ROTL/ROTR. #156142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesFull diff: https://github.com/llvm/llvm-project/pull/156142.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 967306ae37f45..3a3bcb77f5932 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3850,6 +3850,21 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false,
Op->getFlags().hasExact());
break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Canonicalize to ROTR.
+ if (Opcode == ISD::ROTL)
+ Amt = BitWidth - Amt;
+
+ Known.Zero = Known.Zero.rotr(Amt);
+ Known.One = Known.One.rotr(Amt);
+ }
+ break;
case ISD::FSHL:
case ISD::FSHR:
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 9e398096bfcc5..693d1992091b4 100644
--- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -93,10 +93,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: por %xmm0, %xmm3
-; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [715827883,715827883,715827883,715827883]
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSE4-LABEL: p4_vector_urem_by_const__splat:
@@ -104,9 +102,9 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: psrld $1, %xmm0
-; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
-; SSE4-NEXT: pminud %xmm0, %xmm1
-; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
+; SSE4-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE4-NEXT: movdqa %xmm1, %xmm0
; SSE4-NEXT: retq
;
; AVX2-LABEL: p4_vector_urem_by_const__splat:
@@ -116,9 +114,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
-; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
+; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
%t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
%t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
diff --git a/llvm/test/CodeGen/X86/pr67333.ll b/llvm/test/CodeGen/X86/pr67333.ll
index 946380971988c..cbb730857506d 100644
--- a/llvm/test/CodeGen/X86/pr67333.ll
+++ b/llvm/test/CodeGen/X86/pr67333.ll
@@ -14,12 +14,12 @@ define void @SHA256_Compress_Generic(ptr noundef %ctx) #1 {
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vpsrld $17, %xmm2, %xmm0
; CHECK-NEXT: vpslld $15, %xmm2, %xmm3
-; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vpsrld $19, %xmm2, %xmm3
+; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm3
+; CHECK-NEXT: vpsrld $19, %xmm2, %xmm0
; CHECK-NEXT: vpslld $13, %xmm2, %xmm4
-; CHECK-NEXT: vpor %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm3
-; CHECK-NEXT: vpxor %xmm2, %xmm3, %xmm0
+; CHECK-NEXT: vpor %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm4
; CHECK-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm1
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
||
// Canonicalize to ROTR. | ||
if (Opcode == ISD::ROTL) | ||
Amt = BitWidth - Amt; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I need to check that Amy is nonzero first
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May it make sense to move this to KnownBits? This could be used in the middle end with the corresponding intrinsics.
|
||
// Canonicalize to ROTR. | ||
if (Opcode == ISD::ROTL) | ||
Amt = BitWidth - Amt; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1
Known.Zero = Known.Zero.rotr(Amt); | ||
Known.One = Known.One.rotr(Amt); | ||
} | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth adding a general KnownBits::rotl/r handler do you think? Pow2 bitwidths in particular might be known in the bottom log2 bits (all that we care about for urem) but the upper bits might be unknown.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/125/builds/10125 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/52/builds/10811 Here is the relevant piece of the build log for the reference
|
No description provided.