-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[DAGCombine] Propagate nuw when evaluating sub with narrower types #156710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Yingwei Zheng (dtcxzyw) ChangesProof: https://alive2.llvm.org/ce/z/cdbzSL Full diff: https://github.com/llvm/llvm-project/pull/156710.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bed3c42473e27..6310f7270ceaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16317,7 +16317,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
- return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ SDNodeFlags Flags;
+ // Propagate nuw for sub.
+ if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
+ DAG.MaskedValueIsZero(
+ N0->getOperand(0),
+ APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits())))
+ Flags.setNoUnsignedWrap(true);
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
}
}
break;
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 9323cd5b1917f..3909744793dde 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -938,3 +938,228 @@ define i128 @lshr_shl_mask(i128 %a0) {
%2 = lshr i128 %1, 1
ret i128 %2
}
+
+define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) {
+; i686-LABEL: shift_i128_limited_shamt:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: .cfi_def_cfa_offset 8
+; i686-NEXT: .cfi_offset %ebp, -8
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: .cfi_def_cfa_register %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $16, %esp
+; i686-NEXT: .cfi_offset %esi, -20
+; i686-NEXT: .cfi_offset %edi, -16
+; i686-NEXT: .cfi_offset %ebx, -12
+; i686-NEXT: movl 28(%ebp), %esi
+; i686-NEXT: movl 32(%ebp), %eax
+; i686-NEXT: movb $6, %dl
+; i686-NEXT: subb 40(%ebp), %dl
+; i686-NEXT: movl %edx, %ecx
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: movl %esi, %ebx
+; i686-NEXT: movl %esi, %edi
+; i686-NEXT: shrl %ebx
+; i686-NEXT: notb %cl
+; i686-NEXT: shrl %cl, %ebx
+; i686-NEXT: orl %eax, %ebx
+; i686-NEXT: movl 24(%ebp), %esi
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: movl %edx, %ecx
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: shldl %cl, %esi, %edi
+; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT: movl 8(%ebp), %edi
+; i686-NEXT: movl 36(%ebp), %esi
+; i686-NEXT: movl 32(%ebp), %edx
+; i686-NEXT: shldl %cl, %edx, %esi
+; i686-NEXT: movl %esi, 12(%edi)
+; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT: movl %ecx, 4(%edi)
+; i686-NEXT: movl %eax, (%edi)
+; i686-NEXT: movl %ebx, 8(%edi)
+; i686-NEXT: movl %edi, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: .cfi_def_cfa %esp, 4
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: movq %rdi, %rax
+; x86_64-NEXT: movb $6, %cl
+; x86_64-NEXT: subb %dl, %cl
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nuw nsw i32 6, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) {
+; i686-LABEL: shift_i128_limited_shamt_no_nuw:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: .cfi_def_cfa_offset 8
+; i686-NEXT: .cfi_offset %ebp, -8
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: .cfi_def_cfa_register %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $48, %esp
+; i686-NEXT: .cfi_offset %esi, -20
+; i686-NEXT: .cfi_offset %edi, -16
+; i686-NEXT: .cfi_offset %ebx, -12
+; i686-NEXT: movzbl 40(%ebp), %eax
+; i686-NEXT: movl 24(%ebp), %ecx
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
+; i686-NEXT: movl 36(%ebp), %edi
+; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; i686-NEXT: movb $6, %cl
+; i686-NEXT: subb %al, %cl
+; i686-NEXT: movl %ecx, %eax
+; i686-NEXT: shrb $3, %al
+; i686-NEXT: andb $12, %al
+; i686-NEXT: negb %al
+; i686-NEXT: movsbl %al, %eax
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, (%esp)
+; i686-NEXT: movl 20(%esp,%eax), %edx
+; i686-NEXT: movl 24(%esp,%eax), %ebx
+; i686-NEXT: movl %ebx, %edi
+; i686-NEXT: shldl %cl, %edx, %edi
+; i686-NEXT: movl 16(%esp,%eax), %esi
+; i686-NEXT: movl 28(%esp,%eax), %eax
+; i686-NEXT: shldl %cl, %ebx, %eax
+; i686-NEXT: movl 8(%ebp), %ebx
+; i686-NEXT: movl %eax, 12(%ebx)
+; i686-NEXT: movl %edi, 8(%ebx)
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: shldl %cl, %esi, %edx
+; i686-NEXT: movl %edx, 4(%ebx)
+; i686-NEXT: movl %eax, (%ebx)
+; i686-NEXT: movl %ebx, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: .cfi_def_cfa %esp, 4
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_no_nuw:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: movb $6, %cl
+; x86_64-NEXT: subb %dl, %cl
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rdi
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: testb $64, %cl
+; x86_64-NEXT: cmovneq %rdi, %rsi
+; x86_64-NEXT: cmoveq %rdi, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nsw i32 6, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) {
+; i686-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: .cfi_def_cfa_offset 8
+; i686-NEXT: .cfi_offset %ebp, -8
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: .cfi_def_cfa_register %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $48, %esp
+; i686-NEXT: .cfi_offset %esi, -20
+; i686-NEXT: .cfi_offset %edi, -16
+; i686-NEXT: .cfi_offset %ebx, -12
+; i686-NEXT: movl 24(%ebp), %eax
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
+; i686-NEXT: movl 36(%ebp), %edi
+; i686-NEXT: movl 44(%ebp), %ecx
+; i686-NEXT: subl 40(%ebp), %ecx
+; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, (%esp)
+; i686-NEXT: movl %ecx, %eax
+; i686-NEXT: shrb $3, %al
+; i686-NEXT: andb $12, %al
+; i686-NEXT: negb %al
+; i686-NEXT: movsbl %al, %eax
+; i686-NEXT: movl 20(%esp,%eax), %edx
+; i686-NEXT: movl 24(%esp,%eax), %ebx
+; i686-NEXT: movl %ebx, %edi
+; i686-NEXT: shldl %cl, %edx, %edi
+; i686-NEXT: movl 16(%esp,%eax), %esi
+; i686-NEXT: movl 28(%esp,%eax), %eax
+; i686-NEXT: shldl %cl, %ebx, %eax
+; i686-NEXT: movl 8(%ebp), %ebx
+; i686-NEXT: movl %eax, 12(%ebx)
+; i686-NEXT: movl %edi, 8(%ebx)
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: # kill: def $cl killed $cl killed $ecx
+; i686-NEXT: shldl %cl, %esi, %edx
+; i686-NEXT: movl %edx, 4(%ebx)
+; i686-NEXT: movl %eax, (%ebx)
+; i686-NEXT: movl %ebx, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: .cfi_def_cfa %esp, 4
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: subl %edx, %ecx
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rdi
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: testb $64, %cl
+; x86_64-NEXT: cmovneq %rdi, %rsi
+; x86_64-NEXT: cmoveq %rdi, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nuw nsw i32 %c, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one trivial
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/26672 Here is the relevant piece of the build log for the reference
|
Proof: https://alive2.llvm.org/ce/z/cdbzSL
Closes #156559.