Skip to content

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Sep 3, 2025

@dtcxzyw dtcxzyw requested review from RKSimon and topperc September 3, 2025 16:27
@llvmbot llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Sep 3, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 3, 2025

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: Yingwei Zheng (dtcxzyw)

Changes

Proof: https://alive2.llvm.org/ce/z/cdbzSL
Closes #156559.


Full diff: https://github.com/llvm/llvm-project/pull/156710.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+9-1)
  • (modified) llvm/test/CodeGen/X86/shift-i128.ll (+225)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bed3c42473e27..6310f7270ceaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16317,7 +16317,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
-        return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+        SDNodeFlags Flags;
+        // Propagate nuw for sub.
+        if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
+            DAG.MaskedValueIsZero(
+                N0->getOperand(0),
+                APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+                                      VT.getScalarSizeInBits())))
+          Flags.setNoUnsignedWrap(true);
+        return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
       }
     }
     break;
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 9323cd5b1917f..3909744793dde 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -938,3 +938,228 @@ define i128 @lshr_shl_mask(i128 %a0) {
   %2 = lshr i128 %1, 1
   ret i128 %2
 }
+
+define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) {
+; i686-LABEL: shift_i128_limited_shamt:
+; i686:       # %bb.0: # %start
+; i686-NEXT:    pushl %ebp
+; i686-NEXT:    .cfi_def_cfa_offset 8
+; i686-NEXT:    .cfi_offset %ebp, -8
+; i686-NEXT:    movl %esp, %ebp
+; i686-NEXT:    .cfi_def_cfa_register %ebp
+; i686-NEXT:    pushl %ebx
+; i686-NEXT:    pushl %edi
+; i686-NEXT:    pushl %esi
+; i686-NEXT:    andl $-16, %esp
+; i686-NEXT:    subl $16, %esp
+; i686-NEXT:    .cfi_offset %esi, -20
+; i686-NEXT:    .cfi_offset %edi, -16
+; i686-NEXT:    .cfi_offset %ebx, -12
+; i686-NEXT:    movl 28(%ebp), %esi
+; i686-NEXT:    movl 32(%ebp), %eax
+; i686-NEXT:    movb $6, %dl
+; i686-NEXT:    subb 40(%ebp), %dl
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    movl %esi, %ebx
+; i686-NEXT:    movl %esi, %edi
+; i686-NEXT:    shrl %ebx
+; i686-NEXT:    notb %cl
+; i686-NEXT:    shrl %cl, %ebx
+; i686-NEXT:    orl %eax, %ebx
+; i686-NEXT:    movl 24(%ebp), %esi
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    shldl %cl, %esi, %edi
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl 8(%ebp), %edi
+; i686-NEXT:    movl 36(%ebp), %esi
+; i686-NEXT:    movl 32(%ebp), %edx
+; i686-NEXT:    shldl %cl, %edx, %esi
+; i686-NEXT:    movl %esi, 12(%edi)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, 4(%edi)
+; i686-NEXT:    movl %eax, (%edi)
+; i686-NEXT:    movl %ebx, 8(%edi)
+; i686-NEXT:    movl %edi, %eax
+; i686-NEXT:    leal -12(%ebp), %esp
+; i686-NEXT:    popl %esi
+; i686-NEXT:    popl %edi
+; i686-NEXT:    popl %ebx
+; i686-NEXT:    popl %ebp
+; i686-NEXT:    .cfi_def_cfa %esp, 4
+; i686-NEXT:    retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt:
+; x86_64:       # %bb.0: # %start
+; x86_64-NEXT:    movq %rdi, %rax
+; x86_64-NEXT:    movb $6, %cl
+; x86_64-NEXT:    subb %dl, %cl
+; x86_64-NEXT:    shldq %cl, %rdi, %rsi
+; x86_64-NEXT:    shlq %cl, %rax
+; x86_64-NEXT:    movq %rsi, %rdx
+; x86_64-NEXT:    retq
+start:
+  %shamt = sub nuw nsw i32 6, %b
+  %ext = zext nneg i32 %shamt to i128
+  %res = shl i128 %a, %ext
+  ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) {
+; i686-LABEL: shift_i128_limited_shamt_no_nuw:
+; i686:       # %bb.0: # %start
+; i686-NEXT:    pushl %ebp
+; i686-NEXT:    .cfi_def_cfa_offset 8
+; i686-NEXT:    .cfi_offset %ebp, -8
+; i686-NEXT:    movl %esp, %ebp
+; i686-NEXT:    .cfi_def_cfa_register %ebp
+; i686-NEXT:    pushl %ebx
+; i686-NEXT:    pushl %edi
+; i686-NEXT:    pushl %esi
+; i686-NEXT:    andl $-16, %esp
+; i686-NEXT:    subl $48, %esp
+; i686-NEXT:    .cfi_offset %esi, -20
+; i686-NEXT:    .cfi_offset %edi, -16
+; i686-NEXT:    .cfi_offset %ebx, -12
+; i686-NEXT:    movzbl 40(%ebp), %eax
+; i686-NEXT:    movl 24(%ebp), %ecx
+; i686-NEXT:    movl 28(%ebp), %edx
+; i686-NEXT:    movl 32(%ebp), %esi
+; i686-NEXT:    movl 36(%ebp), %edi
+; i686-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; i686-NEXT:    movb $6, %cl
+; i686-NEXT:    subb %al, %cl
+; i686-NEXT:    movl %ecx, %eax
+; i686-NEXT:    shrb $3, %al
+; i686-NEXT:    andb $12, %al
+; i686-NEXT:    negb %al
+; i686-NEXT:    movsbl %al, %eax
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, (%esp)
+; i686-NEXT:    movl 20(%esp,%eax), %edx
+; i686-NEXT:    movl 24(%esp,%eax), %ebx
+; i686-NEXT:    movl %ebx, %edi
+; i686-NEXT:    shldl %cl, %edx, %edi
+; i686-NEXT:    movl 16(%esp,%eax), %esi
+; i686-NEXT:    movl 28(%esp,%eax), %eax
+; i686-NEXT:    shldl %cl, %ebx, %eax
+; i686-NEXT:    movl 8(%ebp), %ebx
+; i686-NEXT:    movl %eax, 12(%ebx)
+; i686-NEXT:    movl %edi, 8(%ebx)
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    shldl %cl, %esi, %edx
+; i686-NEXT:    movl %edx, 4(%ebx)
+; i686-NEXT:    movl %eax, (%ebx)
+; i686-NEXT:    movl %ebx, %eax
+; i686-NEXT:    leal -12(%ebp), %esp
+; i686-NEXT:    popl %esi
+; i686-NEXT:    popl %edi
+; i686-NEXT:    popl %ebx
+; i686-NEXT:    popl %ebp
+; i686-NEXT:    .cfi_def_cfa %esp, 4
+; i686-NEXT:    retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_no_nuw:
+; x86_64:       # %bb.0: # %start
+; x86_64-NEXT:    movb $6, %cl
+; x86_64-NEXT:    subb %dl, %cl
+; x86_64-NEXT:    shldq %cl, %rdi, %rsi
+; x86_64-NEXT:    shlq %cl, %rdi
+; x86_64-NEXT:    xorl %eax, %eax
+; x86_64-NEXT:    testb $64, %cl
+; x86_64-NEXT:    cmovneq %rdi, %rsi
+; x86_64-NEXT:    cmoveq %rdi, %rax
+; x86_64-NEXT:    movq %rsi, %rdx
+; x86_64-NEXT:    retq
+start:
+  %shamt = sub nsw i32 6, %b
+  %ext = zext nneg i32 %shamt to i128
+  %res = shl i128 %a, %ext
+  ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) {
+; i686-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; i686:       # %bb.0: # %start
+; i686-NEXT:    pushl %ebp
+; i686-NEXT:    .cfi_def_cfa_offset 8
+; i686-NEXT:    .cfi_offset %ebp, -8
+; i686-NEXT:    movl %esp, %ebp
+; i686-NEXT:    .cfi_def_cfa_register %ebp
+; i686-NEXT:    pushl %ebx
+; i686-NEXT:    pushl %edi
+; i686-NEXT:    pushl %esi
+; i686-NEXT:    andl $-16, %esp
+; i686-NEXT:    subl $48, %esp
+; i686-NEXT:    .cfi_offset %esi, -20
+; i686-NEXT:    .cfi_offset %edi, -16
+; i686-NEXT:    .cfi_offset %ebx, -12
+; i686-NEXT:    movl 24(%ebp), %eax
+; i686-NEXT:    movl 28(%ebp), %edx
+; i686-NEXT:    movl 32(%ebp), %esi
+; i686-NEXT:    movl 36(%ebp), %edi
+; i686-NEXT:    movl 44(%ebp), %ecx
+; i686-NEXT:    subl 40(%ebp), %ecx
+; i686-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, (%esp)
+; i686-NEXT:    movl %ecx, %eax
+; i686-NEXT:    shrb $3, %al
+; i686-NEXT:    andb $12, %al
+; i686-NEXT:    negb %al
+; i686-NEXT:    movsbl %al, %eax
+; i686-NEXT:    movl 20(%esp,%eax), %edx
+; i686-NEXT:    movl 24(%esp,%eax), %ebx
+; i686-NEXT:    movl %ebx, %edi
+; i686-NEXT:    shldl %cl, %edx, %edi
+; i686-NEXT:    movl 16(%esp,%eax), %esi
+; i686-NEXT:    movl 28(%esp,%eax), %eax
+; i686-NEXT:    shldl %cl, %ebx, %eax
+; i686-NEXT:    movl 8(%ebp), %ebx
+; i686-NEXT:    movl %eax, 12(%ebx)
+; i686-NEXT:    movl %edi, 8(%ebx)
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    # kill: def $cl killed $cl killed $ecx
+; i686-NEXT:    shldl %cl, %esi, %edx
+; i686-NEXT:    movl %edx, 4(%ebx)
+; i686-NEXT:    movl %eax, (%ebx)
+; i686-NEXT:    movl %ebx, %eax
+; i686-NEXT:    leal -12(%ebp), %esp
+; i686-NEXT:    popl %esi
+; i686-NEXT:    popl %edi
+; i686-NEXT:    popl %ebx
+; i686-NEXT:    popl %ebp
+; i686-NEXT:    .cfi_def_cfa %esp, 4
+; i686-NEXT:    retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; x86_64:       # %bb.0: # %start
+; x86_64-NEXT:    subl %edx, %ecx
+; x86_64-NEXT:    shldq %cl, %rdi, %rsi
+; x86_64-NEXT:    shlq %cl, %rdi
+; x86_64-NEXT:    xorl %eax, %eax
+; x86_64-NEXT:    testb $64, %cl
+; x86_64-NEXT:    cmovneq %rdi, %rsi
+; x86_64-NEXT:    cmoveq %rdi, %rax
+; x86_64-NEXT:    movq %rsi, %rdx
+; x86_64-NEXT:    retq
+start:
+  %shamt = sub nuw nsw i32 %c, %b
+  %ext = zext nneg i32 %shamt to i128
+  %res = shl i128 %a, %ext
+  ret i128 %res
+}

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one trivial

@dtcxzyw dtcxzyw merged commit 0d29279 into llvm:main Sep 4, 2025
9 checks passed
@dtcxzyw dtcxzyw deleted the perf/fix-156559 branch September 4, 2025 02:17
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 4, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-aarch64-darwin running on doug-worker-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/26672

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'lld :: MinGW/driver.test' FAILED ********************
Exit Code: 127

Command Output (stdout):
--
# RUN: at line 1
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld -### foo.o -m i386pe 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=X86 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld '-###' foo.o -m i386pe
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=X86 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# note: command had no output on stdout or stderr
# RUN: at line 7
echo "-### foo.o -m i386pe" > /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/tools/lld/test/MinGW/Output/driver.test.tmp.rsp
# executed command: echo '-### foo.o -m i386pe'
# note: command had no output on stdout or stderr
# RUN: at line 8
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld @/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/tools/lld/test/MinGW/Output/driver.test.tmp.rsp 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=X86 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld @/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/tools/lld/test/MinGW/Output/driver.test.tmp.rsp
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=X86 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# note: command had no output on stdout or stderr
# RUN: at line 10
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld -### foo.o -m i386pep 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=X64 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld '-###' foo.o -m i386pep
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=X64 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# note: command had no output on stdout or stderr
# RUN: at line 16
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld -### foo.o -m thumb2pe 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld '-###' foo.o -m thumb2pe
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# note: command had no output on stdout or stderr
# RUN: at line 22
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld -### foo.o -m arm64pe 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM64 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld '-###' foo.o -m arm64pe
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM64 /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# note: command had no output on stdout or stderr
# RUN: at line 28
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld -### foo.o -m arm64ecpe 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM64EC /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld '-###' foo.o -m arm64ecpe
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM64EC /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# note: command had no output on stdout or stderr
# RUN: at line 34
/Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld -### foo.o -m arm64xpe 2>&1 | /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM64X /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/ld.lld '-###' foo.o -m arm64xpe
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=ARM64X /Users/buildbot/buildbot-root/llvm-project/lld/test/MinGW/driver.test
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Poor i128 shift lowering
4 participants