Skip to content

Conversation

YLChenZ
Copy link
Contributor

@YLChenZ YLChenZ commented Aug 27, 2025

… fold (icmp eq a, c1) | (icmp ult f(a), c2) to icmp ult f(a), c2.
@YLChenZ YLChenZ requested a review from nikic as a code owner August 27, 2025 16:33
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Aug 27, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 27, 2025

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: None (YLChenZ)

Changes

Close #143957.
alive2: https://alive2.llvm.org/ce/z/wc2PXT
godbolt: https://godbolt.org/z/o8nqYz577


Full diff: https://github.com/llvm/llvm-project/pull/155650.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/InstructionSimplify.cpp (+5-2)
  • (modified) llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll (+104)
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5907e21065331..1642ce07ad685 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1933,13 +1933,16 @@ static Value *simplifyAndOrWithICmpEq(unsigned Opcode, Value *Op0, Value *Op1,
   // In the final case (Res == Absorber with inverted predicate), it is safe to
   // refine poison during simplification, but not undef. For simplicity always
   // disable undef-based folds here.
+  // Allow one extra recursion level for this speculative replace+simplify;
+  // because some folds require > MaxRecurse replacements to appear.
+  unsigned LocalMaxRecurse = MaxRecurse ? MaxRecurse + 1 : 1;
   if (Value *Res = simplifyWithOpReplaced(Op1, A, B, Q.getWithoutUndef(),
                                           /* AllowRefinement */ true,
-                                          /* DropFlags */ nullptr, MaxRecurse))
+                                          /* DropFlags */ nullptr, LocalMaxRecurse))
     return Simplify(Res);
   if (Value *Res = simplifyWithOpReplaced(Op1, B, A, Q.getWithoutUndef(),
                                           /* AllowRefinement */ true,
-                                          /* DropFlags */ nullptr, MaxRecurse))
+                                          /* DropFlags */ nullptr, LocalMaxRecurse))
     return Simplify(Res);
 
   return nullptr;
diff --git a/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll b/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll
index 99e1dd4528697..aeae8a9880929 100644
--- a/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll
+++ b/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll
@@ -347,3 +347,107 @@ define i1 @pr98753(i32 noundef %x, i32 %y) {
 }
 
 declare i1 @llvm.is.constant.i1(i1)
+
+
+define i1 @or_icmp_fold(i64 %arg0) {
+; CHECK-LABEL: @or_icmp_fold(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], -1112064
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %1 = lshr i64 %arg0, 32
+  %2 = trunc nuw i64 %1 to i32
+  %3 = xor i32 %2, 55296
+  %4 = add i32 %3, -1114112
+  %5 = icmp ult i32 %4, -1112064
+  %6 = icmp eq i64 %1, 1114112
+  %7 = or i1 %6, %5
+  ret i1 %7
+}
+
+
+define i1 @or_icmp_fold_negative(i64 %arg0) {
+; CHECK-LABEL: @or_icmp_fold_negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1000
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP1]], 1114112
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    ret i1 [[TMP7]]
+;
+  %1 = lshr i64 %arg0, 32
+  %2 = trunc nuw i64 %1 to i32
+  %3 = xor i32 %2, 55296
+  %4 = add i32 %3, -1114112
+  %5 = icmp ult i32 %4, 1000
+  %6 = icmp eq i64 %1, 1114112
+  %7 = or i1 %6, %5
+  ret i1 %7
+}
+
+declare void @use(i32)
+
+define i1 @or_icmp_fold_multi_use(i64 %arg0) {
+; CHECK-LABEL: @or_icmp_fold_multi_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
+; CHECK-NEXT:    call void @use(i32 [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], -1112064
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %1 = lshr i64 %arg0, 32
+  %2 = trunc nuw i64 %1 to i32
+  %3 = xor i32 %2, 55296
+  %4 = add i32 %3, -1114112
+  call void @use(i32 %4)
+  %5 = icmp ult i32 %4, -1112064
+  %6 = icmp eq i64 %1, 1114112
+  %7 = or i1 %6, %5
+  ret i1 %7
+}
+
+define i1 @or_icmp_fold_commuted(i64 %arg0) {
+; CHECK-LABEL: @or_icmp_fold_commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], -1112064
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %1 = lshr i64 %arg0, 32
+  %2 = trunc nuw i64 %1 to i32
+  %3 = xor i32 %2, 55296
+  %4 = add i32 %3, -1114112
+  %5 = icmp ult i32 %4, -1112064
+  %6 = icmp eq i64 %1, 1114112
+  %7 = or i1 %5, %6
+  ret i1 %7
+}
+
+
+define <2 x i1> @or_icmp_fold_vec(<2 x i64> %arg0) {
+; CHECK-LABEL: @or_icmp_fold_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[ARG0:%.*]], splat (i64 32)
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 55296)
+; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], splat (i32 -1114112)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult <2 x i32> [[TMP4]], splat (i32 -1112064)
+; CHECK-NEXT:    ret <2 x i1> [[TMP5]]
+;
+  %1 = lshr <2 x i64> %arg0, <i64 32, i64 32>
+  %2 = trunc <2 x i64> %1 to <2 x i32>
+  %3 = xor <2 x i32> %2, <i32 55296, i32 55296>
+  %4 = add <2 x i32> %3, <i32 -1114112, i32 -1114112>
+  %5 = icmp ult <2 x i32> %4, <i32 -1112064, i32 -1112064>
+  %6 = icmp eq <2 x i64> %1, <i64 1114112, i64 1114112>
+  %7 = or <2 x i1> %6, %5
+  ret <2 x i1> %7
+}

Copy link

github-actions bot commented Aug 27, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:analysis Includes value tracking, cost tables and constant folding llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Missed optimization: fold (icmp eq a, c1) | (icmp ult f(a), c2) to icmp ult f(a), c2
2 participants