Skip to content

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Aug 25, 2025

This patch removes bound checks that are dominated by bounded memory accesses. For example, if we have an array int A[5] and A[idx] is performed successfully, we know that idx u< 5 after the load.

compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=f0e9bba024d44b55d54b02025623ce4a3ba5a37c&to=5227b08a4a514159ec524d1b1ca18ed8ab5407df&stat=instructions%3Au
llvm-opt-benchmark: dtcxzyw/llvm-opt-benchmark#2709

Proof: https://alive2.llvm.org/ce/z/JEyjA2

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Aug 25, 2025

@zyw-bot csmith-quick-fuzz

@dtcxzyw dtcxzyw requested review from nikic and fhahn August 25, 2025 17:37
@dtcxzyw dtcxzyw marked this pull request as ready for review August 25, 2025 17:37
@llvmbot
Copy link
Member

llvmbot commented Aug 25, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch removes bound checks that are dominated by bounded memory accesses. For example, if we have an array int A[5] and A[idx] is performed successfully, we know that idx u&lt; 5 after the load.

compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=7129198e85d62b79025dc7f5ab861d9d3d244318&amp;to=d11c4d1c391f269badcf675f301b982ee25cf742&amp;stat=instructions:u
llvm-opt-benchmark: dtcxzyw/llvm-opt-benchmark#2709


Full diff: https://github.com/llvm/llvm-project/pull/155253.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/ConstraintElimination.cpp (+99-7)
  • (added) llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll (+272)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 1ddb8ae9518fc..d4b1cb80b5803 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -19,9 +19,11 @@
 #include "llvm/Analysis/ConstraintSystem.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -170,10 +172,12 @@ struct State {
   DominatorTree &DT;
   LoopInfo &LI;
   ScalarEvolution &SE;
+  TargetLibraryInfo &TLI;
   SmallVector<FactOrCheck, 64> WorkList;
 
-  State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE)
-      : DT(DT), LI(LI), SE(SE) {}
+  State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE,
+        TargetLibraryInfo &TLI)
+      : DT(DT), LI(LI), SE(SE), TLI(TLI) {}
 
   /// Process block \p BB and add known facts to work-list.
   void addInfoFor(BasicBlock &BB);
@@ -1109,10 +1113,43 @@ void State::addInfoForInductions(BasicBlock &BB) {
   }
 }
 
+static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
+                                          uint64_t AccessSize,
+                                          CmpPredicate &Pred, Value *&A,
+                                          Value *&B, const DataLayout &DL,
+                                          const TargetLibraryInfo &TLI) {
+  auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
+  if (!Offset.NW.isInBounds())
+    return false;
+
+  if (Offset.VariableOffsets.size() != 1)
+    return false;
+
+  ObjectSizeOpts Opts;
+  // Workaround for gep inbounds, ptr null, idx.
+  Opts.NullIsUnknownSize = true;
+  ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
+  SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
+  if (!Data.bothKnown() || !Data.Offset.isZero())
+    return false;
+
+  // Index * Scale + ConstOffset + AccessSize <= AllocSize
+  uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
+  auto &[Index, Scale] = Offset.VariableOffsets.front();
+  APInt MaxIndex =
+      (Data.Size - APInt(BitWidth, AccessSize) - Offset.ConstantOffset)
+          .udiv(Scale);
+  Pred = ICmpInst::ICMP_ULE;
+  A = Index;
+  B = ConstantInt::get(Index->getType(), MaxIndex);
+  return true;
+}
+
 void State::addInfoFor(BasicBlock &BB) {
   addInfoForInductions(BB);
+  auto &DL = BB.getDataLayout();
 
-  // True as long as long as the current instruction is guaranteed to execute.
+  // True as long as the current instruction is guaranteed to execute.
   bool GuaranteedToExecute = true;
   // Queue conditions and assumes.
   for (Instruction &I : BB) {
@@ -1127,6 +1164,38 @@ void State::addInfoFor(BasicBlock &BB) {
       continue;
     }
 
+    auto AddFactFromMemoryAccess = [&](Value *Ptr, Type *AccessType) {
+      auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+      if (!GEP)
+        return;
+      TypeSize AccessSize = DL.getTypeStoreSize(AccessType);
+      if (!AccessSize.isFixed())
+        return;
+      if (GuaranteedToExecute) {
+        CmpPredicate Pred;
+        Value *A, *B;
+        if (getConstraintFromMemoryAccess(*GEP, AccessSize.getFixedValue(),
+                                          Pred, A, B, DL, TLI)) {
+          // The memory access is guaranteed to execute when BB is entered,
+          // hence the constraint holds on entry to BB.
+          WorkList.emplace_back(FactOrCheck::getConditionFact(
+              DT.getNode(I.getParent()), Pred, A, B));
+        }
+      } else {
+        WorkList.emplace_back(
+            FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
+      }
+    };
+
+    if (auto *LI = dyn_cast<LoadInst>(&I)) {
+      if (LI->isSimple())
+        AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
+    }
+    if (auto *SI = dyn_cast<StoreInst>(&I)) {
+      if (SI->isSimple())
+        AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
+    }
+
     auto *II = dyn_cast<IntrinsicInst>(&I);
     Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
     switch (ID) {
@@ -1420,7 +1489,7 @@ static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A,
   LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n");
 
   auto R = Info.getConstraintForSolving(Pred, A, B);
-  if (R.empty() || !R.isValid(Info)){
+  if (R.empty() || !R.isValid(Info)) {
     LLVM_DEBUG(dbgs() << "   failed to decompose condition\n");
     return std::nullopt;
   }
@@ -1785,12 +1854,13 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
 
 static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
                                  ScalarEvolution &SE,
-                                 OptimizationRemarkEmitter &ORE) {
+                                 OptimizationRemarkEmitter &ORE,
+                                 TargetLibraryInfo &TLI) {
   bool Changed = false;
   DT.updateDFSNumbers();
   SmallVector<Value *> FunctionArgs(llvm::make_pointer_range(F.args()));
   ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
-  State S(DT, LI, SE);
+  State S(DT, LI, SE, TLI);
   std::unique_ptr<Module> ReproducerModule(
       DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
 
@@ -1960,6 +2030,26 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
         }
         continue;
       }
+
+      auto &DL = F.getDataLayout();
+      auto AddFactsAboutIndices = [&](Value *Ptr, Type *AccessType) {
+        CmpPredicate Pred;
+        Value *A, *B;
+        if (getConstraintFromMemoryAccess(
+                *cast<GetElementPtrInst>(Ptr),
+                DL.getTypeStoreSize(AccessType).getFixedValue(), Pred, A, B, DL,
+                TLI))
+          AddFact(Pred, A, B);
+      };
+
+      if (auto *LI = dyn_cast<LoadInst>(CB.Inst)) {
+        AddFactsAboutIndices(LI->getPointerOperand(), LI->getAccessType());
+        continue;
+      }
+      if (auto *SI = dyn_cast<StoreInst>(CB.Inst)) {
+        AddFactsAboutIndices(SI->getPointerOperand(), SI->getAccessType());
+        continue;
+      }
     }
 
     Value *A = nullptr, *B = nullptr;
@@ -2018,10 +2108,12 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-  if (!eliminateConstraints(F, DT, LI, SE, ORE))
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  if (!eliminateConstraints(F, DT, LI, SE, ORE, TLI))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
+  PA.preserve<TargetLibraryAnalysis>();
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
   PA.preserve<ScalarEvolutionAnalysis>();
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
new file mode 100644
index 0000000000000..e3ac4ee5d1c2a
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -0,0 +1,272 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
+
+@g = private unnamed_addr constant [5 x i8] c"test\00"
+
+declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare void @callee(i1)
+
+define i8 @load_global(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i1 @store_global(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT:    ret i1 true
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  store i8 0, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  ret i1 %cmp
+}
+
+define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_byval(
+; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_alloca(i64 %idx) {
+; CHECK-LABEL: define i8 @load_alloca(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[ALLOC:%.*]] = alloca [5 x i8], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %alloc = alloca [5 x i8], align 1
+  call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+  %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_malloc(i64 %idx) {
+; CHECK-LABEL: define i8 @load_malloc(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 5)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    call void @free(ptr [[ALLOC]])
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %alloc = call ptr @malloc(i64 5)
+  call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+  %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  call void @free(ptr %alloc)
+  ret i8 %add
+}
+
+define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %load = load i32, ptr %gep
+  %cmp = icmp ult i64 %idx, 7
+  %zext = zext i1 %cmp to i32
+  %add = add i32 %load, %zext
+  ret i32 %add
+}
+
+define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    call void @callee(i1 [[CMP1]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    br label %[[NEXT:.*]]
+; CHECK:       [[NEXT]]:
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %cmp1 = icmp ult i64 %idx, 5
+  call void @callee(i1 %cmp1) ; %cmp1 should not be simplified.
+  %load = load i8, ptr %gep
+  br label %next
+
+next:
+  %cmp2 = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp2 to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+; Negative tests.
+
+define i8 @load_from_non_gep(ptr %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_non_gep(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %load = load i8, ptr %p
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: define i8 @load_global_multi_indices(
+; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) {
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX1]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep1 = getelementptr inbounds i8, ptr @g, i64 %idx1
+  %gep2 = getelementptr inbounds i8, ptr %gep1, i64 %idx2
+  %load = load i8, ptr %gep2
+  %cmp = icmp ult i64 %idx1, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_without_inbounds(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_without_inbounds(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr i8, ptr @g, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32_smaller_range(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %load = load i32, ptr %gep
+  %cmp = icmp ult i64 %idx, 6
+  %zext = zext i1 %cmp to i32
+  %add = add i32 %load, %zext
+  ret i32 %add
+}
+
+define i8 @load_global_volatile(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_volatile(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %load = load volatile i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_vscale(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_vscale(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %load = load <vscale x 1 x i8>, ptr %gep
+  %ext = extractelement <vscale x 1 x i8> %load, i64 0
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %ext, %zext
+  ret i8 %add
+}
+
+define i8 @load_from_null(i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_null(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}

@nikic
Copy link
Contributor

nikic commented Aug 26, 2025

compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=7129198e85d62b79025dc7f5ab861d9d3d244318&to=d11c4d1c391f269badcf675f301b982ee25cf742&stat=instructions:u

Can you check whether this is only due to the extra facts, or whether the object size analysis contributes to this?

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Aug 26, 2025

compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=7129198e85d62b79025dc7f5ab861d9d3d244318&to=d11c4d1c391f269badcf675f301b982ee25cf742&stat=instructions:u

Can you check whether this is only due to the extra facts, or whether the object size analysis contributes to this?

Compared to my first draft commit, it is mainly due to the extra facts: https://llvm-compile-time-tracker.com/compare.php?from=de67be436a69fc2e7cb2cd5717e732aa6df0488e&to=a98537f1678fd87fc1b1f7972498144ec79360fd&stat=instructions%3Au

@dtcxzyw dtcxzyw force-pushed the perf/constelim-gep-indices branch from d28609b to 5227b08 Compare September 1, 2025 14:01
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@dtcxzyw dtcxzyw merged commit 89f53af into llvm:main Sep 2, 2025
9 checks passed
@dtcxzyw dtcxzyw deleted the perf/constelim-gep-indices branch September 2, 2025 13:41
Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very nice, thanks!

@mikaelholmen
Copy link
Collaborator

Hi @dtcxzyw

The following starts crashing with this patch:

opt -passes=constraint-elimination bbi-110220.ll -o /dev/null

It crashes with:

opt: ../lib/IR/Constants.cpp:963: static Constant *llvm::ConstantInt::get(Type *, const APInt &): Assertion `C->getType() == Ty->getScalarType() && "ConstantInt type doesn't match the type implied by its value!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=constraint-elimination bbi-110220.ll -o /dev/null
1.	Running pass "function(constraint-elimination)" on module "bbi-110220.ll"
2.	Running pass "constraint-elimination" on function "main"
 #0 0x0000558e6828d286 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x47f7286)
 #1 0x0000558e6828a815 llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x47f4815)
 #2 0x0000558e6828e409 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f52f8c29990 __restore_rt (/lib64/libpthread.so.0+0x12990)
 #4 0x00007f52f65c952f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f52f659ce65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f52f659cd39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f52f65c1e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000558e68369aad llvm::ConstantInt::get(llvm::Type*, llvm::APInt const&) (build-all/bin/opt+0x48d3aad)
 #9 0x0000558e69aec61a getConstraintFromMemoryAccess(llvm::GetElementPtrInst&, unsigned long, llvm::CmpPredicate&, llvm::Value*&, llvm::Value*&, llvm::DataLayout const&, llvm::TargetLibraryInfo const&) ConstraintElimination.cpp:0:0
#10 0x0000558e69aebf89 (anonymous namespace)::State::addInfoFor(llvm::BasicBlock&)::$_0::operator()(llvm::Value*, llvm::Type*) const ConstraintElimination.cpp:0:0
#11 0x0000558e69ae4c93 eliminateConstraints(llvm::Function&, llvm::DominatorTree&, llvm::LoopInfo&, llvm::ScalarEvolution&, llvm::OptimizationRemarkEmitter&, llvm::TargetLibraryInfo&) ConstraintElimination.cpp:0:0
#12 0x0000558e69ae3b3d llvm::ConstraintEliminationPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x604db3d)
#13 0x0000558e69748abd llvm::detail::PassModel<llvm::Function, llvm::ConstraintEliminationPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#14 0x0000558e684a5ba5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4a0fba5)
#15 0x0000558e6974a3fd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#16 0x0000558e684aa4ae llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a144ae)
#17 0x0000558e696d90fd llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) NewPMDriver.cpp:0:0
#18 0x0000558e684a4875 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a0e875)
#19 0x0000558e696d1e40 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5c3be40)
#20 0x0000558e6822e578 optMain (build-all/bin/opt+0x4798578)
#21 0x00007f52f65b57e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#22 0x0000558e6822bfae _start (build-all/bin/opt+0x4795fae)
Abort (core dumped)

bbi-110220.ll.gz

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Sep 3, 2025

Hi @dtcxzyw

The following starts crashing with this patch:

opt -passes=constraint-elimination bbi-110220.ll -o /dev/null

It crashes with:

opt: ../lib/IR/Constants.cpp:963: static Constant *llvm::ConstantInt::get(Type *, const APInt &): Assertion `C->getType() == Ty->getScalarType() && "ConstantInt type doesn't match the type implied by its value!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=constraint-elimination bbi-110220.ll -o /dev/null
1.	Running pass "function(constraint-elimination)" on module "bbi-110220.ll"
2.	Running pass "constraint-elimination" on function "main"
 #0 0x0000558e6828d286 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x47f7286)
 #1 0x0000558e6828a815 llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x47f4815)
 #2 0x0000558e6828e409 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f52f8c29990 __restore_rt (/lib64/libpthread.so.0+0x12990)
 #4 0x00007f52f65c952f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f52f659ce65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f52f659cd39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f52f65c1e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000558e68369aad llvm::ConstantInt::get(llvm::Type*, llvm::APInt const&) (build-all/bin/opt+0x48d3aad)
 #9 0x0000558e69aec61a getConstraintFromMemoryAccess(llvm::GetElementPtrInst&, unsigned long, llvm::CmpPredicate&, llvm::Value*&, llvm::Value*&, llvm::DataLayout const&, llvm::TargetLibraryInfo const&) ConstraintElimination.cpp:0:0
#10 0x0000558e69aebf89 (anonymous namespace)::State::addInfoFor(llvm::BasicBlock&)::$_0::operator()(llvm::Value*, llvm::Type*) const ConstraintElimination.cpp:0:0
#11 0x0000558e69ae4c93 eliminateConstraints(llvm::Function&, llvm::DominatorTree&, llvm::LoopInfo&, llvm::ScalarEvolution&, llvm::OptimizationRemarkEmitter&, llvm::TargetLibraryInfo&) ConstraintElimination.cpp:0:0
#12 0x0000558e69ae3b3d llvm::ConstraintEliminationPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x604db3d)
#13 0x0000558e69748abd llvm::detail::PassModel<llvm::Function, llvm::ConstraintEliminationPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#14 0x0000558e684a5ba5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4a0fba5)
#15 0x0000558e6974a3fd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#16 0x0000558e684aa4ae llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a144ae)
#17 0x0000558e696d90fd llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) NewPMDriver.cpp:0:0
#18 0x0000558e684a4875 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a0e875)
#19 0x0000558e696d1e40 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5c3be40)
#20 0x0000558e6822e578 optMain (build-all/bin/opt+0x4798578)
#21 0x00007f52f65b57e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#22 0x0000558e6822bfae _start (build-all/bin/opt+0x4795fae)
Abort (core dumped)

bbi-110220.ll.gz

I will post a fix later. Thanks!

dtcxzyw added a commit that referenced this pull request Sep 3, 2025
In most cases, GEPs should be canonicalized by InstCombine. Bail out on
non-canonical forms for simplicity.
Fixes
#155253 (comment).
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Sep 3, 2025
In most cases, GEPs should be canonicalized by InstCombine. Bail out on
non-canonical forms for simplicity.
Fixes
llvm/llvm-project#155253 (comment).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants