-
Notifications
You must be signed in to change notification settings - Fork 15k
[ConstraintElim] Use constraints from bounded memory accesses #155253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@zyw-bot csmith-quick-fuzz |
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch removes bound checks that are dominated by bounded memory accesses. For example, if we have an array compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=7129198e85d62b79025dc7f5ab861d9d3d244318&to=d11c4d1c391f269badcf675f301b982ee25cf742&stat=instructions:u Full diff: https://github.com/llvm/llvm-project/pull/155253.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 1ddb8ae9518fc..d4b1cb80b5803 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -19,9 +19,11 @@
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -170,10 +172,12 @@ struct State {
DominatorTree &DT;
LoopInfo &LI;
ScalarEvolution &SE;
+ TargetLibraryInfo &TLI;
SmallVector<FactOrCheck, 64> WorkList;
- State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE)
- : DT(DT), LI(LI), SE(SE) {}
+ State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE,
+ TargetLibraryInfo &TLI)
+ : DT(DT), LI(LI), SE(SE), TLI(TLI) {}
/// Process block \p BB and add known facts to work-list.
void addInfoFor(BasicBlock &BB);
@@ -1109,10 +1113,43 @@ void State::addInfoForInductions(BasicBlock &BB) {
}
}
+static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
+ uint64_t AccessSize,
+ CmpPredicate &Pred, Value *&A,
+ Value *&B, const DataLayout &DL,
+ const TargetLibraryInfo &TLI) {
+ auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
+ if (!Offset.NW.isInBounds())
+ return false;
+
+ if (Offset.VariableOffsets.size() != 1)
+ return false;
+
+ ObjectSizeOpts Opts;
+ // Workaround for gep inbounds, ptr null, idx.
+ Opts.NullIsUnknownSize = true;
+ ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
+ SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
+ if (!Data.bothKnown() || !Data.Offset.isZero())
+ return false;
+
+ // Index * Scale + ConstOffset + AccessSize <= AllocSize
+ uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
+ auto &[Index, Scale] = Offset.VariableOffsets.front();
+ APInt MaxIndex =
+ (Data.Size - APInt(BitWidth, AccessSize) - Offset.ConstantOffset)
+ .udiv(Scale);
+ Pred = ICmpInst::ICMP_ULE;
+ A = Index;
+ B = ConstantInt::get(Index->getType(), MaxIndex);
+ return true;
+}
+
void State::addInfoFor(BasicBlock &BB) {
addInfoForInductions(BB);
+ auto &DL = BB.getDataLayout();
- // True as long as long as the current instruction is guaranteed to execute.
+ // True as long as the current instruction is guaranteed to execute.
bool GuaranteedToExecute = true;
// Queue conditions and assumes.
for (Instruction &I : BB) {
@@ -1127,6 +1164,38 @@ void State::addInfoFor(BasicBlock &BB) {
continue;
}
+ auto AddFactFromMemoryAccess = [&](Value *Ptr, Type *AccessType) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return;
+ TypeSize AccessSize = DL.getTypeStoreSize(AccessType);
+ if (!AccessSize.isFixed())
+ return;
+ if (GuaranteedToExecute) {
+ CmpPredicate Pred;
+ Value *A, *B;
+ if (getConstraintFromMemoryAccess(*GEP, AccessSize.getFixedValue(),
+ Pred, A, B, DL, TLI)) {
+ // The memory access is guaranteed to execute when BB is entered,
+ // hence the constraint holds on entry to BB.
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(I.getParent()), Pred, A, B));
+ }
+ } else {
+ WorkList.emplace_back(
+ FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
+ }
+ };
+
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isSimple())
+ AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
+ }
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (SI->isSimple())
+ AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
+ }
+
auto *II = dyn_cast<IntrinsicInst>(&I);
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
switch (ID) {
@@ -1420,7 +1489,7 @@ static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A,
LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n");
auto R = Info.getConstraintForSolving(Pred, A, B);
- if (R.empty() || !R.isValid(Info)){
+ if (R.empty() || !R.isValid(Info)) {
LLVM_DEBUG(dbgs() << " failed to decompose condition\n");
return std::nullopt;
}
@@ -1785,12 +1854,13 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution &SE,
- OptimizationRemarkEmitter &ORE) {
+ OptimizationRemarkEmitter &ORE,
+ TargetLibraryInfo &TLI) {
bool Changed = false;
DT.updateDFSNumbers();
SmallVector<Value *> FunctionArgs(llvm::make_pointer_range(F.args()));
ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
- State S(DT, LI, SE);
+ State S(DT, LI, SE, TLI);
std::unique_ptr<Module> ReproducerModule(
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
@@ -1960,6 +2030,26 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
}
continue;
}
+
+ auto &DL = F.getDataLayout();
+ auto AddFactsAboutIndices = [&](Value *Ptr, Type *AccessType) {
+ CmpPredicate Pred;
+ Value *A, *B;
+ if (getConstraintFromMemoryAccess(
+ *cast<GetElementPtrInst>(Ptr),
+ DL.getTypeStoreSize(AccessType).getFixedValue(), Pred, A, B, DL,
+ TLI))
+ AddFact(Pred, A, B);
+ };
+
+ if (auto *LI = dyn_cast<LoadInst>(CB.Inst)) {
+ AddFactsAboutIndices(LI->getPointerOperand(), LI->getAccessType());
+ continue;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(CB.Inst)) {
+ AddFactsAboutIndices(SI->getPointerOperand(), SI->getAccessType());
+ continue;
+ }
}
Value *A = nullptr, *B = nullptr;
@@ -2018,10 +2108,12 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- if (!eliminateConstraints(F, DT, LI, SE, ORE))
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ if (!eliminateConstraints(F, DT, LI, SE, ORE, TLI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ PA.preserve<TargetLibraryAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
new file mode 100644
index 0000000000000..e3ac4ee5d1c2a
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -0,0 +1,272 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
+
+@g = private unnamed_addr constant [5 x i8] c"test\00"
+
+declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare void @callee(i1)
+
+define i8 @load_global(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i1 @store_global(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT: ret i1 true
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ store i8 0, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ ret i1 %cmp
+}
+
+define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_byval(
+; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_alloca(i64 %idx) {
+; CHECK-LABEL: define i8 @load_alloca(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[ALLOC:%.*]] = alloca [5 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %alloc = alloca [5 x i8], align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+ %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_malloc(i64 %idx) {
+; CHECK-LABEL: define i8 @load_malloc(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 5)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: call void @free(ptr [[ALLOC]])
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %alloc = call ptr @malloc(i64 5)
+ call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+ %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ call void @free(ptr %alloc)
+ ret i8 %add
+}
+
+define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i32
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %load = load i32, ptr %gep
+ %cmp = icmp ult i64 %idx, 7
+ %zext = zext i1 %cmp to i32
+ %add = add i32 %load, %zext
+ ret i32 %add
+}
+
+define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: call void @callee(i1 [[CMP1]])
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: br label %[[NEXT:.*]]
+; CHECK: [[NEXT]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %cmp1 = icmp ult i64 %idx, 5
+ call void @callee(i1 %cmp1) ; %cmp1 should not be simplified.
+ %load = load i8, ptr %gep
+ br label %next
+
+next:
+ %cmp2 = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp2 to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+; Negative tests.
+
+define i8 @load_from_non_gep(ptr %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_non_gep(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %load = load i8, ptr %p
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: define i8 @load_global_multi_indices(
+; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) {
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX1]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep1 = getelementptr inbounds i8, ptr @g, i64 %idx1
+ %gep2 = getelementptr inbounds i8, ptr %gep1, i64 %idx2
+ %load = load i8, ptr %gep2
+ %cmp = icmp ult i64 %idx1, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_without_inbounds(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_without_inbounds(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr i8, ptr @g, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32_smaller_range(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %load = load i32, ptr %gep
+ %cmp = icmp ult i64 %idx, 6
+ %zext = zext i1 %cmp to i32
+ %add = add i32 %load, %zext
+ ret i32 %add
+}
+
+define i8 @load_global_volatile(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_volatile(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %load = load volatile i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_vscale(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_vscale(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %load = load <vscale x 1 x i8>, ptr %gep
+ %ext = extractelement <vscale x 1 x i8> %load, i64 0
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %ext, %zext
+ ret i8 %add
+}
+
+define i8 @load_from_null(i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_null(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr null, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
|
llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
Outdated
Show resolved
Hide resolved
Can you check whether this is only due to the extra facts, or whether the object size analysis contributes to this? |
Compared to my first draft commit, it is mainly due to the extra facts: https://llvm-compile-time-tracker.com/compare.php?from=de67be436a69fc2e7cb2cd5717e732aa6df0488e&to=a98537f1678fd87fc1b1f7972498144ec79360fd&stat=instructions%3Au |
llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
Show resolved
Hide resolved
d28609b
to
5227b08
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
very nice, thanks!
Hi @dtcxzyw The following starts crashing with this patch:
It crashes with:
|
I will post a fix later. Thanks! |
In most cases, GEPs should be canonicalized by InstCombine. Bail out on non-canonical forms for simplicity. Fixes #155253 (comment).
In most cases, GEPs should be canonicalized by InstCombine. Bail out on non-canonical forms for simplicity. Fixes llvm/llvm-project#155253 (comment).
This patch removes bound checks that are dominated by bounded memory accesses. For example, if we have an array
int A[5]
andA[idx]
is performed successfully, we know thatidx u< 5
after the load.compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=f0e9bba024d44b55d54b02025623ce4a3ba5a37c&to=5227b08a4a514159ec524d1b1ca18ed8ab5407df&stat=instructions%3Au
llvm-opt-benchmark: dtcxzyw/llvm-opt-benchmark#2709
Proof: https://alive2.llvm.org/ce/z/JEyjA2