diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index c32731185afd0..d434d26439c12 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -159,6 +159,8 @@ class IndVarSimplify { bool sinkUnusedInvariants(Loop *L); + bool isIVInitValTargetType(Value *InitVal, unsigned TargetTypeSize); + public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, @@ -816,6 +818,37 @@ static bool isLoopCounter(PHINode* Phi, Loop *L, isa(SE->getSCEV(IncV))); } +/// Determine if the initial value of IV is within the range of TargetTypeSize. +/// If it is a constant, check if it falls within the representable range. If +/// not, verify whether the width of the initial type exceeds TargetTypeSize. +bool IndVarSimplify::isIVInitValTargetType(Value *InitVal, + unsigned TargetTypeSize) { + + unsigned InitTypeSize = SE->getTypeSizeInBits(InitVal->getType()); + if (InitTypeSize <= TargetTypeSize) + return true; + + // If the initial value of the IV is a constant, check whether it is within + // the range. + if (auto *CI = dyn_cast(InitVal)) { + + APInt Value = CI->getValue(); + APInt MinRange = APInt(TargetTypeSize, 1ULL << (TargetTypeSize - 1)); + APInt MaxRange = APInt(TargetTypeSize, (1ULL << (TargetTypeSize - 1)) - 1); + // Check if the constant is within a valid range + return (Value.sle(MaxRange) && Value.sge(-MinRange)); + } + + // Check the initial type of the initial value. + if (auto *SextInst = dyn_cast(InitVal)) { + return isIVInitValTargetType(SextInst->getOperand(0), TargetTypeSize); + } + if (auto *ZextInst = dyn_cast(InitVal)) { + return isIVInitValTargetType(ZextInst->getOperand(0), TargetTypeSize); + } + return false; +} + /// Search the loop header for a loop counter (anadd rec w/step of one) /// suitable for use by LFTR. If multiple counters are available, select the /// "best" one based profitable heuristics. @@ -1048,9 +1081,27 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, if (Extended) { bool Discard; L->makeLoopInvariant(ExitCnt, Discard); - } else - CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), - "lftr.wideiv"); + } else { + // The design of LFTR requires: + // 1. The IV to be a LoopCounter, ensuring a step of 1. + // 2. The ICmpInst::Predicate can only be eq or ne, meaning that + // ExitCnt must represent the final value of IV. + // Given these conditions, if the initial value 'start' of IV does not + // exceed ExitCntSize, then the range [start, end) of IV will stay + // within ExitCntSize. Consequently, the truncation will not result in + // signed or unsigned overflow. + // By legally adding the nsw/nuw flag to the Trunc instruction, avoid the + // unnecessary masking issue. + Value *InitVal = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + if (isIVInitValTargetType(InitVal, ExitCntSize)) + CmpIndVar = Builder.CreateTrunc( + CmpIndVar, ExitCnt->getType(), "lftr.wideiv", + cast(BI->getCondition())->isUnsigned(), + cast(BI->getCondition())->isSigned()); + else + CmpIndVar = + Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), "lftr.wideiv"); + } } LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" << " LHS:" << *CmpIndVar << '\n' diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-masking.ll b/llvm/test/Transforms/IndVarSimplify/lftr-masking.ll new file mode 100644 index 0000000000000..ef0c0916f2dd5 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/lftr-masking.ll @@ -0,0 +1,45 @@ +; Legally add nsw/nuw flag for the Trunc instruction. +; RUN: opt < %s -passes='indvars' -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +define dso_local void @func(ptr noundef captures(none) %0, i32 noundef %1) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @func(ptr noundef captures(none) %0, i32 noundef %1) local_unnamed_addr { +; CHECK-NEXT: [[CHECK:%.*]] = icmp slt i32 %1, 100 +; CHECK-NEXT: br i1 [[CHECK]], label [[LOOP_PREHEADER:%.*]], label [[LOOP_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[SEXT_START:%.*]] = sext i32 %1 to i64 +; CHECK-NEXT: br label [[LOOP_BODY:%.*]] +; CHECK: loop.exit.loopexit: +; CHECK-NEXT: br label [[LOOPEXIT:%.*]] +; CHECK: loop.exit: +; CHECK-NEXT: ret void +; CHECK: loop.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[POSTINC:%.*]], [[LOOP_BODY]] ], [ [[SEXT_START:%.*]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr %0, i64 [[IV]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[DATA:%.*]] = add nsw i32 [[LOAD]], 1 +; CHECK-NEXT: store i32 [[DATA]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[POSTINC:%.*]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc nsw i64 [[POSTINC]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_BODY]], label [[LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: } + %3 = icmp slt i32 %1, 100 + br i1 %3, label %loop.preheader, label %loop.exit +loop.preheader: + br label %loop.body + +loop.exit: + ret void +loop.body: + %8 = phi i32 [ %13, %loop.body ], [ %1, %loop.preheader ] + %9 = sext i32 %8 to i64 + %10 = getelementptr inbounds i32, ptr %0, i64 %9 + %11 = load i32, ptr %10, align 4 + %12 = add nsw i32 %11, 1 + store i32 %12, ptr %10, align 4 + %13 = add nsw i32 %8, 1 + %14 = icmp slt i32 %8, 99 + br i1 %14, label %loop.body, label %loop.exit +} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll index bbdbd95c6017a..ddd98f21c36a4 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll @@ -33,8 +33,7 @@ define i1 @test_order_1(ptr %this, ptr noalias %other, i1 %tobool9.not, i32 %cal ; CHECK-NEXT: br i1 [[CMP44]], label [[FOR_BODY45]], label [[FOR_COND]] ; CHECK: for.inc57: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP1]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND41_PREHEADER_PREHEADER]], label [[FOR_COND41_PREHEADER]] ; CHECK: exit: ; CHECK-NEXT: ret i1 false