Skip to content

Commit 5651567

Browse files
committed
[DependenceAnalysis] Fix incorrect analysis of wrapping AddRec expressions
Fixes GitHub issue #148435 where {false,+,true} patterns reported "da analyze - none!" instead of correct "da analyze - output [*]!". The issue occurs when AddRec expressions in narrow types create cyclic patterns (e.g., {false,+,true} in i1 arithmetic: 0,1,0,1,0,1...) that violate SIV analysis assumptions of linear, non-wrapping recurrences. The fix detects potential wrapping by checking if step × iteration_count exceeds the type's representable range, then classifies such expressions as NonLinear for conservative analysis. Add wrapping detection in checkSubscript() with fallback to exact and max backedge taken count for variable bounds.
1 parent 6c358c6 commit 5651567

File tree

7 files changed

+254
-4
lines changed

7 files changed

+254
-4
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,12 @@ class ScalarEvolution {
13371337
/// sharpen it.
13381338
LLVM_ABI void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
13391339

1340+
/// Check if this AddRec expression may wrap, making it non-affine.
1341+
/// Wrapping AddRecs create cyclic patterns that violate linearity
1342+
/// assumptions. Returns true if definitely wraps, false if definitely safe,
1343+
/// nullopt if unknown.
1344+
LLVM_ABI std::optional<bool> mayAddRecWrap(const SCEVAddRecExpr *AddRec);
1345+
13401346
class LoopGuards {
13411347
DenseMap<const SCEV *, const SCEV *> RewriteMap;
13421348
bool PreserveNUW = false;

llvm/lib/Analysis/DependenceAnalysis.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,21 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
966966
if (!isLoopInvariant(Step, LoopNest))
967967
return false;
968968

969+
// Check if this AddRec expression may wrap, making it non-affine.
970+
std::optional<bool> MayWrap = SE->mayAddRecWrap(AddRec);
971+
if (MayWrap == true) {
972+
// AddRec is known to wrap.
973+
return false;
974+
} else if (!MayWrap.has_value()) {
975+
// Unknown whether it wraps - add runtime predicate that it doesn't wrap.
976+
auto WrapFlags = static_cast<SCEVWrapPredicate::IncrementWrapFlags>(
977+
SCEVWrapPredicate::IncrementNUSW | SCEVWrapPredicate::IncrementNSSW);
978+
const SCEVPredicate *WrapPred = SE->getWrapPredicate(AddRec, WrapFlags);
979+
const_cast<DependenceInfo *>(this)->Assumptions.push_back(WrapPred);
980+
LLVM_DEBUG(dbgs() << "\t Added runtime wrap assumption for: " << *AddRec
981+
<< "\n");
982+
}
983+
969984
// The AddRec must depend on one of the containing loops. Otherwise,
970985
// mapSrcLoop and mapDstLoop return indices outside the intended range. This
971986
// can happen when a subscript in one loop references an IV from a sibling

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 123 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6439,8 +6439,129 @@ void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
64396439
}
64406440
}
64416441

6442-
ConstantRange ScalarEvolution::
6443-
getRangeForUnknownRecurrence(const SCEVUnknown *U) {
6442+
std::optional<bool>
6443+
ScalarEvolution::mayAddRecWrap(const SCEVAddRecExpr *AddRec) {
6444+
Type *Ty = AddRec->getType();
6445+
6446+
// Pointer AddRec expressions do not wrap in the arithmetic sense.
6447+
if (Ty->isPointerTy())
6448+
return false;
6449+
6450+
// Step 1: Check existing no-wrap flags from SCEV construction.
6451+
if (AddRec->hasNoSelfWrap() || AddRec->hasNoUnsignedWrap() ||
6452+
AddRec->hasNoSignedWrap()) {
6453+
LLVM_DEBUG(dbgs() << "\t\tAddRec has no-wrap flags: " << *AddRec << "\n");
6454+
return false;
6455+
}
6456+
6457+
// Step 2: Try to prove no-wrap using constant range analysis.
6458+
// Uses the same logic as proveNoWrapViaConstantRanges.
6459+
if (AddRec->isAffine()) {
6460+
const Loop *Loop = AddRec->getLoop();
6461+
const SCEV *BECount = getConstantMaxBackedgeTakenCount(Loop);
6462+
if (const SCEVConstant *BECountMax = dyn_cast<SCEVConstant>(BECount)) {
6463+
ConstantRange StepCR = getSignedRange(AddRec->getStepRecurrence(*this));
6464+
const APInt &BECountAP = BECountMax->getAPInt();
6465+
unsigned NoOverflowBitWidth =
6466+
BECountAP.getActiveBits() + StepCR.getMinSignedBits();
6467+
if (NoOverflowBitWidth <= getTypeSizeInBits(AddRec->getType())) {
6468+
LLVM_DEBUG(dbgs() << "\t\tConstant range analysis proves no-wrap: "
6469+
<< *AddRec << "\n");
6470+
return false;
6471+
}
6472+
}
6473+
}
6474+
6475+
// Step 3: Try to prove using signed/unsigned range containment.
6476+
// Uses the range containment checks from proveNoWrapViaConstantRanges.
6477+
if (AddRec->isAffine()) {
6478+
using OBO = OverflowingBinaryOperator;
6479+
6480+
// Check unsigned wrap.
6481+
ConstantRange AddRecRange = getUnsignedRange(AddRec);
6482+
ConstantRange IncRange = getUnsignedRange(AddRec->getStepRecurrence(*this));
6483+
6484+
auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
6485+
Instruction::Add, IncRange, OBO::NoUnsignedWrap);
6486+
if (NUWRegion.contains(AddRecRange)) {
6487+
LLVM_DEBUG(dbgs() << "\t\tUnsigned range analysis proves no-wrap: "
6488+
<< *AddRec << "\n");
6489+
return false;
6490+
}
6491+
6492+
// Check signed wrap.
6493+
ConstantRange SignedAddRecRange = getSignedRange(AddRec);
6494+
ConstantRange SignedIncRange =
6495+
getSignedRange(AddRec->getStepRecurrence(*this));
6496+
6497+
auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
6498+
Instruction::Add, SignedIncRange, OBO::NoSignedWrap);
6499+
if (NSWRegion.contains(SignedAddRecRange)) {
6500+
LLVM_DEBUG(dbgs() << "\t\tSigned range analysis proves no-wrap: "
6501+
<< *AddRec << "\n");
6502+
return false;
6503+
}
6504+
}
6505+
6506+
// Step 4: Try induction-based proving methods.
6507+
// Call the existing sophisticated analysis methods.
6508+
SCEV::NoWrapFlags ProvenFlags = proveNoWrapViaConstantRanges(AddRec);
6509+
if (hasFlags(ProvenFlags, SCEV::FlagNW) ||
6510+
hasFlags(ProvenFlags, SCEV::FlagNUW) ||
6511+
hasFlags(ProvenFlags, SCEV::FlagNSW)) {
6512+
LLVM_DEBUG(dbgs() << "\t\tAdvanced constant range analysis proves no-wrap: "
6513+
<< *AddRec << "\n");
6514+
return false;
6515+
}
6516+
6517+
ProvenFlags = proveNoSignedWrapViaInduction(AddRec);
6518+
if (hasFlags(ProvenFlags, SCEV::FlagNSW)) {
6519+
LLVM_DEBUG(dbgs() << "\t\tSigned induction analysis proves no-wrap: "
6520+
<< *AddRec << "\n");
6521+
return false;
6522+
}
6523+
6524+
ProvenFlags = proveNoUnsignedWrapViaInduction(AddRec);
6525+
if (hasFlags(ProvenFlags, SCEV::FlagNUW)) {
6526+
LLVM_DEBUG(dbgs() << "\t\tUnsigned induction analysis proves no-wrap: "
6527+
<< *AddRec << "\n");
6528+
return false;
6529+
}
6530+
6531+
// Step 5: Fallback to explicit step * iteration calculation for narrow types.
6532+
const SCEV *Step = AddRec->getStepRecurrence(*this);
6533+
const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
6534+
if (!ConstStep)
6535+
return std::nullopt;
6536+
6537+
const Loop *Loop = AddRec->getLoop();
6538+
if (!hasLoopInvariantBackedgeTakenCount(Loop))
6539+
return std::nullopt;
6540+
6541+
const SCEV *BTC = getBackedgeTakenCount(Loop);
6542+
const SCEVConstant *ConstBTC = dyn_cast<SCEVConstant>(BTC);
6543+
if (!ConstBTC)
6544+
return std::nullopt;
6545+
6546+
// Explicit calculation: will step * iterations exceed type range?
6547+
APInt StepVal = ConstStep->getAPInt();
6548+
APInt BTCVal = ConstBTC->getAPInt();
6549+
6550+
bool Overflow = false;
6551+
APInt Product = StepVal.zext(64).umul_ov(BTCVal.zext(64), Overflow);
6552+
6553+
unsigned BitWidth = Ty->getScalarSizeInBits();
6554+
if (Overflow || Product.getZExtValue() >= (1ULL << BitWidth)) {
6555+
LLVM_DEBUG(dbgs() << "\t\tExplicit calculation proves wrapping: " << *AddRec
6556+
<< "\n");
6557+
return true;
6558+
}
6559+
6560+
return false;
6561+
}
6562+
6563+
ConstantRange
6564+
ScalarEvolution::getRangeForUnknownRecurrence(const SCEVUnknown *U) {
64446565
const DataLayout &DL = getDataLayout();
64456566

64466567
unsigned BitWidth = getTypeSizeInBits(U->getType());

llvm/test/Analysis/DependenceAnalysis/PR148435.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ for.end10: ; preds = %for.cond
7575
define void @f1(ptr %a) {
7676
; CHECK-LABEL: 'f1'
7777
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
78-
; CHECK-NEXT: da analyze - none!
79-
; Note: the second patch for PR148435 modifies the above CHECK to correct "output [*]".
78+
; CHECK-NEXT: da analyze - output [*]!
8079
;
8180
entry:
8281
br label %loop
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
3+
4+
; Test case for bug #148435 - SIV test assertion failure
5+
; This test ensures that testSIV handles the case where neither Src nor Dst
6+
; expressions contain AddRec after propagation, which can happen when
7+
; constraints simplify the expressions to non-AddRec forms.
8+
9+
define void @f(ptr %a) {
10+
; CHECK-LABEL: 'f'
11+
; CHECK-NEXT: Src: store i8 42, ptr %idx, align 1 --> Dst: store i8 42, ptr %idx, align 1
12+
; CHECK-NEXT: da analyze - output [* *]!
13+
;
14+
entry:
15+
br label %loop.i.header
16+
17+
loop.i.header:
18+
%i = phi i64 [ 0, %entry ], [ %i.next, %loop.i.latch ]
19+
%and.i = and i64 %i, 1
20+
br label %loop.j
21+
22+
loop.j:
23+
%j = phi i64 [ 0, %loop.i.header ], [ %j.next, %loop.j ]
24+
%and.j = and i64 %j, 1
25+
%idx = getelementptr [2 x [2 x i8]], ptr %a, i64 0, i64 %and.i, i64 %and.j
26+
store i8 42, ptr %idx
27+
%j.next = add i64 %j, 1
28+
%exitcond.j = icmp eq i64 %j.next, 100
29+
br i1 %exitcond.j, label %loop.i.latch, label %loop.j
30+
31+
loop.i.latch:
32+
%i.next = add i64 %i, 1
33+
%exitcond.i = icmp eq i64 %i.next, 100
34+
br i1 %exitcond.i, label %exit, label %loop.i.header
35+
36+
exit:
37+
ret void
38+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
3+
4+
; Test case for wrapping AddRec detection in DependenceAnalysis.
5+
; This ensures that AddRec expressions that wrap (creating cyclic rather than
6+
; linear patterns) are rejected from SIV analysis and treated conservatively.
7+
8+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
9+
10+
11+
12+
; This test case has a clear dependence pattern that was incorrectly reported as "none!"
13+
; The issue: {false,+,true} in i1 arithmetic creates pattern (0,1,0,1,0,1,...).
14+
; - i=0: a[0][0][0], i=1: a[0][1][1], i=2: a[0][0][0], i=3: a[0][1][1], ...
15+
; - Clear dependencies at distances 2, 4, 6 between iterations accessing same locations.
16+
; - Strong SIV test was missing these due to treating wrapping pattern as linear.
17+
define void @test_wrapping_i1_addrec(ptr %a) {
18+
; CHECK-LABEL: 'test_wrapping_i1_addrec'
19+
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
20+
; CHECK-NEXT: da analyze - output [*]!
21+
;
22+
entry:
23+
br label %loop
24+
25+
loop:
26+
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
27+
%and = and i64 %i, 1
28+
%idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %and
29+
store i8 0, ptr %idx
30+
%i.next = add i64 %i, 1
31+
%exitcond.not = icmp slt i64 %i.next, 8
32+
br i1 %exitcond.not, label %loop, label %exit
33+
34+
exit:
35+
ret void
36+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
3+
4+
; Test case for wrapping AddRec detection using constant max backedge taken count.
5+
; This ensures that wrapping detection works even when exact BTC is not available
6+
; but we can get a conservative upper bound.
7+
8+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
9+
10+
; Test case where loop has variable bound but SCEV can provide max BTC estimate.
11+
; The i2 type can only represent 0,1,2,3, so if we iterate more than 4 times
12+
; with step=1, we'll get wrapping: 0,1,2,3,0,1,2,3...
13+
define void @test_wrapping_with_maxbtc(ptr %a, i32 %n) {
14+
; CHECK-LABEL: 'test_wrapping_with_maxbtc'
15+
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
16+
; CHECK-NEXT: da analyze - output [*]!
17+
;
18+
entry:
19+
%bound = and i32 %n, 1023 ; Limit n to at most 1024
20+
%cmp = icmp sgt i32 %bound, 0
21+
br i1 %cmp, label %loop, label %exit
22+
23+
loop:
24+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
25+
%i.narrow = trunc i32 %i to i2 ; Only 2 bits: wraps after 4 iterations
26+
%zext = zext i2 %i.narrow to i64
27+
%idx = getelementptr inbounds [8 x i8], ptr %a, i64 0, i64 %zext
28+
store i8 0, ptr %idx
29+
%i.next = add i32 %i, 1
30+
%exitcond = icmp slt i32 %i.next, %bound ; Variable upper bound
31+
br i1 %exitcond, label %loop, label %exit
32+
33+
exit:
34+
ret void
35+
}

0 commit comments

Comments
 (0)