[DependenceAnalysis] Fix incorrect analysis of wrapping AddRec expressions

sebpop · sebpop · commit 5651567cf3bb · 2025-08-22T18:05:26.000-05:00
Fixes GitHub issue #148435 where {false,+,true} patterns reported "da analyze - none!" instead of correct "da analyze - output [*]!". The issue occurs when AddRec expressions in narrow types create cyclic patterns (e.g., {false,+,true} in i1 arithmetic: 0,1,0,1,0,1...) that violate SIV analysis assumptions of linear, non-wrapping recurrences. The fix detects potential wrapping by checking if step × iteration_count exceeds the type's representable range, then classifies such expressions as NonLinear for conservative analysis. Add wrapping detection in checkSubscript() with fallback to exact and max backedge taken count for variable bounds.
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1337,6 +1337,12 @@ class ScalarEvolution {
   /// sharpen it.
   LLVM_ABI void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
 
+  /// Check if this AddRec expression may wrap, making it non-affine.
+  /// Wrapping AddRecs create cyclic patterns that violate linearity
+  /// assumptions. Returns true if definitely wraps, false if definitely safe,
+  /// nullopt if unknown.
+  LLVM_ABI std::optional<bool> mayAddRecWrap(const SCEVAddRecExpr *AddRec);
+
   class LoopGuards {
     DenseMap<const SCEV *, const SCEV *> RewriteMap;
     bool PreserveNUW = false;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -966,6 +966,21 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
   if (!isLoopInvariant(Step, LoopNest))
     return false;
 
+  // Check if this AddRec expression may wrap, making it non-affine.
+  std::optional<bool> MayWrap = SE->mayAddRecWrap(AddRec);
+  if (MayWrap == true) {
+    // AddRec is known to wrap.
+    return false;
+  } else if (!MayWrap.has_value()) {
+    // Unknown whether it wraps - add runtime predicate that it doesn't wrap.
+    auto WrapFlags = static_cast<SCEVWrapPredicate::IncrementWrapFlags>(
+        SCEVWrapPredicate::IncrementNUSW | SCEVWrapPredicate::IncrementNSSW);
+    const SCEVPredicate *WrapPred = SE->getWrapPredicate(AddRec, WrapFlags);
+    const_cast<DependenceInfo *>(this)->Assumptions.push_back(WrapPred);
+    LLVM_DEBUG(dbgs() << "\t    Added runtime wrap assumption for: " << *AddRec
+                      << "\n");
+  }
+
   // The AddRec must depend on one of the containing loops. Otherwise,
   // mapSrcLoop and mapDstLoop return indices outside the intended range. This
   // can happen when a subscript in one loop references an IV from a sibling
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6439,8 +6439,129 @@ void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
   }
 }
 
-ConstantRange ScalarEvolution::
-getRangeForUnknownRecurrence(const SCEVUnknown *U) {
+std::optional<bool>
+ScalarEvolution::mayAddRecWrap(const SCEVAddRecExpr *AddRec) {
+  Type *Ty = AddRec->getType();
+
+  // Pointer AddRec expressions do not wrap in the arithmetic sense.
+  if (Ty->isPointerTy())
+    return false;
+
+  // Step 1: Check existing no-wrap flags from SCEV construction.
+  if (AddRec->hasNoSelfWrap() || AddRec->hasNoUnsignedWrap() ||
+      AddRec->hasNoSignedWrap()) {
+    LLVM_DEBUG(dbgs() << "\t\tAddRec has no-wrap flags: " << *AddRec << "\n");
+    return false;
+  }
+
+  // Step 2: Try to prove no-wrap using constant range analysis.
+  // Uses the same logic as proveNoWrapViaConstantRanges.
+  if (AddRec->isAffine()) {
+    const Loop *Loop = AddRec->getLoop();
+    const SCEV *BECount = getConstantMaxBackedgeTakenCount(Loop);
+    if (const SCEVConstant *BECountMax = dyn_cast<SCEVConstant>(BECount)) {
+      ConstantRange StepCR = getSignedRange(AddRec->getStepRecurrence(*this));
+      const APInt &BECountAP = BECountMax->getAPInt();
+      unsigned NoOverflowBitWidth =
+          BECountAP.getActiveBits() + StepCR.getMinSignedBits();
+      if (NoOverflowBitWidth <= getTypeSizeInBits(AddRec->getType())) {
+        LLVM_DEBUG(dbgs() << "\t\tConstant range analysis proves no-wrap: "
+                          << *AddRec << "\n");
+        return false;
+      }
+    }
+  }
+
+  // Step 3: Try to prove using signed/unsigned range containment.
+  // Uses the range containment checks from proveNoWrapViaConstantRanges.
+  if (AddRec->isAffine()) {
+    using OBO = OverflowingBinaryOperator;
+
+    // Check unsigned wrap.
+    ConstantRange AddRecRange = getUnsignedRange(AddRec);
+    ConstantRange IncRange = getUnsignedRange(AddRec->getStepRecurrence(*this));
+
+    auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+        Instruction::Add, IncRange, OBO::NoUnsignedWrap);
+    if (NUWRegion.contains(AddRecRange)) {
+      LLVM_DEBUG(dbgs() << "\t\tUnsigned range analysis proves no-wrap: "
+                        << *AddRec << "\n");
+      return false;
+    }
+
+    // Check signed wrap.
+    ConstantRange SignedAddRecRange = getSignedRange(AddRec);
+    ConstantRange SignedIncRange =
+        getSignedRange(AddRec->getStepRecurrence(*this));
+
+    auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+        Instruction::Add, SignedIncRange, OBO::NoSignedWrap);
+    if (NSWRegion.contains(SignedAddRecRange)) {
+      LLVM_DEBUG(dbgs() << "\t\tSigned range analysis proves no-wrap: "
+                        << *AddRec << "\n");
+      return false;
+    }
+  }
+
+  // Step 4: Try induction-based proving methods.
+  // Call the existing sophisticated analysis methods.
+  SCEV::NoWrapFlags ProvenFlags = proveNoWrapViaConstantRanges(AddRec);
+  if (hasFlags(ProvenFlags, SCEV::FlagNW) ||
+      hasFlags(ProvenFlags, SCEV::FlagNUW) ||
+      hasFlags(ProvenFlags, SCEV::FlagNSW)) {
+    LLVM_DEBUG(dbgs() << "\t\tAdvanced constant range analysis proves no-wrap: "
+                      << *AddRec << "\n");
+    return false;
+  }
+
+  ProvenFlags = proveNoSignedWrapViaInduction(AddRec);
+  if (hasFlags(ProvenFlags, SCEV::FlagNSW)) {
+    LLVM_DEBUG(dbgs() << "\t\tSigned induction analysis proves no-wrap: "
+                      << *AddRec << "\n");
+    return false;
+  }
+
+  ProvenFlags = proveNoUnsignedWrapViaInduction(AddRec);
+  if (hasFlags(ProvenFlags, SCEV::FlagNUW)) {
+    LLVM_DEBUG(dbgs() << "\t\tUnsigned induction analysis proves no-wrap: "
+                      << *AddRec << "\n");
+    return false;
+  }
+
+  // Step 5: Fallback to explicit step * iteration calculation for narrow types.
+  const SCEV *Step = AddRec->getStepRecurrence(*this);
+  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+  if (!ConstStep)
+    return std::nullopt;
+
+  const Loop *Loop = AddRec->getLoop();
+  if (!hasLoopInvariantBackedgeTakenCount(Loop))
+    return std::nullopt;
+
+  const SCEV *BTC = getBackedgeTakenCount(Loop);
+  const SCEVConstant *ConstBTC = dyn_cast<SCEVConstant>(BTC);
+  if (!ConstBTC)
+    return std::nullopt;
+
+  // Explicit calculation: will step * iterations exceed type range?
+  APInt StepVal = ConstStep->getAPInt();
+  APInt BTCVal = ConstBTC->getAPInt();
+
+  bool Overflow = false;
+  APInt Product = StepVal.zext(64).umul_ov(BTCVal.zext(64), Overflow);
+
+  unsigned BitWidth = Ty->getScalarSizeInBits();
+  if (Overflow || Product.getZExtValue() >= (1ULL << BitWidth)) {
+    LLVM_DEBUG(dbgs() << "\t\tExplicit calculation proves wrapping: " << *AddRec
+                      << "\n");
+    return true;
+  }
+
+  return false;
+}
+
+ConstantRange
+ScalarEvolution::getRangeForUnknownRecurrence(const SCEVUnknown *U) {
   const DataLayout &DL = getDataLayout();
 
   unsigned BitWidth = getTypeSizeInBits(U->getType());
diff --git a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
@@ -75,8 +75,7 @@ for.end10:                                        ; preds = %for.cond
 define void @f1(ptr %a) {
 ; CHECK-LABEL: 'f1'
 ; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
-; CHECK-NEXT:    da analyze - none!
-; Note: the second patch for PR148435 modifies the above CHECK to correct "output [*]".
+; CHECK-NEXT:    da analyze - output [*]!
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for bug #148435 - SIV test assertion failure
+; This test ensures that testSIV handles the case where neither Src nor Dst
+; expressions contain AddRec after propagation, which can happen when
+; constraints simplify the expressions to non-AddRec forms.
+
+define void @f(ptr %a) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT:  Src: store i8 42, ptr %idx, align 1 --> Dst: store i8 42, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - output [* *]!
+;
+entry:
+  br label %loop.i.header
+
+loop.i.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop.i.latch ]
+  %and.i = and i64 %i, 1
+  br label %loop.j
+
+loop.j:
+  %j = phi i64 [ 0, %loop.i.header ], [ %j.next, %loop.j ]
+  %and.j = and i64 %j, 1
+  %idx = getelementptr [2 x [2 x i8]], ptr %a, i64 0, i64 %and.i, i64 %and.j
+  store i8 42, ptr %idx
+  %j.next = add i64 %j, 1
+  %exitcond.j = icmp eq i64 %j.next, 100
+ br i1 %exitcond.j, label %loop.i.latch, label %loop.j
+
+loop.i.latch:
+  %i.next = add i64 %i, 1
+  %exitcond.i = icmp eq i64 %i.next, 100
+  br i1 %exitcond.i, label %exit, label %loop.i.header
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for wrapping AddRec detection in DependenceAnalysis.
+; This ensures that AddRec expressions that wrap (creating cyclic rather than
+; linear patterns) are rejected from SIV analysis and treated conservatively.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+
+
+; This test case has a clear dependence pattern that was incorrectly reported as "none!"
+; The issue: {false,+,true} in i1 arithmetic creates pattern (0,1,0,1,0,1,...).
+; - i=0: a[0][0][0], i=1: a[0][1][1], i=2: a[0][0][0], i=3: a[0][1][1], ...
+; - Clear dependencies at distances 2, 4, 6 between iterations accessing same locations.
+; - Strong SIV test was missing these due to treating wrapping pattern as linear.
+define void @test_wrapping_i1_addrec(ptr %a) {
+; CHECK-LABEL: 'test_wrapping_i1_addrec'
+; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - output [*]!
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %and = and i64 %i, 1
+  %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %and
+  store i8 0, ptr %idx
+  %i.next = add i64 %i, 1
+  %exitcond.not = icmp slt i64 %i.next, 8
+  br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for wrapping AddRec detection using constant max backedge taken count.
+; This ensures that wrapping detection works even when exact BTC is not available
+; but we can get a conservative upper bound.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+; Test case where loop has variable bound but SCEV can provide max BTC estimate.
+; The i2 type can only represent 0,1,2,3, so if we iterate more than 4 times
+; with step=1, we'll get wrapping: 0,1,2,3,0,1,2,3...
+define void @test_wrapping_with_maxbtc(ptr %a, i32 %n) {
+; CHECK-LABEL: 'test_wrapping_with_maxbtc'
+; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - output [*]!
+;
+entry:
+  %bound = and i32 %n, 1023    ; Limit n to at most 1024
+  %cmp = icmp sgt i32 %bound, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.narrow = trunc i32 %i to i2      ; Only 2 bits: wraps after 4 iterations
+  %zext = zext i2 %i.narrow to i64
+  %idx = getelementptr inbounds [8 x i8], ptr %a, i64 0, i64 %zext
+  store i8 0, ptr %idx
+  %i.next = add i32 %i, 1
+  %exitcond = icmp slt i32 %i.next, %bound  ; Variable upper bound
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}