Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ static cl::opt<bool> EnableUnswitchCostMultiplier(
static cl::opt<int> UnswitchSiblingsToplevelDiv(
"unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
cl::desc("Toplevel siblings divisor for cost multiplier."));
static cl::opt<int> UnswitchParentBlocksDiv(
"unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
cl::desc("Outer loop size divisor for cost multiplier."));
static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
"unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
cl::desc("Number of unswitch candidates that are ignored when calculating "
Expand Down Expand Up @@ -2809,9 +2812,9 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
}

/// Cost multiplier is a way to limit potentially exponential behavior
/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
/// candidates available. Also accounting for the number of "sibling" loops with
/// the idea to account for previous unswitches that already happened on this
/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
/// candidates available. Also consider the number of "sibling" loops with
/// the idea of accounting for previous unswitches that already happened on this
/// cluster of loops. There was an attempt to keep this formula simple,
/// just enough to limit the worst case behavior. Even if it is not that simple
/// now it is still not an attempt to provide a detailed heuristic size
Expand Down Expand Up @@ -2842,7 +2845,19 @@ static int CalculateUnswitchCostMultiplier(
return 1;
}

// Each invariant non-trivial condition, after being unswitched, is supposed
// to have its own specialized sibling loop (the invariant condition has been
// hoisted out of the child loop into a newly-cloned loop). When unswitching
// conditions in nested loops, the basic block size of the outer loop should
// not be altered. If such a size significantly increases across unswitching
// invocations, something may be wrong; so adjust the final cost taking this
// into account.
auto *ParentL = L.getParentLoop();
int ParentLoopSizeMultiplier = 1;
if (ParentL)
ParentLoopSizeMultiplier =
std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);

int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
: std::distance(LI.begin(), LI.end()));
// Count amount of clones that all the candidates might cause during
Expand Down Expand Up @@ -2887,14 +2902,16 @@ static int CalculateUnswitchCostMultiplier(
// at an upper bound.
int CostMultiplier;
if (ClonesPower > Log2_32(UnswitchThreshold) ||
SiblingsMultiplier > UnswitchThreshold)
SiblingsMultiplier > UnswitchThreshold ||
ParentLoopSizeMultiplier > UnswitchThreshold)
CostMultiplier = UnswitchThreshold;
else
CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
(int)UnswitchThreshold);

LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
<< " (siblings " << SiblingsMultiplier << " * clones "
<< " (siblings " << SiblingsMultiplier << " * parent size "
<< ParentLoopSizeMultiplier << " * clones "
<< (1 << ClonesPower) << ")"
<< " for unswitch candidate: " << TI << "\n");
return CostMultiplier;
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/Transforms/SimpleLoopUnswitch/pr138509.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -enable-unswitch-cost-multiplier=true -unswitch-parent-blocks-div=1 \
; RUN: -passes="loop-mssa(loop-simplifycfg,licm,loop-rotate,simple-loop-unswitch<nontrivial>),print<loops>" \
; RUN: -disable-output 2>&1 | sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-DIV-1

; RUN: opt < %s -S -enable-unswitch-cost-multiplier=true -unswitch-parent-blocks-div=2 \
; RUN: -passes="loop-mssa(loop-simplifycfg,licm,loop-rotate,simple-loop-unswitch<nontrivial>),print<loops>" \
; RUN: -disable-output 2>&1 | sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-DIV-2

; LOOP-DIV-1-COUNT-6: Loop at depth 1 containing:
; LOOP-DIV-2-COUNT-12: Loop at depth 1 containing:

@a = global i32 0, align 4
@b = global i32 0, align 4
@c = global i32 0, align 4
@d = global i32 0, align 4

define i32 @main() {
entry:
br label %outer.loop.header

outer.loop.header: ; preds = %outer.loop.latch, %entry
br i1 false, label %exit, label %outer.loop.body

outer.loop.body: ; preds = %inner.loop.header, %outer.loop.header
store i32 1, ptr @c, align 4
%cmp = icmp sgt i32 0, -1
br i1 %cmp, label %outer.loop.latch, label %exit

inner.loop.header: ; preds = %outer.loop.latch, %inner.loop.body
%a_val = load i32, ptr @a, align 4
%c_val = load i32, ptr @c, align 4
%mul = mul nsw i32 %c_val, %a_val
store i32 %mul, ptr @b, align 4
%cmp2 = icmp sgt i32 %mul, -1
br i1 %cmp2, label %inner.loop.body, label %outer.loop.body

inner.loop.body: ; preds = %inner.loop.header
%mul2 = mul nsw i32 %c_val, 3
store i32 %mul2, ptr @c, align 4
store i32 %c_val, ptr @d, align 4
%mul3 = mul nsw i32 %c_val, %a_val
%cmp3 = icmp sgt i32 %mul3, -1
br i1 %cmp3, label %inner.loop.header, label %exit

outer.loop.latch: ; preds = %outer.loop.body
%d_val = load i32, ptr @d, align 4
store i32 %d_val, ptr @b, align 4
%cmp4 = icmp eq i32 %d_val, 0
br i1 %cmp4, label %inner.loop.header, label %outer.loop.header

exit: ; preds = %inner.loop.body, %outer.loop.body, %outer.loop.header
ret i32 0
}