Skip to content

Commit 3f3017e

Browse files
[Loop Peeling] Add possibility to enable peeling on loop nests.
Summary: Current peeling implementation bails out in case of loop nests. The patch introduces a field in TargetTransformInfo structure that certain targets can use to relax the constraints if it's profitable (disabled by default). Also additional option is added to enable peeling manually for experimenting and testing purposes. Reviewers: fhahn, lebedev.ri, xbolva00 Reviewed By: xbolva00 Subscribers: xbolva00, hiraditya, zzheng, llvm-commits Differential Revision: https://reviews.llvm.org/D70304
1 parent 3180af4 commit 3f3017e

File tree

5 files changed

+194
-93
lines changed

5 files changed

+194
-93
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ class TargetTransformInfo {
490490
bool UpperBound;
491491
/// Allow peeling off loop iterations.
492492
bool AllowPeeling;
493+
/// Allow peeling off loop iterations for loop nests.
494+
bool AllowLoopNestsPeeling;
493495
/// Allow unrolling of all the iterations of the runtime loop remainder.
494496
bool UnrollRemainder;
495497
/// Allow unroll and jam. Used to enable unroll and jam for the target.

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ static cl::opt<bool>
154154
cl::desc("Allows loops to be peeled when the dynamic "
155155
"trip count is known to be low."));
156156

157+
static cl::opt<bool> UnrollAllowLoopNestsPeeling(
158+
"unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
159+
cl::desc("Allows loop nests to be peeled."));
160+
157161
static cl::opt<bool> UnrollUnrollRemainder(
158162
"unroll-remainder", cl::Hidden,
159163
cl::desc("Allow the loop remainder to be unrolled."));
@@ -204,6 +208,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
204208
UP.Force = false;
205209
UP.UpperBound = false;
206210
UP.AllowPeeling = true;
211+
UP.AllowLoopNestsPeeling = false;
207212
UP.UnrollAndJam = false;
208213
UP.PeelProfiledIterations = true;
209214
UP.UnrollAndJamInnerLoopThreshold = 60;
@@ -244,6 +249,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
244249
UP.UpperBound = false;
245250
if (UnrollAllowPeeling.getNumOccurrences() > 0)
246251
UP.AllowPeeling = UnrollAllowPeeling;
252+
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
253+
UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
247254
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
248255
UP.UnrollRemainder = UnrollUnrollRemainder;
249256

llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
289289
if (!canPeel(L))
290290
return;
291291

292-
// Only try to peel innermost loops.
293-
if (!L->empty())
292+
// Only try to peel innermost loops by default.
293+
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
294+
// or by the flag -unroll-allow-loop-nests-peeling.
295+
if (!UP.AllowLoopNestsPeeling && !L->empty())
294296
return;
295297

296298
// If the user provided a peel count, use that.

llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll

Lines changed: 26 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -403,76 +403,11 @@ for.end:
403403
ret void
404404
}
405405

406-
; In this case we cannot peel the inner loop, because the condition involves
407-
; the outer induction variable.
408-
define void @test5(i32 %k) {
409-
; CHECK-LABEL: @test5(
410-
; CHECK-NEXT: for.body.lr.ph:
411-
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
412-
; CHECK: outer.header:
413-
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
414-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
415-
; CHECK: for.body:
416-
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
417-
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
418-
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
419-
; CHECK: if.then:
420-
; CHECK-NEXT: call void @f1()
421-
; CHECK-NEXT: br label [[FOR_INC]]
422-
; CHECK: if.else:
423-
; CHECK-NEXT: call void @f2()
424-
; CHECK-NEXT: br label [[FOR_INC]]
425-
; CHECK: for.inc:
426-
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
427-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
428-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
429-
; CHECK: outer.inc:
430-
; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
431-
; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
432-
; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]]
433-
; CHECK: for.end:
434-
; CHECK-NEXT: ret void
435-
;
436-
for.body.lr.ph:
437-
br label %outer.header
438-
439-
outer.header:
440-
%j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
441-
br label %for.body
442-
443-
for.body:
444-
%i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
445-
%cmp1 = icmp ult i32 %j, 2
446-
br i1 %cmp1, label %if.then, label %if.else
447-
448-
if.then:
449-
call void @f1()
450-
br label %for.inc
451-
452-
if.else:
453-
call void @f2()
454-
br label %for.inc
455-
456-
for.inc:
457-
%inc = add nsw i32 %i.05, 1
458-
%cmp = icmp slt i32 %inc, %k
459-
br i1 %cmp, label %for.body, label %outer.inc
460-
461-
outer.inc:
462-
%j.inc = add nsw i32 %j, 1
463-
%outer.cmp = icmp slt i32 %j.inc, %k
464-
br i1 %outer.cmp, label %outer.header, label %for.end
465-
466-
467-
for.end:
468-
ret void
469-
}
470-
471406
; In this test, the condition involves 2 AddRecs. Without evaluating both
472407
; AddRecs, we cannot prove that the condition becomes known in the loop body
473408
; after peeling.
474-
define void @test6(i32 %k) {
475-
; CHECK-LABEL: @test6(
409+
define void @test5(i32 %k) {
410+
; CHECK-LABEL: @test5(
476411
; CHECK-NEXT: entry:
477412
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
478413
; CHECK: for.body:
@@ -521,8 +456,8 @@ for.end:
521456
ret void
522457
}
523458

524-
define void @test7(i32 %k) {
525-
; CHECK-LABEL: @test7(
459+
define void @test6(i32 %k) {
460+
; CHECK-LABEL: @test6(
526461
; CHECK-NEXT: for.body.lr.ph:
527462
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
528463
; CHECK: for.body.peel.begin:
@@ -615,8 +550,8 @@ for.end:
615550
ret void
616551
}
617552

618-
define void @test8(i32 %k) {
619-
; CHECK-LABEL: @test8(
553+
define void @test7(i32 %k) {
554+
; CHECK-LABEL: @test7(
620555
; CHECK-NEXT: for.body.lr.ph:
621556
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
622557
; CHECK: for.body.peel.begin:
@@ -711,8 +646,8 @@ for.end:
711646

712647
; Comparison with non-monotonic predicate due to possible wrapping, loop
713648
; body cannot be simplified.
714-
define void @test9(i32 %k) {
715-
; CHECK-LABEL: @test9(
649+
define void @test8(i32 %k) {
650+
; CHECK-LABEL: @test8(
716651
; CHECK-NEXT: for.body.lr.ph:
717652
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
718653
; CHECK: for.body:
@@ -751,8 +686,8 @@ for.end:
751686
}
752687
; CHECK-NOT: llvm.loop.unroll.disable
753688

754-
define void @test_10__peel_first_iter_via_slt_pred(i32 %len) {
755-
; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred(
689+
define void @test_9__peel_first_iter_via_slt_pred(i32 %len) {
690+
; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred(
756691
; CHECK-NEXT: entry:
757692
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
758693
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -818,8 +753,8 @@ if.end: ; preds = %if.then, %for.body
818753
br i1 %exitcond, label %for.cond.cleanup, label %for.body
819754
}
820755

821-
define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) {
822-
; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred(
756+
define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) {
757+
; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred(
823758
; CHECK-NEXT: entry:
824759
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
825760
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -887,8 +822,8 @@ if.end: ; preds = %if.then, %for.body
887822

888823
; NOTE: here we should only peel the first iteration,
889824
; i.e. all calls to sink() must stay in loop.
890-
define void @test12__peel_first_iter_via_eq_pred(i32 %len) {
891-
; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred(
825+
define void @test11__peel_first_iter_via_eq_pred(i32 %len) {
826+
; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred(
892827
; CHECK-NEXT: entry:
893828
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
894829
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -956,8 +891,8 @@ if.end: ; preds = %if.then, %for.body
956891

957892
; NOTE: here we should only peel the first iteration,
958893
; i.e. all calls to sink() must stay in loop.
959-
define void @test13__peel_first_iter_via_ne_pred(i32 %len) {
960-
; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred(
894+
define void @test12__peel_first_iter_via_ne_pred(i32 %len) {
895+
; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred(
961896
; CHECK-NEXT: entry:
962897
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
963898
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -1024,8 +959,8 @@ if.end: ; preds = %if.then, %for.body
1024959
}
1025960

1026961
; No peeling is profitable here.
1027-
define void @test14__ivar_mod2_is_1(i32 %len) {
1028-
; CHECK-LABEL: @test14__ivar_mod2_is_1(
962+
define void @test13__ivar_mod2_is_1(i32 %len) {
963+
; CHECK-LABEL: @test13__ivar_mod2_is_1(
1029964
; CHECK-NEXT: entry:
1030965
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
1031966
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -1074,8 +1009,8 @@ if.end: ; preds = %if.then, %for.body
10741009
}
10751010

10761011
; No peeling is profitable here.
1077-
define void @test15__ivar_mod2_is_0(i32 %len) {
1078-
; CHECK-LABEL: @test15__ivar_mod2_is_0(
1012+
define void @test14__ivar_mod2_is_0(i32 %len) {
1013+
; CHECK-LABEL: @test14__ivar_mod2_is_0(
10791014
; CHECK-NEXT: entry:
10801015
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
10811016
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -1123,10 +1058,10 @@ if.end: ; preds = %if.then, %for.body
11231058
br i1 %exitcond, label %for.cond.cleanup, label %for.body
11241059
}
11251060

1126-
; Similar to @test7, we need to peel one extra iteration, and we can't do that
1061+
; Similar to @test6, we need to peel one extra iteration, and we can't do that
11271062
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
1128-
define void @test16(i32 %k) {
1129-
; CHECK-LABEL: @test16(
1063+
define void @test15(i32 %k) {
1064+
; CHECK-LABEL: @test15(
11301065
; CHECK-NEXT: for.body.lr.ph:
11311066
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
11321067
; CHECK: for.body:
@@ -1164,10 +1099,10 @@ for.end:
11641099
ret void
11651100
}
11661101

1167-
; Similar to @test8, we need to peel one extra iteration, and we can't do that
1102+
; Similar to @test7, we need to peel one extra iteration, and we can't do that
11681103
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
1169-
define void @test17(i32 %k) {
1170-
; CHECK-LABEL: @test17(
1104+
define void @test16(i32 %k) {
1105+
; CHECK-LABEL: @test16(
11711106
; CHECK-NEXT: for.body.lr.ph:
11721107
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
11731108
; CHECK: for.body:

0 commit comments

Comments
 (0)