-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[OpenMPIRBuilder] Fix tripcount not a multiple of tile size #154999
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMPIRBuilder] Fix tripcount not a multiple of tile size #154999
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-flang-openmp Author: Michael Kruse (Meinersbur) ChangesThe emitted code tests whether the current tile should executing the remainder iterations by checking the logical iteration number is the one after the floor iterations that execute the non-remainder iterations. There are two counts of how many iterations there are: Those of non-remainder iterations (simply rounded-down division of tripcount and tile size), and those including an additional floor iteration for the remainder iterations. The code was used the wrong one that caused the condition to never match. Full diff: https://github.com/llvm/llvm-project/pull/154999.diff 6 Files Affected:
diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
index 8780d51de8a36..a9514e1e7d145 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
@@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d)
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
// CHECK: omp_floor0.body:
// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
-// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
// CHECK: omp_tile0.preheader:
diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
index 728f67ccf2843..8ca000a05792f 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
@@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
// CHECK: omp_floor0.body:
// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
-// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
// CHECK: omp_tile0.preheader:
diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
index f41f1fe5ce719..5fbcf8f2d030c 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
@@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
// CHECK: omp_floor0.body:
// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
-// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
// CHECK: omp_tile0.preheader:
diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
index 3c2407851e5a5..9a28c0c1bf713 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
@@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
// CHECK: omp_floor0.body:
// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
-// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
// CHECK: omp_tile0.preheader:
diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
index a03bd47ca8b8f..24d42d265d6a6 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
@@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]]
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
// CHECK: omp_floor0.body:
-// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]]
// CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
// CHECK: omp_tile0.preheader:
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 989bcf45e0006..606f2e03821a3 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5582,13 +5582,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
// Compute the trip counts of the floor loops.
Builder.SetCurrentDebugLocation(DL);
Builder.restoreIP(OutermostLoop->getPreheaderIP());
- SmallVector<Value *, 4> FloorCount, FloorRems;
+ SmallVector<Value *, 4> FloorCompleteCount, FloorCount, FloorRems;
for (int i = 0; i < NumLoops; ++i) {
Value *TileSize = TileSizes[i];
Value *OrigTripCount = OrigTripCounts[i];
Type *IVType = OrigTripCount->getType();
- Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
+ Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
// 0 if tripcount divides the tilesize, 1 otherwise.
@@ -5602,11 +5602,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
- FloorTripCount =
- Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
+ Value *FloorTripCount =
+ Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
"omp_floor" + Twine(i) + ".tripcount", true);
// Remember some values for later use.
+ FloorCompleteCount.push_back(FloorCompleteTripCount);
FloorCount.push_back(FloorTripCount);
FloorRems.push_back(FloorTripRem);
}
@@ -5661,7 +5662,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Value *TileSize = TileSizes[i];
Value *FloorIsEpilogue =
- Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
+ Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
Value *TileTripCount =
Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing this. LGTM!
The emitted code tests whether the current tile should executing the remainder iterations by checking the logical iteration number is the one after the floor iterations that execute the non-remainder iterations. There are two counts of how many iterations there are: Those of non-remainder iterations (simply rounded-down division of tripcount and tile size), and those including an additional floor iteration for the remainder iterations. The code was used the wrong one that caused the condition to never match.