-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays #155536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-openacc Author: None (khaki3) ChangesIn the following example of reducing a static 2D array, we have incorrect coordinates for array access in the reduction combiner. This PR reverses the order of the induction variables used for such array indexing. For other cases of static arrays, we reverse the loop order as well so that the innermost loop can handle the innermost dimension. program main
implicit none
integer, parameter :: m = 2
integer, parameter :: n = 10
integer :: r(n,m), i
r = 0
!$acc parallel loop reduction(+:r(:n,:m))
do i = 1, n
r(i, 1) = i
enddo
print *, r
end program main Currently, we have: fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> We'll obtain: fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> Full diff: https://github.com/llvm/llvm-project/pull/155536.diff 3 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 35edcb0926b69..7a84b21913bae 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
if (bounds.empty()) {
llvm::SmallVector<mlir::Value> extents;
mlir::Type idxTy = builder.getIndexType();
- for (auto extent : seqTy.getShape()) {
+ for (auto extent : llvm::reverse(seqTy.getShape())) {
mlir::Value lb = mlir::arith::ConstantOp::create(
builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0));
mlir::Value ub = mlir::arith::ConstantOp::create(
@@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
}
} else {
// Lowerbound, upperbound and step are passed as block arguments.
- [[maybe_unused]] unsigned nbRangeArgs =
+ unsigned nbRangeArgs =
recipe.getCombinerRegion().getArguments().size() - 2;
assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
"Expect 3 block arguments per dimension");
- for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
- i += 3) {
+ for (int i = nbRangeArgs - 1; i >= 2; i -= 3) {
mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
@@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
ivs.push_back(loop.getInductionVar());
}
}
- auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs);
- auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs);
+ llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend());
+ auto addr1 =
+ fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs);
+ auto addr2 =
+ fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs);
auto load1 = fir::LoadOp::create(builder, loc, addr1);
auto load2 = fir::LoadOp::create(builder, loc, addr2);
mlir::Value res =
diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb751678ed53..02a152c9d7ae2 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -381,8 +381,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -427,8 +427,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -612,8 +612,8 @@
! CHECK: %[[UB2:.*]] = arith.constant 99 : index
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -641,8 +641,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 035b38b8a4da4..2a896c6b8d771 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -423,15 +423,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -469,15 +469,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -650,7 +650,7 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 1 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
@@ -658,11 +658,11 @@
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
! CHECK: %[[LB2:.*]] = arith.constant 0 : index
-! CHECK: %[[UB2:.*]] = arith.constant 1 : index
+! CHECK: %[[UB2:.*]] = arith.constant 99 : index
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -683,15 +683,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
|
@llvm/pr-subscribers-flang-fir-hlfir Author: None (khaki3) ChangesIn the following example of reducing a static 2D array, we have incorrect coordinates for array access in the reduction combiner. This PR reverses the order of the induction variables used for such array indexing. For other cases of static arrays, we reverse the loop order as well so that the innermost loop can handle the innermost dimension. program main
implicit none
integer, parameter :: m = 2
integer, parameter :: n = 10
integer :: r(n,m), i
r = 0
!$acc parallel loop reduction(+:r(:n,:m))
do i = 1, n
r(i, 1) = i
enddo
print *, r
end program main Currently, we have: fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> We'll obtain: fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> Full diff: https://github.com/llvm/llvm-project/pull/155536.diff 3 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 35edcb0926b69..7a84b21913bae 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
if (bounds.empty()) {
llvm::SmallVector<mlir::Value> extents;
mlir::Type idxTy = builder.getIndexType();
- for (auto extent : seqTy.getShape()) {
+ for (auto extent : llvm::reverse(seqTy.getShape())) {
mlir::Value lb = mlir::arith::ConstantOp::create(
builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0));
mlir::Value ub = mlir::arith::ConstantOp::create(
@@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
}
} else {
// Lowerbound, upperbound and step are passed as block arguments.
- [[maybe_unused]] unsigned nbRangeArgs =
+ unsigned nbRangeArgs =
recipe.getCombinerRegion().getArguments().size() - 2;
assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
"Expect 3 block arguments per dimension");
- for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
- i += 3) {
+ for (int i = nbRangeArgs - 1; i >= 2; i -= 3) {
mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
@@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
ivs.push_back(loop.getInductionVar());
}
}
- auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs);
- auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs);
+ llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend());
+ auto addr1 =
+ fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs);
+ auto addr2 =
+ fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs);
auto load1 = fir::LoadOp::create(builder, loc, addr1);
auto load2 = fir::LoadOp::create(builder, loc, addr2);
mlir::Value res =
diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb751678ed53..02a152c9d7ae2 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -381,8 +381,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -427,8 +427,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -612,8 +612,8 @@
! CHECK: %[[UB2:.*]] = arith.constant 99 : index
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -641,8 +641,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 035b38b8a4da4..2a896c6b8d771 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -423,15 +423,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -469,15 +469,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -650,7 +650,7 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 1 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
@@ -658,11 +658,11 @@
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
! CHECK: %[[LB2:.*]] = arith.constant 0 : index
-! CHECK: %[[UB2:.*]] = arith.constant 1 : index
+! CHECK: %[[UB2:.*]] = arith.constant 99 : index
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -683,15 +683,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you!
In the following example of reducing a static 2D array, we have incorrect coordinates for array access in the reduction combiner. This PR reverses the order of the induction variables used for such array indexing. For other cases of static arrays, we reverse the loop order as well so that the innermost loop can handle the innermost dimension.
Currently, we have:
We'll obtain: