diff --git a/flang/test/Transforms/DoConcurrent/allocatable.f90 b/flang/test/Transforms/DoConcurrent/allocatable.f90 new file mode 100644 index 0000000000000..03962f150eb95 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/allocatable.f90 @@ -0,0 +1,29 @@ +! Verifies that proper `omp.map.bounds` ops are emitted when an allocatable is +! implicitly mapped by a `do concurrent` loop. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +program main + implicit none + + integer,parameter :: n = 1000000 + real, allocatable, dimension(:) :: y + integer :: i + + allocate(y(1:n)) + + do concurrent(i=1:n) + y(i) = 42 + end do + + deallocate(y) +end program main + +! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFEy"} +! CHECK: %[[Y_VAL:.*]] = fir.load %[[Y_DECL]]#0 +! CHECK: %[[Y_DIM0:.*]]:3 = fir.box_dims %[[Y_VAL]], %{{c0_.*}} +! CHECK: %[[Y_LB:.*]] = arith.constant 0 : index +! CHECK: %[[Y_UB:.*]] = arith.subi %[[Y_DIM0]]#1, %{{c1_.*}} : index +! CHECK: %[[Y_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[Y_LB]] : index) upper_bound(%[[Y_UB]] : index) extent(%[[Y_DIM0]]#1 : index) +! CHECK: %[[MEM_MAP:.*]] = omp.map.info {{.*}} bounds(%[[Y_BOUNDS]]) +! CHECK: omp.map.info var_ptr(%[[Y_DECL]]#1 : {{.*}}) {{.*}} members(%[[MEM_MAP]] : {{.*}}) diff --git a/flang/test/Transforms/DoConcurrent/host_eval.f90 b/flang/test/Transforms/DoConcurrent/host_eval.f90 new file mode 100644 index 0000000000000..7d16a91ae6941 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/host_eval.f90 @@ -0,0 +1,63 @@ +! Tests `host_eval` clause code-gen and loop nest bounds on host vs. device. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s --check-prefix=HOST -vv + +! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp \ +! RUN: -fopenmp-is-target-device -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s --check-prefix=DEVICE + +program do_concurrent_host_eval + implicit none + integer :: i, j + + do concurrent (i=1:10, j=1:20) + end do +end program do_concurrent_host_eval + +! HOST: omp.target host_eval( +! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_LB:[^,]+]], +! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_UB:[^,]+]], +! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_ST:[^,]+]], +! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_LB:[^,]+]], +! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_UB:[^,]+]], +! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_ST:[^,]+]] : {{.*}}) map_entries + +! HOST: omp.loop_nest ({{.*}}, {{.*}}) : index = (%[[I_LB]], %[[J_LB]]) to +! HOST-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step +! HOST-SAME: (%[[I_ST]], %[[J_ST]]) + +! DEVICE: omp.target map_entries( +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_LB_MAP:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_UB_MAP:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ST_MAP:[^,]+]], + +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_LB_MAP:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_UB_MAP:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ST_MAP:[^,]+]], + +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}} : {{.*}}) + +! DEVICE: %[[I_LB_DECL:.*]]:2 = hlfir.declare %[[I_LB_MAP]] +! DEVICE: %[[I_LB:.*]] = fir.load %[[I_LB_DECL]]#1 : !fir.ref + +! DEVICE: %[[I_UB_DECL:.*]]:2 = hlfir.declare %[[I_UB_MAP]] +! DEVICE: %[[I_UB:.*]] = fir.load %[[I_UB_DECL]]#1 : !fir.ref + +! DEVICE: %[[I_ST_DECL:.*]]:2 = hlfir.declare %[[I_ST_MAP]] +! DEVICE: %[[I_ST:.*]] = fir.load %[[I_ST_DECL]]#1 : !fir.ref + +! DEVICE: %[[J_LB_DECL:.*]]:2 = hlfir.declare %[[J_LB_MAP]] +! DEVICE: %[[J_LB:.*]] = fir.load %[[J_LB_DECL]]#1 : !fir.ref + +! DEVICE: %[[J_UB_DECL:.*]]:2 = hlfir.declare %[[J_UB_MAP]] +! DEVICE: %[[J_UB:.*]] = fir.load %[[J_UB_DECL]]#1 : !fir.ref + +! DEVICE: %[[J_ST_DECL:.*]]:2 = hlfir.declare %[[J_ST_MAP]] +! DEVICE: %[[J_ST:.*]] = fir.load %[[J_ST_DECL]]#1 : !fir.ref + +! DEVICE: omp.loop_nest ({{.*}}, {{.*}}) : index = (%[[I_LB]], %[[J_LB]]) to +! DEVICE-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step +! DEVICE-SAME: (%[[I_ST]], %[[J_ST]]) diff --git a/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 b/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 index f82696669eca6..28429cebf8587 100644 --- a/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 +++ b/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 @@ -1,9 +1,14 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. + ! Tests that "loop-local values" are properly handled by localizing them to the ! body of the loop nest. See `collectLoopLocalValues` and `localizeLoopLocalValue` ! for a definition of "loop-local values" and how they are handled. ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \ -! RUN: | FileCheck %s +! RUN: | FileCheck %s --check-prefixes=COMMON + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s --check-prefixes=COMMON,DEVICE module struct_mod type test_struct integer, allocatable :: x_ @@ -46,17 +51,25 @@ program main print *, "total =", total end program main -! CHECK: omp.parallel { -! CHECK: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box>}> {bindc_name = ".result"} -! CHECK: omp.wsloop { -! CHECK: omp.loop_nest {{.*}} { -! CHECK: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components -! CHECK: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]] -! CHECK: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]] -! CHECK: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]] -! CHECK: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]]) -! CHECK: omp.yield -! CHECK: } -! CHECK: } -! CHECK: omp.terminator -! CHECK: } +! DEVICE: omp.target {{.*}} { +! DEVICE: omp.teams { +! COMMON: omp.parallel { +! COMMON: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box>}> {bindc_name = ".result"} +! DEVICE: omp.distribute { +! COMMON: omp.wsloop { +! COMMON: omp.loop_nest {{.*}} { +! COMMON: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components +! COMMON: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]] +! COMMON: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]] +! COMMON: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]] +! COMMON: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]]) +! COMMON: omp.yield +! COMMON: } +! COMMON: } +! DEVICE: } +! COMMON: omp.terminator +! COMMON: } +! DEVICE: omp.terminator +! DEVICE: } +! DEVICE: omp.terminator +! DEVICE: } diff --git a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 b/flang/test/Transforms/DoConcurrent/map_shape_info.f90 new file mode 100644 index 0000000000000..3dca1340ae6b9 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/map_shape_info.f90 @@ -0,0 +1,104 @@ +! Tests mapping of a basic `do concurrent` loop to +! `!$omp target teams distribute parallel do`. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +program do_concurrent_shape + implicit none + integer :: a(10, 20) + integer :: i, j + + do concurrent (i=1:10, j=1:20) + a(i, j) = i * j + end do +end program do_concurrent_shape + +! CHECK: fir.store %{{c10.*}} to %[[DIM0_EXT:.*]] : !fir.ref +! CHECK: fir.store %{{c20.*}} to %[[DIM1_EXT:.*]] : !fir.ref + +! CHECK: omp.map.info +! CHECK: omp.map.info +! CHECK: omp.map.info + +! CHECK: omp.map.info +! CHECK: omp.map.info +! CHECK: omp.map.info + +! CHECK: omp.map.info +! CHECK: omp.map.info +! CHECK: omp.map.info + +! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info +! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref, index) +! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = "_QFEa.extent.dim0"} + +! CHECK: %[[DIM1_EXT_MAP:.*]] = omp.map.info +! CHECK-SAME: var_ptr(%[[DIM1_EXT]] : !fir.ref, index) +! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = "_QFEa.extent.dim1"} + +! CHECK: omp.target host_eval({{.*}}) map_entries( +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %[[DIM0_EXT_MAP]] -> %[[DIM0_EXT_ARG:[^,]+]], +! CHECK-SAME: %[[DIM1_EXT_MAP]] -> %[[DIM1_EXT_ARG:[^,]+]] : {{.*}}) + +! CHECK-DAG: %[[DIM0_EXT_DEV:.*]] = fir.load %[[DIM0_EXT_ARG]] +! CHECK-DAG: %[[DIM1_EXT_DEV:.*]] = fir.load %[[DIM1_EXT_ARG]] + +! CHECK: %[[SHAPE:.*]] = fir.shape %[[DIM0_EXT_DEV]], %[[DIM1_EXT_DEV]] +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}}(%[[SHAPE]]) {uniq_name = "_QFEa"} + +subroutine do_concurrent_shape_shift + implicit none + integer :: a(2:10) + integer :: i + + do concurrent (i=1:10) + a(i) = i + end do +end subroutine do_concurrent_shape_shift + +! CHECK: fir.store %{{c2.*}} to %[[DIM0_STRT:.*]] : !fir.ref +! CHECK: fir.store %{{c9.*}} to %[[DIM0_EXT:.*]] : !fir.ref + +! CHECK: omp.map.info +! CHECK: omp.map.info +! CHECK: omp.map.info + +! CHECK: omp.map.info +! CHECK: omp.map.info + +! CHECK: %[[DIM0_STRT_MAP:.*]] = omp.map.info +! CHECK-SAME: var_ptr(%[[DIM0_STRT]] : !fir.ref, index) +! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = "_QF{{.*}}Ea.start_idx.dim0"} + +! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info +! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref, index) +! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = "_QF{{.*}}Ea.extent.dim0"} + +! CHECK: omp.target host_eval({{.*}}) map_entries( +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %[[DIM0_STRT_MAP]] -> %[[DIM0_STRT_ARG:[^,]+]], +! CHECK-SAME: %[[DIM0_EXT_MAP]] -> %[[DIM0_EXT_ARG:[^,]+]] : {{.*}}) + +! CHECK-DAG: %[[DIM0_STRT_DEV:.*]] = fir.load %[[DIM0_STRT_ARG]] +! CHECK-DAG: %[[DIM0_EXT_DEV:.*]] = fir.load %[[DIM0_EXT_ARG]] + +! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIM0_STRT_DEV]], %[[DIM0_EXT_DEV]] +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}}(%[[SHAPE_SHIFT]]) {uniq_name = "_QF{{.*}}Ea"} + diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index d0210726de83e..3ea32f9f4cecc 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -1,9 +1,14 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. + ! Tests mapping of a `do concurrent` loop with multiple iteration ranges. ! RUN: split-file %s %t ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %t/multi_range.f90 -o - \ -! RUN: | FileCheck %s +! RUN: | FileCheck %s --check-prefixes=HOST,COMMON + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %t/multi_range.f90 -o - \ +! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON !--- multi_range.f90 program main @@ -17,56 +22,75 @@ program main end do end -! CHECK: func.func @_QQmain +! COMMON: func.func @_QQmain + +! COMMON: %[[C3:.*]] = arith.constant 3 : i32 +! COMMON: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index +! COMMON: %[[C20:.*]] = arith.constant 20 : i32 +! COMMON: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index +! COMMON: %[[STEP_I:.*]] = arith.constant 1 : index + +! COMMON: %[[C5:.*]] = arith.constant 5 : i32 +! COMMON: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index +! COMMON: %[[C40:.*]] = arith.constant 40 : i32 +! COMMON: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index +! COMMON: %[[STEP_J:.*]] = arith.constant 1 : index + +! COMMON: %[[C7:.*]] = arith.constant 7 : i32 +! COMMON: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index +! COMMON: %[[C60:.*]] = arith.constant 60 : i32 +! COMMON: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index +! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index + +! DEVICE: omp.target host_eval( +! DEVICE-SAME: %[[LB_I]] -> %[[LB_I:[[:alnum:]]+]], +! DEVICE-SAME: %[[UB_I]] -> %[[UB_I:[[:alnum:]]+]], +! DEVICE-SAME: %[[STEP_I]] -> %[[STEP_I:[[:alnum:]]+]], +! DEVICE-SAME: %[[LB_J]] -> %[[LB_J:[[:alnum:]]+]], +! DEVICE-SAME: %[[UB_J]] -> %[[UB_J:[[:alnum:]]+]], +! DEVICE-SAME: %[[STEP_J]] -> %[[STEP_J:[[:alnum:]]+]], +! DEVICE-SAME: %[[LB_K]] -> %[[LB_K:[[:alnum:]]+]], +! DEVICE-SAME: %[[UB_K]] -> %[[UB_K:[[:alnum:]]+]], +! DEVICE-SAME: %[[STEP_K]] -> %[[STEP_K:[[:alnum:]]+]] : +! DEVICE-SAME: index, index, index, index, index, index, index, index, index) -! CHECK: %[[C3:.*]] = arith.constant 3 : i32 -! CHECK: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index -! CHECK: %[[C20:.*]] = arith.constant 20 : i32 -! CHECK: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index -! CHECK: %[[STEP_I:.*]] = arith.constant 1 : index +! DEVICE: omp.teams -! CHECK: %[[C5:.*]] = arith.constant 5 : i32 -! CHECK: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index -! CHECK: %[[C40:.*]] = arith.constant 40 : i32 -! CHECK: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index -! CHECK: %[[STEP_J:.*]] = arith.constant 1 : index +! HOST-NOT: omp.target +! HOST-NOT: omp.teams -! CHECK: %[[C7:.*]] = arith.constant 7 : i32 -! CHECK: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index -! CHECK: %[[C60:.*]] = arith.constant 60 : i32 -! CHECK: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index -! CHECK: %[[STEP_K:.*]] = arith.constant 1 : index +! COMMON: omp.parallel { -! CHECK: omp.parallel { +! COMMON-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"} +! COMMON-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"} -! CHECK-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"} -! CHECK-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"} +! COMMON-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"} +! COMMON-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"} -! CHECK-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"} -! CHECK-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"} +! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} +! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} -! CHECK-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} -! CHECK-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} +! DEVICE: omp.distribute -! CHECK: omp.wsloop { -! CHECK-NEXT: omp.loop_nest -! CHECK-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) -! CHECK-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) -! CHECK-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive -! CHECK-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { +! COMMON: omp.wsloop { +! COMMON-NEXT: omp.loop_nest +! COMMON-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) +! COMMON-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) +! COMMON-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive +! COMMON-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { -! CHECK-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] -! CHECK-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#0 +! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] +! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#0 -! CHECK-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]] -! CHECK-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#0 +! COMMON-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]] +! COMMON-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#0 -! CHECK-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]] -! CHECK-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#0 +! COMMON-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]] +! COMMON-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#0 -! CHECK: omp.yield -! CHECK-NEXT: } -! CHECK-NEXT: } +! COMMON: omp.yield +! COMMON-NEXT: } +! COMMON-NEXT: } -! CHECK-NEXT: omp.terminator -! CHECK-NEXT: } +! HOST-NEXT: omp.terminator +! HOST-NEXT: } diff --git a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 new file mode 100644 index 0000000000000..b6b2136e2d405 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 @@ -0,0 +1,34 @@ +! Tests that we can map "unnamed" and non-reference/non-box values to device; for +! example, values that result from `fix.box_dims` ops. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +subroutine test_non_refernece + integer i + real, allocatable :: arr(:) + + associate(a => arr) + do concurrent (i = 1:10) + block + real z(size(a,1)) + end block + end do + end associate +end subroutine test_non_refernece + +! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref, index) +! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref, index) +! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref, index) + +! CHECK: %[[DIM_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, index) +! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = ""} + + +! CHECK: omp.target host_eval({{.*}} : index, index, index) +! CHECK-SAME: map_entries({{.*}}, %[[DIM_MAP]] -> %{{.*}} : +! CHECK-SAME: !fir.ref, !fir.ref) + diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 index 74799359e0476..c87cf392bd5d6 100644 --- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 @@ -1,8 +1,14 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. + ! Tests that if `do concurrent` is not perfectly nested in its parent loop, that ! we skip converting the not-perfectly nested `do concurrent` loop. + ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \ -! RUN: | FileCheck %s +! RUN: | FileCheck %s --check-prefixes=COMMON + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON program main integer, parameter :: n = 10 @@ -19,28 +25,46 @@ program main end do end -! CHECK: omp.parallel { -! CHECK: omp.wsloop { -! CHECK: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { -! CHECK: fir.do_concurrent { -! CHECK: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"} -! CHECK: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] +! DEVICE: omp.target {{.*}}map_entries( +! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[X_ARG:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^:]+}} : +! DEVICE-SAME: {{.*}}) { + +! DEVICE: omp.teams + +! COMMON: omp.parallel { + +! DEVICE: omp.distribute + +! COMMON: omp.wsloop { +! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { +! COMMON: fir.do_concurrent { + +! COMMON: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"} +! COMMON: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] -! CHECK: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"} -! CHECK: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]] +! COMMON: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"} +! COMMON: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]] -! CHECK: fir.do_concurrent.loop (%[[J_IV:.*]], %[[K_IV:.*]]) = {{.*}} { -! CHECK: %[[J_IV_CONV:.*]] = fir.convert %[[J_IV]] : (index) -> i32 -! CHECK: fir.store %[[J_IV_CONV]] to %[[ORIG_J_DECL]]#0 +! COMMON: fir.do_concurrent.loop (%[[J_IV:.*]], %[[K_IV:.*]]) = {{.*}} { +! COMMON: %[[J_IV_CONV:.*]] = fir.convert %[[J_IV]] : (index) -> i32 +! COMMON: fir.store %[[J_IV_CONV]] to %[[ORIG_J_DECL]]#0 -! CHECK: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32 -! CHECK: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#0 -! CHECK: } -! CHECK: } -! CHECK: omp.yield -! CHECK: } -! CHECK: } -! CHECK: omp.terminator -! CHECK: } +! COMMON: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32 +! COMMON: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#0 +! COMMON: } +! COMMON: } +! COMMON: omp.yield +! COMMON: } +! COMMON: } +! COMMON: omp.terminator +! COMMON: } diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 new file mode 100644 index 0000000000000..e38474a68747f --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -0,0 +1,42 @@ +! Tests `do concurrent` mapping when mapped value(s) depend on values defined +! outside the target region; e.g. the size of the array is dynamic. This needs +! to be handled by localizing these region outsiders by either cloning them in +! the region or in case we cannot do that, map them and use the mapped values. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +subroutine foo(n) + implicit none + integer :: n + integer :: i + integer, dimension(n) :: a + + do concurrent(i=1:10) + a(i) = i + end do +end subroutine + +! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFfooEi"} +! CHECK-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFfooEa"} + +! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}}) {{.*}} {name = "_QFfooEi"} +! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) {{.*}} {name = "_QFfooEa"} +! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "_QFfooEa.extent.dim0"} + +! CHECK: omp.target +! CHECK-SAME: map_entries( +! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! CHECK-SAME: %[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], +! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]], +! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) +! CHECK-SAME: {{.*}} { + +! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]] +! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]] +! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]] + +! CHECK: omp.terminator +! CHECK: } diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 new file mode 100644 index 0000000000000..2dada05396ad6 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 @@ -0,0 +1,68 @@ +! Tests that if `do concurrent` is indirectly nested in its parent loop, that we +! skip converting the indirectly nested `do concurrent` loop. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \ +! RUN: | FileCheck %s --check-prefixes=HOST,COMMON + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON + +program main + integer, parameter :: n = 10 + integer, parameter :: m = 20 + integer, parameter :: l = 30 + integer x; + integer :: a(n, m, l) + + do concurrent(i=1:n) + do j=1,m + do concurrent(k=1:l) + a(i,j,k) = i * j + k + end do + end do + end do +end + +! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}} +! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] + +! DEVICE: omp.target {{.*}}map_entries( +! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^,]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^:]+}} : +! DEVICE-SAME: {{.*}}) { + +! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} + +! DEVICE: omp.teams + +! COMMON: omp.parallel { + +! DEVICE: omp.distribute + +! COMMON: omp.wsloop { +! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { +! COMMON: fir.do_loop {{.*}} iter_args(%[[J_IV:.*]] = {{.*}}) -> {{.*}} { +! HOST: fir.store %[[J_IV]] to %[[ORIG_J_DECL]]#0 +! DEVICE: fir.store %[[J_IV]] to %[[TARGET_J_DECL]]#0 + +! COMMON: fir.do_concurrent { +! COMMON: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"} +! COMMON: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]] +! COMMON: fir.do_concurrent.loop (%[[K_IV:.*]]) = {{.*}} { +! COMMON: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32 +! COMMON: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#0 +! COMMON: } +! COMMON: } +! COMMON: } +! COMMON: omp.yield +! COMMON: } +! COMMON: } +! COMMON: omp.terminator +! COMMON: }