Adds support for lowering `do concurrent` nests from PFT to the new
`fir.do_concurrent` MLIR op as well as its special terminator
`fir.do_concurrent.loop` which models the actual loop nest.
To that end, this PR emits the allocations for the iteration variables
within the block of the `fir.do_concurrent` op and creates a region for
the `fir.do_concurrent.loop` op that accepts arguments equal in number
to the number of the input `do concurrent` iteration ranges.
For example, given the following input:
```fortran
do concurrent(i=1:10, j=11:20)
end do
```
the changes in this PR emit the following MLIR:
```mlir
fir.do_concurrent {
%22 = fir.alloca i32 {bindc_name = "i"}
%23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%24 = fir.alloca i32 {bindc_name = "j"}
%25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) {
%26 = fir.convert %arg1 : (index) -> i32
fir.store %26 to %23#0 : !fir.ref<i32>
%27 = fir.convert %arg2 : (index) -> i32
fir.store %27 to %25#0 : !fir.ref<i32>
}
}
```
53 lines
2.1 KiB
Fortran
53 lines
2.1 KiB
Fortran
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
|
! XFAIL: *
|
|
|
|
! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
|
|
|
|
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
|
|
! RUN: | FileCheck %s
|
|
! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
|
|
! RUN: | FileCheck %s
|
|
|
|
! CHECK-LABEL: do_concurrent_basic
|
|
program do_concurrent_basic
|
|
! CHECK: %[[ARR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
|
|
|
implicit none
|
|
integer :: a(10)
|
|
integer :: i
|
|
|
|
! CHECK-NOT: fir.do_loop
|
|
|
|
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
|
|
! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index
|
|
! CHECK: %[[C10:.*]] = arith.constant 10 : i32
|
|
! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index
|
|
! CHECK: %[[STEP:.*]] = arith.constant 1 : index
|
|
|
|
! CHECK: omp.parallel {
|
|
|
|
! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"}
|
|
! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
|
|
|
! CHECK: omp.wsloop {
|
|
! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
|
|
! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32
|
|
! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#0 : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[IV_VAL2:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[IV_VAL_I64:.*]] = fir.convert %[[IV_VAL2]] : (i32) -> i64
|
|
! CHECK-NEXT: %[[ARR_ACCESS:.*]] = hlfir.designate %[[ARR]]#0 (%[[IV_VAL_I64]]) : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
|
|
! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32>
|
|
! CHECK-NEXT: omp.yield
|
|
! CHECK-NEXT: }
|
|
! CHECK-NEXT: }
|
|
|
|
! CHECK-NEXT: omp.terminator
|
|
! CHECK-NEXT: }
|
|
do concurrent (i=1:10)
|
|
a(i) = i
|
|
end do
|
|
|
|
! CHECK-NOT: fir.do_loop
|
|
end program do_concurrent_basic
|