Files
clang-p2996/flang/test/Transforms/loop-versioning.fir
Mats Petersson b812932b35 [FLANG] Change loop versioning to use shift instead of divide
Despite me being convinced that the use of divide didn't produce any
divide instructions, it does in fact add more instructions than using
a plain shift operation.

This patch simply changes the divide to a shift right, with an
assert to check that the "divisor" is a power of two.

Reviewed By: kiranchandramohan, tblah

Differential Revision: https://reviews.llvm.org/D151880
2023-06-01 19:29:57 +01:00

522 lines
23 KiB
Plaintext

// RUN: fir-opt --loop-versioning %s | FileCheck %s
// subroutine sum1d(a, n)
// real*8 :: a(:)
// integer :: n
// real*8 :: sum
// integer :: i
// sum = 0
// do i=1,n
// sum = sum + a(i)
// end do
// end subroutine sum1d
module {
func.func @sum1d(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmoduleFsum1dEi"}
%1 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QMmoduleFsum1dEsum"}
%cst = arith.constant 0.000000e+00 : f64
fir.store %cst to %1 : !fir.ref<f64>
%c1_i32 = arith.constant 1 : i32
%2 = fir.convert %c1_i32 : (i32) -> index
%3 = fir.load %arg1 : !fir.ref<i32>
%4 = fir.convert %3 : (i32) -> index
%c1 = arith.constant 1 : index
%5 = fir.convert %2 : (index) -> i32
%6:2 = fir.do_loop %arg2 = %2 to %4 step %c1 iter_args(%arg3 = %5) -> (index, i32) {
fir.store %arg3 to %0 : !fir.ref<i32>
%7 = fir.load %1 : !fir.ref<f64>
%8 = fir.load %0 : !fir.ref<i32>
%9 = fir.convert %8 : (i32) -> i64
%c1_i64 = arith.constant 1 : i64
%10 = arith.subi %9, %c1_i64 : i64
%11 = fir.coordinate_of %arg0, %10 : (!fir.box<!fir.array<?xf64>>, i64) -> !fir.ref<f64>
%12 = fir.load %11 : !fir.ref<f64>
%13 = arith.addf %7, %12 fastmath<contract> : f64
fir.store %13 to %1 : !fir.ref<f64>
%14 = arith.addi %arg2, %c1 : index
%15 = fir.convert %c1 : (index) -> i32
%16 = fir.load %0 : !fir.ref<i32>
%17 = arith.addi %16, %15 : i32
fir.result %14, %17 : index, i32
}
fir.store %6#1 to %0 : !fir.ref<i32>
return
}
// Note this only checks the expected transformation, not the entire generated code:
// CHECK-LABEL: func.func @sum1d(
// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf64>> {{.*}})
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.*}}
// CHECK: %[[SIZE:.*]] = arith.constant 8 : index
// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS]]#2, %[[SIZE]]
// CHECK: %[[IF_RES:.*]]:2 = fir.if %[[CMP]] -> {{.*}}
// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]
// CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[NEWARR]] : {{.*}} -> !fir.ref<!fir.array<?xf64>>
// CHECK: %[[LOOP_RES:.*]]:2 = fir.do_loop {{.*}}
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %{{.*}} : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
// CHECK: fir.result %{{.*}}, %{{.*}}
// CHECK: }
// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1
// CHECK: } else {
// CHECK: %[[LOOP_RES2:.*]]:2 = fir.do_loop {{.*}}
// CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.box<!fir.array<?xf64>>, i64) -> !fir.ref<f64>
// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>
// CHECK: fir.result %{{.*}}, %{{.*}}
// CHECK: }
// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1
// CHECK: }
// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}
// CHECK: return
// -----
// Test that loop-versioning pass doesn't expand known size arrays.
func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsum1dfixedEi"}
%1 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QFsum1dfixedEsum"}
%cst = arith.constant 0.000000e+00 : f64
fir.store %cst to %1 : !fir.ref<f64>
%c1_i32 = arith.constant 1 : i32
%2 = fir.convert %c1_i32 : (i32) -> index
%3 = fir.load %arg1 : !fir.ref<i32>
%4 = fir.convert %3 : (i32) -> index
%c1 = arith.constant 1 : index
%5 = fir.convert %2 : (index) -> i32
%6:2 = fir.do_loop %arg2 = %2 to %4 step %c1 iter_args(%arg3 = %5) -> (index, i32) {
fir.store %arg3 to %0 : !fir.ref<i32>
%7 = fir.load %1 : !fir.ref<f64>
%8 = fir.load %0 : !fir.ref<i32>
%9 = fir.convert %8 : (i32) -> i64
%c1_i64 = arith.constant 1 : i64
%10 = arith.subi %9, %c1_i64 : i64
%11 = fir.coordinate_of %arg0, %10 : (!fir.ref<!fir.array<?xf64>>, i64) -> !fir.ref<f64>
%12 = fir.load %11 : !fir.ref<f64>
%13 = arith.addf %7, %12 fastmath<contract> : f64
fir.store %13 to %1 : !fir.ref<f64>
%14 = arith.addi %arg2, %c1 : index
%15 = fir.convert %c1 : (index) -> i32
%16 = fir.load %0 : !fir.ref<i32>
%17 = arith.addi %16, %15 : i32
fir.result %14, %17 : index, i32
}
fir.store %6#1 to %0 : !fir.ref<i32>
return
}
// CHECK-LABEL: func.func @sum1dfixed(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf64>> {{.*}})
// CHECK: fir.do_loop {{.*}}
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[ARG0]], {{.*}}
// CHECK: %{{.*}} = fir.load %[[COORD]]
// -----
// RUN: fir-opt --loop-versioning %s | FileCheck %s
// Check that "no result" from a versioned loop works correctly
// This code was the basis for this, but `read` is replaced with a function called Func
// subroutine test3(x, y)
// integer :: y(:)
// integer :: x(:)
// read(*,*) x(y)
// end subroutine
func.func @test3(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
%c0 = arith.constant 0 : index
%3:3 = fir.box_dims %arg1, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
%c1 = arith.constant 1 : index
%4 = fir.slice %c1, %3#1, %c1 : (index, index, index) -> !fir.slice<1>
%c1_0 = arith.constant 1 : index
%c0_1 = arith.constant 0 : index
%5 = arith.subi %3#1, %c1_0 : index
fir.do_loop %arg2 = %c0_1 to %5 step %c1_0 {
%7 = fir.coordinate_of %arg1, %arg2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
%8 = fir.load %7 : !fir.ref<i32>
%9 = fir.convert %8 : (i32) -> index
%10 = fir.array_coor %arg0 [%4] %9 : (!fir.box<!fir.array<?xi32>>, !fir.slice<1>, index) -> !fir.ref<i32>
%12 = fir.call @Func(%10) fastmath<contract> : (!fir.ref<i32>) -> i1
}
return
}
func.func private @Func(!fir.ref<i8>, !fir.ref<i32>) -> i1
// CHECK-LABEL: func.func @test3(
// CHECK-SAME: %[[X:.*]]: !fir.box<!fir.array<?xi32>> {{.*}},
// CHECK-SAME: %[[Y:.*]]: !fir.box<!fir.array<?xi32>> {{.*}}) {
// Look for arith.subi to locate the correct part of code.
// CHECK: {{.*}} arith.subi {{.*}}
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[Y]], %[[ZERO]]
// CHECK: %[[FOUR:.*]] = arith.constant 4 : index
// CHECK: %[[COMP:.*]] = arith.cmpi eq, %[[DIMS]]#2, %[[FOUR]] : index
// CHECK: fir.if %[[COMP]] {
// CHECK: %[[CONV:.*]] = fir.convert %[[Y]] : {{.*}}
// CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[CONV]] : {{.*}}
// CHECK: fir.do_loop %[[INDEX:.*]] = {{.*}}
// CHECK: %[[YADDR:.*]] = fir.coordinate_of %[[BOX_ADDR]], %[[INDEX]]
// CHECK: %[[YINT:.*]] = fir.load %[[YADDR]] : {{.*}}
// CHECK: %[[YINDEX:.*]] = fir.convert %[[YINT]]
// CHECK: %[[XADDR:.*]] = fir.array_coor %[[X]] [%{{.*}}] %[[YINDEX]]
// CHECK: fir.call @Func(%[[XADDR]])
// CHECK-NEXT: }
// CHECK-NEXT: } else {
// CHECK: fir.do_loop %[[INDEX2:.*]] = {{.*}}
// CHECK: %[[YADDR2:.*]] = fir.coordinate_of %[[Y]], %[[INDEX2]]
// CHECK: %[[YINT2:.*]] = fir.load %[[YADDR2]] : {{.*}}
// CHECK: %[[YINDEX2:.*]] = fir.convert %[[YINT2]]
// CHECK: %[[XADDR2:.*]] = fir.array_coor %[[X]] [%{{.*}}] %[[YINDEX2]]
// CHECK: fir.call @Func(%[[XADDR2]])
// CHECK-NEXT: }
// ----
// Test array initialization.
//
// This code has been modified to simplify it - removing the realloc generated to grow
// the constructed
//subroutine test4(a, b, n1, m1)
// real :: a(:)
// real :: b(:,:)
//
// a = [ ((b(i,j), j=1,n1,m1), i=1,n1,m1) ]
//end subroutine test4
func.func @test4(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "b"}, %arg2: !fir.ref<i32> {fir.bindc_name = "n1"}, %arg3: !fir.ref<i32> {fir.bindc_name = "m1"}) {
%0 = fir.alloca index {bindc_name = ".buff.pos"}
%1 = fir.alloca index {bindc_name = ".buff.size"}
%c0 = arith.constant 0 : index
%2:3 = fir.box_dims %arg0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
%3 = fir.array_load %arg0 : (!fir.box<!fir.array<?xf32>>) -> !fir.array<?xf32>
%c0_0 = arith.constant 0 : index
fir.store %c0_0 to %0 : !fir.ref<index>
%c32 = arith.constant 32 : index
%4 = fir.allocmem f32, %c32
fir.store %c32 to %1 : !fir.ref<index>
%c1_i64 = arith.constant 1 : i64
%5 = fir.convert %c1_i64 : (i64) -> index
%6 = fir.load %arg2 : !fir.ref<i32>
%7 = fir.convert %6 : (i32) -> i64
%8 = fir.convert %7 : (i64) -> index
%9 = fir.load %arg3 : !fir.ref<i32>
%10 = fir.convert %9 : (i32) -> i64
%11 = fir.convert %10 : (i64) -> index
%12 = fir.do_loop %arg4 = %5 to %8 step %11 iter_args(%arg5 = %4) -> (!fir.heap<f32>) {
%c1_i64_2 = arith.constant 1 : i64
%19 = fir.convert %c1_i64_2 : (i64) -> index
%20 = fir.load %arg2 : !fir.ref<i32>
%21 = fir.convert %20 : (i32) -> i64
%22 = fir.convert %21 : (i64) -> index
%23 = fir.load %arg3 : !fir.ref<i32>
%24 = fir.convert %23 : (i32) -> i64
%25 = fir.convert %24 : (i64) -> index
%26 = fir.do_loop %arg6 = %19 to %22 step %25 iter_args(%arg7 = %arg5) -> (!fir.heap<f32>) {
%27 = fir.convert %arg4 : (index) -> i32
%28 = fir.convert %27 : (i32) -> i64
%c1_i64_3 = arith.constant 1 : i64
%29 = arith.subi %28, %c1_i64_3 : i64
%30 = fir.convert %arg6 : (index) -> i32
%31 = fir.convert %30 : (i32) -> i64
%c1_i64_4 = arith.constant 1 : i64
%32 = arith.subi %31, %c1_i64_4 : i64
%33 = fir.coordinate_of %arg1, %29, %32 : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
%34 = fir.load %33 : !fir.ref<f32>
%c1_5 = arith.constant 1 : index
%35 = fir.zero_bits !fir.ref<!fir.array<?xf32>>
%36 = fir.coordinate_of %35, %c1_5 : (!fir.ref<!fir.array<?xf32>>, index) -> !fir.ref<f32>
%37 = fir.convert %36 : (!fir.ref<f32>) -> index
%38 = fir.load %0 : !fir.ref<index>
%39 = fir.load %1 : !fir.ref<index>
%c1_6 = arith.constant 1 : index
%40 = arith.addi %38, %c1_6 : index
fir.store %40 to %0 : !fir.ref<index>
fir.result %arg7 : !fir.heap<f32>
}
fir.result %26 : !fir.heap<f32>
}
%13 = fir.convert %12 : (!fir.heap<f32>) -> !fir.heap<!fir.array<?xf32>>
%14 = fir.load %0 : !fir.ref<index>
%15 = fir.shape %14 : (index) -> !fir.shape<1>
%16 = fir.array_load %13(%15) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.array<?xf32>
%c1 = arith.constant 1 : index
%c0_1 = arith.constant 0 : index
%17 = arith.subi %2#1, %c1 : index
%18 = fir.do_loop %arg4 = %c0_1 to %17 step %c1 unordered iter_args(%arg5 = %3) -> (!fir.array<?xf32>) {
%19 = fir.array_fetch %16, %arg4 : (!fir.array<?xf32>, index) -> f32
%20 = fir.array_update %arg5, %19, %arg4 : (!fir.array<?xf32>, f32, index) -> !fir.array<?xf32>
fir.result %20 : !fir.array<?xf32>
}
fir.array_merge_store %3, %18 to %arg0 : !fir.array<?xf32>, !fir.array<?xf32>, !fir.box<!fir.array<?xf32>>
fir.freemem %13 : !fir.heap<!fir.array<?xf32>>
return
}
// CHECK: func.func @test4(
// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xf32>>
// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?x?xf32>>
// CHECK-SAME: %[[N1:.*]]: !fir.ref<i32> {{.*}},
// CHECK-SAME: %[[M1:.*]]: !fir.ref<i32> {{.*}}) {
// CHECK: fir.do_loop
// CHECL: %[[FOUR:.*]] = arith.constant 4 : index
// CHECK: %[[COMP:.*]] = arith.cmpi {{.*}}, %[[FOUR]]
// CHECK: fir.if %[[COMP]] -> {{.*}} {
// CHECK: %[[CONV:.*]] = fir.convert %[[B]] :
// CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[CONV]]
// CHECK: %[[RES:.*]] = fir.do_loop {{.*}} {
// CHECK: %[[ADDR:.*]] = fir.coordinate_of %[[BOX_ADDR]], %{{.*}}
// CHECK: %{{.*}} = fir.load %[[ADDR]] : !fir.ref<f32>
// CHECK: }
// CHECK: fir.result %[[RES]] : {{.*}}
// CHECK: } else {
// CHECK: %[[RES2:.*]] = fir.do_loop
// CHECK: %{{.*}} = fir.coordinate_of %[[B]], %{{.*}}
// CHECK: }
// CHECK: fir.result %[[RES2]]
// CHECK: }
// -----
// Check that 2D arrays are identified and converted.
// Source code:
// subroutine sum2d(a, nx, ny)
// real*8 :: a(:,:)
// integer :: nx, ny
// real*8 :: sum
// integer :: i, j
// sum = 0
// do i=1,nx
// do j=1,ny
// sum = sum + a(j,i)
// end do
// end do
// end subroutine sum2d
func.func @sum2d(%arg0: !fir.box<!fir.array<?x?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "nx"}, %arg2: !fir.ref<i32> {fir.bindc_name = "ny"}) {
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmoduleFsum2dEi"}
%1 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmoduleFsum2dEj"}
%2 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QMmoduleFsum2dEsum"}
%cst = arith.constant 0.000000e+00 : f64
fir.store %cst to %2 : !fir.ref<f64>
%c1_i32 = arith.constant 1 : i32
%3 = fir.convert %c1_i32 : (i32) -> index
%4 = fir.load %arg1 : !fir.ref<i32>
%5 = fir.convert %4 : (i32) -> index
%c1 = arith.constant 1 : index
%6 = fir.convert %3 : (index) -> i32
%7:2 = fir.do_loop %arg3 = %3 to %5 step %c1 iter_args(%arg4 = %6) -> (index, i32) {
fir.store %arg4 to %0 : !fir.ref<i32>
%c1_i32_0 = arith.constant 1 : i32
%8 = fir.convert %c1_i32_0 : (i32) -> index
%9 = fir.load %arg2 : !fir.ref<i32>
%10 = fir.convert %9 : (i32) -> index
%c1_1 = arith.constant 1 : index
%11 = fir.convert %8 : (index) -> i32
%12:2 = fir.do_loop %arg5 = %8 to %10 step %c1_1 iter_args(%arg6 = %11) -> (index, i32) {
fir.store %arg6 to %1 : !fir.ref<i32>
%17 = fir.load %2 : !fir.ref<f64>
%18 = fir.load %1 : !fir.ref<i32>
%19 = fir.convert %18 : (i32) -> i64
%c1_i64 = arith.constant 1 : i64
%20 = arith.subi %19, %c1_i64 : i64
%21 = fir.load %0 : !fir.ref<i32>
%22 = fir.convert %21 : (i32) -> i64
%c1_i64_2 = arith.constant 1 : i64
%23 = arith.subi %22, %c1_i64_2 : i64
%24 = fir.coordinate_of %arg0, %20, %23 : (!fir.box<!fir.array<?x?xf64>>, i64, i64) -> !fir.ref<f64>
%25 = fir.load %24 : !fir.ref<f64>
%26 = arith.addf %17, %25 fastmath<contract> : f64
fir.store %26 to %2 : !fir.ref<f64>
%27 = arith.addi %arg5, %c1_1 : index
%28 = fir.convert %c1_1 : (index) -> i32
%29 = fir.load %1 : !fir.ref<i32>
%30 = arith.addi %29, %28 : i32
fir.result %27, %30 : index, i32
}
fir.store %12#1 to %1 : !fir.ref<i32>
%13 = arith.addi %arg3, %c1 : index
%14 = fir.convert %c1 : (index) -> i32
%15 = fir.load %0 : !fir.ref<i32>
%16 = arith.addi %15, %14 : i32
fir.result %13, %16 : index, i32
}
fir.store %7#1 to %0 : !fir.ref<i32>
return
}
// Note this only checks the expected transformation, not the entire generated code:
// CHECK-LABEL: func.func @sum2d(
// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf64>> {{.*}})
// Only inner loop should be verisoned.
// CHECK: fir.do_loop
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK: %[[DIMS0:.*]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.*}}
// CHECK: %[[ONE:.*]] = arith.constant 1 : index
// CHECK: %[[DIMS1:.*]]:3 = fir.box_dims %[[ARG0]], %[[ONE]] : {{.*}}
// CHECK: %[[SIZE:.*]] = arith.constant 8 : index
// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS0]]#2, %[[SIZE]]
// CHECK: %[[IF_RES:.*]]:2 = fir.if %[[CMP]] -> {{.*}}
// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]
// CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[NEWARR]] : {{.*}} -> !fir.ref<!fir.array<?xf64>>
// CHECK: %[[LOOP_RES:.*]]:2 = fir.do_loop {{.*}}
// Check the 2D -> 1D coordinate conversion, should have a multiply and a final add.
// Some other operations are checked to synch the different parts.
// CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
// CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
// CHECK: %[[OUTER_DIV:.*]] = arith.shrsi %[[OUTER_IDX]], %[[ITEMSHIFT]]
// CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_DIV]], %[[INNER_IDX]]
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
// CHECK: fir.result %{{.*}}, %{{.*}}
// CHECK: }
// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1
// CHECK: } else {
// CHECK: %[[LOOP_RES2:.*]]:2 = fir.do_loop {{.*}}
// CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.box<!fir.array<?x?xf64>>, i64, i64) -> !fir.ref<f64>
// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>
// CHECK: fir.result %{{.*}}, %{{.*}}
// CHECK: }
// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1
// CHECK: }
// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}
// CHECK: return
// -----
// subroutine sum3d(a, nx, ny, nz)
// real*8 :: a(:, :, :)
// integer :: nx, ny, nz
// real*8 :: sum
// integer :: i, j, k
// sum = 0
// do k=1,nz
// do j=1,ny
// do i=0,nx
// sum = sum + a(i, j, k)
// end do
// end do
// end do
// end subroutine sum3d
func.func @sum3d(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "nx"}, %arg2: !fir.ref<i32> {fir.bindc_name = "ny"}, %arg3: !fir.ref<i32> {fir.bindc_name = "nz"}) {
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmoduleFsum3dEi"}
%1 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmoduleFsum3dEj"}
%2 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmoduleFsum3dEk"}
%3 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QMmoduleFsum3dEsum"}
%cst = arith.constant 0.000000e+00 : f64
fir.store %cst to %3 : !fir.ref<f64>
%c1_i32 = arith.constant 1 : i32
%4 = fir.convert %c1_i32 : (i32) -> index
%5 = fir.load %arg3 : !fir.ref<i32>
%6 = fir.convert %5 : (i32) -> index
%c1 = arith.constant 1 : index
%7 = fir.convert %4 : (index) -> i32
%8:2 = fir.do_loop %arg4 = %4 to %6 step %c1 iter_args(%arg5 = %7) -> (index, i32) {
fir.store %arg5 to %2 : !fir.ref<i32>
%c1_i32_0 = arith.constant 1 : i32
%9 = fir.convert %c1_i32_0 : (i32) -> index
%10 = fir.load %arg2 : !fir.ref<i32>
%11 = fir.convert %10 : (i32) -> index
%c1_1 = arith.constant 1 : index
%12 = fir.convert %9 : (index) -> i32
%13:2 = fir.do_loop %arg6 = %9 to %11 step %c1_1 iter_args(%arg7 = %12) -> (index, i32) {
fir.store %arg7 to %1 : !fir.ref<i32>
%c0_i32 = arith.constant 0 : i32
%18 = fir.convert %c0_i32 : (i32) -> index
%19 = fir.load %arg1 : !fir.ref<i32>
%20 = fir.convert %19 : (i32) -> index
%c1_2 = arith.constant 1 : index
%21 = fir.convert %18 : (index) -> i32
%22:2 = fir.do_loop %arg8 = %18 to %20 step %c1_2 iter_args(%arg9 = %21) -> (index, i32) {
fir.store %arg9 to %0 : !fir.ref<i32>
%27 = fir.load %3 : !fir.ref<f64>
%28 = fir.load %0 : !fir.ref<i32>
%29 = fir.convert %28 : (i32) -> i64
%c1_i64 = arith.constant 1 : i64
%30 = arith.subi %29, %c1_i64 : i64
%31 = fir.load %1 : !fir.ref<i32>
%32 = fir.convert %31 : (i32) -> i64
%c1_i64_3 = arith.constant 1 : i64
%33 = arith.subi %32, %c1_i64_3 : i64
%34 = fir.load %2 : !fir.ref<i32>
%35 = fir.convert %34 : (i32) -> i64
%c1_i64_4 = arith.constant 1 : i64
%36 = arith.subi %35, %c1_i64_4 : i64
%37 = fir.coordinate_of %arg0, %30, %33, %36 : (!fir.box<!fir.array<?x?x?xf64>>, i64, i64, i64) -> !fir.ref<f64>
%38 = fir.load %37 : !fir.ref<f64>
%39 = arith.addf %27, %38 fastmath<contract> : f64
fir.store %39 to %3 : !fir.ref<f64>
%40 = arith.addi %arg8, %c1_2 : index
%41 = fir.convert %c1_2 : (index) -> i32
%42 = fir.load %0 : !fir.ref<i32>
%43 = arith.addi %42, %41 : i32
fir.result %40, %43 : index, i32
}
fir.store %22#1 to %0 : !fir.ref<i32>
%23 = arith.addi %arg6, %c1_1 : index
%24 = fir.convert %c1_1 : (index) -> i32
%25 = fir.load %1 : !fir.ref<i32>
%26 = arith.addi %25, %24 : i32
fir.result %23, %26 : index, i32
}
fir.store %13#1 to %1 : !fir.ref<i32>
%14 = arith.addi %arg4, %c1 : index
%15 = fir.convert %c1 : (index) -> i32
%16 = fir.load %2 : !fir.ref<i32>
%17 = arith.addi %16, %15 : i32
fir.result %14, %17 : index, i32
}
fir.store %8#1 to %2 : !fir.ref<i32>
return
}
// Note this only checks the expected transformation, not the entire generated code:
// CHECK-LABEL: func.func @sum3d(
// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {{.*}})
// Only inner loop should be verisoned.
// CHECK: fir.do_loop
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK: %[[DIMS0:.*]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.*}}
// CHECK: %[[ONE:.*]] = arith.constant 1 : index
// CHECK: %[[DIMS1:.*]]:3 = fir.box_dims %[[ARG0]], %[[ONE]] : {{.*}}
// CHECK: %[[TWO:.*]] = arith.constant 2 : index
// CHECK: %[[DIMS2:.*]]:3 = fir.box_dims %[[ARG0]], %[[TWO]] : {{.*}}
// CHECK: %[[SIZE:.*]] = arith.constant 8 : index
// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS0]]#2, %[[SIZE]]
// CHECK: %[[IF_RES:.*]]:2 = fir.if %[[CMP]] -> {{.*}}
// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]
// CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[NEWARR]] : {{.*}} -> !fir.ref<!fir.array<?xf64>>
// CHECK: %[[LOOP_RES:.*]]:2 = fir.do_loop {{.*}}
// Check the 3D -> 1D coordinate conversion, should have a multiply and a final add.
// Some other operations are checked to synch the different parts.
// CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS2]]#2, {{.*}}
// CHECK: %[[MIDDLE_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
// CHECK: %[[MIDDLE_SUM:.*]] = arith.addi %[[MIDDLE_IDX]], %[[OUTER_IDX]]
// CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
// CHECK: %[[MIDDLE_DIV:.*]] = arith.shrsi %[[MIDDLE_SUM]], %[[ITEMSHIFT]]
// CHECK: %[[C3D:.*]] = arith.addi %[[MIDDLE_DIV]], %[[INNER_IDX]]
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C3D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
// CHECK: fir.result %{{.*}}, %{{.*}}
// CHECK: }
// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1
// CHECK: } else {
// CHECK: %[[LOOP_RES2:.*]]:2 = fir.do_loop {{.*}}
// CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.box<!fir.array<?x?x?xf64>>, i64, i64, i64) -> !fir.ref<f64>
// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>
// CHECK: fir.result %{{.*}}, %{{.*}}
// CHECK: }
// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1
// CHECK: }
// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}
// CHECK: return
} // End module