clang-p2996/mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir

// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s

// tests a wsloop private + firstprivate + reduction to make sure block structure
// is handled properly.

omp.private {type = private} @_QFwsloop_privateEi_private_ref_i32 : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
  %0 = llvm.mlir.constant(1 : i64) : i64
  %1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr
  omp.yield(%1 : !llvm.ptr)
}

llvm.func @foo_free(!llvm.ptr)

omp.private {type = firstprivate} @_QFwsloop_privateEc_firstprivate_ref_c8 : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
  %0 = llvm.mlir.constant(1 : i64) : i64
  %1 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c", pinned} : (i64) -> !llvm.ptr
  omp.yield(%1 : !llvm.ptr)
} copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
  %0 = llvm.load %arg0 : !llvm.ptr -> !llvm.array<1 x i8>
  llvm.store %0, %arg1 : !llvm.array<1 x i8>, !llvm.ptr
  omp.yield(%arg1 : !llvm.ptr)
} dealloc {
^bb0(%arg0: !llvm.ptr):
  llvm.call @foo_free(%arg0) : (!llvm.ptr) -> ()
  omp.yield
}

omp.declare_reduction @max_f32 : f32 init {
^bb0(%arg0: f32):
  %0 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32
  omp.yield(%0 : f32)
} combiner {
^bb0(%arg0: f32, %arg1: f32):
  %0 = llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath<contract>} : (f32, f32) -> f32
  omp.yield(%0 : f32)
}

llvm.func @wsloop_private_(%arg0: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPwsloop_private", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
  %0 = llvm.mlir.constant(1 : i64) : i64
  %1 = llvm.alloca %0 x f32 {bindc_name = "x"} : (i64) -> !llvm.ptr
  %3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
  %5 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c"} : (i64) -> !llvm.ptr
  %6 = llvm.mlir.constant(1 : i32) : i32
  %7 = llvm.mlir.constant(10 : i32) : i32
  %8 = llvm.mlir.constant(0 : i32) : i32
  omp.parallel {
    omp.wsloop private(@_QFwsloop_privateEc_firstprivate_ref_c8 %5 -> %arg1, @_QFwsloop_privateEi_private_ref_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) reduction(@max_f32 %1 -> %arg3 : !llvm.ptr) {
      omp.loop_nest (%arg4) : i32 = (%8) to (%7) inclusive step (%6) {
        omp.yield
      }
    }
    omp.terminator
  }
  llvm.return
}

// CHECK: call void {{.*}} @__kmpc_fork_call(ptr @1, i32 1, ptr @[[OUTLINED:.*]], ptr %{{.*}})

// CHECK: define internal void @[[OUTLINED:.*]]{{.*}} {

// First, check that all memory for privates and reductions is allocated.
// CHECK: omp.par.entry:
// CHECK:   %[[CHR:.*]] = alloca [1 x i8], i64 1, align 1
// CHECK:   %[[INT:.*]] = alloca i32, i64 1, align 4
// CHECK:   %[[FLT:.*]] = alloca float, align 4
// CHECK:   %[[RED_ARR:.*]] = alloca [1 x ptr], align 8
// CHECK:   br label %[[LATE_ALLOC_BB:.*]]

// CHECK: [[LATE_ALLOC_BB]]:
// CHECK:   br label %[[PRIVATE_CPY_BB:.*]]

// Second, check that first private was properly copied.
// CHECK: [[PRIVATE_CPY_BB:.*]]:
// CHECK:   %[[CHR_VAL:.*]] = load [1 x i8], ptr %{{.*}}, align 1
// CHECK:   store [1 x i8] %[[CHR_VAL]], ptr %[[CHR]], align 1
// CHECK:   br label %[[RED_INIT_BB:.*]]

// Third, check that reduction init took place.
// CHECK: [[RED_INIT_BB]]:
// CHECK:   store float 0x{{.*}}, ptr %[[FLT]], align 4

// Finally, check for the private dealloc region
// CHECK:   call void @foo_free(ptr %[[CHR]])

// CHECK: }