Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions. Parent PR: https://github.com/llvm/llvm-project/pull/118447. Only latest commit is relevant for this PR.
89 lines
3.3 KiB
MLIR
89 lines
3.3 KiB
MLIR
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
|
|
|
|
// tests a wsloop private + firstprivate + reduction to make sure block structure
|
|
// is handled properly.
|
|
|
|
omp.private {type = private} @_QFwsloop_privateEi_private_ref_i32 : !llvm.ptr alloc {
|
|
^bb0(%arg0: !llvm.ptr):
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr
|
|
omp.yield(%1 : !llvm.ptr)
|
|
}
|
|
|
|
llvm.func @foo_free(!llvm.ptr)
|
|
|
|
omp.private {type = firstprivate} @_QFwsloop_privateEc_firstprivate_ref_c8 : !llvm.ptr alloc {
|
|
^bb0(%arg0: !llvm.ptr):
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c", pinned} : (i64) -> !llvm.ptr
|
|
omp.yield(%1 : !llvm.ptr)
|
|
} copy {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%0 = llvm.load %arg0 : !llvm.ptr -> !llvm.array<1 x i8>
|
|
llvm.store %0, %arg1 : !llvm.array<1 x i8>, !llvm.ptr
|
|
omp.yield(%arg1 : !llvm.ptr)
|
|
} dealloc {
|
|
^bb0(%arg0: !llvm.ptr):
|
|
llvm.call @foo_free(%arg0) : (!llvm.ptr) -> ()
|
|
omp.yield
|
|
}
|
|
|
|
omp.declare_reduction @max_f32 : f32 init {
|
|
^bb0(%arg0: f32):
|
|
%0 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32
|
|
omp.yield(%0 : f32)
|
|
} combiner {
|
|
^bb0(%arg0: f32, %arg1: f32):
|
|
%0 = llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath<contract>} : (f32, f32) -> f32
|
|
omp.yield(%0 : f32)
|
|
}
|
|
|
|
llvm.func @wsloop_private_(%arg0: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPwsloop_private", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.alloca %0 x f32 {bindc_name = "x"} : (i64) -> !llvm.ptr
|
|
%3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
|
|
%5 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c"} : (i64) -> !llvm.ptr
|
|
%6 = llvm.mlir.constant(1 : i32) : i32
|
|
%7 = llvm.mlir.constant(10 : i32) : i32
|
|
%8 = llvm.mlir.constant(0 : i32) : i32
|
|
omp.parallel {
|
|
omp.wsloop private(@_QFwsloop_privateEc_firstprivate_ref_c8 %5 -> %arg1, @_QFwsloop_privateEi_private_ref_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) reduction(@max_f32 %1 -> %arg3 : !llvm.ptr) {
|
|
omp.loop_nest (%arg4) : i32 = (%8) to (%7) inclusive step (%6) {
|
|
omp.yield
|
|
}
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// CHECK: call void {{.*}} @__kmpc_fork_call(ptr @1, i32 1, ptr @[[OUTLINED:.*]], ptr %{{.*}})
|
|
|
|
// CHECK: define internal void @[[OUTLINED:.*]]{{.*}} {
|
|
|
|
// First, check that all memory for privates and reductions is allocated.
|
|
// CHECK: omp.par.entry:
|
|
// CHECK: %[[CHR:.*]] = alloca [1 x i8], i64 1, align 1
|
|
// CHECK: %[[INT:.*]] = alloca i32, i64 1, align 4
|
|
// CHECK: %[[FLT:.*]] = alloca float, align 4
|
|
// CHECK: %[[RED_ARR:.*]] = alloca [1 x ptr], align 8
|
|
// CHECK: br label %[[LATE_ALLOC_BB:.*]]
|
|
|
|
// CHECK: [[LATE_ALLOC_BB]]:
|
|
// CHECK: br label %[[PRIVATE_CPY_BB:.*]]
|
|
|
|
// Second, check that first private was properly copied.
|
|
// CHECK: [[PRIVATE_CPY_BB:.*]]:
|
|
// CHECK: %[[CHR_VAL:.*]] = load [1 x i8], ptr %{{.*}}, align 1
|
|
// CHECK: store [1 x i8] %[[CHR_VAL]], ptr %[[CHR]], align 1
|
|
// CHECK: br label %[[RED_INIT_BB:.*]]
|
|
|
|
// Third, check that reduction init took place.
|
|
// CHECK: [[RED_INIT_BB]]:
|
|
// CHECK: store float 0x{{.*}}, ptr %[[FLT]], align 4
|
|
|
|
// Finally, check for the private dealloc region
|
|
// CHECK: call void @foo_free(ptr %[[CHR]])
|
|
|
|
// CHECK: }
|