This patch updates printing and parsing of operations including clauses
that define entry block arguments to the operation's region. This
impacts `in_reduction`, `map`, `private`, `reduction` and
`task_reduction`.
The proposed representation to be used by all such clauses is the
following:
```
<clause_name>([byref] [@<sym>] %value -> %block_arg [, ...] : <type>[, ...]) {
...
}
```
The `byref` tag is only allowed for reduction-like clauses and the
`@<sym>` is required and only allowed for the `private` and
reduction-like clauses. The `map` clause does not accept any of these
two.
This change fixes some currently broken op representations, like
`omp.teams` or `omp.sections` reduction:
```
omp.teams reduction([byref] @<sym> -> %value : <type>) {
^bb0(%block_arg : <type>):
...
}
```
Additionally, it addresses some redundancy in the representation of the
previously mentioned cases, as well as e.g. `map` in `omp.target`. The
problem is that the block argument name after the arrow is not checked
in any way, which makes some misleading representations legal:
```mlir
omp.target map_entries(%x -> %arg1, %y -> %arg0, %z -> %doesnt_exist : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
^bb0(%arg0 : !llvm.ptr, %arg1 : !llvm.ptr, %arg2 : !llvm.ptr):
...
}
```
In that case, `%x` maps to `%arg0`, contrary to what the representation
states, and `%z` maps to `%arg2`. `%doesnt_exist` is not resolved, so it
would likely cause issues if used anywhere inside of the operation's
region.
The solution implemented in this patch makes it so that values
introduced after the arrow on the representation of these clauses
implicitly define the corresponding entry block arguments, removing the
potential for these problematic representations. This is what is already
implemented for the `private` and `reduction` clauses of `omp.parallel`.
There are a couple of consequences of this change:
- Entry block argument-defining clauses must come at the end of the
operation's representation and in alphabetical order. This is because
they are printed/parsed as part of the region and a standardized
ordering is needed to reliably match op arguments with their
corresponding entry block arguments via the `BlockArgOpenMPOpInterface`.
- We can no longer define per-clause assembly formats to be reused by
all operations that take these clauses, since they must be passed to a
custom printer including the region and arguments of all other entry
block argument-defining clauses. Code duplication and potential for
introducing issues is minimized by providing the generic
`{print,parse}BlockArgRegion` helpers and associated structures.
MLIR and Flang lowering unit tests are updated due to changes in the
order and formatting of impacted operations.
218 lines
12 KiB
MLIR
218 lines
12 KiB
MLIR
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
|
|
|
|
// nonesense minimised code simulating the control flow graph generated by flang
|
|
// for array reductions. The important thing here is that we are testing a byref
|
|
// reduction with a cleanup region, and the various regions contain multiple
|
|
// blocks
|
|
omp.declare_reduction @add_reduction_byref_box_Uxf32 : !llvm.ptr alloc {
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
|
|
omp.yield(%1 : !llvm.ptr)
|
|
} init {
|
|
^bb0(%arg0: !llvm.ptr, %alloc: !llvm.ptr):
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
llvm.store %0, %alloc : i64, !llvm.ptr
|
|
omp.yield(%alloc : !llvm.ptr)
|
|
} combiner {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%0 = llvm.mlir.constant(0 : i64) : i64
|
|
%1 = llvm.mlir.constant(0 : index) : i64
|
|
%2 = llvm.mlir.constant(1 : index) : i64
|
|
llvm.br ^bb1(%0 : i64)
|
|
^bb1(%3: i64): // 2 preds: ^bb0, ^bb2
|
|
%4 = llvm.icmp "sgt" %3, %1 : i64
|
|
llvm.cond_br %4, ^bb2, ^bb3
|
|
^bb2: // pred: ^bb1
|
|
%5 = llvm.sub %3, %2 : i64
|
|
llvm.br ^bb1(%5 : i64)
|
|
^bb3: // pred: ^bb1
|
|
omp.yield(%arg0 : !llvm.ptr)
|
|
} cleanup {
|
|
^bb0(%arg0: !llvm.ptr):
|
|
%0 = llvm.mlir.constant(0 : i64) : i64
|
|
%1 = llvm.ptrtoint %arg0 : !llvm.ptr to i64
|
|
%2 = llvm.icmp "ne" %1, %0 : i64
|
|
llvm.cond_br %2, ^bb1, ^bb2
|
|
^bb1: // pred: ^bb0
|
|
llvm.br ^bb2
|
|
^bb2: // 2 preds: ^bb0, ^bb1
|
|
omp.yield
|
|
}
|
|
llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.internal_name = "_QPsectionsreduction"} {
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.mlir.constant(0 : index) : i64
|
|
%2 = llvm.mlir.constant(1 : index) : i64
|
|
omp.parallel {
|
|
%3 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
|
|
omp.sections reduction(byref @add_reduction_byref_box_Uxf32 %3 -> %arg1 : !llvm.ptr) {
|
|
omp.section {
|
|
^bb0(%arg2: !llvm.ptr):
|
|
llvm.br ^bb1(%0 : i64)
|
|
^bb1(%4: i64): // 2 preds: ^bb0, ^bb2
|
|
%5 = llvm.icmp "sgt" %4, %1 : i64
|
|
llvm.cond_br %5, ^bb2, ^bb3
|
|
^bb2: // pred: ^bb1
|
|
%6 = llvm.sub %4, %2 : i64
|
|
llvm.br ^bb1(%6 : i64)
|
|
^bb3: // pred: ^bb1
|
|
omp.terminator
|
|
}
|
|
omp.section {
|
|
^bb0(%arg2: !llvm.ptr):
|
|
llvm.br ^bb1(%0 : i64)
|
|
^bb1(%4: i64): // 2 preds: ^bb0, ^bb2
|
|
%5 = llvm.icmp "sgt" %4, %1 : i64
|
|
llvm.cond_br %5, ^bb2, ^bb3
|
|
^bb2: // pred: ^bb1
|
|
%6 = llvm.sub %4, %2 : i64
|
|
llvm.br ^bb1(%6 : i64)
|
|
^bb3: // pred: ^bb1
|
|
omp.terminator
|
|
}
|
|
omp.terminator
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// CHECK-LABEL: define internal void @sectionsreduction_..omp_par
|
|
// CHECK: omp.par.entry:
|
|
// CHECK: %[[VAL_6:.*]] = alloca i32, align 4
|
|
// CHECK: %[[VAL_7:.*]] = alloca i32, align 4
|
|
// CHECK: %[[VAL_8:.*]] = alloca i32, align 4
|
|
// CHECK: %[[VAL_9:.*]] = alloca i32, align 4
|
|
// CHECK: %[[VAL_10:.*]] = alloca i32, align 4
|
|
// CHECK: %[[VAL_11:.*]] = load i32, ptr %[[VAL_12:.*]], align 4
|
|
// CHECK: store i32 %[[VAL_11]], ptr %[[VAL_10]], align 4
|
|
// CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4
|
|
// CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
|
|
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
|
|
// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8
|
|
// CHECK: br label %[[VAL_15:.*]]
|
|
// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]]
|
|
// CHECK: br label %[[VAL_17:.*]]
|
|
// CHECK: omp.par.region: ; preds = %[[VAL_15]]
|
|
// CHECK: br label %[[VAL_18:.*]]
|
|
// CHECK: omp.par.region1: ; preds = %[[VAL_17]]
|
|
// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
|
|
// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
|
|
// CHECK: br label %[[VAL_22:.*]]
|
|
// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]]
|
|
// CHECK: store i32 0, ptr %[[VAL_7]], align 4
|
|
// CHECK: store i32 1, ptr %[[VAL_8]], align 4
|
|
// CHECK: store i32 1, ptr %[[VAL_9]], align 4
|
|
// CHECK: %[[VAL_23:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
|
|
// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[VAL_23]], i32 34, ptr %[[VAL_6]], ptr %[[VAL_7]], ptr %[[VAL_8]], ptr %[[VAL_9]], i32 1, i32 0)
|
|
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_7]], align 4
|
|
// CHECK: %[[VAL_25:.*]] = load i32, ptr %[[VAL_8]], align 4
|
|
// CHECK: %[[VAL_26:.*]] = sub i32 %[[VAL_25]], %[[VAL_24]]
|
|
// CHECK: %[[VAL_27:.*]] = add i32 %[[VAL_26]], 1
|
|
// CHECK: br label %[[VAL_28:.*]]
|
|
// CHECK: omp_section_loop.header: ; preds = %[[VAL_29:.*]], %[[VAL_22]]
|
|
// CHECK: %[[VAL_30:.*]] = phi i32 [ 0, %[[VAL_22]] ], [ %[[VAL_31:.*]], %[[VAL_29]] ]
|
|
// CHECK: br label %[[VAL_32:.*]]
|
|
// CHECK: omp_section_loop.cond: ; preds = %[[VAL_28]]
|
|
// CHECK: %[[VAL_33:.*]] = icmp ult i32 %[[VAL_30]], %[[VAL_27]]
|
|
// CHECK: br i1 %[[VAL_33]], label %[[VAL_34:.*]], label %[[VAL_35:.*]]
|
|
// CHECK: omp_section_loop.exit: ; preds = %[[VAL_32]]
|
|
// CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_23]])
|
|
// CHECK: %[[VAL_36:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
|
|
// CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_36]])
|
|
// CHECK: br label %[[VAL_37:.*]]
|
|
// CHECK: omp_section_loop.after: ; preds = %[[VAL_35]]
|
|
// CHECK: br label %[[VAL_38:.*]]
|
|
// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_37]]
|
|
// CHECK: %[[VAL_39:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_14]], i64 0, i64 0
|
|
// CHECK: store ptr %[[VAL_21]], ptr %[[VAL_39]], align 8
|
|
// CHECK: %[[VAL_40:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
|
|
// CHECK: %[[VAL_41:.*]] = call i32 @__kmpc_reduce(ptr @1, i32 %[[VAL_40]], i32 1, i64 8, ptr %[[VAL_14]], ptr @.omp.reduction.func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK: switch i32 %[[VAL_41]], label %[[VAL_42:.*]] [
|
|
// CHECK: i32 1, label %[[VAL_43:.*]]
|
|
// CHECK: i32 2, label %[[VAL_44:.*]]
|
|
// CHECK: ]
|
|
// CHECK: reduce.switch.atomic: ; preds = %[[VAL_38]]
|
|
// CHECK: unreachable
|
|
// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_38]]
|
|
// CHECK: %[[VAL_45:.*]] = load ptr, ptr %[[VAL_21]], align 8
|
|
// CHECK: br label %[[VAL_46:.*]]
|
|
// CHECK: omp.reduction.nonatomic.body: ; preds = %[[VAL_43]]
|
|
// CHECK: br label %[[VAL_47:.*]]
|
|
// CHECK: omp.reduction.nonatomic.body16: ; preds = %[[VAL_48:.*]], %[[VAL_46]]
|
|
// CHECK: %[[VAL_49:.*]] = phi i64 [ %[[VAL_50:.*]], %[[VAL_48]] ], [ 0, %[[VAL_46]] ]
|
|
// CHECK: %[[VAL_51:.*]] = icmp sgt i64 %[[VAL_49]], 0
|
|
// CHECK: br i1 %[[VAL_51]], label %[[VAL_48]], label %[[VAL_52:.*]]
|
|
// CHECK: omp.reduction.nonatomic.body18: ; preds = %[[VAL_47]]
|
|
// CHECK: br label %[[VAL_53:.*]]
|
|
// CHECK: omp.region.cont15: ; preds = %[[VAL_52]]
|
|
// CHECK: %[[VAL_54:.*]] = phi ptr [ %[[VAL_19]], %[[VAL_52]] ]
|
|
// CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_40]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK: br label %[[VAL_42]]
|
|
// CHECK: omp.reduction.nonatomic.body17: ; preds = %[[VAL_47]]
|
|
// CHECK: %[[VAL_50]] = sub i64 %[[VAL_49]], 1
|
|
// CHECK: br label %[[VAL_47]]
|
|
// CHECK: reduce.finalize: ; preds = %[[VAL_53]], %[[VAL_38]]
|
|
// CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
|
|
// CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_55]])
|
|
// CHECK: %[[VAL_56:.*]] = load ptr, ptr %[[VAL_21]], align 8
|
|
// CHECK: br label %[[VAL_57:.*]]
|
|
// CHECK: omp.reduction.cleanup: ; preds = %[[VAL_42]]
|
|
// CHECK: %[[VAL_58:.*]] = ptrtoint ptr %[[VAL_56]] to i64
|
|
// CHECK: %[[VAL_59:.*]] = icmp ne i64 %[[VAL_58]], 0
|
|
// CHECK: br i1 %[[VAL_59]], label %[[VAL_60:.*]], label %[[VAL_61:.*]]
|
|
// CHECK: omp.reduction.cleanup22: ; preds = %[[VAL_60]], %[[VAL_57]]
|
|
// CHECK: br label %[[VAL_62:.*]]
|
|
// CHECK: omp.region.cont20: ; preds = %[[VAL_61]]
|
|
// CHECK: br label %[[VAL_63:.*]]
|
|
// CHECK: omp.region.cont: ; preds = %[[VAL_62]]
|
|
// CHECK: br label %[[VAL_64:.*]]
|
|
// CHECK: omp.par.pre_finalize: ; preds = %[[VAL_63]]
|
|
// CHECK: br label %[[VAL_65:.*]]
|
|
// CHECK: omp.reduction.cleanup21: ; preds = %[[VAL_57]]
|
|
// CHECK: br label %[[VAL_61]]
|
|
// CHECK: omp_section_loop.body: ; preds = %[[VAL_32]]
|
|
// CHECK: %[[VAL_66:.*]] = add i32 %[[VAL_30]], %[[VAL_24]]
|
|
// CHECK: %[[VAL_67:.*]] = mul i32 %[[VAL_66]], 1
|
|
// CHECK: %[[VAL_68:.*]] = add i32 %[[VAL_67]], 0
|
|
// CHECK: switch i32 %[[VAL_68]], label %[[VAL_69:.*]] [
|
|
// CHECK: i32 0, label %[[VAL_70:.*]]
|
|
// CHECK: i32 1, label %[[VAL_71:.*]]
|
|
// CHECK: ]
|
|
// CHECK: omp_section_loop.body.case6: ; preds = %[[VAL_34]]
|
|
// CHECK: br label %[[VAL_72:.*]]
|
|
// CHECK: omp.section.region8: ; preds = %[[VAL_71]]
|
|
// CHECK: br label %[[VAL_73:.*]]
|
|
// CHECK: omp.section.region9: ; preds = %[[VAL_74:.*]], %[[VAL_72]]
|
|
// CHECK: %[[VAL_75:.*]] = phi i64 [ %[[VAL_76:.*]], %[[VAL_74]] ], [ 1, %[[VAL_72]] ]
|
|
// CHECK: %[[VAL_77:.*]] = icmp sgt i64 %[[VAL_75]], 0
|
|
// CHECK: br i1 %[[VAL_77]], label %[[VAL_74]], label %[[VAL_78:.*]]
|
|
// CHECK: omp.section.region11: ; preds = %[[VAL_73]]
|
|
// CHECK: br label %[[VAL_79:.*]]
|
|
// CHECK: omp.region.cont7: ; preds = %[[VAL_78]]
|
|
// CHECK: br label %[[VAL_69]]
|
|
// CHECK: omp.section.region10: ; preds = %[[VAL_73]]
|
|
// CHECK: %[[VAL_76]] = sub i64 %[[VAL_75]], 1
|
|
// CHECK: br label %[[VAL_73]]
|
|
// CHECK: omp_section_loop.body.case: ; preds = %[[VAL_34]]
|
|
// CHECK: br label %[[VAL_80:.*]]
|
|
// CHECK: omp.section.region: ; preds = %[[VAL_70]]
|
|
// CHECK: br label %[[VAL_81:.*]]
|
|
// CHECK: omp.section.region3: ; preds = %[[VAL_82:.*]], %[[VAL_80]]
|
|
// CHECK: %[[VAL_83:.*]] = phi i64 [ %[[VAL_84:.*]], %[[VAL_82]] ], [ 1, %[[VAL_80]] ]
|
|
// CHECK: %[[VAL_85:.*]] = icmp sgt i64 %[[VAL_83]], 0
|
|
// CHECK: br i1 %[[VAL_85]], label %[[VAL_82]], label %[[VAL_86:.*]]
|
|
// CHECK: omp.section.region5: ; preds = %[[VAL_81]]
|
|
// CHECK: br label %[[VAL_87:.*]]
|
|
// CHECK: omp.region.cont2: ; preds = %[[VAL_86]]
|
|
// CHECK: br label %[[VAL_69]]
|
|
// CHECK: omp.section.region4: ; preds = %[[VAL_81]]
|
|
// CHECK: %[[VAL_84]] = sub i64 %[[VAL_83]], 1
|
|
// CHECK: br label %[[VAL_81]]
|
|
// CHECK: omp_section_loop.body.sections.after: ; preds = %[[VAL_79]], %[[VAL_87]], %[[VAL_34]]
|
|
// CHECK: br label %[[VAL_29]]
|
|
// CHECK: omp_section_loop.inc: ; preds = %[[VAL_69]]
|
|
// CHECK: %[[VAL_31]] = add nuw i32 %[[VAL_30]], 1
|
|
// CHECK: br label %[[VAL_28]]
|
|
// CHECK: omp.par.outlined.exit.exitStub: ; preds = %[[VAL_64]]
|
|
// CHECK: ret void
|