[OpenACC][CIR] Add parallelism determ. to all acc.loops (#143751)
PR #143720 adds a requirement to the ACC dialect that every acc.loop must have a seq, independent, or auto attribute for the 'default' device_type. The standard has rules for how this can be intuited: orphan/parallel/parallel loop: independent kernels/kernels loop: auto serial/serial loop: seq, unless there is a gang/worker/vector, at which point it should be 'auto'. This patch implements all of this rule as a 'cleanup' step on the IR generation for combined/loop operations. Note that the test impact is much less since I inadvertently have my 'operation' terminating curley matching the end curley from 'attribute' instead of the front of the line, so I've added sufficient tests to ensure I captured the above.
This commit is contained in:
@@ -34,6 +34,12 @@ namespace {
|
||||
class ScalarExprEmitter;
|
||||
} // namespace
|
||||
|
||||
namespace mlir {
|
||||
namespace acc {
|
||||
class LoopOp;
|
||||
} // namespace acc
|
||||
} // namespace mlir
|
||||
|
||||
namespace clang::CIRGen {
|
||||
|
||||
class CIRGenFunction : public CIRGenTypeCache {
|
||||
@@ -1082,6 +1088,12 @@ private:
|
||||
OpenACCDirectiveKind dirKind, SourceLocation dirLoc,
|
||||
ArrayRef<const OpenACCClause *> clauses);
|
||||
|
||||
// The OpenACC LoopOp requires that we have auto, seq, or independent on all
|
||||
// LoopOp operations for the 'none' device type case. This function checks if
|
||||
// the LoopOp has one, else it updates it to have one.
|
||||
void updateLoopOpParallelism(mlir::acc::LoopOp &op, bool isOrphan,
|
||||
OpenACCDirectiveKind dk);
|
||||
|
||||
public:
|
||||
mlir::LogicalResult
|
||||
emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s);
|
||||
|
||||
@@ -102,6 +102,8 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct(
|
||||
|
||||
emitOpenACCClauses(computeOp, loopOp, dirKind, dirLoc, clauses);
|
||||
|
||||
updateLoopOpParallelism(loopOp, /*isOrphan=*/false, dirKind);
|
||||
|
||||
builder.create<TermOp>(end);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,36 @@ using namespace clang::CIRGen;
|
||||
using namespace cir;
|
||||
using namespace mlir::acc;
|
||||
|
||||
void CIRGenFunction::updateLoopOpParallelism(mlir::acc::LoopOp &op,
|
||||
bool isOrphan,
|
||||
OpenACCDirectiveKind dk) {
|
||||
// Check that at least one of auto, independent, or seq is present
|
||||
// for the device-independent default clauses.
|
||||
if (op.hasParallelismFlag(mlir::acc::DeviceType::None))
|
||||
return;
|
||||
|
||||
switch (dk) {
|
||||
default:
|
||||
llvm_unreachable("Invalid parent directive kind");
|
||||
case OpenACCDirectiveKind::Invalid:
|
||||
case OpenACCDirectiveKind::Parallel:
|
||||
case OpenACCDirectiveKind::ParallelLoop:
|
||||
op.addIndependent(builder.getContext(), {});
|
||||
return;
|
||||
case OpenACCDirectiveKind::Kernels:
|
||||
case OpenACCDirectiveKind::KernelsLoop:
|
||||
op.addAuto(builder.getContext(), {});
|
||||
return;
|
||||
case OpenACCDirectiveKind::Serial:
|
||||
case OpenACCDirectiveKind::SerialLoop:
|
||||
if (op.hasDefaultGangWorkerVector())
|
||||
op.addAuto(builder.getContext(), {});
|
||||
else
|
||||
op.addSeq(builder.getContext(), {});
|
||||
return;
|
||||
};
|
||||
}
|
||||
|
||||
mlir::LogicalResult
|
||||
CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) {
|
||||
mlir::Location start = getLoc(s.getSourceRange().getBegin());
|
||||
@@ -90,6 +120,9 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) {
|
||||
emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
|
||||
s.clauses());
|
||||
|
||||
updateLoopOpParallelism(op, s.isOrphanedLoopConstruct(),
|
||||
s.getParentComputeConstructKind());
|
||||
|
||||
mlir::LogicalResult stmtRes = mlir::success();
|
||||
// Emit body.
|
||||
{
|
||||
|
||||
@@ -74,7 +74,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.serial combined(loop) {
|
||||
// CHECK: acc.loop combined(serial) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>, #acc.device_type<none>]} loc
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
#pragma acc kernels loop seq device_type(nvidia, radeon)
|
||||
@@ -99,7 +99,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.serial combined(loop) {
|
||||
// CHECK: acc.loop combined(serial) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>], seq = [#acc.device_type<none>]} loc
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
#pragma acc kernels loop auto device_type(nvidia, radeon)
|
||||
@@ -124,7 +124,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.serial combined(loop) {
|
||||
// CHECK: acc.loop combined(serial) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>], seq = [#acc.device_type<none>]} loc
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
#pragma acc kernels loop independent device_type(nvidia, radeon)
|
||||
@@ -143,7 +143,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.parallel combined(loop) {
|
||||
// CHECK: acc.loop combined(parallel) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>]}
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
@@ -154,7 +154,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.serial combined(loop) {
|
||||
// CHECK: acc.loop combined(serial) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>], seq = [#acc.device_type<none>]}
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
@@ -165,7 +165,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.kernels combined(loop) {
|
||||
// CHECK: acc.loop combined(kernels) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
|
||||
// CHECK: acc.terminator
|
||||
// CHECK-NEXT: } loc
|
||||
#pragma acc parallel loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
|
||||
@@ -175,7 +175,7 @@ extern "C" void acc_combined(int N, int cond) {
|
||||
// CHECK: acc.parallel combined(loop) {
|
||||
// CHECK: acc.loop combined(parallel) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>], independent = [#acc.device_type<none>]}
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
@@ -1184,4 +1184,59 @@ extern "C" void acc_combined_data_clauses(int *arg1, int *arg2) {
|
||||
// CHECK-NEXT: } loc
|
||||
// CHECK-NEXT: acc.detach accPtr(%[[ATTACH2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_attach>, name = "arg2"}
|
||||
// CHECK-NEXT: acc.detach accPtr(%[[ATTACH1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_attach>, name = "arg1"}
|
||||
|
||||
// Checking the automatic-addition of parallelism clauses.
|
||||
#pragma acc parallel loop
|
||||
for(unsigned I = 0; I < 5; ++I);
|
||||
// CHECK-NEXT: acc.parallel combined(loop) {
|
||||
// CHECK-NEXT: acc.loop combined(parallel) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc kernels loop
|
||||
for(unsigned I = 0; I < 5; ++I);
|
||||
// CHECK-NEXT: acc.kernels combined(loop) {
|
||||
// CHECK-NEXT: acc.loop combined(kernels) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
// CHECK-NEXT: acc.terminator
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial loop
|
||||
for(unsigned I = 0; I < 5; ++I);
|
||||
// CHECK-NEXT: acc.serial combined(loop) {
|
||||
// CHECK-NEXT: acc.loop combined(serial) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial loop worker
|
||||
for(unsigned I = 0; I < 5; ++I);
|
||||
// CHECK-NEXT: acc.serial combined(loop) {
|
||||
// CHECK-NEXT: acc.loop combined(serial) worker {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial loop vector
|
||||
for(unsigned I = 0; I < 5; ++I);
|
||||
// CHECK-NEXT: acc.serial combined(loop) {
|
||||
// CHECK-NEXT: acc.loop combined(serial) vector {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial loop gang
|
||||
for(unsigned I = 0; I < 5; ++I);
|
||||
// CHECK-NEXT: acc.serial combined(loop) {
|
||||
// CHECK-NEXT: acc.loop combined(serial) gang {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
}
|
||||
|
||||
@@ -41,12 +41,12 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>], seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
#pragma acc loop device_type(radeon) seq
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {seq = [#acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>], seq = [#acc.device_type<radeon>]} loc
|
||||
#pragma acc loop seq device_type(nvidia, radeon)
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
@@ -67,12 +67,12 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>, #acc.device_type<none>]} loc
|
||||
#pragma acc loop device_type(radeon) independent
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<radeon>, #acc.device_type<none>]} loc
|
||||
#pragma acc loop independent device_type(nvidia, radeon)
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
@@ -93,12 +93,12 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>], independent = [#acc.device_type<none>]} loc
|
||||
#pragma acc loop device_type(radeon) auto
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<radeon>]} loc
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<radeon>], independent = [#acc.device_type<none>]} loc
|
||||
#pragma acc loop auto device_type(nvidia, radeon)
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK: acc.loop {
|
||||
@@ -116,7 +116,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
for(unsigned K = 0; K < N; ++K);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>]}
|
||||
|
||||
#pragma acc loop collapse(1) device_type(radeon) collapse (2)
|
||||
for(unsigned I = 0; I < N; ++I)
|
||||
@@ -124,7 +124,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
for(unsigned K = 0; K < N; ++K);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>], independent = [#acc.device_type<none>]}
|
||||
|
||||
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
|
||||
for(unsigned I = 0; I < N; ++I)
|
||||
@@ -132,14 +132,14 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
for(unsigned K = 0; K < N; ++K);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>], independent = [#acc.device_type<none>]}
|
||||
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
|
||||
for(unsigned I = 0; I < N; ++I)
|
||||
for(unsigned J = 0; J < N; ++J)
|
||||
for(unsigned K = 0; K < N; ++K);
|
||||
// CHECK: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
|
||||
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>], independent = [#acc.device_type<none>]}
|
||||
|
||||
#pragma acc loop tile(1, 2, 3)
|
||||
for(unsigned I = 0; I < N; ++I)
|
||||
@@ -392,4 +392,85 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
}
|
||||
// CHECK-NEXT: acc.terminator
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
// Checking the automatic-addition of parallelism clauses.
|
||||
#pragma acc loop
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
|
||||
|
||||
#pragma acc parallel
|
||||
{
|
||||
// CHECK-NEXT: acc.parallel {
|
||||
#pragma acc loop
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
|
||||
}
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
// CHECK-NEXT: acc.kernels {
|
||||
#pragma acc loop
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
}
|
||||
// CHECK-NEXT: acc.terminator
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial
|
||||
{
|
||||
// CHECK-NEXT: acc.serial {
|
||||
#pragma acc loop
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
|
||||
}
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial
|
||||
{
|
||||
// CHECK-NEXT: acc.serial {
|
||||
#pragma acc loop worker
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop worker {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
}
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial
|
||||
{
|
||||
// CHECK-NEXT: acc.serial {
|
||||
#pragma acc loop vector
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop vector {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
}
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
|
||||
#pragma acc serial
|
||||
{
|
||||
// CHECK-NEXT: acc.serial {
|
||||
#pragma acc loop gang
|
||||
for(unsigned I = 0; I < N; ++I);
|
||||
// CHECK-NEXT: acc.loop gang {
|
||||
// CHECK: acc.yield
|
||||
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
|
||||
}
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: } loc
|
||||
}
|
||||
|
||||
@@ -2246,6 +2246,14 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
|
||||
// device_types. This is for the case where there is no expression specified
|
||||
// in a 'gang'.
|
||||
void addEmptyGang(MLIRContext *, llvm::ArrayRef<DeviceType>);
|
||||
|
||||
// Return whether this LoopOp has an auto, seq, or independent for the
|
||||
// specified device-type.
|
||||
bool hasParallelismFlag(DeviceType);
|
||||
|
||||
// Return whether this LoopOp has a gang, worker, or vector applying to the
|
||||
// 'default'/None device-type.
|
||||
bool hasDefaultGangWorkerVector();
|
||||
}];
|
||||
|
||||
let hasCustomAssemblyFormat = 1;
|
||||
|
||||
@@ -2839,6 +2839,30 @@ void acc::LoopOp::addEmptyGang(
|
||||
effectiveDeviceTypes));
|
||||
}
|
||||
|
||||
bool acc::LoopOp::hasParallelismFlag(DeviceType dt) {
|
||||
auto hasDevice = [=](DeviceTypeAttr attr) -> bool {
|
||||
return attr.getValue() == dt;
|
||||
};
|
||||
auto testFromArr = [=](ArrayAttr arr) -> bool {
|
||||
return llvm::any_of(arr.getAsRange<DeviceTypeAttr>(), hasDevice);
|
||||
};
|
||||
|
||||
if (ArrayAttr arr = getSeqAttr(); arr && testFromArr(arr))
|
||||
return true;
|
||||
if (ArrayAttr arr = getIndependentAttr(); arr && testFromArr(arr))
|
||||
return true;
|
||||
if (ArrayAttr arr = getAuto_Attr(); arr && testFromArr(arr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool acc::LoopOp::hasDefaultGangWorkerVector() {
|
||||
return hasVector() || getVectorValue() || hasWorker() || getWorkerValue() ||
|
||||
hasGang() || getGangValue(GangArgType::Num) ||
|
||||
getGangValue(GangArgType::Dim) || getGangValue(GangArgType::Static);
|
||||
}
|
||||
|
||||
void acc::LoopOp::addGangOperands(
|
||||
MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
|
||||
llvm::ArrayRef<GangArgType> argTypes, mlir::ValueRange values) {
|
||||
|
||||
Reference in New Issue
Block a user