This is part of an effort to migrate from llvm::Optional to std::optional. This patch changes the way mlir-tblgen generates .inc files, and modifies tests and documentation appropriately. It is a "no compromises" patch, and doesn't leave the user with an unpleasant mix of llvm::Optional and std::optional. A non-trivial change has been made to ControlFlowInterfaces to split one constructor into two, relating to a build failure on Windows. See also: https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 Signed-off-by: Ramkumar Ramachandra <r@artagnon.com> Differential Revision: https://reviews.llvm.org/D138934
1473 lines
64 KiB
C++
1473 lines
64 KiB
C++
//===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a translation between the MLIR OpenMP dialect and LLVM
|
|
// IR.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
|
|
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
|
#include "mlir/IR/BlockAndValueMapping.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/Support/LLVM.h"
|
|
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
|
|
|
|
#include "llvm/ADT/SetVector.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
|
|
#include "llvm/IR/DebugInfoMetadata.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
static llvm::omp::ScheduleKind
|
|
convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
|
|
if (!schedKind.has_value())
|
|
return llvm::omp::OMP_SCHEDULE_Default;
|
|
switch (schedKind.value()) {
|
|
case omp::ClauseScheduleKind::Static:
|
|
return llvm::omp::OMP_SCHEDULE_Static;
|
|
case omp::ClauseScheduleKind::Dynamic:
|
|
return llvm::omp::OMP_SCHEDULE_Dynamic;
|
|
case omp::ClauseScheduleKind::Guided:
|
|
return llvm::omp::OMP_SCHEDULE_Guided;
|
|
case omp::ClauseScheduleKind::Auto:
|
|
return llvm::omp::OMP_SCHEDULE_Auto;
|
|
case omp::ClauseScheduleKind::Runtime:
|
|
return llvm::omp::OMP_SCHEDULE_Runtime;
|
|
}
|
|
llvm_unreachable("unhandled schedule clause argument");
|
|
}
|
|
|
|
/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
|
|
/// insertion points for allocas.
|
|
class OpenMPAllocaStackFrame
|
|
: public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
|
|
public:
|
|
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
|
|
|
|
explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
|
|
: allocaInsertPoint(allocaIP) {}
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
|
|
};
|
|
|
|
/// ModuleTranslation stack frame containing the partial mapping between MLIR
|
|
/// values and their LLVM IR equivalents.
|
|
class OpenMPVarMappingStackFrame
|
|
: public LLVM::ModuleTranslation::StackFrameBase<
|
|
OpenMPVarMappingStackFrame> {
|
|
public:
|
|
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
|
|
|
|
explicit OpenMPVarMappingStackFrame(
|
|
const DenseMap<Value, llvm::Value *> &mapping)
|
|
: mapping(mapping) {}
|
|
|
|
DenseMap<Value, llvm::Value *> mapping;
|
|
};
|
|
} // namespace
|
|
|
|
/// Find the insertion point for allocas given the current insertion point for
|
|
/// normal operations in the builder.
|
|
static llvm::OpenMPIRBuilder::InsertPointTy
|
|
findAllocaInsertPoint(llvm::IRBuilderBase &builder,
|
|
const LLVM::ModuleTranslation &moduleTranslation) {
|
|
// If there is an alloca insertion point on stack, i.e. we are in a nested
|
|
// operation and a specific point was provided by some surrounding operation,
|
|
// use it.
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
|
|
WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
|
|
[&](const OpenMPAllocaStackFrame &frame) {
|
|
allocaInsertPoint = frame.allocaInsertPoint;
|
|
return WalkResult::interrupt();
|
|
});
|
|
if (walkResult.wasInterrupted())
|
|
return allocaInsertPoint;
|
|
|
|
// Otherwise, insert to the entry block of the surrounding function.
|
|
// If the current IRBuilder InsertPoint is the function's entry, it cannot
|
|
// also be used for alloca insertion which would result in insertion order
|
|
// confusion. Create a new BasicBlock for the Builder and use the entry block
|
|
// for the allocs.
|
|
// TODO: Create a dedicated alloca BasicBlock at function creation such that
|
|
// we do not need to move the current InertPoint here.
|
|
if (builder.GetInsertBlock() ==
|
|
&builder.GetInsertBlock()->getParent()->getEntryBlock()) {
|
|
assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
|
|
"Assuming end of basic block");
|
|
llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
|
|
builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
|
|
builder.GetInsertBlock()->getNextNode());
|
|
builder.CreateBr(entryBB);
|
|
builder.SetInsertPoint(entryBB);
|
|
}
|
|
|
|
llvm::BasicBlock &funcEntryBlock =
|
|
builder.GetInsertBlock()->getParent()->getEntryBlock();
|
|
return llvm::OpenMPIRBuilder::InsertPointTy(
|
|
&funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
|
|
}
|
|
|
|
/// Converts the given region that appears within an OpenMP dialect operation to
|
|
/// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
|
|
/// region, and a branch from any block with an successor-less OpenMP terminator
|
|
/// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
|
|
/// of the continuation block if provided.
|
|
static llvm::BasicBlock *convertOmpOpRegions(
|
|
Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
|
|
SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
|
|
llvm::BasicBlock *continuationBlock =
|
|
splitBB(builder, true, "omp.region.cont");
|
|
llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
|
|
|
|
llvm::LLVMContext &llvmContext = builder.getContext();
|
|
for (Block &bb : region) {
|
|
llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
|
|
llvmContext, blockName, builder.GetInsertBlock()->getParent(),
|
|
builder.GetInsertBlock()->getNextNode());
|
|
moduleTranslation.mapBlock(&bb, llvmBB);
|
|
}
|
|
|
|
llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
|
|
|
|
// Terminators (namely YieldOp) may be forwarding values to the region that
|
|
// need to be available in the continuation block. Collect the types of these
|
|
// operands in preparation of creating PHI nodes.
|
|
SmallVector<llvm::Type *> continuationBlockPHITypes;
|
|
bool operandsProcessed = false;
|
|
unsigned numYields = 0;
|
|
for (Block &bb : region.getBlocks()) {
|
|
if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
|
|
if (!operandsProcessed) {
|
|
for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
|
|
continuationBlockPHITypes.push_back(
|
|
moduleTranslation.convertType(yield->getOperand(i).getType()));
|
|
}
|
|
operandsProcessed = true;
|
|
} else {
|
|
assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
|
|
"mismatching number of values yielded from the region");
|
|
for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
|
|
llvm::Type *operandType =
|
|
moduleTranslation.convertType(yield->getOperand(i).getType());
|
|
(void)operandType;
|
|
assert(continuationBlockPHITypes[i] == operandType &&
|
|
"values of mismatching types yielded from the region");
|
|
}
|
|
}
|
|
numYields++;
|
|
}
|
|
}
|
|
|
|
// Insert PHI nodes in the continuation block for any values forwarded by the
|
|
// terminators in this region.
|
|
if (!continuationBlockPHITypes.empty())
|
|
assert(
|
|
continuationBlockPHIs &&
|
|
"expected continuation block PHIs if converted regions yield values");
|
|
if (continuationBlockPHIs) {
|
|
llvm::IRBuilderBase::InsertPointGuard guard(builder);
|
|
continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
|
|
builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
|
|
for (llvm::Type *ty : continuationBlockPHITypes)
|
|
continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
|
|
}
|
|
|
|
// Convert blocks one by one in topological order to ensure
|
|
// defs are converted before uses.
|
|
SetVector<Block *> blocks =
|
|
LLVM::detail::getTopologicallySortedBlocks(region);
|
|
for (Block *bb : blocks) {
|
|
llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
|
|
// Retarget the branch of the entry block to the entry block of the
|
|
// converted region (regions are single-entry).
|
|
if (bb->isEntryBlock()) {
|
|
assert(sourceTerminator->getNumSuccessors() == 1 &&
|
|
"provided entry block has multiple successors");
|
|
assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
|
|
"ContinuationBlock is not the successor of the entry block");
|
|
sourceTerminator->setSuccessor(0, llvmBB);
|
|
}
|
|
|
|
llvm::IRBuilderBase::InsertPointGuard guard(builder);
|
|
if (failed(
|
|
moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
|
|
bodyGenStatus = failure();
|
|
return continuationBlock;
|
|
}
|
|
|
|
// Special handling for `omp.yield` and `omp.terminator` (we may have more
|
|
// than one): they return the control to the parent OpenMP dialect operation
|
|
// so replace them with the branch to the continuation block. We handle this
|
|
// here to avoid relying inter-function communication through the
|
|
// ModuleTranslation class to set up the correct insertion point. This is
|
|
// also consistent with MLIR's idiom of handling special region terminators
|
|
// in the same code that handles the region-owning operation.
|
|
Operation *terminator = bb->getTerminator();
|
|
if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
|
|
builder.CreateBr(continuationBlock);
|
|
|
|
for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
|
|
(*continuationBlockPHIs)[i]->addIncoming(
|
|
moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
|
|
}
|
|
}
|
|
// After all blocks have been traversed and values mapped, connect the PHI
|
|
// nodes to the results of preceding blocks.
|
|
LLVM::detail::connectPHINodes(region, moduleTranslation);
|
|
|
|
// Remove the blocks and values defined in this region from the mapping since
|
|
// they are not visible outside of this region. This allows the same region to
|
|
// be converted several times, that is cloned, without clashes, and slightly
|
|
// speeds up the lookups.
|
|
moduleTranslation.forgetMapping(region);
|
|
|
|
return continuationBlock;
|
|
}
|
|
|
|
/// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
|
|
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
|
|
switch (kind) {
|
|
case omp::ClauseProcBindKind::Close:
|
|
return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
|
|
case omp::ClauseProcBindKind::Master:
|
|
return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
|
|
case omp::ClauseProcBindKind::Primary:
|
|
return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
|
|
case omp::ClauseProcBindKind::Spread:
|
|
return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
|
|
}
|
|
llvm_unreachable("Unknown ClauseProcBindKind kind");
|
|
}
|
|
|
|
/// Converts the OpenMP parallel operation to LLVM IR.
|
|
static LogicalResult
|
|
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
|
// relying on captured variables.
|
|
LogicalResult bodyGenStatus = success();
|
|
|
|
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
|
// Save the alloca insertion point on ModuleTranslation stack for use in
|
|
// nested regions.
|
|
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
|
|
moduleTranslation, allocaIP);
|
|
|
|
// ParallelOp has only one region associated with it.
|
|
builder.restoreIP(codeGenIP);
|
|
convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
|
|
// TODO: Perform appropriate actions according to the data-sharing
|
|
// attribute (shared, private, firstprivate, ...) of variables.
|
|
// Currently defaults to shared.
|
|
auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
|
|
llvm::Value &, llvm::Value &vPtr,
|
|
llvm::Value *&replacementValue) -> InsertPointTy {
|
|
replacementValue = &vPtr;
|
|
|
|
return codeGenIP;
|
|
};
|
|
|
|
// TODO: Perform finalization actions for variables. This has to be
|
|
// called for variables which have destructors/finalizers.
|
|
auto finiCB = [&](InsertPointTy codeGenIP) {};
|
|
|
|
llvm::Value *ifCond = nullptr;
|
|
if (auto ifExprVar = opInst.getIfExprVar())
|
|
ifCond = moduleTranslation.lookupValue(ifExprVar);
|
|
llvm::Value *numThreads = nullptr;
|
|
if (auto numThreadsVar = opInst.getNumThreadsVar())
|
|
numThreads = moduleTranslation.lookupValue(numThreadsVar);
|
|
auto pbKind = llvm::omp::OMP_PROC_BIND_default;
|
|
if (auto bind = opInst.getProcBindVal())
|
|
pbKind = getProcBindKind(*bind);
|
|
// TODO: Is the Parallel construct cancellable?
|
|
bool isCancellable = false;
|
|
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
|
findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
|
|
ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind,
|
|
isCancellable));
|
|
|
|
return bodyGenStatus;
|
|
}
|
|
|
|
/// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
|
// relying on captured variables.
|
|
LogicalResult bodyGenStatus = success();
|
|
|
|
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
|
// MasterOp has only one region associated with it.
|
|
auto ®ion = cast<omp::MasterOp>(opInst).getRegion();
|
|
builder.restoreIP(codeGenIP);
|
|
convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
|
|
bodyGenStatus);
|
|
};
|
|
|
|
// TODO: Perform finalization actions for variables. This has to be
|
|
// called for variables which have destructors/finalizers.
|
|
auto finiCB = [&](InsertPointTy codeGenIP) {};
|
|
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
|
|
ompLoc, bodyGenCB, finiCB));
|
|
return success();
|
|
}
|
|
|
|
/// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
auto criticalOp = cast<omp::CriticalOp>(opInst);
|
|
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
|
// relying on captured variables.
|
|
LogicalResult bodyGenStatus = success();
|
|
|
|
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
|
// CriticalOp has only one region associated with it.
|
|
auto ®ion = cast<omp::CriticalOp>(opInst).getRegion();
|
|
builder.restoreIP(codeGenIP);
|
|
convertOmpOpRegions(region, "omp.critical.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
|
|
// TODO: Perform finalization actions for variables. This has to be
|
|
// called for variables which have destructors/finalizers.
|
|
auto finiCB = [&](InsertPointTy codeGenIP) {};
|
|
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
|
|
llvm::Constant *hint = nullptr;
|
|
|
|
// If it has a name, it probably has a hint too.
|
|
if (criticalOp.getNameAttr()) {
|
|
// The verifiers in OpenMP Dialect guarentee that all the pointers are
|
|
// non-null
|
|
auto symbolRef = criticalOp.getNameAttr().cast<SymbolRefAttr>();
|
|
auto criticalDeclareOp =
|
|
SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
|
|
symbolRef);
|
|
hint = llvm::ConstantInt::get(
|
|
llvm::Type::getInt32Ty(llvmContext),
|
|
static_cast<int>(criticalDeclareOp.getHintVal()));
|
|
}
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
|
|
ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
|
|
return success();
|
|
}
|
|
|
|
/// Returns a reduction declaration that corresponds to the given reduction
|
|
/// operation in the given container. Currently only supports reductions inside
|
|
/// WsLoopOp but can be easily extended.
|
|
static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
|
|
omp::ReductionOp reduction) {
|
|
SymbolRefAttr reductionSymbol;
|
|
for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
|
|
if (container.getReductionVars()[i] != reduction.getAccumulator())
|
|
continue;
|
|
reductionSymbol = (*container.getReductions())[i].cast<SymbolRefAttr>();
|
|
break;
|
|
}
|
|
assert(reductionSymbol &&
|
|
"reduction operation must be associated with a declaration");
|
|
|
|
return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
|
|
container, reductionSymbol);
|
|
}
|
|
|
|
/// Populates `reductions` with reduction declarations used in the given loop.
|
|
static void
|
|
collectReductionDecls(omp::WsLoopOp loop,
|
|
SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
|
|
std::optional<ArrayAttr> attr = loop.getReductions();
|
|
if (!attr)
|
|
return;
|
|
|
|
reductions.reserve(reductions.size() + loop.getNumReductionVars());
|
|
for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
|
|
reductions.push_back(
|
|
SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
|
|
loop, symbolRef));
|
|
}
|
|
}
|
|
|
|
/// Translates the blocks contained in the given region and appends them to at
|
|
/// the current insertion point of `builder`. The operations of the entry block
|
|
/// are appended to the current insertion block, which is not expected to have a
|
|
/// terminator. If set, `continuationBlockArgs` is populated with translated
|
|
/// values that correspond to the values omp.yield'ed from the region.
|
|
static LogicalResult inlineConvertOmpRegions(
|
|
Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation,
|
|
SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
|
|
if (region.empty())
|
|
return success();
|
|
|
|
// Special case for single-block regions that don't create additional blocks:
|
|
// insert operations without creating additional blocks.
|
|
if (llvm::hasSingleElement(region)) {
|
|
moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock());
|
|
if (failed(moduleTranslation.convertBlock(
|
|
region.front(), /*ignoreArguments=*/true, builder)))
|
|
return failure();
|
|
|
|
// The continuation arguments are simply the translated terminator operands.
|
|
if (continuationBlockArgs)
|
|
llvm::append_range(
|
|
*continuationBlockArgs,
|
|
moduleTranslation.lookupValues(region.front().back().getOperands()));
|
|
|
|
// Drop the mapping that is no longer necessary so that the same region can
|
|
// be processed multiple times.
|
|
moduleTranslation.forgetMapping(region);
|
|
return success();
|
|
}
|
|
|
|
LogicalResult bodyGenStatus = success();
|
|
SmallVector<llvm::PHINode *> phis;
|
|
llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
|
|
region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
|
|
if (failed(bodyGenStatus))
|
|
return failure();
|
|
if (continuationBlockArgs)
|
|
llvm::append_range(*continuationBlockArgs, phis);
|
|
builder.SetInsertPoint(continuationBlock,
|
|
continuationBlock->getFirstInsertionPt());
|
|
return success();
|
|
}
|
|
|
|
namespace {
|
|
/// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
|
|
/// store lambdas with capture.
|
|
using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
|
|
llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
|
|
llvm::Value *&)>;
|
|
using OwningAtomicReductionGen =
|
|
std::function<llvm::OpenMPIRBuilder::InsertPointTy(
|
|
llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
|
|
llvm::Value *)>;
|
|
} // namespace
|
|
|
|
/// Create an OpenMPIRBuilder-compatible reduction generator for the given
|
|
/// reduction declaration. The generator uses `builder` but ignores its
|
|
/// insertion point.
|
|
static OwningReductionGen
|
|
makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
// The lambda is mutable because we need access to non-const methods of decl
|
|
// (which aren't actually mutating it), and we must capture decl by-value to
|
|
// avoid the dangling reference after the parent function returns.
|
|
OwningReductionGen gen =
|
|
[&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
|
|
llvm::Value *lhs, llvm::Value *rhs,
|
|
llvm::Value *&result) mutable {
|
|
Region &reductionRegion = decl.getReductionRegion();
|
|
moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
|
|
moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
|
|
builder.restoreIP(insertPoint);
|
|
SmallVector<llvm::Value *> phis;
|
|
if (failed(inlineConvertOmpRegions(reductionRegion,
|
|
"omp.reduction.nonatomic.body",
|
|
builder, moduleTranslation, &phis)))
|
|
return llvm::OpenMPIRBuilder::InsertPointTy();
|
|
assert(phis.size() == 1);
|
|
result = phis[0];
|
|
return builder.saveIP();
|
|
};
|
|
return gen;
|
|
}
|
|
|
|
/// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
|
|
/// given reduction declaration. The generator uses `builder` but ignores its
|
|
/// insertion point. Returns null if there is no atomic region available in the
|
|
/// reduction declaration.
|
|
static OwningAtomicReductionGen
|
|
makeAtomicReductionGen(omp::ReductionDeclareOp decl,
|
|
llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
if (decl.getAtomicReductionRegion().empty())
|
|
return OwningAtomicReductionGen();
|
|
|
|
// The lambda is mutable because we need access to non-const methods of decl
|
|
// (which aren't actually mutating it), and we must capture decl by-value to
|
|
// avoid the dangling reference after the parent function returns.
|
|
OwningAtomicReductionGen atomicGen =
|
|
[&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
|
|
llvm::Value *lhs, llvm::Value *rhs) mutable {
|
|
Region &atomicRegion = decl.getAtomicReductionRegion();
|
|
moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
|
|
moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
|
|
builder.restoreIP(insertPoint);
|
|
SmallVector<llvm::Value *> phis;
|
|
if (failed(inlineConvertOmpRegions(atomicRegion,
|
|
"omp.reduction.atomic.body", builder,
|
|
moduleTranslation, &phis)))
|
|
return llvm::OpenMPIRBuilder::InsertPointTy();
|
|
assert(phis.empty());
|
|
return builder.saveIP();
|
|
};
|
|
return atomicGen;
|
|
}
|
|
|
|
/// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
auto orderedOp = cast<omp::OrderedOp>(opInst);
|
|
|
|
omp::ClauseDepend dependType = *orderedOp.getDependTypeVal();
|
|
bool isDependSource = dependType == omp::ClauseDepend::dependsource;
|
|
unsigned numLoops = *orderedOp.getNumLoopsVal();
|
|
SmallVector<llvm::Value *> vecValues =
|
|
moduleTranslation.lookupValues(orderedOp.getDependVecVars());
|
|
|
|
size_t indexVecValues = 0;
|
|
while (indexVecValues < vecValues.size()) {
|
|
SmallVector<llvm::Value *> storeValues;
|
|
storeValues.reserve(numLoops);
|
|
for (unsigned i = 0; i < numLoops; i++) {
|
|
storeValues.push_back(vecValues[indexVecValues]);
|
|
indexVecValues++;
|
|
}
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
|
findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
|
|
ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
|
|
}
|
|
return success();
|
|
}
|
|
|
|
/// Converts an OpenMP 'ordered_region' operation into LLVM IR using
|
|
/// OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
|
|
|
|
// TODO: The code generation for ordered simd directive is not supported yet.
|
|
if (orderedRegionOp.getSimd())
|
|
return failure();
|
|
|
|
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
|
// relying on captured variables.
|
|
LogicalResult bodyGenStatus = success();
|
|
|
|
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
|
// OrderedOp has only one region associated with it.
|
|
auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion();
|
|
builder.restoreIP(codeGenIP);
|
|
convertOmpOpRegions(region, "omp.ordered.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
|
|
// TODO: Perform finalization actions for variables. This has to be
|
|
// called for variables which have destructors/finalizers.
|
|
auto finiCB = [&](InsertPointTy codeGenIP) {};
|
|
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(
|
|
moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
|
|
ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd()));
|
|
return bodyGenStatus;
|
|
}
|
|
|
|
static LogicalResult
|
|
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
using StorableBodyGenCallbackTy =
|
|
llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
|
|
|
|
auto sectionsOp = cast<omp::SectionsOp>(opInst);
|
|
|
|
// TODO: Support the following clauses: private, firstprivate, lastprivate,
|
|
// reduction, allocate
|
|
if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() ||
|
|
!sectionsOp.getAllocateVars().empty() ||
|
|
!sectionsOp.getAllocatorsVars().empty())
|
|
return emitError(sectionsOp.getLoc())
|
|
<< "reduction and allocate clauses are not supported for sections "
|
|
"construct";
|
|
|
|
LogicalResult bodyGenStatus = success();
|
|
SmallVector<StorableBodyGenCallbackTy> sectionCBs;
|
|
|
|
for (Operation &op : *sectionsOp.getRegion().begin()) {
|
|
auto sectionOp = dyn_cast<omp::SectionOp>(op);
|
|
if (!sectionOp) // omp.terminator
|
|
continue;
|
|
|
|
Region ®ion = sectionOp.getRegion();
|
|
auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus](
|
|
InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
|
builder.restoreIP(codeGenIP);
|
|
convertOmpOpRegions(region, "omp.section.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
sectionCBs.push_back(sectionCB);
|
|
}
|
|
|
|
// No sections within omp.sections operation - skip generation. This situation
|
|
// is only possible if there is only a terminator operation inside the
|
|
// sections operation
|
|
if (sectionCBs.empty())
|
|
return success();
|
|
|
|
assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
|
|
|
|
// TODO: Perform appropriate actions according to the data-sharing
|
|
// attribute (shared, private, firstprivate, ...) of variables.
|
|
// Currently defaults to shared.
|
|
auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
|
|
llvm::Value &vPtr,
|
|
llvm::Value *&replacementValue) -> InsertPointTy {
|
|
replacementValue = &vPtr;
|
|
return codeGenIP;
|
|
};
|
|
|
|
// TODO: Perform finalization actions for variables. This has to be
|
|
// called for variables which have destructors/finalizers.
|
|
auto finiCB = [&](InsertPointTy codeGenIP) {};
|
|
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
|
findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
|
|
ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
|
|
sectionsOp.getNowait()));
|
|
return bodyGenStatus;
|
|
}
|
|
|
|
/// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
LogicalResult bodyGenStatus = success();
|
|
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
|
builder.restoreIP(codegenIP);
|
|
convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
auto finiCB = [&](InsertPointTy codeGenIP) {};
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
|
|
ompLoc, bodyCB, finiCB, singleOp.getNowait(), /*DidIt=*/nullptr));
|
|
return bodyGenStatus;
|
|
}
|
|
|
|
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
LogicalResult bodyGenStatus = success();
|
|
if (taskOp.getIfExpr() || taskOp.getFinalExpr() || taskOp.getUntiedAttr() ||
|
|
taskOp.getMergeableAttr() || taskOp.getInReductions() ||
|
|
taskOp.getPriority() || !taskOp.getAllocateVars().empty()) {
|
|
return taskOp.emitError("unhandled clauses for translation to LLVM IR");
|
|
}
|
|
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
|
builder.restoreIP(codegenIP);
|
|
convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
|
findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
|
|
ompLoc, allocaIP, bodyCB, !taskOp.getUntied()));
|
|
return bodyGenStatus;
|
|
}
|
|
|
|
/// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
|
LogicalResult bodyGenStatus = success();
|
|
if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
|
|
return tgOp.emitError("unhandled clauses for translation to LLVM IR");
|
|
}
|
|
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
|
builder.restoreIP(codegenIP);
|
|
convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
|
|
ompLoc, allocaIP, bodyCB));
|
|
return bodyGenStatus;
|
|
}
|
|
|
|
/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
auto loop = cast<omp::WsLoopOp>(opInst);
|
|
// TODO: this should be in the op verifier instead.
|
|
if (loop.getLowerBound().empty())
|
|
return failure();
|
|
|
|
// Static is the default.
|
|
auto schedule =
|
|
loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
|
|
|
|
// Find the loop configuration.
|
|
llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]);
|
|
llvm::Type *ivType = step->getType();
|
|
llvm::Value *chunk = nullptr;
|
|
if (loop.getScheduleChunkVar()) {
|
|
llvm::Value *chunkVar =
|
|
moduleTranslation.lookupValue(loop.getScheduleChunkVar());
|
|
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
|
|
}
|
|
|
|
SmallVector<omp::ReductionDeclareOp> reductionDecls;
|
|
collectReductionDecls(loop, reductionDecls);
|
|
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
|
findAllocaInsertPoint(builder, moduleTranslation);
|
|
|
|
// Allocate space for privatized reduction variables.
|
|
SmallVector<llvm::Value *> privateReductionVariables;
|
|
DenseMap<Value, llvm::Value *> reductionVariableMap;
|
|
unsigned numReductions = loop.getNumReductionVars();
|
|
privateReductionVariables.reserve(numReductions);
|
|
if (numReductions != 0) {
|
|
llvm::IRBuilderBase::InsertPointGuard guard(builder);
|
|
builder.restoreIP(allocaIP);
|
|
for (unsigned i = 0; i < numReductions; ++i) {
|
|
auto reductionType =
|
|
loop.getReductionVars()[i].getType().cast<LLVM::LLVMPointerType>();
|
|
llvm::Value *var = builder.CreateAlloca(
|
|
moduleTranslation.convertType(reductionType.getElementType()));
|
|
privateReductionVariables.push_back(var);
|
|
reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
|
|
}
|
|
}
|
|
|
|
// Store the mapping between reduction variables and their private copies on
|
|
// ModuleTranslation stack. It can be then recovered when translating
|
|
// omp.reduce operations in a separate call.
|
|
LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
|
|
moduleTranslation, reductionVariableMap);
|
|
|
|
// Before the loop, store the initial values of reductions into reduction
|
|
// variables. Although this could be done after allocas, we don't want to mess
|
|
// up with the alloca insertion point.
|
|
for (unsigned i = 0; i < numReductions; ++i) {
|
|
SmallVector<llvm::Value *> phis;
|
|
if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
|
|
"omp.reduction.neutral", builder,
|
|
moduleTranslation, &phis)))
|
|
return failure();
|
|
assert(phis.size() == 1 && "expected one value to be yielded from the "
|
|
"reduction neutral element declaration region");
|
|
builder.CreateStore(phis[0], privateReductionVariables[i]);
|
|
}
|
|
|
|
// Set up the source location value for OpenMP runtime.
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
|
|
// Generator of the canonical loop body.
|
|
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
|
// relying on captured variables.
|
|
SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
|
|
SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
|
|
LogicalResult bodyGenStatus = success();
|
|
auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
|
|
// Make sure further conversions know about the induction variable.
|
|
moduleTranslation.mapValue(
|
|
loop.getRegion().front().getArgument(loopInfos.size()), iv);
|
|
|
|
// Capture the body insertion point for use in nested loops. BodyIP of the
|
|
// CanonicalLoopInfo always points to the beginning of the entry block of
|
|
// the body.
|
|
bodyInsertPoints.push_back(ip);
|
|
|
|
if (loopInfos.size() != loop.getNumLoops() - 1)
|
|
return;
|
|
|
|
// Convert the body of the loop.
|
|
builder.restoreIP(ip);
|
|
convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
|
|
// Delegate actual loop construction to the OpenMP IRBuilder.
|
|
// TODO: this currently assumes WsLoop is semantically similar to SCF loop,
|
|
// i.e. it has a positive step, uses signed integer semantics. Reconsider
|
|
// this code when WsLoop clearly supports more cases.
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
|
|
llvm::Value *lowerBound =
|
|
moduleTranslation.lookupValue(loop.getLowerBound()[i]);
|
|
llvm::Value *upperBound =
|
|
moduleTranslation.lookupValue(loop.getUpperBound()[i]);
|
|
llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
|
|
|
|
// Make sure loop trip count are emitted in the preheader of the outermost
|
|
// loop at the latest so that they are all available for the new collapsed
|
|
// loop will be created below.
|
|
llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
|
|
llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
|
|
if (i != 0) {
|
|
loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
|
|
computeIP = loopInfos.front()->getPreheaderIP();
|
|
}
|
|
loopInfos.push_back(ompBuilder->createCanonicalLoop(
|
|
loc, bodyGen, lowerBound, upperBound, step,
|
|
/*IsSigned=*/true, loop.getInclusive(), computeIP));
|
|
|
|
if (failed(bodyGenStatus))
|
|
return failure();
|
|
}
|
|
|
|
// Collapse loops. Store the insertion point because LoopInfos may get
|
|
// invalidated.
|
|
llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
|
|
llvm::CanonicalLoopInfo *loopInfo =
|
|
ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
|
|
|
|
allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
|
|
|
// TODO: Handle doacross loops when the ordered clause has a parameter.
|
|
bool isOrdered = loop.getOrderedVal().has_value();
|
|
std::optional<omp::ScheduleModifier> scheduleModifier =
|
|
loop.getScheduleModifier();
|
|
bool isSimd = loop.getSimdModifier();
|
|
|
|
ompBuilder->applyWorkshareLoop(
|
|
ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
|
|
convertToScheduleKind(schedule), chunk, isSimd,
|
|
scheduleModifier == omp::ScheduleModifier::monotonic,
|
|
scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
|
|
|
|
// Continue building IR after the loop. Note that the LoopInfo returned by
|
|
// `collapseLoops` points inside the outermost loop and is intended for
|
|
// potential further loop transformations. Use the insertion point stored
|
|
// before collapsing loops instead.
|
|
builder.restoreIP(afterIP);
|
|
|
|
// Process the reductions if required.
|
|
if (numReductions == 0)
|
|
return success();
|
|
|
|
// Create the reduction generators. We need to own them here because
|
|
// ReductionInfo only accepts references to the generators.
|
|
SmallVector<OwningReductionGen> owningReductionGens;
|
|
SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
|
|
for (unsigned i = 0; i < numReductions; ++i) {
|
|
owningReductionGens.push_back(
|
|
makeReductionGen(reductionDecls[i], builder, moduleTranslation));
|
|
owningAtomicReductionGens.push_back(
|
|
makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
|
|
}
|
|
|
|
// Collect the reduction information.
|
|
SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
|
|
reductionInfos.reserve(numReductions);
|
|
for (unsigned i = 0; i < numReductions; ++i) {
|
|
llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
|
|
if (owningAtomicReductionGens[i])
|
|
atomicGen = owningAtomicReductionGens[i];
|
|
auto reductionType =
|
|
loop.getReductionVars()[i].getType().cast<LLVM::LLVMPointerType>();
|
|
llvm::Value *variable =
|
|
moduleTranslation.lookupValue(loop.getReductionVars()[i]);
|
|
reductionInfos.push_back(
|
|
{moduleTranslation.convertType(reductionType.getElementType()),
|
|
variable, privateReductionVariables[i], owningReductionGens[i],
|
|
atomicGen});
|
|
}
|
|
|
|
// The call to createReductions below expects the block to have a
|
|
// terminator. Create an unreachable instruction to serve as terminator
|
|
// and remove it later.
|
|
llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
|
|
builder.SetInsertPoint(tempTerminator);
|
|
llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
|
|
ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
|
|
loop.getNowait());
|
|
if (!contInsertPoint.getBlock())
|
|
return loop->emitOpError() << "failed to convert reductions";
|
|
auto nextInsertionPoint =
|
|
ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
|
|
tempTerminator->eraseFromParent();
|
|
builder.restoreIP(nextInsertionPoint);
|
|
|
|
return success();
|
|
}
|
|
|
|
/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
auto loop = cast<omp::SimdLoopOp>(opInst);
|
|
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
|
|
// Generator of the canonical loop body.
|
|
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
|
// relying on captured variables.
|
|
SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
|
|
SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
|
|
LogicalResult bodyGenStatus = success();
|
|
auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
|
|
// Make sure further conversions know about the induction variable.
|
|
moduleTranslation.mapValue(
|
|
loop.getRegion().front().getArgument(loopInfos.size()), iv);
|
|
|
|
// Capture the body insertion point for use in nested loops. BodyIP of the
|
|
// CanonicalLoopInfo always points to the beginning of the entry block of
|
|
// the body.
|
|
bodyInsertPoints.push_back(ip);
|
|
|
|
if (loopInfos.size() != loop.getNumLoops() - 1)
|
|
return;
|
|
|
|
// Convert the body of the loop.
|
|
builder.restoreIP(ip);
|
|
convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
|
|
moduleTranslation, bodyGenStatus);
|
|
};
|
|
|
|
// Delegate actual loop construction to the OpenMP IRBuilder.
|
|
// TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
|
|
// i.e. it has a positive step, uses signed integer semantics. Reconsider
|
|
// this code when SimdLoop clearly supports more cases.
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
|
|
llvm::Value *lowerBound =
|
|
moduleTranslation.lookupValue(loop.getLowerBound()[i]);
|
|
llvm::Value *upperBound =
|
|
moduleTranslation.lookupValue(loop.getUpperBound()[i]);
|
|
llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
|
|
|
|
// Make sure loop trip count are emitted in the preheader of the outermost
|
|
// loop at the latest so that they are all available for the new collapsed
|
|
// loop will be created below.
|
|
llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
|
|
llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
|
|
if (i != 0) {
|
|
loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
|
|
ompLoc.DL);
|
|
computeIP = loopInfos.front()->getPreheaderIP();
|
|
}
|
|
loopInfos.push_back(ompBuilder->createCanonicalLoop(
|
|
loc, bodyGen, lowerBound, upperBound, step,
|
|
/*IsSigned=*/true, /*Inclusive=*/true, computeIP));
|
|
|
|
if (failed(bodyGenStatus))
|
|
return failure();
|
|
}
|
|
|
|
// Collapse loops.
|
|
llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
|
|
llvm::CanonicalLoopInfo *loopInfo =
|
|
ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
|
|
|
|
llvm::ConstantInt *simdlen = nullptr;
|
|
if (std::optional<uint64_t> simdlenVar = loop.getSimdlen())
|
|
simdlen = builder.getInt64(simdlenVar.value());
|
|
|
|
llvm::ConstantInt *safelen = nullptr;
|
|
if (std::optional<uint64_t> safelenVar = loop.getSafelen())
|
|
safelen = builder.getInt64(safelenVar.value());
|
|
|
|
llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
|
|
ompBuilder->applySimd(
|
|
loopInfo, alignedVars,
|
|
loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
|
|
: nullptr,
|
|
llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
|
|
|
|
builder.restoreIP(afterIP);
|
|
return success();
|
|
}
|
|
|
|
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
|
|
llvm::AtomicOrdering
|
|
convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
|
|
if (!ao)
|
|
return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
|
|
|
|
switch (*ao) {
|
|
case omp::ClauseMemoryOrderKind::Seq_cst:
|
|
return llvm::AtomicOrdering::SequentiallyConsistent;
|
|
case omp::ClauseMemoryOrderKind::Acq_rel:
|
|
return llvm::AtomicOrdering::AcquireRelease;
|
|
case omp::ClauseMemoryOrderKind::Acquire:
|
|
return llvm::AtomicOrdering::Acquire;
|
|
case omp::ClauseMemoryOrderKind::Release:
|
|
return llvm::AtomicOrdering::Release;
|
|
case omp::ClauseMemoryOrderKind::Relaxed:
|
|
return llvm::AtomicOrdering::Monotonic;
|
|
}
|
|
llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
|
|
}
|
|
|
|
/// Convert omp.atomic.read operation to LLVM IR.
|
|
static LogicalResult
|
|
convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
|
|
auto readOp = cast<omp::AtomicReadOp>(opInst);
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
|
|
llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
|
|
llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
|
|
Type xTy =
|
|
readOp.getX().getType().cast<omp::PointerLikeType>().getElementType();
|
|
llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
|
|
Type vTy =
|
|
readOp.getV().getType().cast<omp::PointerLikeType>().getElementType();
|
|
llvm::OpenMPIRBuilder::AtomicOpValue V = {
|
|
v, moduleTranslation.convertType(vTy), false, false};
|
|
llvm::OpenMPIRBuilder::AtomicOpValue X = {
|
|
x, moduleTranslation.convertType(xTy), false, false};
|
|
builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
|
|
return success();
|
|
}
|
|
|
|
/// Converts an omp.atomic.write operation to LLVM IR.
|
|
static LogicalResult
|
|
convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
auto writeOp = cast<omp::AtomicWriteOp>(opInst);
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal());
|
|
llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getValue());
|
|
llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getAddress());
|
|
llvm::Type *ty = moduleTranslation.convertType(writeOp.getValue().getType());
|
|
llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
|
|
/*isVolatile=*/false};
|
|
builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
|
|
return success();
|
|
}
|
|
|
|
/// Converts an LLVM dialect binary operation to the corresponding enum value
|
|
/// for `atomicrmw` supported binary operation.
|
|
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
|
|
return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op)
|
|
.Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
|
|
.Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
|
|
.Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
|
|
.Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
|
|
.Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
|
|
.Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
|
|
.Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
|
|
.Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
|
|
.Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
|
|
.Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
|
|
}
|
|
|
|
/// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
|
|
llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
|
|
// Convert values and types.
|
|
auto &innerOpList = opInst.getRegion().front().getOperations();
|
|
if (innerOpList.size() != 2)
|
|
return opInst.emitError("exactly two operations are allowed inside an "
|
|
"atomic update region while lowering to LLVM IR");
|
|
|
|
Operation &innerUpdateOp = innerOpList.front();
|
|
|
|
if (innerUpdateOp.getNumOperands() != 2 ||
|
|
!llvm::is_contained(innerUpdateOp.getOperands(),
|
|
opInst.getRegion().getArgument(0)))
|
|
return opInst.emitError(
|
|
"the update operation inside the region must be a binary operation and "
|
|
"that update operation must have the region argument as an operand");
|
|
|
|
llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp);
|
|
|
|
bool isXBinopExpr =
|
|
innerUpdateOp.getNumOperands() > 0 &&
|
|
innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0);
|
|
|
|
mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1)
|
|
: innerUpdateOp.getOperand(0));
|
|
llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
|
|
llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.getX());
|
|
LLVM::LLVMPointerType mlirXType =
|
|
opInst.getX().getType().cast<LLVM::LLVMPointerType>();
|
|
llvm::Type *llvmXElementType =
|
|
moduleTranslation.convertType(mlirXType.getElementType());
|
|
llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
|
|
/*isSigned=*/false,
|
|
/*isVolatile=*/false};
|
|
|
|
llvm::AtomicOrdering atomicOrdering =
|
|
convertAtomicOrdering(opInst.getMemoryOrderVal());
|
|
|
|
// Generate update code.
|
|
LogicalResult updateGenStatus = success();
|
|
auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
|
|
llvm::Value *atomicx,
|
|
llvm::IRBuilder<> &builder) -> llvm::Value * {
|
|
Block &bb = *opInst.getRegion().begin();
|
|
moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
|
|
moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
|
|
if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
|
|
updateGenStatus = (opInst.emitError()
|
|
<< "unable to convert update operation to llvm IR");
|
|
return nullptr;
|
|
}
|
|
omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
|
|
assert(yieldop && yieldop.getResults().size() == 1 &&
|
|
"terminator must be omp.yield op and it must have exactly one "
|
|
"argument");
|
|
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
|
|
};
|
|
|
|
// Handle ambiguous alloca, if any.
|
|
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(ompBuilder->createAtomicUpdate(
|
|
ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
|
|
isXBinopExpr));
|
|
return updateGenStatus;
|
|
}
|
|
|
|
static LogicalResult
|
|
convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
|
|
llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
mlir::Value mlirExpr;
|
|
bool isXBinopExpr = false, isPostfixUpdate = false;
|
|
llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
|
|
|
|
omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
|
|
omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
|
|
|
|
assert((atomicUpdateOp || atomicWriteOp) &&
|
|
"internal op must be an atomic.update or atomic.write op");
|
|
|
|
if (atomicWriteOp) {
|
|
isPostfixUpdate = true;
|
|
mlirExpr = atomicWriteOp.getValue();
|
|
} else {
|
|
isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
|
|
atomicCaptureOp.getAtomicUpdateOp().getOperation();
|
|
auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
|
|
if (innerOpList.size() != 2)
|
|
return atomicUpdateOp.emitError(
|
|
"exactly two operations are allowed inside an "
|
|
"atomic update region while lowering to LLVM IR");
|
|
Operation *innerUpdateOp = atomicUpdateOp.getFirstOp();
|
|
if (innerUpdateOp->getNumOperands() != 2 ||
|
|
!llvm::is_contained(innerUpdateOp->getOperands(),
|
|
atomicUpdateOp.getRegion().getArgument(0)))
|
|
return atomicUpdateOp.emitError(
|
|
"the update operation inside the region must be a binary operation "
|
|
"and that update operation must have the region argument as an "
|
|
"operand");
|
|
binop = convertBinOpToAtomic(*innerUpdateOp);
|
|
|
|
isXBinopExpr = innerUpdateOp->getOperand(0) ==
|
|
atomicUpdateOp.getRegion().getArgument(0);
|
|
|
|
mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1)
|
|
: innerUpdateOp->getOperand(0));
|
|
}
|
|
|
|
llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
|
|
llvm::Value *llvmX =
|
|
moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
|
|
llvm::Value *llvmV =
|
|
moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
|
|
auto mlirXType = atomicCaptureOp.getAtomicReadOp()
|
|
.getX()
|
|
.getType()
|
|
.cast<LLVM::LLVMPointerType>();
|
|
llvm::Type *llvmXElementType =
|
|
moduleTranslation.convertType(mlirXType.getElementType());
|
|
llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
|
|
/*isSigned=*/false,
|
|
/*isVolatile=*/false};
|
|
llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
|
|
/*isSigned=*/false,
|
|
/*isVolatile=*/false};
|
|
|
|
llvm::AtomicOrdering atomicOrdering =
|
|
convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal());
|
|
|
|
LogicalResult updateGenStatus = success();
|
|
auto updateFn = [&](llvm::Value *atomicx,
|
|
llvm::IRBuilder<> &builder) -> llvm::Value * {
|
|
if (atomicWriteOp)
|
|
return moduleTranslation.lookupValue(atomicWriteOp.getValue());
|
|
Block &bb = *atomicUpdateOp.getRegion().begin();
|
|
moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
|
|
atomicx);
|
|
moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
|
|
if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
|
|
updateGenStatus = (atomicUpdateOp.emitError()
|
|
<< "unable to convert update operation to llvm IR");
|
|
return nullptr;
|
|
}
|
|
omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
|
|
assert(yieldop && yieldop.getResults().size() == 1 &&
|
|
"terminator must be omp.yield op and it must have exactly one "
|
|
"argument");
|
|
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
|
|
};
|
|
|
|
// Handle ambiguous alloca, if any.
|
|
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
builder.restoreIP(ompBuilder->createAtomicCapture(
|
|
ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
|
|
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
|
|
return updateGenStatus;
|
|
}
|
|
|
|
/// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
|
|
/// mapping between reduction variables and their private equivalents to have
|
|
/// been stored on the ModuleTranslation stack. Currently only supports
|
|
/// reduction within WsLoopOp, but can be easily extended.
|
|
static LogicalResult
|
|
convertOmpReductionOp(omp::ReductionOp reductionOp,
|
|
llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
// Find the declaration that corresponds to the reduction op.
|
|
auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
|
|
omp::ReductionDeclareOp declaration =
|
|
findReductionDecl(reductionContainer, reductionOp);
|
|
assert(declaration && "could not find reduction declaration");
|
|
|
|
// Retrieve the mapping between reduction variables and their private
|
|
// equivalents.
|
|
const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
|
|
moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
|
|
[&](const OpenMPVarMappingStackFrame &frame) {
|
|
reductionVariableMap = &frame.mapping;
|
|
return WalkResult::interrupt();
|
|
});
|
|
assert(reductionVariableMap && "couldn't find private reduction variables");
|
|
|
|
// Translate the reduction operation by emitting the body of the corresponding
|
|
// reduction declaration.
|
|
Region &reductionRegion = declaration.getReductionRegion();
|
|
llvm::Value *privateReductionVar =
|
|
reductionVariableMap->lookup(reductionOp.getAccumulator());
|
|
llvm::Value *reductionVal = builder.CreateLoad(
|
|
moduleTranslation.convertType(reductionOp.getOperand().getType()),
|
|
privateReductionVar);
|
|
|
|
moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
|
|
reductionVal);
|
|
moduleTranslation.mapValue(
|
|
reductionRegion.front().getArgument(1),
|
|
moduleTranslation.lookupValue(reductionOp.getOperand()));
|
|
|
|
SmallVector<llvm::Value *> phis;
|
|
if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
|
|
builder, moduleTranslation, &phis)))
|
|
return failure();
|
|
assert(phis.size() == 1 && "expected one value to be yielded from "
|
|
"the reduction body declaration region");
|
|
builder.CreateStore(phis[0], privateReductionVar);
|
|
return success();
|
|
}
|
|
|
|
/// Converts an OpenMP Threadprivate operation into LLVM IR using
|
|
/// OpenMPIRBuilder.
|
|
static LogicalResult
|
|
convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) {
|
|
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
|
auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
|
|
|
|
Value symAddr = threadprivateOp.getSymAddr();
|
|
auto *symOp = symAddr.getDefiningOp();
|
|
if (!isa<LLVM::AddressOfOp>(symOp))
|
|
return opInst.emitError("Addressing symbol not found");
|
|
LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
|
|
|
|
LLVM::GlobalOp global =
|
|
addressOfOp.getGlobal(moduleTranslation.symbolTable());
|
|
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
|
|
llvm::Value *data =
|
|
builder.CreateBitCast(globalValue, builder.getInt8PtrTy());
|
|
llvm::Type *type = globalValue->getValueType();
|
|
llvm::TypeSize typeSize =
|
|
builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
|
|
type);
|
|
llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedSize());
|
|
llvm::StringRef suffix = llvm::StringRef(".cache", 6);
|
|
std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
|
|
// Emit runtime function and bitcast its type (i8*) to real data type.
|
|
llvm::Value *callInst =
|
|
moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
|
|
ompLoc, data, size, cacheName);
|
|
llvm::Value *result = builder.CreateBitCast(callInst, globalValue->getType());
|
|
moduleTranslation.mapValue(opInst.getResult(0), result);
|
|
return success();
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Implementation of the dialect interface that converts operations belonging
|
|
/// to the OpenMP dialect to LLVM IR.
|
|
class OpenMPDialectLLVMIRTranslationInterface
|
|
: public LLVMTranslationDialectInterface {
|
|
public:
|
|
using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
|
|
|
|
/// Translates the given operation to LLVM IR using the provided IR builder
|
|
/// and saving the state in `moduleTranslation`.
|
|
LogicalResult
|
|
convertOperation(Operation *op, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) const final;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
|
|
/// (including OpenMP runtime calls).
|
|
LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
|
|
Operation *op, llvm::IRBuilderBase &builder,
|
|
LLVM::ModuleTranslation &moduleTranslation) const {
|
|
|
|
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
|
|
|
return llvm::TypeSwitch<Operation *, LogicalResult>(op)
|
|
.Case([&](omp::BarrierOp) {
|
|
ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
|
|
return success();
|
|
})
|
|
.Case([&](omp::TaskwaitOp) {
|
|
ompBuilder->createTaskwait(builder.saveIP());
|
|
return success();
|
|
})
|
|
.Case([&](omp::TaskyieldOp) {
|
|
ompBuilder->createTaskyield(builder.saveIP());
|
|
return success();
|
|
})
|
|
.Case([&](omp::FlushOp) {
|
|
// No support in Openmp runtime function (__kmpc_flush) to accept
|
|
// the argument list.
|
|
// OpenMP standard states the following:
|
|
// "An implementation may implement a flush with a list by ignoring
|
|
// the list, and treating it the same as a flush without a list."
|
|
//
|
|
// The argument list is discarded so that, flush with a list is treated
|
|
// same as a flush without a list.
|
|
ompBuilder->createFlush(builder.saveIP());
|
|
return success();
|
|
})
|
|
.Case([&](omp::ParallelOp op) {
|
|
return convertOmpParallel(op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::ReductionOp reductionOp) {
|
|
return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::MasterOp) {
|
|
return convertOmpMaster(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::CriticalOp) {
|
|
return convertOmpCritical(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::OrderedRegionOp) {
|
|
return convertOmpOrderedRegion(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::OrderedOp) {
|
|
return convertOmpOrdered(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::WsLoopOp) {
|
|
return convertOmpWsLoop(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::SimdLoopOp) {
|
|
return convertOmpSimdLoop(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::AtomicReadOp) {
|
|
return convertOmpAtomicRead(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::AtomicWriteOp) {
|
|
return convertOmpAtomicWrite(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::AtomicUpdateOp op) {
|
|
return convertOmpAtomicUpdate(op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::AtomicCaptureOp op) {
|
|
return convertOmpAtomicCapture(op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::SectionsOp) {
|
|
return convertOmpSections(*op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::SingleOp op) {
|
|
return convertOmpSingle(op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::TaskOp op) {
|
|
return convertOmpTaskOp(op, builder, moduleTranslation);
|
|
})
|
|
.Case([&](omp::TaskGroupOp op) {
|
|
return convertOmpTaskgroupOp(op, builder, moduleTranslation);
|
|
})
|
|
.Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
|
|
omp::CriticalDeclareOp>([](auto op) {
|
|
// `yield` and `terminator` can be just omitted. The block structure
|
|
// was created in the region that handles their parent operation.
|
|
// `reduction.declare` will be used by reductions and is not
|
|
// converted directly, skip it.
|
|
// `critical.declare` is only used to declare names of critical
|
|
// sections which will be used by `critical` ops and hence can be
|
|
// ignored for lowering. The OpenMP IRBuilder will create unique
|
|
// name for critical section names.
|
|
return success();
|
|
})
|
|
.Case([&](omp::ThreadprivateOp) {
|
|
return convertOmpThreadprivate(*op, builder, moduleTranslation);
|
|
})
|
|
.Default([&](Operation *inst) {
|
|
return inst->emitError("unsupported OpenMP operation: ")
|
|
<< inst->getName();
|
|
});
|
|
}
|
|
|
|
void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) {
|
|
registry.insert<omp::OpenMPDialect>();
|
|
registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
|
|
dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
|
|
});
|
|
}
|
|
|
|
void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
|
|
DialectRegistry registry;
|
|
registerOpenMPDialectTranslation(registry);
|
|
context.appendDialectRegistry(registry);
|
|
}
|