//===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a translation between the MLIR OpenMP dialect and LLVM // IR. // //===----------------------------------------------------------------------===// #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Operation.h" #include "mlir/Support/LLVM.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/IRBuilder.h" using namespace mlir; namespace { /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the /// insertion points for allocas. class OpenMPAllocaStackFrame : public LLVM::ModuleTranslation::StackFrameBase { public: explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) : allocaInsertPoint(allocaIP) {} llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; }; /// ModuleTranslation stack frame containing the partial mapping between MLIR /// values and their LLVM IR equivalents. class OpenMPVarMappingStackFrame : public LLVM::ModuleTranslation::StackFrameBase< OpenMPVarMappingStackFrame> { public: explicit OpenMPVarMappingStackFrame( const DenseMap &mapping) : mapping(mapping) {} DenseMap mapping; }; } // namespace /// Find the insertion point for allocas given the current insertion point for /// normal operations in the builder. static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation) { // If there is an alloca insertion point on stack, i.e. we are in a nested // operation and a specific point was provided by some surrounding operation, // use it. llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; WalkResult walkResult = moduleTranslation.stackWalk( [&](const OpenMPAllocaStackFrame &frame) { allocaInsertPoint = frame.allocaInsertPoint; return WalkResult::interrupt(); }); if (walkResult.wasInterrupted()) return allocaInsertPoint; // Otherwise, insert to the entry block of the surrounding function. llvm::BasicBlock &funcEntryBlock = builder.GetInsertBlock()->getParent()->getEntryBlock(); return llvm::OpenMPIRBuilder::InsertPointTy( &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); } /// Converts the given region that appears within an OpenMP dialect operation to /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the /// region, and a branch from any block with an successor-less OpenMP terminator /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes /// of the continuation block if provided. static void convertOmpOpRegions( Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl *continuationBlockPHIs = nullptr) { llvm::LLVMContext &llvmContext = builder.getContext(); for (Block &bb : region) { llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( llvmContext, blockName, builder.GetInsertBlock()->getParent(), builder.GetInsertBlock()->getNextNode()); moduleTranslation.mapBlock(&bb, llvmBB); } llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); // Terminators (namely YieldOp) may be forwarding values to the region that // need to be available in the continuation block. Collect the types of these // operands in preparation of creating PHI nodes. SmallVector continuationBlockPHITypes; bool operandsProcessed = false; unsigned numYields = 0; for (Block &bb : region.getBlocks()) { if (omp::YieldOp yield = dyn_cast(bb.getTerminator())) { if (!operandsProcessed) { for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { continuationBlockPHITypes.push_back( moduleTranslation.convertType(yield->getOperand(i).getType())); } operandsProcessed = true; } else { assert(continuationBlockPHITypes.size() == yield->getNumOperands() && "mismatching number of values yielded from the region"); for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { llvm::Type *operandType = moduleTranslation.convertType(yield->getOperand(i).getType()); (void)operandType; assert(continuationBlockPHITypes[i] == operandType && "values of mismatching types yielded from the region"); } } numYields++; } } // Insert PHI nodes in the continuation block for any values forwarded by the // terminators in this region. if (!continuationBlockPHITypes.empty()) assert( continuationBlockPHIs && "expected continuation block PHIs if converted regions yield values"); if (continuationBlockPHIs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); for (llvm::Type *ty : continuationBlockPHITypes) continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); } // Convert blocks one by one in topological order to ensure // defs are converted before uses. SetVector blocks = LLVM::detail::getTopologicallySortedBlocks(region); for (Block *bb : blocks) { llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); // Retarget the branch of the entry block to the entry block of the // converted region (regions are single-entry). if (bb->isEntryBlock()) { assert(sourceTerminator->getNumSuccessors() == 1 && "provided entry block has multiple successors"); assert(sourceTerminator->getSuccessor(0) == &continuationBlock && "ContinuationBlock is not the successor of the entry block"); sourceTerminator->setSuccessor(0, llvmBB); } llvm::IRBuilderBase::InsertPointGuard guard(builder); if (failed( moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { bodyGenStatus = failure(); return; } // Special handling for `omp.yield` and `omp.terminator` (we may have more // than one): they return the control to the parent OpenMP dialect operation // so replace them with the branch to the continuation block. We handle this // here to avoid relying inter-function communication through the // ModuleTranslation class to set up the correct insertion point. This is // also consistent with MLIR's idiom of handling special region terminators // in the same code that handles the region-owning operation. Operation *terminator = bb->getTerminator(); if (isa(terminator)) { builder.CreateBr(&continuationBlock); for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) (*continuationBlockPHIs)[i]->addIncoming( moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); } } // After all blocks have been traversed and values mapped, connect the PHI // nodes to the results of preceding blocks. LLVM::detail::connectPHINodes(region, moduleTranslation); // Remove the blocks and values defined in this region from the mapping since // they are not visible outside of this region. This allows the same region to // be converted several times, that is cloned, without clashes, and slightly // speeds up the lookups. moduleTranslation.forgetMapping(region); } /// Converts the OpenMP parallel operation to LLVM IR. static LogicalResult convertOmpParallel(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::BasicBlock &continuationBlock) { // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. LLVM::ModuleTranslation::SaveStack frame( moduleTranslation, allocaIP); // ParallelOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.par.region", *codeGenIP.getBlock(), continuationBlock, builder, moduleTranslation, bodyGenStatus); }; // TODO: Perform appropriate actions according to the data-sharing // attribute (shared, private, firstprivate, ...) of variables. // Currently defaults to shared. auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::Value &, llvm::Value &vPtr, llvm::Value *&replacementValue) -> InsertPointTy { replacementValue = &vPtr; return codeGenIP; }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::Value *ifCond = nullptr; if (auto ifExprVar = cast(opInst).if_expr_var()) ifCond = moduleTranslation.lookupValue(ifExprVar); llvm::Value *numThreads = nullptr; if (auto numThreadsVar = cast(opInst).num_threads_var()) numThreads = moduleTranslation.lookupValue(numThreadsVar); llvm::omp::ProcBindKind pbKind = llvm::omp::OMP_PROC_BIND_default; if (auto bind = cast(opInst).proc_bind_val()) pbKind = llvm::omp::getProcBindKind(bind.getValue()); // TODO: Is the Parallel construct cancellable? bool isCancellable = false; llvm::OpenMPIRBuilder::LocationDescription ompLoc( builder.saveIP(), builder.getCurrentDebugLocation()); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); return bodyGenStatus; } /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::BasicBlock &continuationBlock) { // MasterOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), continuationBlock, builder, moduleTranslation, bodyGenStatus); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::LocationDescription ompLoc( builder.saveIP(), builder.getCurrentDebugLocation()); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( ompLoc, bodyGenCB, finiCB)); return success(); } /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; auto criticalOp = cast(opInst); // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::BasicBlock &continuationBlock) { // CriticalOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), continuationBlock, builder, moduleTranslation, bodyGenStatus); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::LocationDescription ompLoc( builder.saveIP(), builder.getCurrentDebugLocation()); llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); llvm::Constant *hint = nullptr; if (criticalOp.hint().hasValue()) { hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), static_cast(criticalOp.hint().getValue())); } else { hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 0); } builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); return success(); } /// Returns a reduction declaration that corresponds to the given reduction /// operation in the given container. Currently only supports reductions inside /// WsLoopOp but can be easily extended. static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, omp::ReductionOp reduction) { SymbolRefAttr reductionSymbol; for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { if (container.reduction_vars()[i] != reduction.accumulator()) continue; reductionSymbol = (*container.reductions())[i].cast(); break; } assert(reductionSymbol && "reduction operation must be associated with a declaration"); return SymbolTable::lookupNearestSymbolFrom( container, reductionSymbol); } /// Populates `reductions` with reduction declarations used in the given loop. static void collectReductionDecls(omp::WsLoopOp loop, SmallVectorImpl &reductions) { Optional attr = loop.reductions(); if (!attr) return; reductions.reserve(reductions.size() + loop.getNumReductionVars()); for (auto symbolRef : attr->getAsRange()) { reductions.push_back( SymbolTable::lookupNearestSymbolFrom( loop, symbolRef)); } } /// Translates the blocks contained in the given region and appends them to at /// the current insertion point of `builder`. The operations of the entry block /// are appended to the current insertion block, which is not expected to have a /// terminator. If set, `continuationBlockArgs` is populated with translated /// values that correspond to the values omp.yield'ed from the region. static LogicalResult inlineConvertOmpRegions( Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl *continuationBlockArgs = nullptr) { if (region.empty()) return success(); // Special case for single-block regions that don't create additional blocks: // insert operations without creating additional blocks. if (llvm::hasSingleElement(region)) { moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); if (failed(moduleTranslation.convertBlock( region.front(), /*ignoreArguments=*/true, builder))) return failure(); // The continuation arguments are simply the translated terminator operands. if (continuationBlockArgs) llvm::append_range( *continuationBlockArgs, moduleTranslation.lookupValues(region.front().back().getOperands())); // Drop the mapping that is no longer necessary so that the same region can // be processed multiple times. moduleTranslation.forgetMapping(region); return success(); } // Create the continuation block manually instead of calling splitBlock // because the current insertion block may not have a terminator. llvm::BasicBlock *continuationBlock = llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", builder.GetInsertBlock()->getParent(), builder.GetInsertBlock()->getNextNode()); builder.CreateBr(continuationBlock); LogicalResult bodyGenStatus = success(); SmallVector phis; convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), *continuationBlock, builder, moduleTranslation, bodyGenStatus, &phis); if (failed(bodyGenStatus)) return failure(); if (continuationBlockArgs) llvm::append_range(*continuationBlockArgs, phis); builder.SetInsertPoint(continuationBlock, continuationBlock->getFirstInsertionPt()); return success(); } namespace { /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to /// store lambdas with capture. using OwningReductionGen = std::function; using OwningAtomicReductionGen = std::function; } // namespace /// Create an OpenMPIRBuilder-compatible reduction generator for the given /// reduction declaration. The generator uses `builder` but ignores its /// insertion point. static OwningReductionGen makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { // The lambda is mutable because we need access to non-const methods of decl // (which aren't actually mutating it), and we must capture decl by-value to // avoid the dangling reference after the parent function returns. OwningReductionGen gen = [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Value *lhs, llvm::Value *rhs, llvm::Value *&result) mutable { Region &reductionRegion = decl.reductionRegion(); moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); builder.restoreIP(insertPoint); SmallVector phis; if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.nonatomic.body", builder, moduleTranslation, &phis))) return llvm::OpenMPIRBuilder::InsertPointTy(); assert(phis.size() == 1); result = phis[0]; return builder.saveIP(); }; return gen; } /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the /// given reduction declaration. The generator uses `builder` but ignores its /// insertion point. Returns null if there is no atomic region available in the /// reduction declaration. static OwningAtomicReductionGen makeAtomicReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { if (decl.atomicReductionRegion().empty()) return OwningAtomicReductionGen(); // The lambda is mutable because we need access to non-const methods of decl // (which aren't actually mutating it), and we must capture decl by-value to // avoid the dangling reference after the parent function returns. OwningAtomicReductionGen atomicGen = [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Value *lhs, llvm::Value *rhs) mutable { Region &atomicRegion = decl.atomicReductionRegion(); moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); builder.restoreIP(insertPoint); SmallVector phis; if (failed(inlineConvertOmpRegions(atomicRegion, "omp.reduction.atomic.body", builder, moduleTranslation, &phis))) return llvm::OpenMPIRBuilder::InsertPointTy(); assert(phis.empty()); return builder.saveIP(); }; return atomicGen; } /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto loop = cast(opInst); // TODO: this should be in the op verifier instead. if (loop.lowerBound().empty()) return failure(); // Static is the default. omp::ClauseScheduleKind schedule = omp::ClauseScheduleKind::Static; if (loop.schedule_val().hasValue()) schedule = *omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()); // Find the loop configuration. llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); llvm::Type *ivType = step->getType(); llvm::Value *chunk = loop.schedule_chunk_var() ? moduleTranslation.lookupValue(loop.schedule_chunk_var()) : llvm::ConstantInt::get(ivType, 1); SmallVector reductionDecls; collectReductionDecls(loop, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); // Allocate space for privatized reduction variables. SmallVector privateReductionVariables; DenseMap reductionVariableMap; unsigned numReductions = loop.getNumReductionVars(); privateReductionVariables.reserve(numReductions); if (numReductions != 0) { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.restoreIP(allocaIP); for (unsigned i = 0; i < numReductions; ++i) { auto reductionType = loop.reduction_vars()[i].getType().cast(); llvm::Value *var = builder.CreateAlloca( moduleTranslation.convertType(reductionType.getElementType())); privateReductionVariables.push_back(var); reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); } } // Store the mapping between reduction variables and their private copies on // ModuleTranslation stack. It can be then recovered when translating // omp.reduce operations in a separate call. LLVM::ModuleTranslation::SaveStack mappingGuard( moduleTranslation, reductionVariableMap); // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. for (unsigned i = 0; i < numReductions; ++i) { SmallVector phis; if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), "omp.reduction.neutral", builder, moduleTranslation, &phis))) return failure(); assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); builder.CreateStore(phis[0], privateReductionVariables[i]); } // Set up the source location value for OpenMP runtime. llvm::DISubprogram *subprogram = builder.GetInsertBlock()->getParent()->getSubprogram(); const llvm::DILocation *diLoc = moduleTranslation.translateLoc(opInst.getLoc(), subprogram); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), llvm::DebugLoc(diLoc)); // Generator of the canonical loop body. // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. SmallVector loopInfos; SmallVector bodyInsertPoints; LogicalResult bodyGenStatus = success(); auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { // Make sure further conversions know about the induction variable. moduleTranslation.mapValue( loop.getRegion().front().getArgument(loopInfos.size()), iv); // Capture the body insertion point for use in nested loops. BodyIP of the // CanonicalLoopInfo always points to the beginning of the entry block of // the body. bodyInsertPoints.push_back(ip); if (loopInfos.size() != loop.getNumLoops() - 1) return; // Convert the body of the loop. llvm::BasicBlock *entryBlock = ip.getBlock(); llvm::BasicBlock *exitBlock = entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, *exitBlock, builder, moduleTranslation, bodyGenStatus); }; // Delegate actual loop construction to the OpenMP IRBuilder. // TODO: this currently assumes WsLoop is semantically similar to SCF loop, // i.e. it has a positive step, uses signed integer semantics. Reconsider // this code when WsLoop clearly supports more cases. llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { llvm::Value *lowerBound = moduleTranslation.lookupValue(loop.lowerBound()[i]); llvm::Value *upperBound = moduleTranslation.lookupValue(loop.upperBound()[i]); llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); // Make sure loop trip count are emitted in the preheader of the outermost // loop at the latest so that they are all available for the new collapsed // loop will be created below. llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; if (i != 0) { loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), llvm::DebugLoc(diLoc)); computeIP = loopInfos.front()->getPreheaderIP(); } loopInfos.push_back(ompBuilder->createCanonicalLoop( loc, bodyGen, lowerBound, upperBound, step, /*IsSigned=*/true, loop.inclusive(), computeIP)); if (failed(bodyGenStatus)) return failure(); } // Collapse loops. Store the insertion point because LoopInfos may get // invalidated. llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); llvm::CanonicalLoopInfo *loopInfo = ompBuilder->collapseLoops(diLoc, loopInfos, {}); allocaIP = findAllocaInsertPoint(builder, moduleTranslation); if (schedule == omp::ClauseScheduleKind::Static) { ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), chunk); } else { llvm::omp::OMPScheduleType schedType; switch (schedule) { case omp::ClauseScheduleKind::Dynamic: schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: schedType = llvm::omp::OMPScheduleType::GuidedChunked; break; case omp::ClauseScheduleKind::Auto: schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: schedType = llvm::omp::OMPScheduleType::Runtime; break; default: llvm_unreachable("Unknown schedule value"); break; } ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); } // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for // potential further loop transformations. Use the insertion point stored // before collapsing loops instead. builder.restoreIP(afterIP); // Process the reductions if required. if (numReductions == 0) return success(); // Create the reduction generators. We need to own them here because // ReductionInfo only accepts references to the generators. SmallVector owningReductionGens; SmallVector owningAtomicReductionGens; for (unsigned i = 0; i < numReductions; ++i) { owningReductionGens.push_back( makeReductionGen(reductionDecls[i], builder, moduleTranslation)); owningAtomicReductionGens.push_back( makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); } // Collect the reduction information. SmallVector reductionInfos; reductionInfos.reserve(numReductions); for (unsigned i = 0; i < numReductions; ++i) { llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; if (owningAtomicReductionGens[i]) atomicGen = owningAtomicReductionGens[i]; reductionInfos.push_back( {moduleTranslation.lookupValue(loop.reduction_vars()[i]), privateReductionVariables[i], owningReductionGens[i], atomicGen}); } // The call to createReductions below expects the block to have a // terminator. Create an unreachable instruction to serve as terminator // and remove it later. llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); builder.SetInsertPoint(tempTerminator); llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, loop.nowait()); if (!contInsertPoint.getBlock()) return loop->emitOpError() << "failed to convert reductions"; auto nextInsertionPoint = ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); tempTerminator->eraseFromParent(); builder.restoreIP(nextInsertionPoint); return success(); } /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the /// mapping between reduction variables and their private equivalents to have /// been stored on the ModuleTranslation stack. Currently only supports /// reduction within WsLoopOp, but can be easily extended. static LogicalResult convertOmpReductionOp(omp::ReductionOp reductionOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { // Find the declaration that corresponds to the reduction op. auto reductionContainer = reductionOp->getParentOfType(); omp::ReductionDeclareOp declaration = findReductionDecl(reductionContainer, reductionOp); assert(declaration && "could not find reduction declaration"); // Retrieve the mapping between reduction variables and their private // equivalents. const DenseMap *reductionVariableMap = nullptr; moduleTranslation.stackWalk( [&](const OpenMPVarMappingStackFrame &frame) { reductionVariableMap = &frame.mapping; return WalkResult::interrupt(); }); assert(reductionVariableMap && "couldn't find private reduction variables"); // Translate the reduction operation by emitting the body of the corresponding // reduction declaration. Region &reductionRegion = declaration.reductionRegion(); llvm::Value *privateReductionVar = reductionVariableMap->lookup(reductionOp.accumulator()); llvm::Value *reductionVal = builder.CreateLoad( moduleTranslation.convertType(reductionOp.operand().getType()), privateReductionVar); moduleTranslation.mapValue(reductionRegion.front().getArgument(0), reductionVal); moduleTranslation.mapValue( reductionRegion.front().getArgument(1), moduleTranslation.lookupValue(reductionOp.operand())); SmallVector phis; if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", builder, moduleTranslation, &phis))) return failure(); assert(phis.size() == 1 && "expected one value to be yielded from " "the reduction body declaration region"); builder.CreateStore(phis[0], privateReductionVar); return success(); } namespace { /// Implementation of the dialect interface that converts operations belonging /// to the OpenMP dialect to LLVM IR. class OpenMPDialectLLVMIRTranslationInterface : public LLVMTranslationDialectInterface { public: using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; /// Translates the given operation to LLVM IR using the provided IR builder /// and saving the state in `moduleTranslation`. LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; }; } // end namespace /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); return llvm::TypeSwitch(op) .Case([&](omp::BarrierOp) { ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); return success(); }) .Case([&](omp::TaskwaitOp) { ompBuilder->createTaskwait(builder.saveIP()); return success(); }) .Case([&](omp::TaskyieldOp) { ompBuilder->createTaskyield(builder.saveIP()); return success(); }) .Case([&](omp::FlushOp) { // No support in Openmp runtime function (__kmpc_flush) to accept // the argument list. // OpenMP standard states the following: // "An implementation may implement a flush with a list by ignoring // the list, and treating it the same as a flush without a list." // // The argument list is discarded so that, flush with a list is treated // same as a flush without a list. ompBuilder->createFlush(builder.saveIP()); return success(); }) .Case([&](omp::ParallelOp) { return convertOmpParallel(*op, builder, moduleTranslation); }) .Case([&](omp::ReductionOp reductionOp) { return convertOmpReductionOp(reductionOp, builder, moduleTranslation); }) .Case([&](omp::MasterOp) { return convertOmpMaster(*op, builder, moduleTranslation); }) .Case([&](omp::CriticalOp) { return convertOmpCritical(*op, builder, moduleTranslation); }) .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(*op, builder, moduleTranslation); }) .Case([](auto op) { // `yield` and `terminator` can be just omitted. The block structure // was created in the region that handles their parent operation. // `reduction.declare` will be used by reductions and is not // converted directly, skip it. // `critical.declare` is only used to declare names of critical // sections which will be used by `critical` ops and hence can be // ignored for lowering. The OpenMP IRBuilder will create unique // name for critical section names. return success(); }) .Default([&](Operation *inst) { return inst->emitError("unsupported OpenMP operation: ") << inst->getName(); }); } void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { registry.insert(); registry.addDialectInterface(); } void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { DialectRegistry registry; registerOpenMPDialectTranslation(registry); context.appendDialectRegistry(registry); }