Files
clang-p2996/polly/lib/CodeGen/LoopGeneratorsGOMP.cpp
Michael Kruse fe0e5b3e43 [Polly] Insert !dbg metadata for emitted CallInsts.
The IR Verifier requires that every call instruction to an inlineable
function (among other things, its implementation must be visible in the
translation unit) must also have !dbg metadata attached to it. When
parallelizing, Polly emits calls to OpenMP runtime function out of thin
air, or at least not directly derived from a bounded list of previous
instruction. While we could search for instructions in the SCoP that has
some debug info attached to it, there is no guarantee that we find any.
Our solution is to generate a new DILocation that points to line 0 to
represent optimized code.

The OpenMP function implementation is usually not available in the
user's translation unit, but can become visible in an LTO build. For
the bug to appear, libomp must also be built with debug symbols.

IMHO, the IR verifier rule is too strict. Runtime functions can
also be inserted by other optimization passes, such as
LoopIdiomRecognize. When inserting a call to e.g. memset, it uses the
DebugLoc from a StoreInst from the unoptimized code. It is not
required to have !dbg metadata attached either.

Fixes #56692
2022-07-26 19:43:53 -05:00

229 lines
8.2 KiB
C++

//===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains functions to create parallel loops as LLVM-IR.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/LoopGeneratorsGOMP.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
using namespace llvm;
using namespace polly;
void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn,
Value *SubFnParam,
Value *LB, Value *UB,
Value *Stride) {
const std::string Name = "GOMP_parallel_loop_runtime_start";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {PointerType::getUnqual(FunctionType::get(
Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
Builder.getInt8PtrTy(),
Builder.getInt32Ty(),
LongType,
LongType,
LongType};
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads),
LB, UB, Stride};
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
Value *SubFnParam,
Value *LB, Value *UB,
Value *Stride) {
// Tell the runtime we start a parallel loop
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
CallInst *Call = Builder.CreateCall(SubFn, SubFnParam);
Call->setDebugLoc(DLGenerated);
createCallJoinThreads();
}
Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const {
FunctionType *FT =
FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false);
Function *SubFn = Function::Create(FT, Function::InternalLinkage,
F->getName() + "_polly_subfn", M);
// Name the function's arguments
SubFn->arg_begin()->setName("polly.par.userContext");
return SubFn;
}
// Create a subfunction of the following (preliminary) structure:
//
// PrevBB
// |
// v
// HeaderBB
// | _____
// v v |
// CheckNextBB PreHeaderBB
// |\ |
// | \______/
// |
// v
// ExitBB
//
// HeaderBB will hold allocations and loading of variables.
// CheckNextBB will check for more work.
// If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
// PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
// ExitBB marks the end of the parallel execution.
std::tuple<Value *, Function *>
ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
SetVector<Value *> Data,
ValueMapT &Map) {
if (PollyScheduling != OMPGeneralSchedulingType::Runtime) {
// User tried to influence the scheduling type (currently not supported)
errs() << "warning: Polly's GNU OpenMP backend solely "
"supports the scheduling type 'runtime'.\n";
}
if (PollyChunkSize != 0) {
// User tried to influence the chunk size (currently not supported)
errs() << "warning: Polly's GNU OpenMP backend solely "
"supports the default chunk size.\n";
}
Function *SubFn = createSubFnDefinition();
LLVMContext &Context = SubFn->getContext();
// Store the previous basic block.
BasicBlock *PrevBB = Builder.GetInsertBlock();
// Create basic blocks.
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
BasicBlock *CheckNextBB =
BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
BasicBlock *PreHeaderBB =
BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
DT.addNewBlock(HeaderBB, PrevBB);
DT.addNewBlock(ExitBB, HeaderBB);
DT.addNewBlock(CheckNextBB, HeaderBB);
DT.addNewBlock(PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint(HeaderBB);
Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
Value *UserContext = Builder.CreateBitCast(
&*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
Map);
Builder.CreateBr(CheckNextBB);
// Add code to check if another set of iterations will be executed.
Builder.SetInsertPoint(CheckNextBB);
Value *Next = createCallGetWorkItem(LBPtr, UBPtr);
Value *HasNextSchedule = Builder.CreateTrunc(
Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock");
Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
// Add code to load the iv bounds for this set of iterations.
Builder.SetInsertPoint(PreHeaderBB);
Value *LB = Builder.CreateLoad(LongType, LBPtr, "polly.par.LB");
Value *UB = Builder.CreateLoad(LongType, UBPtr, "polly.par.UB");
// Subtract one as the upper bound provided by OpenMP is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
"polly.par.UBAdjusted");
Builder.CreateBr(CheckNextBB);
Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
BasicBlock *AfterBB;
Value *IV =
createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
nullptr, true, /* UseGuard */ false);
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
// Add code to terminate this subfunction.
Builder.SetInsertPoint(ExitBB);
createCallCleanupThread();
Builder.CreateRetVoid();
Builder.SetInsertPoint(&*LoopBody);
return std::make_tuple(IV, SubFn);
}
Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr,
Value *UBPtr) {
const std::string Name = "GOMP_loop_runtime_next";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {LBPtr, UBPtr};
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
Value *Return = Builder.CreateICmpNE(
Call, Builder.CreateZExt(Builder.getFalse(), Call->getType()));
return Return;
}
void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
const std::string Name = "GOMP_parallel_end";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
F = Function::Create(Ty, Linkage, Name, M);
}
CallInst *Call = Builder.CreateCall(F, {});
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
const std::string Name = "GOMP_loop_end_nowait";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
F = Function::Create(Ty, Linkage, Name, M);
}
CallInst *Call = Builder.CreateCall(F, {});
Call->setDebugLoc(DLGenerated);
}