Files
clang-p2996/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
Matt Arsenault 69e75ae695 CodeGen: Don't lazily construct MachineFunctionInfo
This fixes what I consider to be an API flaw I've tripped over
multiple times. The point this is constructed isn't well defined, so
depending on where this is first called, you can conclude different
information based on the MachineFunction. For example, the AMDGPU
implementation inspected the MachineFrameInfo on construction for the
stack objects and if the frame has calls. This kind of worked in
SelectionDAG which visited all allocas up front, but broke in
GlobalISel which hasn't visited any of the IR when arguments are
lowered.

I've run into similar problems before with the MIR parser and trying
to make use of other MachineFunction fields, so I think it's best to
just categorically disallow dependency on the MachineFunction state in
the constructor and to always construct this at the same time as the
MachineFunction itself.

A missing feature I still could use is a way to access an custom
analysis pass on the IR here.
2022-12-21 10:49:32 -05:00

229 lines
7.8 KiB
C++

//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUMachineFunction.h"
#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
const AMDGPUSubtarget &ST)
: IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
IsModuleEntryFunction(
AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
NoSignedZerosFPMath(false) {
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
// except reserved size is not correctly aligned.
Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
MemoryBound = MemBoundAttr.getValueAsBool();
Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
WaveLimiter = WaveLimitAttr.getValueAsBool();
// FIXME: How is this attribute supposed to interact with statically known
// global sizes?
StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
if (!S.empty())
S.consumeInteger(0, GDSSize);
// Assume the attribute allocates before any known GDS globals.
StaticGDSSize = GDSSize;
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
// FIXME: Shouldn't be target specific
Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
NoSignedZerosFPMath =
NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
}
unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
const GlobalVariable &GV,
Align Trailing) {
auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
if (!Entry.second)
return Entry.first->second;
Align Alignment =
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
unsigned Offset;
if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
/// TODO: We should sort these to minimize wasted space due to alignment
/// padding. Currently the padding is decided by the first encountered use
/// during lowering.
Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
// Align LDS size to trailing, e.g. for aligning dynamic shared memory
LDSSize = alignTo(StaticLDSSize, Trailing);
} else {
assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
"expected region address space");
Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
// FIXME: Apply alignment of dynamic GDS
GDSSize = StaticGDSSize;
}
Entry.first->second = Offset;
return Offset;
}
static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
auto name = GV.getName();
return (name == ModuleLDSName) ||
(name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
}
const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
const GlobalVariable &GV) {
const Module &M = *GV.getParent();
StringRef N(GV.getName());
if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
return M.getFunction(N);
}
return nullptr;
}
const GlobalVariable *
AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
const Module *M = F.getParent();
std::string KernelLDSName = "llvm.amdgcn.kernel.";
KernelLDSName += F.getName();
KernelLDSName += ".lds";
return M->getNamedGlobal(KernelLDSName);
}
// This kernel calls no functions that require the module lds struct
static bool canElideModuleLDS(const Function &F) {
return F.hasFnAttribute("amdgpu-elide-module-lds");
}
unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
const GlobalVariable &GV) {
// module.lds, then alignment padding, then kernel.lds, then other variables
// if any
assert(isKnownAddressLDSGlobal(GV));
unsigned Offset = 0;
if (GV.getName() == ModuleLDSName) {
return 0;
}
const Module *M = GV.getParent();
const DataLayout &DL = M->getDataLayout();
const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
const Function *f = getKernelLDSFunctionFromGlobal(GV);
// Account for module.lds if allocated for this function
if (GVM && f && !canElideModuleLDS(*f)) {
// allocator aligns this to var align, but it's zero to begin with
Offset += DL.getTypeAllocSize(GVM->getValueType());
}
// No dynamic LDS alignment done by allocateModuleLDSGlobal
Offset = alignTo(
Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
return Offset;
}
void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
const Module *M = F.getParent();
// This function is called before allocating any other LDS so that it can
// reliably put values at known addresses. Consequently, dynamic LDS, if
// present, will not yet have been allocated
assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
if (isModuleEntryFunction()) {
// Pointer values start from zero, memory allocated per-kernel-launch
// Variables can be grouped into a module level struct and a struct per
// kernel function by AMDGPULowerModuleLDSPass. If that is done, they
// are allocated at statically computable addresses here.
//
// Address 0
// {
// llvm.amdgcn.module.lds
// }
// alignment padding
// {
// llvm.amdgcn.kernel.some-name.lds
// }
// other variables, e.g. dynamic lds, allocated after this call
const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
if (GV && !canElideModuleLDS(F)) {
assert(isKnownAddressLDSGlobal(*GV));
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
(void)Offset;
assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
"Module LDS expected to be allocated before other LDS");
}
if (KV) {
// The per-kernel offset is deterministic because it is allocated
// before any other non-module LDS variables.
assert(isKnownAddressLDSGlobal(*KV));
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
(void)Offset;
assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
"Kernel LDS expected to be immediately after module LDS");
}
}
}
std::optional<uint32_t>
AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
if (MD && MD->getNumOperands() == 1) {
ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
if (KnownSize) {
uint64_t V = KnownSize->getZExtValue();
if (V <= UINT32_MAX) {
return V;
}
}
}
return {};
}
void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
const GlobalVariable &GV) {
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
Align Alignment =
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
if (Alignment <= DynLDSAlign)
return;
LDSSize = alignTo(StaticLDSSize, Alignment);
DynLDSAlign = Alignment;
}