This fixes what I consider to be an API flaw I've tripped over multiple times. The point this is constructed isn't well defined, so depending on where this is first called, you can conclude different information based on the MachineFunction. For example, the AMDGPU implementation inspected the MachineFrameInfo on construction for the stack objects and if the frame has calls. This kind of worked in SelectionDAG which visited all allocas up front, but broke in GlobalISel which hasn't visited any of the IR when arguments are lowered. I've run into similar problems before with the MIR parser and trying to make use of other MachineFunction fields, so I think it's best to just categorically disallow dependency on the MachineFunction state in the constructor and to always construct this at the same time as the MachineFunction itself. A missing feature I still could use is a way to access an custom analysis pass on the IR here.
229 lines
7.8 KiB
C++
229 lines
7.8 KiB
C++
//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUMachineFunction.h"
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUPerfHintAnalysis.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
using namespace llvm;
|
|
|
|
AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
|
|
const AMDGPUSubtarget &ST)
|
|
: IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
|
|
IsModuleEntryFunction(
|
|
AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
|
|
NoSignedZerosFPMath(false) {
|
|
|
|
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
|
|
// except reserved size is not correctly aligned.
|
|
|
|
Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
|
|
MemoryBound = MemBoundAttr.getValueAsBool();
|
|
|
|
Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
|
|
WaveLimiter = WaveLimitAttr.getValueAsBool();
|
|
|
|
// FIXME: How is this attribute supposed to interact with statically known
|
|
// global sizes?
|
|
StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
|
|
if (!S.empty())
|
|
S.consumeInteger(0, GDSSize);
|
|
|
|
// Assume the attribute allocates before any known GDS globals.
|
|
StaticGDSSize = GDSSize;
|
|
|
|
CallingConv::ID CC = F.getCallingConv();
|
|
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
|
|
ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
|
|
|
|
// FIXME: Shouldn't be target specific
|
|
Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
|
|
NoSignedZerosFPMath =
|
|
NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
|
|
}
|
|
|
|
unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
|
|
const GlobalVariable &GV,
|
|
Align Trailing) {
|
|
auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
|
|
if (!Entry.second)
|
|
return Entry.first->second;
|
|
|
|
Align Alignment =
|
|
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
|
|
|
|
unsigned Offset;
|
|
if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
|
|
/// TODO: We should sort these to minimize wasted space due to alignment
|
|
/// padding. Currently the padding is decided by the first encountered use
|
|
/// during lowering.
|
|
Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
|
|
|
|
StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
|
|
|
|
// Align LDS size to trailing, e.g. for aligning dynamic shared memory
|
|
LDSSize = alignTo(StaticLDSSize, Trailing);
|
|
} else {
|
|
assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
|
|
"expected region address space");
|
|
|
|
Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
|
|
StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
|
|
|
|
// FIXME: Apply alignment of dynamic GDS
|
|
GDSSize = StaticGDSSize;
|
|
}
|
|
|
|
Entry.first->second = Offset;
|
|
return Offset;
|
|
}
|
|
|
|
static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
|
|
|
|
bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
|
|
auto name = GV.getName();
|
|
return (name == ModuleLDSName) ||
|
|
(name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
|
|
}
|
|
|
|
const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
|
|
const GlobalVariable &GV) {
|
|
const Module &M = *GV.getParent();
|
|
StringRef N(GV.getName());
|
|
if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
|
|
return M.getFunction(N);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
const GlobalVariable *
|
|
AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
|
|
const Module *M = F.getParent();
|
|
std::string KernelLDSName = "llvm.amdgcn.kernel.";
|
|
KernelLDSName += F.getName();
|
|
KernelLDSName += ".lds";
|
|
return M->getNamedGlobal(KernelLDSName);
|
|
}
|
|
|
|
// This kernel calls no functions that require the module lds struct
|
|
static bool canElideModuleLDS(const Function &F) {
|
|
return F.hasFnAttribute("amdgpu-elide-module-lds");
|
|
}
|
|
|
|
unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
|
|
const GlobalVariable &GV) {
|
|
// module.lds, then alignment padding, then kernel.lds, then other variables
|
|
// if any
|
|
|
|
assert(isKnownAddressLDSGlobal(GV));
|
|
unsigned Offset = 0;
|
|
|
|
if (GV.getName() == ModuleLDSName) {
|
|
return 0;
|
|
}
|
|
|
|
const Module *M = GV.getParent();
|
|
const DataLayout &DL = M->getDataLayout();
|
|
|
|
const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
|
|
const Function *f = getKernelLDSFunctionFromGlobal(GV);
|
|
|
|
// Account for module.lds if allocated for this function
|
|
if (GVM && f && !canElideModuleLDS(*f)) {
|
|
// allocator aligns this to var align, but it's zero to begin with
|
|
Offset += DL.getTypeAllocSize(GVM->getValueType());
|
|
}
|
|
|
|
// No dynamic LDS alignment done by allocateModuleLDSGlobal
|
|
Offset = alignTo(
|
|
Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
|
|
|
|
return Offset;
|
|
}
|
|
|
|
void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
|
|
const Module *M = F.getParent();
|
|
|
|
// This function is called before allocating any other LDS so that it can
|
|
// reliably put values at known addresses. Consequently, dynamic LDS, if
|
|
// present, will not yet have been allocated
|
|
|
|
assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
|
|
|
|
if (isModuleEntryFunction()) {
|
|
|
|
// Pointer values start from zero, memory allocated per-kernel-launch
|
|
// Variables can be grouped into a module level struct and a struct per
|
|
// kernel function by AMDGPULowerModuleLDSPass. If that is done, they
|
|
// are allocated at statically computable addresses here.
|
|
//
|
|
// Address 0
|
|
// {
|
|
// llvm.amdgcn.module.lds
|
|
// }
|
|
// alignment padding
|
|
// {
|
|
// llvm.amdgcn.kernel.some-name.lds
|
|
// }
|
|
// other variables, e.g. dynamic lds, allocated after this call
|
|
|
|
const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
|
|
const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
|
|
|
|
if (GV && !canElideModuleLDS(F)) {
|
|
assert(isKnownAddressLDSGlobal(*GV));
|
|
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
|
|
(void)Offset;
|
|
assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
|
|
"Module LDS expected to be allocated before other LDS");
|
|
}
|
|
|
|
if (KV) {
|
|
// The per-kernel offset is deterministic because it is allocated
|
|
// before any other non-module LDS variables.
|
|
assert(isKnownAddressLDSGlobal(*KV));
|
|
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
|
|
(void)Offset;
|
|
assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
|
|
"Kernel LDS expected to be immediately after module LDS");
|
|
}
|
|
}
|
|
}
|
|
|
|
std::optional<uint32_t>
|
|
AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
|
|
auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
|
|
if (MD && MD->getNumOperands() == 1) {
|
|
ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
|
|
if (KnownSize) {
|
|
uint64_t V = KnownSize->getZExtValue();
|
|
if (V <= UINT32_MAX) {
|
|
return V;
|
|
}
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
|
|
const GlobalVariable &GV) {
|
|
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
|
|
|
|
Align Alignment =
|
|
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
|
|
if (Alignment <= DynLDSAlign)
|
|
return;
|
|
|
|
LDSSize = alignTo(StaticLDSSize, Alignment);
|
|
DynLDSAlign = Alignment;
|
|
}
|