Summary: The previous offloading entry type did not fit the current use-cases very well. This widens it and adds a version to prevent further annoyances. It also includes the kind to better sort who's using it. The first 64-bytes are reserved as zero so the OpenMP runtime can detect the old format for binary compatibilitry.
376 lines
14 KiB
C++
376 lines
14 KiB
C++
//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Frontend/Offloading/Utility.h"
|
|
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
|
#include "llvm/BinaryFormat/ELF.h"
|
|
#include "llvm/BinaryFormat/MsgPackDocument.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/GlobalValue.h"
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
#include "llvm/IR/Value.h"
|
|
#include "llvm/Object/ELFObjectFile.h"
|
|
#include "llvm/Support/MemoryBufferRef.h"
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::offloading;
|
|
|
|
StructType *offloading::getEntryTy(Module &M) {
|
|
LLVMContext &C = M.getContext();
|
|
StructType *EntryTy =
|
|
StructType::getTypeByName(C, "struct.__tgt_offload_entry");
|
|
if (!EntryTy)
|
|
EntryTy = StructType::create(
|
|
"struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C),
|
|
Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C),
|
|
PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C),
|
|
PointerType::getUnqual(C));
|
|
return EntryTy;
|
|
}
|
|
|
|
std::pair<Constant *, GlobalVariable *>
|
|
offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
|
|
Constant *Addr, StringRef Name,
|
|
uint64_t Size, uint32_t Flags,
|
|
uint64_t Data, Constant *AuxAddr) {
|
|
llvm::Triple Triple(M.getTargetTriple());
|
|
Type *PtrTy = PointerType::getUnqual(M.getContext());
|
|
Type *Int64Ty = Type::getInt64Ty(M.getContext());
|
|
Type *Int32Ty = Type::getInt32Ty(M.getContext());
|
|
Type *Int16Ty = Type::getInt16Ty(M.getContext());
|
|
|
|
Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
|
|
|
|
StringRef Prefix =
|
|
Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
|
|
|
|
// Create the constant string used to look up the symbol in the device.
|
|
auto *Str =
|
|
new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
|
|
GlobalValue::InternalLinkage, AddrName, Prefix);
|
|
StringRef SectionName = ".llvm.rodata.offloading";
|
|
Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
|
Str->setSection(SectionName);
|
|
Str->setAlignment(Align(1));
|
|
|
|
// Make a metadata node for these constants so it can be queried from IR.
|
|
NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
|
|
Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
|
|
MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
|
|
|
|
// Construct the offloading entry.
|
|
Constant *EntryData[] = {
|
|
ConstantExpr::getNullValue(Int64Ty),
|
|
ConstantInt::get(Int16Ty, 1),
|
|
ConstantInt::get(Int16Ty, Kind),
|
|
ConstantInt::get(Int32Ty, Flags),
|
|
ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy),
|
|
ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy),
|
|
ConstantInt::get(Int64Ty, Size),
|
|
ConstantInt::get(Int64Ty, Data),
|
|
AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy)
|
|
: ConstantExpr::getNullValue(PtrTy)};
|
|
Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
|
|
return {EntryInitializer, Str};
|
|
}
|
|
|
|
void offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
|
|
Constant *Addr, StringRef Name,
|
|
uint64_t Size, uint32_t Flags,
|
|
uint64_t Data, StringRef SectionName,
|
|
Constant *AuxAddr) {
|
|
llvm::Triple Triple(M.getTargetTriple());
|
|
|
|
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
|
|
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
|
|
|
|
StringRef Prefix =
|
|
Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
|
|
auto *Entry = new GlobalVariable(
|
|
M, getEntryTy(M),
|
|
/*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
|
|
Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
|
|
M.getDataLayout().getDefaultGlobalsAddressSpace());
|
|
|
|
// The entry has to be created in the section the linker expects it to be.
|
|
if (Triple.isOSBinFormatCOFF())
|
|
Entry->setSection((SectionName + "$OE").str());
|
|
else
|
|
Entry->setSection(SectionName);
|
|
Entry->setAlignment(Align(1));
|
|
}
|
|
|
|
std::pair<GlobalVariable *, GlobalVariable *>
|
|
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
|
|
llvm::Triple Triple(M.getTargetTriple());
|
|
|
|
auto *ZeroInitilaizer =
|
|
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
|
|
auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
|
|
auto *EntryType = ArrayType::get(getEntryTy(M), 0);
|
|
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
|
|
: GlobalValue::ExternalLinkage;
|
|
|
|
auto *EntriesB =
|
|
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
|
|
"__start_" + SectionName);
|
|
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
|
|
auto *EntriesE =
|
|
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
|
|
"__stop_" + SectionName);
|
|
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
|
|
|
|
if (Triple.isOSBinFormatELF()) {
|
|
// We assume that external begin/end symbols that we have created above will
|
|
// be defined by the linker. This is done whenever a section name with a
|
|
// valid C-identifier is present. We define a dummy variable here to force
|
|
// the linker to always provide these symbols.
|
|
auto *DummyEntry = new GlobalVariable(
|
|
M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
|
|
ZeroInitilaizer, "__dummy." + SectionName);
|
|
DummyEntry->setSection(SectionName);
|
|
appendToCompilerUsed(M, DummyEntry);
|
|
} else {
|
|
// The COFF linker will merge sections containing a '$' together into a
|
|
// single section. The order of entries in this section will be sorted
|
|
// alphabetically by the characters following the '$' in the name. Set the
|
|
// sections here to ensure that the beginning and end symbols are sorted.
|
|
EntriesB->setSection((SectionName + "$OA").str());
|
|
EntriesE->setSection((SectionName + "$OZ").str());
|
|
}
|
|
|
|
return std::make_pair(EntriesB, EntriesE);
|
|
}
|
|
|
|
bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
|
|
uint32_t ImageFlags,
|
|
StringRef EnvTargetID) {
|
|
using namespace llvm::ELF;
|
|
StringRef EnvArch = EnvTargetID.split(":").first;
|
|
|
|
// Trivial check if the base processors match.
|
|
if (EnvArch != ImageArch)
|
|
return false;
|
|
|
|
// Check if the image is requesting xnack on or off.
|
|
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
|
|
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
|
|
// The image is 'xnack-' so the environment must be 'xnack-'.
|
|
if (!EnvTargetID.contains("xnack-"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
|
|
// The image is 'xnack+' so the environment must be 'xnack+'.
|
|
if (!EnvTargetID.contains("xnack+"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
|
|
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Check if the image is requesting sramecc on or off.
|
|
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
|
|
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
|
|
// The image is 'sramecc-' so the environment must be 'sramecc-'.
|
|
if (!EnvTargetID.contains("sramecc-"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
|
|
// The image is 'sramecc+' so the environment must be 'sramecc+'.
|
|
if (!EnvTargetID.contains("sramecc+"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
|
|
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
namespace {
|
|
/// Reads the AMDGPU specific per-kernel-metadata from an image.
|
|
class KernelInfoReader {
|
|
public:
|
|
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
|
|
: KernelInfoMap(KIM) {}
|
|
|
|
/// Process ELF note to read AMDGPU metadata from respective information
|
|
/// fields.
|
|
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
|
|
if (Note.getName() != "AMDGPU")
|
|
return Error::success(); // We are not interested in other things
|
|
|
|
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
|
|
"Parse AMDGPU MetaData");
|
|
auto Desc = Note.getDesc(Align);
|
|
StringRef MsgPackString =
|
|
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
|
|
msgpack::Document MsgPackDoc;
|
|
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
|
|
return Error::success();
|
|
|
|
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
|
|
if (!Verifier.verify(MsgPackDoc.getRoot()))
|
|
return Error::success();
|
|
|
|
auto RootMap = MsgPackDoc.getRoot().getMap(true);
|
|
|
|
if (auto Err = iterateAMDKernels(RootMap))
|
|
return Err;
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
private:
|
|
/// Extracts the relevant information via simple string look-up in the msgpack
|
|
/// document elements.
|
|
Error
|
|
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
|
|
std::string &KernelName,
|
|
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
|
|
if (!V.first.isString())
|
|
return Error::success();
|
|
|
|
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
|
|
return DK.getString() == SK;
|
|
};
|
|
|
|
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
|
|
uint32_t *Vals) {
|
|
assert(DN.isArray() && "MsgPack DocNode is an array node");
|
|
auto DNA = DN.getArray();
|
|
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
|
|
|
|
int I = 0;
|
|
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
|
|
++DNABegin) {
|
|
Vals[I++] = DNABegin->getUInt();
|
|
}
|
|
};
|
|
|
|
if (IsKey(V.first, ".name")) {
|
|
KernelName = V.second.toString();
|
|
} else if (IsKey(V.first, ".sgpr_count")) {
|
|
KernelData.SGPRCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".sgpr_spill_count")) {
|
|
KernelData.SGPRSpillCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".vgpr_count")) {
|
|
KernelData.VGPRCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".vgpr_spill_count")) {
|
|
KernelData.VGPRSpillCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".agpr_count")) {
|
|
KernelData.AGPRCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
|
|
KernelData.PrivateSegmentSize = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
|
|
KernelData.GroupSegmentList = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
|
|
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
|
|
} else if (IsKey(V.first, ".workgroup_size_hint")) {
|
|
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
|
|
} else if (IsKey(V.first, ".wavefront_size")) {
|
|
KernelData.WavefrontSize = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
|
|
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
/// Get the "amdhsa.kernels" element from the msgpack Document
|
|
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
|
|
auto Res = MDN.find("amdhsa.kernels");
|
|
if (Res == MDN.end())
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Could not find amdhsa.kernels key");
|
|
|
|
auto Pair = *Res;
|
|
assert(Pair.second.isArray() &&
|
|
"AMDGPU kernel entries are arrays of entries");
|
|
|
|
return Pair.second.getArray();
|
|
}
|
|
|
|
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
|
|
/// MapDocNode that either maps a string to a single value (most of them) or
|
|
/// to another array of things. Currently, we only handle the case that maps
|
|
/// to scalar value.
|
|
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
|
|
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
|
|
std::string KernelName;
|
|
auto Entry = (*It).getMap();
|
|
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
|
|
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
|
|
return Err;
|
|
|
|
KernelInfoMap.insert({KernelName, KernelData});
|
|
return Error::success();
|
|
}
|
|
|
|
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
|
|
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
|
|
auto KernelsOrErr = getAMDKernelsArray(MDN);
|
|
if (auto Err = KernelsOrErr.takeError())
|
|
return Err;
|
|
|
|
auto KernelsArr = *KernelsOrErr;
|
|
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
|
|
if (!It->isMap())
|
|
continue; // we expect <key,value> pairs
|
|
|
|
// Obtain the value for the different entries. Each array entry is a
|
|
// MapDocNode
|
|
if (auto Err = generateKernelInfo(It))
|
|
return Err;
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
// Kernel names are the keys
|
|
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
|
|
};
|
|
} // namespace
|
|
|
|
Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
|
|
MemoryBufferRef MemBuffer,
|
|
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
|
|
uint16_t &ELFABIVersion) {
|
|
Error Err = Error::success(); // Used later as out-parameter
|
|
|
|
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
|
|
if (auto Err = ELFOrError.takeError())
|
|
return Err;
|
|
|
|
const object::ELF64LEFile ELFObj = ELFOrError.get();
|
|
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
|
|
if (!Sections)
|
|
return Sections.takeError();
|
|
KernelInfoReader Reader(KernelInfoMap);
|
|
|
|
// Read the code object version from ELF image header
|
|
auto Header = ELFObj.getHeader();
|
|
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
|
|
for (const auto &S : *Sections) {
|
|
if (S.sh_type != ELF::SHT_NOTE)
|
|
continue;
|
|
|
|
for (const auto N : ELFObj.notes(S, Err)) {
|
|
if (Err)
|
|
return Err;
|
|
// Fills the KernelInfoTabel entries in the reader
|
|
if ((Err = Reader.processNote(N, S.sh_addralign)))
|
|
return Err;
|
|
}
|
|
}
|
|
return Error::success();
|
|
}
|