[llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)

This patch moves utilities from
`offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h` to
`llvm/Frontend/Offloading/Utility.h` to be reused by
other projects.

Concretely the following changes were made:
- Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`.
- Remove unused fields `KernelObject`, `KernelSegmentSize`,
`ExplicitArgumentCount` and `ImplicitArgumentCount` from
`AMDGPUKernelMetaData`.
- Return the produced error if `ELFObj.sections()` failed instead of
using `cantFail`.
- Added `AGPRCount` field to `AMDGPUKernelMetaData`.
- Added a default invalid value to all the fields in
`AMDGPUKernelMetaData`.
This commit is contained in:
Fabian Mora
2024-08-20 09:03:06 -04:00
committed by GitHub
parent 5f3c0b2375
commit cfc76b6498
6 changed files with 312 additions and 262 deletions

View File

@@ -9,8 +9,14 @@
#ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
#define LLVM_FRONTEND_OFFLOADING_UTILITY_H
#include <cstdint>
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBufferRef.h"
namespace llvm {
namespace offloading {
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
std::pair<GlobalVariable *, GlobalVariable *>
getOffloadEntryArray(Module &M, StringRef SectionName);
namespace amdgpu {
/// Check if an image is compatible with current system's environment. The
/// system environment is given as a 'target-id' which has the form:
///
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
///
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
/// and is compatible with either '+' or '-'. The HSA runtime returns this
/// information using the target-id, while we use the ELF header to determine
/// these features.
bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
StringRef EnvTargetID);
/// Struct for holding metadata related to AMDGPU kernels, for more information
/// about the metadata and its meaning see:
/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
struct AMDGPUKernelMetaData {
/// Constant indicating that a value is invalid.
static constexpr uint32_t KInvalidValue =
std::numeric_limits<uint32_t>::max();
/// The amount of group segment memory required by a work-group in bytes.
uint32_t GroupSegmentList = KInvalidValue;
/// The amount of fixed private address space memory required for a work-item
/// in bytes.
uint32_t PrivateSegmentSize = KInvalidValue;
/// Number of scalar registers required by a wavefront.
uint32_t SGPRCount = KInvalidValue;
/// Number of vector registers required by each work-item.
uint32_t VGPRCount = KInvalidValue;
/// Number of stores from a scalar register to a register allocator created
/// spill location.
uint32_t SGPRSpillCount = KInvalidValue;
/// Number of stores from a vector register to a register allocator created
/// spill location.
uint32_t VGPRSpillCount = KInvalidValue;
/// Number of accumulator registers required by each work-item.
uint32_t AGPRCount = KInvalidValue;
/// Corresponds to the OpenCL reqd_work_group_size attribute.
uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
KInvalidValue};
/// Corresponds to the OpenCL work_group_size_hint attribute.
uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
/// Wavefront size.
uint32_t WavefrontSize = KInvalidValue;
/// Maximum flat work-group size supported by the kernel in work-items.
uint32_t MaxFlatWorkgroupSize = KInvalidValue;
};
/// Reads AMDGPU specific metadata from the ELF file and propagates the
/// KernelInfoMap.
Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
uint16_t &ELFABIVersion);
} // namespace amdgpu
} // namespace offloading
} // namespace llvm

View File

@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
LINK_COMPONENTS
Core
BinaryFormat
Object
Support
TransformUtils
TargetParser

View File

@@ -7,10 +7,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Value.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
return std::make_pair(EntriesB, EntriesE);
}
bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
uint32_t ImageFlags,
StringRef EnvTargetID) {
using namespace llvm::ELF;
StringRef EnvArch = EnvTargetID.split(":").first;
// Trivial check if the base processors match.
if (EnvArch != ImageArch)
return false;
// Check if the image is requesting xnack on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
// The image is 'xnack-' so the environment must be 'xnack-'.
if (!EnvTargetID.contains("xnack-"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
// The image is 'xnack+' so the environment must be 'xnack+'.
if (!EnvTargetID.contains("xnack+"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
default:
break;
}
// Check if the image is requesting sramecc on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
// The image is 'sramecc-' so the environment must be 'sramecc-'.
if (!EnvTargetID.contains("sramecc-"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
// The image is 'sramecc+' so the environment must be 'sramecc+'.
if (!EnvTargetID.contains("sramecc+"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
break;
}
return true;
}
namespace {
/// Reads the AMDGPU specific per-kernel-metadata from an image.
class KernelInfoReader {
public:
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
: KernelInfoMap(KIM) {}
/// Process ELF note to read AMDGPU metadata from respective information
/// fields.
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
if (Note.getName() != "AMDGPU")
return Error::success(); // We are not interested in other things
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
"Parse AMDGPU MetaData");
auto Desc = Note.getDesc(Align);
StringRef MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
msgpack::Document MsgPackDoc;
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return Error::success();
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
if (!Verifier.verify(MsgPackDoc.getRoot()))
return Error::success();
auto RootMap = MsgPackDoc.getRoot().getMap(true);
if (auto Err = iterateAMDKernels(RootMap))
return Err;
return Error::success();
}
private:
/// Extracts the relevant information via simple string look-up in the msgpack
/// document elements.
Error
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
std::string &KernelName,
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
if (!V.first.isString())
return Error::success();
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
return DK.getString() == SK;
};
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
uint32_t *Vals) {
assert(DN.isArray() && "MsgPack DocNode is an array node");
auto DNA = DN.getArray();
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
int I = 0;
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
++DNABegin) {
Vals[I++] = DNABegin->getUInt();
}
};
if (IsKey(V.first, ".name")) {
KernelName = V.second.toString();
} else if (IsKey(V.first, ".sgpr_count")) {
KernelData.SGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".sgpr_spill_count")) {
KernelData.SGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_count")) {
KernelData.VGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_spill_count")) {
KernelData.VGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".agpr_count")) {
KernelData.AGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
KernelData.PrivateSegmentSize = V.second.getUInt();
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
KernelData.GroupSegmentList = V.second.getUInt();
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
} else if (IsKey(V.first, ".workgroup_size_hint")) {
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
} else if (IsKey(V.first, ".wavefront_size")) {
KernelData.WavefrontSize = V.second.getUInt();
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
}
return Error::success();
}
/// Get the "amdhsa.kernels" element from the msgpack Document
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
auto Res = MDN.find("amdhsa.kernels");
if (Res == MDN.end())
return createStringError(inconvertibleErrorCode(),
"Could not find amdhsa.kernels key");
auto Pair = *Res;
assert(Pair.second.isArray() &&
"AMDGPU kernel entries are arrays of entries");
return Pair.second.getArray();
}
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
/// MapDocNode that either maps a string to a single value (most of them) or
/// to another array of things. Currently, we only handle the case that maps
/// to scalar value.
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
std::string KernelName;
auto Entry = (*It).getMap();
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
return Err;
KernelInfoMap.insert({KernelName, KernelData});
return Error::success();
}
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
auto KernelsOrErr = getAMDKernelsArray(MDN);
if (auto Err = KernelsOrErr.takeError())
return Err;
auto KernelsArr = *KernelsOrErr;
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
if (!It->isMap())
continue; // we expect <key,value> pairs
// Obtain the value for the different entries. Each array entry is a
// MapDocNode
if (auto Err = generateKernelInfo(It))
return Err;
}
return Error::success();
}
// Kernel names are the keys
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
};
} // namespace
Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
MemoryBufferRef MemBuffer,
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
uint16_t &ELFABIVersion) {
Error Err = Error::success(); // Used later as out-parameter
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
if (auto Err = ELFOrError.takeError())
return Err;
const object::ELF64LEFile ELFObj = ELFOrError.get();
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
if (!Sections)
return Sections.takeError();
KernelInfoReader Reader(KernelInfoMap);
// Read the code object version from ELF image header
auto Header = ELFObj.getHeader();
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
for (const auto &S : *Sections) {
if (S.sh_type != ELF::SHT_NOTE)
continue;
for (const auto N : ELFObj.notes(S, Err)) {
if (Err)
return Err;
// Fills the KernelInfoTabel entries in the reader
if ((Err = Reader.processNote(N, S.sh_addralign)))
return Err;
}
}
return Error::success();
}

View File

@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
message(STATUS "Building AMDGPU plugin linked against libhsa")
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
else()
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
endif()
# Configure testing for the AMDGPU plugin. We will build tests if we could a

View File

@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
/// Get additional info for kernel, e.g., register spill counts
std::optional<utils::KernelMetaDataTy>
std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
getKernelInfo(StringRef Identifier) const {
auto It = KernelInfoMap.find(Identifier);
@@ -499,7 +499,7 @@ private:
/// The exectuable loaded on the agent.
hsa_executable_t Executable;
hsa_code_object_t CodeObject;
StringMap<utils::KernelMetaDataTy> KernelInfoMap;
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
uint16_t ELFABIVersion;
};
@@ -600,7 +600,7 @@ private:
uint32_t ImplicitArgsSize;
/// Additional Info for the AMD GPU Kernel
std::optional<utils::KernelMetaDataTy> KernelInfo;
std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
};
/// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
if (!TargeTripleAndFeaturesOrError)
return TargeTripleAndFeaturesOrError.takeError();
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
ElfOrErr->getPlatformFlags(),
*TargeTripleAndFeaturesOrError);
return offloading::amdgpu::isImageCompatibleWithEnv(
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
*TargeTripleAndFeaturesOrError);
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

View File

@@ -17,24 +17,13 @@
#include "omptarget.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/YAMLTraits.h"
using namespace llvm::ELF;
#include "llvm/Frontend/Offloading/Utility.h"
namespace llvm {
namespace omp {
namespace target {
namespace plugin {
namespace utils {
// The implicit arguments of COV5 AMDGPU kernels.
struct AMDGPUImplicitArgsTy {
uint32_t BlockCountX;
@@ -55,259 +44,26 @@ struct AMDGPUImplicitArgsTyCOV4 {
uint8_t Unused[56];
};
/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
/// `Version` is the ELF ABI version, e.g. COV5.
inline uint32_t getImplicitArgsSize(uint16_t Version) {
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
? sizeof(AMDGPUImplicitArgsTyCOV4)
: sizeof(AMDGPUImplicitArgsTy);
}
/// Check if an image is compatible with current system's environment. The
/// system environment is given as a 'target-id' which has the form:
///
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
///
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
/// and is compatible with either '+' or '-'. The HSA runtime returns this
/// information using the target-id, while we use the ELF header to determine
/// these features.
inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
StringRef EnvTargetID) {
StringRef EnvArch = EnvTargetID.split(":").first;
// Trivial check if the base processors match.
if (EnvArch != ImageArch)
return false;
// Check if the image is requesting xnack on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
// The image is 'xnack-' so the environment must be 'xnack-'.
if (!EnvTargetID.contains("xnack-"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
// The image is 'xnack+' so the environment must be 'xnack+'.
if (!EnvTargetID.contains("xnack+"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
default:
break;
}
// Check if the image is requesting sramecc on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
// The image is 'sramecc-' so the environment must be 'sramecc-'.
if (!EnvTargetID.contains("sramecc-"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
// The image is 'sramecc+' so the environment must be 'sramecc+'.
if (!EnvTargetID.contains("sramecc+"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
break;
}
return true;
}
struct KernelMetaDataTy {
uint64_t KernelObject;
uint32_t GroupSegmentList;
uint32_t PrivateSegmentSize;
uint32_t SGPRCount;
uint32_t VGPRCount;
uint32_t SGPRSpillCount;
uint32_t VGPRSpillCount;
uint32_t KernelSegmentSize;
uint32_t ExplicitArgumentCount;
uint32_t ImplicitArgumentCount;
uint32_t RequestedWorkgroupSize[3];
uint32_t WorkgroupSizeHint[3];
uint32_t WavefronSize;
uint32_t MaxFlatWorkgroupSize;
};
namespace {
/// Reads the AMDGPU specific per-kernel-metadata from an image.
class KernelInfoReader {
public:
KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
/// Process ELF note to read AMDGPU metadata from respective information
/// fields.
Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
if (Note.getName() != "AMDGPU")
return Error::success(); // We are not interested in other things
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
"Parse AMDGPU MetaData");
auto Desc = Note.getDesc(Align);
StringRef MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
msgpack::Document MsgPackDoc;
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return Error::success();
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
if (!Verifier.verify(MsgPackDoc.getRoot()))
return Error::success();
auto RootMap = MsgPackDoc.getRoot().getMap(true);
if (auto Err = iterateAMDKernels(RootMap))
return Err;
return Error::success();
}
private:
/// Extracts the relevant information via simple string look-up in the msgpack
/// document elements.
Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
std::string &KernelName,
KernelMetaDataTy &KernelData) {
if (!V.first.isString())
return Error::success();
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
return DK.getString() == SK;
};
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
uint32_t *Vals) {
assert(DN.isArray() && "MsgPack DocNode is an array node");
auto DNA = DN.getArray();
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
int I = 0;
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
++DNABegin) {
Vals[I++] = DNABegin->getUInt();
}
};
if (IsKey(V.first, ".name")) {
KernelName = V.second.toString();
} else if (IsKey(V.first, ".sgpr_count")) {
KernelData.SGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".sgpr_spill_count")) {
KernelData.SGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_count")) {
KernelData.VGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_spill_count")) {
KernelData.VGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
KernelData.PrivateSegmentSize = V.second.getUInt();
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
KernelData.GroupSegmentList = V.second.getUInt();
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
} else if (IsKey(V.first, ".workgroup_size_hint")) {
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
} else if (IsKey(V.first, ".wavefront_size")) {
KernelData.WavefronSize = V.second.getUInt();
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
}
return Error::success();
}
/// Get the "amdhsa.kernels" element from the msgpack Document
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
auto Res = MDN.find("amdhsa.kernels");
if (Res == MDN.end())
return createStringError(inconvertibleErrorCode(),
"Could not find amdhsa.kernels key");
auto Pair = *Res;
assert(Pair.second.isArray() &&
"AMDGPU kernel entries are arrays of entries");
return Pair.second.getArray();
}
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
/// MapDocNode that either maps a string to a single value (most of them) or
/// to another array of things. Currently, we only handle the case that maps
/// to scalar value.
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
KernelMetaDataTy KernelData;
std::string KernelName;
auto Entry = (*It).getMap();
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
return Err;
KernelInfoMap.insert({KernelName, KernelData});
return Error::success();
}
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
auto KernelsOrErr = getAMDKernelsArray(MDN);
if (auto Err = KernelsOrErr.takeError())
return Err;
auto KernelsArr = *KernelsOrErr;
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
if (!It->isMap())
continue; // we expect <key,value> pairs
// Obtain the value for the different entries. Each array entry is a
// MapDocNode
if (auto Err = generateKernelInfo(It))
return Err;
}
return Error::success();
}
// Kernel names are the keys
StringMap<KernelMetaDataTy> &KernelInfoMap;
};
} // namespace
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
/// KernelInfoMap
inline Error
readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
StringMap<KernelMetaDataTy> &KernelInfoMap,
uint16_t &ELFABIVersion) {
Error Err = Error::success(); // Used later as out-parameter
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
if (auto Err = ELFOrError.takeError())
inline Error readAMDGPUMetaDataFromImage(
MemoryBufferRef MemBuffer,
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
uint16_t &ELFABIVersion) {
Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
MemBuffer, KernelInfoMap, ELFABIVersion);
if (!Err)
return Err;
const object::ELF64LEFile ELFObj = ELFOrError.get();
ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
KernelInfoReader Reader(KernelInfoMap);
// Read the code object version from ELF image header
auto Header = ELFObj.getHeader();
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
for (const auto &S : Sections) {
if (S.sh_type != ELF::SHT_NOTE)
continue;
for (const auto N : ELFObj.notes(S, Err)) {
if (Err)
return Err;
// Fills the KernelInfoTabel entries in the reader
if ((Err = Reader.processNote(N, S.sh_addralign)))
return Err;
}
}
return Error::success();
return Err;
}
} // namespace utils