[llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)
This patch moves utilities from `offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h` to `llvm/Frontend/Offloading/Utility.h` to be reused by other projects. Concretely the following changes were made: - Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`. - Remove unused fields `KernelObject`, `KernelSegmentSize`, `ExplicitArgumentCount` and `ImplicitArgumentCount` from `AMDGPUKernelMetaData`. - Return the produced error if `ELFObj.sections()` failed instead of using `cantFail`. - Added `AGPRCount` field to `AMDGPUKernelMetaData`. - Added a default invalid value to all the fields in `AMDGPUKernelMetaData`.
This commit is contained in:
@@ -9,8 +9,14 @@
|
||||
#ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
|
||||
#define LLVM_FRONTEND_OFFLOADING_UTILITY_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Object/OffloadBinary.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/MemoryBufferRef.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace offloading {
|
||||
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
|
||||
std::pair<GlobalVariable *, GlobalVariable *>
|
||||
getOffloadEntryArray(Module &M, StringRef SectionName);
|
||||
|
||||
namespace amdgpu {
|
||||
/// Check if an image is compatible with current system's environment. The
|
||||
/// system environment is given as a 'target-id' which has the form:
|
||||
///
|
||||
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
|
||||
///
|
||||
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
|
||||
/// and is compatible with either '+' or '-'. The HSA runtime returns this
|
||||
/// information using the target-id, while we use the ELF header to determine
|
||||
/// these features.
|
||||
bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
|
||||
StringRef EnvTargetID);
|
||||
|
||||
/// Struct for holding metadata related to AMDGPU kernels, for more information
|
||||
/// about the metadata and its meaning see:
|
||||
/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
|
||||
struct AMDGPUKernelMetaData {
|
||||
/// Constant indicating that a value is invalid.
|
||||
static constexpr uint32_t KInvalidValue =
|
||||
std::numeric_limits<uint32_t>::max();
|
||||
/// The amount of group segment memory required by a work-group in bytes.
|
||||
uint32_t GroupSegmentList = KInvalidValue;
|
||||
/// The amount of fixed private address space memory required for a work-item
|
||||
/// in bytes.
|
||||
uint32_t PrivateSegmentSize = KInvalidValue;
|
||||
/// Number of scalar registers required by a wavefront.
|
||||
uint32_t SGPRCount = KInvalidValue;
|
||||
/// Number of vector registers required by each work-item.
|
||||
uint32_t VGPRCount = KInvalidValue;
|
||||
/// Number of stores from a scalar register to a register allocator created
|
||||
/// spill location.
|
||||
uint32_t SGPRSpillCount = KInvalidValue;
|
||||
/// Number of stores from a vector register to a register allocator created
|
||||
/// spill location.
|
||||
uint32_t VGPRSpillCount = KInvalidValue;
|
||||
/// Number of accumulator registers required by each work-item.
|
||||
uint32_t AGPRCount = KInvalidValue;
|
||||
/// Corresponds to the OpenCL reqd_work_group_size attribute.
|
||||
uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
|
||||
KInvalidValue};
|
||||
/// Corresponds to the OpenCL work_group_size_hint attribute.
|
||||
uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
|
||||
/// Wavefront size.
|
||||
uint32_t WavefrontSize = KInvalidValue;
|
||||
/// Maximum flat work-group size supported by the kernel in work-items.
|
||||
uint32_t MaxFlatWorkgroupSize = KInvalidValue;
|
||||
};
|
||||
|
||||
/// Reads AMDGPU specific metadata from the ELF file and propagates the
|
||||
/// KernelInfoMap.
|
||||
Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
|
||||
StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
|
||||
uint16_t &ELFABIVersion);
|
||||
} // namespace amdgpu
|
||||
} // namespace offloading
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
|
||||
LINK_COMPONENTS
|
||||
Core
|
||||
BinaryFormat
|
||||
Object
|
||||
Support
|
||||
TransformUtils
|
||||
TargetParser
|
||||
|
||||
@@ -7,10 +7,16 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Frontend/Offloading/Utility.h"
|
||||
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/BinaryFormat/MsgPackDocument.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
#include "llvm/Support/MemoryBufferRef.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
||||
|
||||
using namespace llvm;
|
||||
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
|
||||
|
||||
return std::make_pair(EntriesB, EntriesE);
|
||||
}
|
||||
|
||||
bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
|
||||
uint32_t ImageFlags,
|
||||
StringRef EnvTargetID) {
|
||||
using namespace llvm::ELF;
|
||||
StringRef EnvArch = EnvTargetID.split(":").first;
|
||||
|
||||
// Trivial check if the base processors match.
|
||||
if (EnvArch != ImageArch)
|
||||
return false;
|
||||
|
||||
// Check if the image is requesting xnack on or off.
|
||||
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
|
||||
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
|
||||
// The image is 'xnack-' so the environment must be 'xnack-'.
|
||||
if (!EnvTargetID.contains("xnack-"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
|
||||
// The image is 'xnack+' so the environment must be 'xnack+'.
|
||||
if (!EnvTargetID.contains("xnack+"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
|
||||
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if the image is requesting sramecc on or off.
|
||||
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
|
||||
// The image is 'sramecc-' so the environment must be 'sramecc-'.
|
||||
if (!EnvTargetID.contains("sramecc-"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
|
||||
// The image is 'sramecc+' so the environment must be 'sramecc+'.
|
||||
if (!EnvTargetID.contains("sramecc+"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// Reads the AMDGPU specific per-kernel-metadata from an image.
|
||||
class KernelInfoReader {
|
||||
public:
|
||||
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
|
||||
: KernelInfoMap(KIM) {}
|
||||
|
||||
/// Process ELF note to read AMDGPU metadata from respective information
|
||||
/// fields.
|
||||
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
|
||||
if (Note.getName() != "AMDGPU")
|
||||
return Error::success(); // We are not interested in other things
|
||||
|
||||
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
|
||||
"Parse AMDGPU MetaData");
|
||||
auto Desc = Note.getDesc(Align);
|
||||
StringRef MsgPackString =
|
||||
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
|
||||
msgpack::Document MsgPackDoc;
|
||||
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
|
||||
return Error::success();
|
||||
|
||||
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
|
||||
if (!Verifier.verify(MsgPackDoc.getRoot()))
|
||||
return Error::success();
|
||||
|
||||
auto RootMap = MsgPackDoc.getRoot().getMap(true);
|
||||
|
||||
if (auto Err = iterateAMDKernels(RootMap))
|
||||
return Err;
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
private:
|
||||
/// Extracts the relevant information via simple string look-up in the msgpack
|
||||
/// document elements.
|
||||
Error
|
||||
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
|
||||
std::string &KernelName,
|
||||
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
|
||||
if (!V.first.isString())
|
||||
return Error::success();
|
||||
|
||||
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
|
||||
return DK.getString() == SK;
|
||||
};
|
||||
|
||||
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
|
||||
uint32_t *Vals) {
|
||||
assert(DN.isArray() && "MsgPack DocNode is an array node");
|
||||
auto DNA = DN.getArray();
|
||||
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
|
||||
|
||||
int I = 0;
|
||||
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
|
||||
++DNABegin) {
|
||||
Vals[I++] = DNABegin->getUInt();
|
||||
}
|
||||
};
|
||||
|
||||
if (IsKey(V.first, ".name")) {
|
||||
KernelName = V.second.toString();
|
||||
} else if (IsKey(V.first, ".sgpr_count")) {
|
||||
KernelData.SGPRCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".sgpr_spill_count")) {
|
||||
KernelData.SGPRSpillCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".vgpr_count")) {
|
||||
KernelData.VGPRCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".vgpr_spill_count")) {
|
||||
KernelData.VGPRSpillCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".agpr_count")) {
|
||||
KernelData.AGPRCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
|
||||
KernelData.PrivateSegmentSize = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
|
||||
KernelData.GroupSegmentList = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
|
||||
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
|
||||
} else if (IsKey(V.first, ".workgroup_size_hint")) {
|
||||
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
|
||||
} else if (IsKey(V.first, ".wavefront_size")) {
|
||||
KernelData.WavefrontSize = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
|
||||
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
|
||||
}
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
/// Get the "amdhsa.kernels" element from the msgpack Document
|
||||
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
|
||||
auto Res = MDN.find("amdhsa.kernels");
|
||||
if (Res == MDN.end())
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Could not find amdhsa.kernels key");
|
||||
|
||||
auto Pair = *Res;
|
||||
assert(Pair.second.isArray() &&
|
||||
"AMDGPU kernel entries are arrays of entries");
|
||||
|
||||
return Pair.second.getArray();
|
||||
}
|
||||
|
||||
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
|
||||
/// MapDocNode that either maps a string to a single value (most of them) or
|
||||
/// to another array of things. Currently, we only handle the case that maps
|
||||
/// to scalar value.
|
||||
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
|
||||
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
|
||||
std::string KernelName;
|
||||
auto Entry = (*It).getMap();
|
||||
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
|
||||
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
|
||||
return Err;
|
||||
|
||||
KernelInfoMap.insert({KernelName, KernelData});
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
|
||||
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
|
||||
auto KernelsOrErr = getAMDKernelsArray(MDN);
|
||||
if (auto Err = KernelsOrErr.takeError())
|
||||
return Err;
|
||||
|
||||
auto KernelsArr = *KernelsOrErr;
|
||||
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
|
||||
if (!It->isMap())
|
||||
continue; // we expect <key,value> pairs
|
||||
|
||||
// Obtain the value for the different entries. Each array entry is a
|
||||
// MapDocNode
|
||||
if (auto Err = generateKernelInfo(It))
|
||||
return Err;
|
||||
}
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
// Kernel names are the keys
|
||||
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
|
||||
MemoryBufferRef MemBuffer,
|
||||
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
|
||||
uint16_t &ELFABIVersion) {
|
||||
Error Err = Error::success(); // Used later as out-parameter
|
||||
|
||||
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
|
||||
if (auto Err = ELFOrError.takeError())
|
||||
return Err;
|
||||
|
||||
const object::ELF64LEFile ELFObj = ELFOrError.get();
|
||||
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
|
||||
if (!Sections)
|
||||
return Sections.takeError();
|
||||
KernelInfoReader Reader(KernelInfoMap);
|
||||
|
||||
// Read the code object version from ELF image header
|
||||
auto Header = ELFObj.getHeader();
|
||||
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
|
||||
for (const auto &S : *Sections) {
|
||||
if (S.sh_type != ELF::SHT_NOTE)
|
||||
continue;
|
||||
|
||||
for (const auto N : ELFObj.notes(S, Err)) {
|
||||
if (Err)
|
||||
return Err;
|
||||
// Fills the KernelInfoTabel entries in the reader
|
||||
if ((Err = Reader.processNote(N, S.sh_addralign)))
|
||||
return Err;
|
||||
}
|
||||
}
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
|
||||
|
||||
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
|
||||
message(STATUS "Building AMDGPU plugin linked against libhsa")
|
||||
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
|
||||
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
|
||||
else()
|
||||
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
|
||||
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
|
||||
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
|
||||
target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
|
||||
endif()
|
||||
|
||||
# Configure testing for the AMDGPU plugin. We will build tests if we could a
|
||||
|
||||
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
|
||||
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
|
||||
|
||||
/// Get additional info for kernel, e.g., register spill counts
|
||||
std::optional<utils::KernelMetaDataTy>
|
||||
std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
|
||||
getKernelInfo(StringRef Identifier) const {
|
||||
auto It = KernelInfoMap.find(Identifier);
|
||||
|
||||
@@ -499,7 +499,7 @@ private:
|
||||
/// The exectuable loaded on the agent.
|
||||
hsa_executable_t Executable;
|
||||
hsa_code_object_t CodeObject;
|
||||
StringMap<utils::KernelMetaDataTy> KernelInfoMap;
|
||||
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
|
||||
uint16_t ELFABIVersion;
|
||||
};
|
||||
|
||||
@@ -600,7 +600,7 @@ private:
|
||||
uint32_t ImplicitArgsSize;
|
||||
|
||||
/// Additional Info for the AMD GPU Kernel
|
||||
std::optional<utils::KernelMetaDataTy> KernelInfo;
|
||||
std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
|
||||
};
|
||||
|
||||
/// Class representing an HSA signal. Signals are used to define dependencies
|
||||
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
|
||||
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
|
||||
if (!TargeTripleAndFeaturesOrError)
|
||||
return TargeTripleAndFeaturesOrError.takeError();
|
||||
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
|
||||
ElfOrErr->getPlatformFlags(),
|
||||
*TargeTripleAndFeaturesOrError);
|
||||
return offloading::amdgpu::isImageCompatibleWithEnv(
|
||||
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
|
||||
*TargeTripleAndFeaturesOrError);
|
||||
}
|
||||
|
||||
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
|
||||
|
||||
@@ -17,24 +17,13 @@
|
||||
|
||||
#include "omptarget.h"
|
||||
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
|
||||
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/BinaryFormat/MsgPackDocument.h"
|
||||
#include "llvm/Support/MemoryBufferRef.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
|
||||
using namespace llvm::ELF;
|
||||
#include "llvm/Frontend/Offloading/Utility.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace omp {
|
||||
namespace target {
|
||||
namespace plugin {
|
||||
namespace utils {
|
||||
|
||||
// The implicit arguments of COV5 AMDGPU kernels.
|
||||
struct AMDGPUImplicitArgsTy {
|
||||
uint32_t BlockCountX;
|
||||
@@ -55,259 +44,26 @@ struct AMDGPUImplicitArgsTyCOV4 {
|
||||
uint8_t Unused[56];
|
||||
};
|
||||
|
||||
/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
|
||||
/// `Version` is the ELF ABI version, e.g. COV5.
|
||||
inline uint32_t getImplicitArgsSize(uint16_t Version) {
|
||||
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
|
||||
? sizeof(AMDGPUImplicitArgsTyCOV4)
|
||||
: sizeof(AMDGPUImplicitArgsTy);
|
||||
}
|
||||
|
||||
/// Check if an image is compatible with current system's environment. The
|
||||
/// system environment is given as a 'target-id' which has the form:
|
||||
///
|
||||
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
|
||||
///
|
||||
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
|
||||
/// and is compatible with either '+' or '-'. The HSA runtime returns this
|
||||
/// information using the target-id, while we use the ELF header to determine
|
||||
/// these features.
|
||||
inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
|
||||
StringRef EnvTargetID) {
|
||||
StringRef EnvArch = EnvTargetID.split(":").first;
|
||||
|
||||
// Trivial check if the base processors match.
|
||||
if (EnvArch != ImageArch)
|
||||
return false;
|
||||
|
||||
// Check if the image is requesting xnack on or off.
|
||||
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
|
||||
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
|
||||
// The image is 'xnack-' so the environment must be 'xnack-'.
|
||||
if (!EnvTargetID.contains("xnack-"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
|
||||
// The image is 'xnack+' so the environment must be 'xnack+'.
|
||||
if (!EnvTargetID.contains("xnack+"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
|
||||
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if the image is requesting sramecc on or off.
|
||||
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
|
||||
// The image is 'sramecc-' so the environment must be 'sramecc-'.
|
||||
if (!EnvTargetID.contains("sramecc-"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
|
||||
// The image is 'sramecc+' so the environment must be 'sramecc+'.
|
||||
if (!EnvTargetID.contains("sramecc+"))
|
||||
return false;
|
||||
break;
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
|
||||
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct KernelMetaDataTy {
|
||||
uint64_t KernelObject;
|
||||
uint32_t GroupSegmentList;
|
||||
uint32_t PrivateSegmentSize;
|
||||
uint32_t SGPRCount;
|
||||
uint32_t VGPRCount;
|
||||
uint32_t SGPRSpillCount;
|
||||
uint32_t VGPRSpillCount;
|
||||
uint32_t KernelSegmentSize;
|
||||
uint32_t ExplicitArgumentCount;
|
||||
uint32_t ImplicitArgumentCount;
|
||||
uint32_t RequestedWorkgroupSize[3];
|
||||
uint32_t WorkgroupSizeHint[3];
|
||||
uint32_t WavefronSize;
|
||||
uint32_t MaxFlatWorkgroupSize;
|
||||
};
|
||||
namespace {
|
||||
|
||||
/// Reads the AMDGPU specific per-kernel-metadata from an image.
|
||||
class KernelInfoReader {
|
||||
public:
|
||||
KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
|
||||
|
||||
/// Process ELF note to read AMDGPU metadata from respective information
|
||||
/// fields.
|
||||
Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
|
||||
if (Note.getName() != "AMDGPU")
|
||||
return Error::success(); // We are not interested in other things
|
||||
|
||||
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
|
||||
"Parse AMDGPU MetaData");
|
||||
auto Desc = Note.getDesc(Align);
|
||||
StringRef MsgPackString =
|
||||
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
|
||||
msgpack::Document MsgPackDoc;
|
||||
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
|
||||
return Error::success();
|
||||
|
||||
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
|
||||
if (!Verifier.verify(MsgPackDoc.getRoot()))
|
||||
return Error::success();
|
||||
|
||||
auto RootMap = MsgPackDoc.getRoot().getMap(true);
|
||||
|
||||
if (auto Err = iterateAMDKernels(RootMap))
|
||||
return Err;
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
private:
|
||||
/// Extracts the relevant information via simple string look-up in the msgpack
|
||||
/// document elements.
|
||||
Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
|
||||
std::string &KernelName,
|
||||
KernelMetaDataTy &KernelData) {
|
||||
if (!V.first.isString())
|
||||
return Error::success();
|
||||
|
||||
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
|
||||
return DK.getString() == SK;
|
||||
};
|
||||
|
||||
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
|
||||
uint32_t *Vals) {
|
||||
assert(DN.isArray() && "MsgPack DocNode is an array node");
|
||||
auto DNA = DN.getArray();
|
||||
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
|
||||
|
||||
int I = 0;
|
||||
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
|
||||
++DNABegin) {
|
||||
Vals[I++] = DNABegin->getUInt();
|
||||
}
|
||||
};
|
||||
|
||||
if (IsKey(V.first, ".name")) {
|
||||
KernelName = V.second.toString();
|
||||
} else if (IsKey(V.first, ".sgpr_count")) {
|
||||
KernelData.SGPRCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".sgpr_spill_count")) {
|
||||
KernelData.SGPRSpillCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".vgpr_count")) {
|
||||
KernelData.VGPRCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".vgpr_spill_count")) {
|
||||
KernelData.VGPRSpillCount = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
|
||||
KernelData.PrivateSegmentSize = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
|
||||
KernelData.GroupSegmentList = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
|
||||
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
|
||||
} else if (IsKey(V.first, ".workgroup_size_hint")) {
|
||||
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
|
||||
} else if (IsKey(V.first, ".wavefront_size")) {
|
||||
KernelData.WavefronSize = V.second.getUInt();
|
||||
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
|
||||
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
|
||||
}
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
/// Get the "amdhsa.kernels" element from the msgpack Document
|
||||
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
|
||||
auto Res = MDN.find("amdhsa.kernels");
|
||||
if (Res == MDN.end())
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Could not find amdhsa.kernels key");
|
||||
|
||||
auto Pair = *Res;
|
||||
assert(Pair.second.isArray() &&
|
||||
"AMDGPU kernel entries are arrays of entries");
|
||||
|
||||
return Pair.second.getArray();
|
||||
}
|
||||
|
||||
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
|
||||
/// MapDocNode that either maps a string to a single value (most of them) or
|
||||
/// to another array of things. Currently, we only handle the case that maps
|
||||
/// to scalar value.
|
||||
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
|
||||
KernelMetaDataTy KernelData;
|
||||
std::string KernelName;
|
||||
auto Entry = (*It).getMap();
|
||||
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
|
||||
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
|
||||
return Err;
|
||||
|
||||
KernelInfoMap.insert({KernelName, KernelData});
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
|
||||
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
|
||||
auto KernelsOrErr = getAMDKernelsArray(MDN);
|
||||
if (auto Err = KernelsOrErr.takeError())
|
||||
return Err;
|
||||
|
||||
auto KernelsArr = *KernelsOrErr;
|
||||
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
|
||||
if (!It->isMap())
|
||||
continue; // we expect <key,value> pairs
|
||||
|
||||
// Obtain the value for the different entries. Each array entry is a
|
||||
// MapDocNode
|
||||
if (auto Err = generateKernelInfo(It))
|
||||
return Err;
|
||||
}
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
// Kernel names are the keys
|
||||
StringMap<KernelMetaDataTy> &KernelInfoMap;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
|
||||
/// KernelInfoMap
|
||||
inline Error
|
||||
readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
|
||||
StringMap<KernelMetaDataTy> &KernelInfoMap,
|
||||
uint16_t &ELFABIVersion) {
|
||||
Error Err = Error::success(); // Used later as out-parameter
|
||||
|
||||
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
|
||||
if (auto Err = ELFOrError.takeError())
|
||||
inline Error readAMDGPUMetaDataFromImage(
|
||||
MemoryBufferRef MemBuffer,
|
||||
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
|
||||
uint16_t &ELFABIVersion) {
|
||||
Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
|
||||
MemBuffer, KernelInfoMap, ELFABIVersion);
|
||||
if (!Err)
|
||||
return Err;
|
||||
|
||||
const object::ELF64LEFile ELFObj = ELFOrError.get();
|
||||
ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
|
||||
KernelInfoReader Reader(KernelInfoMap);
|
||||
|
||||
// Read the code object version from ELF image header
|
||||
auto Header = ELFObj.getHeader();
|
||||
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
|
||||
DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
|
||||
|
||||
for (const auto &S : Sections) {
|
||||
if (S.sh_type != ELF::SHT_NOTE)
|
||||
continue;
|
||||
|
||||
for (const auto N : ELFObj.notes(S, Err)) {
|
||||
if (Err)
|
||||
return Err;
|
||||
// Fills the KernelInfoTabel entries in the reader
|
||||
if ((Err = Reader.processNote(N, S.sh_addralign)))
|
||||
return Err;
|
||||
}
|
||||
}
|
||||
|
||||
return Error::success();
|
||||
return Err;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
|
||||
Reference in New Issue
Block a user