[AMDGPU] Add an asm directive to track code_object_version (#76267)
Named '.amdhsa_code_object_version'. This directive sets the e_ident[ABIVERSION] in the ELF header, and should be used as the assumed COV for the rest of the asm file. This commit also weakens the --amdhsa-code-object-version CL flag. Previously, the CL flag took precedence over the IR flag. Now the IR flag/asm directive take precedence over the CL flag. This is implemented by merging a few COV-checking functions in AMDGPUBaseInfo.h.
This commit is contained in:
@@ -15428,6 +15428,14 @@ command-line options such as ``-triple``, ``-mcpu``, and
|
||||
The target ID syntax used for code object V2 to V3 for this directive differs
|
||||
from that used elsewhere. See :ref:`amdgpu-target-id-v2-v3`.
|
||||
|
||||
.. _amdgpu-assembler-directive-amdhsa-code-object-version:
|
||||
|
||||
.amdhsa_code_object_version <version>
|
||||
+++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Optional directive which declares the code object version to be generated by the
|
||||
assembler. If not present, a default value will be used.
|
||||
|
||||
.amdhsa_kernel <name>
|
||||
+++++++++++++++++++++
|
||||
|
||||
|
||||
@@ -92,6 +92,9 @@ public:
|
||||
/// ELF only. Mark that we have seen GNU ABI usage (e.g. SHF_GNU_RETAIN).
|
||||
virtual void markGnuAbi() {}
|
||||
|
||||
/// ELF only, override the default ABIVersion in the ELF header.
|
||||
virtual void setOverrideABIVersion(uint8_t ABIVersion) {}
|
||||
|
||||
/// Tell the object writer to emit an address-significance table during
|
||||
/// writeObject(). If this function is not called, all symbols are treated as
|
||||
/// address-significant.
|
||||
|
||||
@@ -29,11 +29,6 @@ namespace AMDGPU {
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace HSAMD {
|
||||
|
||||
/// HSA metadata major version for code object V2.
|
||||
constexpr uint32_t VersionMajorV2 = 1;
|
||||
/// HSA metadata minor version for code object V2.
|
||||
constexpr uint32_t VersionMinorV2 = 0;
|
||||
|
||||
/// HSA metadata major version for code object V3.
|
||||
constexpr uint32_t VersionMajorV3 = 1;
|
||||
/// HSA metadata minor version for code object V3.
|
||||
@@ -49,10 +44,9 @@ constexpr uint32_t VersionMajorV5 = 1;
|
||||
/// HSA metadata minor version for code object V5.
|
||||
constexpr uint32_t VersionMinorV5 = 2;
|
||||
|
||||
/// HSA metadata beginning assembler directive.
|
||||
/// Old HSA metadata beginning assembler directive for V2. This is only used for
|
||||
/// diagnostics now.
|
||||
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
|
||||
/// HSA metadata ending assembler directive.
|
||||
constexpr char AssemblerDirectiveEnd[] = ".end_amd_amdgpu_hsa_metadata";
|
||||
|
||||
/// Access qualifiers.
|
||||
enum class AccessQualifier : uint8_t {
|
||||
|
||||
@@ -226,6 +226,8 @@ class ELFObjectWriter : public MCObjectWriter {
|
||||
|
||||
bool SeenGnuAbi = false;
|
||||
|
||||
std::optional<uint8_t> OverrideABIVersion;
|
||||
|
||||
bool hasRelocationAddend() const;
|
||||
|
||||
bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCValue &Val,
|
||||
@@ -238,6 +240,7 @@ public:
|
||||
|
||||
void reset() override {
|
||||
SeenGnuAbi = false;
|
||||
OverrideABIVersion.reset();
|
||||
Relocations.clear();
|
||||
Renames.clear();
|
||||
MCObjectWriter::reset();
|
||||
@@ -264,6 +267,10 @@ public:
|
||||
void markGnuAbi() override { SeenGnuAbi = true; }
|
||||
bool seenGnuAbi() const { return SeenGnuAbi; }
|
||||
|
||||
bool seenOverrideABIVersion() const { return OverrideABIVersion.has_value(); }
|
||||
uint8_t getOverrideABIVersion() const { return OverrideABIVersion.value(); }
|
||||
void setOverrideABIVersion(uint8_t V) override { OverrideABIVersion = V; }
|
||||
|
||||
friend struct ELFWriter;
|
||||
};
|
||||
|
||||
@@ -417,7 +424,9 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
|
||||
? int(ELF::ELFOSABI_GNU)
|
||||
: OSABI);
|
||||
// e_ident[EI_ABIVERSION]
|
||||
W.OS << char(OWriter.TargetObjectWriter->getABIVersion());
|
||||
W.OS << char(OWriter.seenOverrideABIVersion()
|
||||
? OWriter.getOverrideABIVersion()
|
||||
: OWriter.TargetObjectWriter->getABIVersion());
|
||||
|
||||
W.OS.write_zeros(ELF::EI_NIDENT - ELF::EI_PAD);
|
||||
|
||||
|
||||
@@ -123,8 +123,11 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
|
||||
|
||||
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
|
||||
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
|
||||
getTargetStreamer()->EmitDirectiveAMDHSACodeObjectVersion(
|
||||
CodeObjectVersion);
|
||||
HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
|
||||
}
|
||||
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
|
||||
getTargetStreamer()->getPALMetadata()->readFromIR(M);
|
||||
@@ -230,8 +233,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
|
||||
IsaInfo::getNumExtraSGPRs(
|
||||
&STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
|
||||
getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
|
||||
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
|
||||
CodeObjectVersion);
|
||||
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
|
||||
|
||||
Streamer.popSection();
|
||||
}
|
||||
@@ -323,7 +325,7 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::doInitialization(Module &M) {
|
||||
CodeObjectVersion = AMDGPU::getCodeObjectVersion(M);
|
||||
CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M);
|
||||
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
|
||||
switch (CodeObjectVersion) {
|
||||
@@ -631,8 +633,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
|
||||
// In the beginning all features are either 'Any' or 'NotSupported',
|
||||
// depending on global target features. This will cover empty modules.
|
||||
getTargetStreamer()->initializeTargetID(
|
||||
*getGlobalSTI(), getGlobalSTI()->getFeatureString(), CodeObjectVersion);
|
||||
getTargetStreamer()->initializeTargetID(*getGlobalSTI(),
|
||||
getGlobalSTI()->getFeatureString());
|
||||
|
||||
// If module is empty, we are done.
|
||||
if (M.empty())
|
||||
|
||||
@@ -144,7 +144,7 @@ public:
|
||||
BumpPtrAllocator &Allocator,
|
||||
SetVector<Function *> *CGSCC, TargetMachine &TM)
|
||||
: InformationCache(M, AG, Allocator, CGSCC), TM(TM),
|
||||
CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {}
|
||||
CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
|
||||
|
||||
TargetMachine &TM;
|
||||
|
||||
|
||||
@@ -474,7 +474,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
|
||||
|
||||
const Module *M = MF.getFunction().getParent();
|
||||
if (UserSGPRInfo.hasQueuePtr() &&
|
||||
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
|
||||
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
|
||||
Register QueuePtrReg = Info.addQueuePtr(TRI);
|
||||
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(QueuePtrReg);
|
||||
|
||||
@@ -532,7 +532,8 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
|
||||
Func.getCallingConv() != CallingConv::SPIR_KERNEL)
|
||||
return;
|
||||
|
||||
auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent());
|
||||
auto CodeObjectVersion =
|
||||
AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
|
||||
auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
|
||||
|
||||
auto Kernels =
|
||||
|
||||
@@ -2139,7 +2139,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
|
||||
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
||||
// For code object version 5, private_base and shared_base are passed through
|
||||
// implicit kernargs.
|
||||
if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >=
|
||||
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
|
||||
AMDGPU::AMDHSA_COV5) {
|
||||
AMDGPUTargetLowering::ImplicitParameter Param =
|
||||
AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
|
||||
@@ -6582,7 +6582,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
|
||||
|
||||
Register SGPR01(AMDGPU::SGPR0_SGPR1);
|
||||
// For code object version 5, queue_ptr is passed through implicit kernarg.
|
||||
if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >=
|
||||
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
|
||||
AMDGPU::AMDHSA_COV5) {
|
||||
AMDGPUTargetLowering::ImplicitParameter Param =
|
||||
AMDGPUTargetLowering::QUEUE_PTR;
|
||||
|
||||
@@ -323,7 +323,8 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
|
||||
// TargetPassConfig for subtarget.
|
||||
bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
|
||||
bool MadeChange = false;
|
||||
bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5;
|
||||
bool IsV5OrAbove =
|
||||
AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5;
|
||||
Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove);
|
||||
|
||||
if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used.
|
||||
@@ -356,7 +357,7 @@ ModulePass *llvm::createAMDGPULowerKernelAttributesPass() {
|
||||
PreservedAnalyses
|
||||
AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
bool IsV5OrAbove =
|
||||
AMDGPU::getCodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5;
|
||||
AMDGPU::getAMDHSACodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5;
|
||||
Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove);
|
||||
|
||||
if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used.
|
||||
|
||||
@@ -112,7 +112,7 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
|
||||
|
||||
// By default, for code object v5 and later, track only the minimum scratch
|
||||
// size
|
||||
if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
|
||||
if (AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
|
||||
STI.getTargetTriple().getOS() == Triple::AMDPAL) {
|
||||
if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
|
||||
AssumedStackSizeForDynamicSizeObjects = 0;
|
||||
|
||||
@@ -571,7 +571,7 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
|
||||
// Assume all implicit inputs are used by default
|
||||
const Module *M = F.getParent();
|
||||
unsigned NBytes =
|
||||
AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56;
|
||||
AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56;
|
||||
return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
|
||||
NBytes);
|
||||
}
|
||||
|
||||
@@ -1303,10 +1303,8 @@ private:
|
||||
unsigned NextFreeSGPR, SMRange SGPRRange,
|
||||
unsigned &VGPRBlocks, unsigned &SGPRBlocks);
|
||||
bool ParseDirectiveAMDGCNTarget();
|
||||
bool ParseDirectiveAMDHSACodeObjectVersion();
|
||||
bool ParseDirectiveAMDHSAKernel();
|
||||
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
|
||||
bool ParseDirectiveHSACodeObjectVersion();
|
||||
bool ParseDirectiveHSACodeObjectISA();
|
||||
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
|
||||
bool ParseDirectiveAMDKernelCodeT();
|
||||
// TODO: Possibly make subtargetHasRegister const.
|
||||
@@ -5133,20 +5131,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
|
||||
uint32_t &Minor) {
|
||||
if (ParseAsAbsoluteExpression(Major))
|
||||
return TokError("invalid major version");
|
||||
|
||||
if (!trySkipToken(AsmToken::Comma))
|
||||
return TokError("minor version number required, comma expected");
|
||||
|
||||
if (ParseAsAbsoluteExpression(Minor))
|
||||
return TokError("invalid minor version");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
|
||||
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
|
||||
return TokError("directive only supported for amdgcn architecture");
|
||||
@@ -5612,63 +5596,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||
}
|
||||
}
|
||||
|
||||
getTargetStreamer().EmitAmdhsaKernelDescriptor(
|
||||
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
|
||||
ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
|
||||
getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
|
||||
NextFreeVGPR, NextFreeSGPR,
|
||||
ReserveVCC, ReserveFlatScr);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
|
||||
uint32_t Major;
|
||||
uint32_t Minor;
|
||||
|
||||
if (ParseDirectiveMajorMinor(Major, Minor))
|
||||
bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
|
||||
uint32_t Version;
|
||||
if (ParseAsAbsoluteExpression(Version))
|
||||
return true;
|
||||
|
||||
getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
|
||||
uint32_t Major;
|
||||
uint32_t Minor;
|
||||
uint32_t Stepping;
|
||||
StringRef VendorName;
|
||||
StringRef ArchName;
|
||||
|
||||
// If this directive has no arguments, then use the ISA version for the
|
||||
// targeted GPU.
|
||||
if (isToken(AsmToken::EndOfStatement)) {
|
||||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
||||
getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
|
||||
ISA.Stepping,
|
||||
"AMD", "AMDGPU");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ParseDirectiveMajorMinor(Major, Minor))
|
||||
return true;
|
||||
|
||||
if (!trySkipToken(AsmToken::Comma))
|
||||
return TokError("stepping version number required, comma expected");
|
||||
|
||||
if (ParseAsAbsoluteExpression(Stepping))
|
||||
return TokError("invalid stepping version");
|
||||
|
||||
if (!trySkipToken(AsmToken::Comma))
|
||||
return TokError("vendor name required, comma expected");
|
||||
|
||||
if (!parseString(VendorName, "invalid vendor name"))
|
||||
return true;
|
||||
|
||||
if (!trySkipToken(AsmToken::Comma))
|
||||
return TokError("arch name required, comma expected");
|
||||
|
||||
if (!parseString(ArchName, "invalid arch name"))
|
||||
return true;
|
||||
|
||||
getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
|
||||
VendorName, ArchName);
|
||||
getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -5955,16 +5894,13 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
|
||||
if (IDVal == ".amdhsa_kernel")
|
||||
return ParseDirectiveAMDHSAKernel();
|
||||
|
||||
if (IDVal == ".amdhsa_code_object_version")
|
||||
return ParseDirectiveAMDHSACodeObjectVersion();
|
||||
|
||||
// TODO: Restructure/combine with PAL metadata directive.
|
||||
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
|
||||
return ParseDirectiveHSAMetadata();
|
||||
} else {
|
||||
if (IDVal == ".hsa_code_object_version")
|
||||
return ParseDirectiveHSACodeObjectVersion();
|
||||
|
||||
if (IDVal == ".hsa_code_object_isa")
|
||||
return ParseDirectiveHSACodeObjectISA();
|
||||
|
||||
if (IDVal == ".amd_kernel_code_t")
|
||||
return ParseDirectiveAMDKernelCodeT();
|
||||
|
||||
@@ -8137,9 +8073,8 @@ void AMDGPUAsmParser::onBeginOfFile() {
|
||||
return;
|
||||
|
||||
if (!getTargetStreamer().getTargetID())
|
||||
getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
|
||||
// TODO: Should try to check code object version from directive???
|
||||
AMDGPU::getAmdhsaCodeObjectVersion());
|
||||
getTargetStreamer().initializeTargetID(getSTI(),
|
||||
getSTI().getFeatureString());
|
||||
|
||||
if (isHsaAbi(getSTI()))
|
||||
getTargetStreamer().EmitDirectiveAMDGCNTarget();
|
||||
|
||||
@@ -2184,7 +2184,8 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
|
||||
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
|
||||
}
|
||||
|
||||
if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
|
||||
// FIXME: We should be looking at the ELF header ABI version for this.
|
||||
if (AMDGPU::getDefaultAMDHSACodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
|
||||
PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
|
||||
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
|
||||
|
||||
|
||||
@@ -232,13 +232,11 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
|
||||
bool Is64Bit;
|
||||
bool HasRelocationAddend;
|
||||
uint8_t OSABI = ELF::ELFOSABI_NONE;
|
||||
uint8_t ABIVersion = 0;
|
||||
|
||||
public:
|
||||
ELFAMDGPUAsmBackend(const Target &T, const Triple &TT, uint8_t ABIVersion) :
|
||||
AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
|
||||
HasRelocationAddend(TT.getOS() == Triple::AMDHSA),
|
||||
ABIVersion(ABIVersion) {
|
||||
ELFAMDGPUAsmBackend(const Target &T, const Triple &TT)
|
||||
: AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
|
||||
HasRelocationAddend(TT.getOS() == Triple::AMDHSA) {
|
||||
switch (TT.getOS()) {
|
||||
case Triple::AMDHSA:
|
||||
OSABI = ELF::ELFOSABI_AMDGPU_HSA;
|
||||
@@ -256,8 +254,7 @@ public:
|
||||
|
||||
std::unique_ptr<MCObjectTargetWriter>
|
||||
createObjectTargetWriter() const override {
|
||||
return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend,
|
||||
ABIVersion);
|
||||
return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -267,6 +264,5 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options) {
|
||||
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(),
|
||||
getHsaAbiVersion(&STI).value_or(0));
|
||||
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
|
||||
}
|
||||
|
||||
@@ -18,8 +18,7 @@ namespace {
|
||||
|
||||
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
|
||||
public:
|
||||
AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend,
|
||||
uint8_t ABIVersion);
|
||||
AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend);
|
||||
|
||||
protected:
|
||||
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
|
||||
@@ -29,12 +28,10 @@ protected:
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
|
||||
uint8_t OSABI,
|
||||
bool HasRelocationAddend,
|
||||
uint8_t ABIVersion)
|
||||
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU,
|
||||
HasRelocationAddend, ABIVersion) {}
|
||||
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
|
||||
bool HasRelocationAddend)
|
||||
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU,
|
||||
HasRelocationAddend) {}
|
||||
|
||||
unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
|
||||
const MCValue &Target,
|
||||
@@ -100,9 +97,7 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
|
||||
|
||||
std::unique_ptr<MCObjectTargetWriter>
|
||||
llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
|
||||
bool HasRelocationAddend,
|
||||
uint8_t ABIVersion) {
|
||||
bool HasRelocationAddend) {
|
||||
return std::make_unique<AMDGPUELFObjectWriter>(Is64Bit, OSABI,
|
||||
HasRelocationAddend,
|
||||
ABIVersion);
|
||||
HasRelocationAddend);
|
||||
}
|
||||
|
||||
@@ -42,8 +42,8 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
|
||||
|
||||
std::unique_ptr<MCObjectTargetWriter>
|
||||
createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
|
||||
bool HasRelocationAddend, uint8_t ABIVersion);
|
||||
} // End llvm namespace
|
||||
bool HasRelocationAddend);
|
||||
} // namespace llvm
|
||||
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCELFStreamer.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
@@ -35,27 +36,6 @@ using namespace llvm::AMDGPU;
|
||||
// AMDGPUTargetStreamer
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
|
||||
uint32_t &Stepping, bool Sramecc, bool Xnack) {
|
||||
if (Major == 9 && Minor == 0) {
|
||||
switch (Stepping) {
|
||||
case 0:
|
||||
case 2:
|
||||
case 4:
|
||||
case 6:
|
||||
if (Xnack)
|
||||
Stepping++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
|
||||
HSAMD::Metadata HSAMetadata;
|
||||
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
|
||||
return false;
|
||||
return EmitHSAMetadata(HSAMetadata);
|
||||
}
|
||||
|
||||
bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
|
||||
msgpack::Document HSAMetadataDoc;
|
||||
if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
|
||||
@@ -238,21 +218,10 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
|
||||
OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
|
||||
}
|
||||
|
||||
void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
|
||||
uint32_t Major, uint32_t Minor) {
|
||||
OS << "\t.hsa_code_object_version " <<
|
||||
Twine(Major) << "," << Twine(Minor) << '\n';
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
|
||||
uint32_t Minor,
|
||||
uint32_t Stepping,
|
||||
StringRef VendorName,
|
||||
StringRef ArchName) {
|
||||
convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
|
||||
OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << ","
|
||||
<< Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
|
||||
void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
|
||||
unsigned COV) {
|
||||
AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
|
||||
OS << "\t.amdhsa_code_object_version " << COV << '\n';
|
||||
}
|
||||
|
||||
void
|
||||
@@ -283,18 +252,6 @@ bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
|
||||
const AMDGPU::HSAMD::Metadata &HSAMetadata) {
|
||||
std::string HSAMetadataString;
|
||||
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
|
||||
return false;
|
||||
|
||||
OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
|
||||
OS << HSAMetadataString << '\n';
|
||||
OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
|
||||
msgpack::Document &HSAMetadataDoc, bool Strict) {
|
||||
HSAMD::V3::MetadataVerifier Verifier(Strict);
|
||||
@@ -336,7 +293,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
|
||||
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
|
||||
bool ReserveVCC, bool ReserveFlatScr, unsigned CodeObjectVersion) {
|
||||
bool ReserveVCC, bool ReserveFlatScr) {
|
||||
IsaVersion IVersion = getIsaVersion(STI.getCPU());
|
||||
|
||||
OS << "\t.amdhsa_kernel " << KernelName << '\n';
|
||||
@@ -529,6 +486,8 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
|
||||
void AMDGPUTargetELFStreamer::finish() {
|
||||
MCAssembler &MCA = getStreamer().getAssembler();
|
||||
MCA.setELFHeaderEFlags(getEFlags());
|
||||
MCA.getWriter().setOverrideABIVersion(
|
||||
getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion));
|
||||
|
||||
std::string Blob;
|
||||
const char *Vendor = getPALMetadata()->getVendor();
|
||||
@@ -616,17 +575,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
|
||||
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
|
||||
assert(isHsaAbi(STI));
|
||||
|
||||
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
|
||||
switch (*HsaAbiVer) {
|
||||
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
|
||||
return getEFlagsV3();
|
||||
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
|
||||
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
|
||||
return getEFlagsV4();
|
||||
}
|
||||
}
|
||||
|
||||
llvm_unreachable("HSA OS ABI Version identification must be defined");
|
||||
return getEFlagsV4();
|
||||
}
|
||||
|
||||
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
|
||||
@@ -699,44 +648,6 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
|
||||
|
||||
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
|
||||
|
||||
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
|
||||
uint32_t Major, uint32_t Minor) {
|
||||
|
||||
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
|
||||
ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
|
||||
OS.emitInt32(Major);
|
||||
OS.emitInt32(Minor);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
|
||||
uint32_t Minor,
|
||||
uint32_t Stepping,
|
||||
StringRef VendorName,
|
||||
StringRef ArchName) {
|
||||
uint16_t VendorNameSize = VendorName.size() + 1;
|
||||
uint16_t ArchNameSize = ArchName.size() + 1;
|
||||
|
||||
unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
|
||||
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
|
||||
VendorNameSize + ArchNameSize;
|
||||
|
||||
convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
|
||||
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
|
||||
ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) {
|
||||
OS.emitInt16(VendorNameSize);
|
||||
OS.emitInt16(ArchNameSize);
|
||||
OS.emitInt32(Major);
|
||||
OS.emitInt32(Minor);
|
||||
OS.emitInt32(Stepping);
|
||||
OS.emitBytes(VendorName);
|
||||
OS.emitInt8(0); // NULL terminate VendorName
|
||||
OS.emitBytes(ArchName);
|
||||
OS.emitInt8(0); // NULL terminate ArchName
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
|
||||
|
||||
@@ -818,30 +729,6 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
|
||||
const AMDGPU::HSAMD::Metadata &HSAMetadata) {
|
||||
std::string HSAMetadataString;
|
||||
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
|
||||
return false;
|
||||
|
||||
// Create two labels to mark the beginning and end of the desc field
|
||||
// and a MCExpr to calculate the size of the desc field.
|
||||
auto &Context = getContext();
|
||||
auto *DescBegin = Context.createTempSymbol();
|
||||
auto *DescEnd = Context.createTempSymbol();
|
||||
auto *DescSZ = MCBinaryExpr::createSub(
|
||||
MCSymbolRefExpr::create(DescEnd, Context),
|
||||
MCSymbolRefExpr::create(DescBegin, Context), Context);
|
||||
|
||||
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.emitLabel(DescBegin);
|
||||
OS.emitBytes(HSAMetadataString);
|
||||
OS.emitLabel(DescEnd);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
|
||||
const MCSubtargetInfo &STI) {
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
@@ -889,8 +776,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
|
||||
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
|
||||
unsigned CodeObjectVersion) {
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
|
||||
auto &Streamer = getStreamer();
|
||||
auto &Context = Streamer.getContext();
|
||||
|
||||
|
||||
@@ -37,23 +37,24 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
|
||||
protected:
|
||||
// TODO: Move HSAMetadataStream to AMDGPUTargetStreamer.
|
||||
std::optional<AMDGPU::IsaInfo::AMDGPUTargetID> TargetID;
|
||||
unsigned CodeObjectVersion;
|
||||
|
||||
MCContext &getContext() const { return Streamer.getContext(); }
|
||||
|
||||
public:
|
||||
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
|
||||
AMDGPUTargetStreamer(MCStreamer &S)
|
||||
: MCTargetStreamer(S),
|
||||
// Assume the default COV for now, EmitDirectiveAMDHSACodeObjectVersion
|
||||
// will update this if it is encountered.
|
||||
CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {}
|
||||
|
||||
AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; }
|
||||
|
||||
virtual void EmitDirectiveAMDGCNTarget(){};
|
||||
|
||||
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
|
||||
uint32_t Minor){};
|
||||
|
||||
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
|
||||
uint32_t Stepping,
|
||||
StringRef VendorName,
|
||||
StringRef ArchName){};
|
||||
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) {
|
||||
CodeObjectVersion = COV;
|
||||
}
|
||||
|
||||
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
|
||||
|
||||
@@ -65,9 +66,6 @@ public:
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitISAVersion() { return true; }
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
|
||||
|
||||
@@ -98,8 +96,7 @@ public:
|
||||
virtual void EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
|
||||
unsigned CodeObjectVersion){};
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {}
|
||||
|
||||
static StringRef getArchNameFromElfMach(unsigned ElfMach);
|
||||
static unsigned getElfMach(StringRef GPU);
|
||||
@@ -110,15 +107,12 @@ public:
|
||||
std::optional<AMDGPU::IsaInfo::AMDGPUTargetID> &getTargetID() {
|
||||
return TargetID;
|
||||
}
|
||||
void initializeTargetID(const MCSubtargetInfo &STI,
|
||||
unsigned CodeObjectVersion) {
|
||||
void initializeTargetID(const MCSubtargetInfo &STI) {
|
||||
assert(TargetID == std::nullopt && "TargetID can only be initialized once");
|
||||
TargetID.emplace(STI);
|
||||
getTargetID()->setCodeObjectVersion(CodeObjectVersion);
|
||||
}
|
||||
void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString,
|
||||
unsigned CodeObjectVersion) {
|
||||
initializeTargetID(STI, CodeObjectVersion);
|
||||
void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) {
|
||||
initializeTargetID(STI);
|
||||
|
||||
assert(getTargetID() != std::nullopt && "TargetID is None");
|
||||
getTargetID()->setTargetIDFromFeaturesString(FeatureString);
|
||||
@@ -134,12 +128,7 @@ public:
|
||||
|
||||
void EmitDirectiveAMDGCNTarget() override;
|
||||
|
||||
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
|
||||
uint32_t Minor) override;
|
||||
|
||||
void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
|
||||
uint32_t Stepping, StringRef VendorName,
|
||||
StringRef ArchName) override;
|
||||
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
|
||||
|
||||
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
|
||||
|
||||
@@ -153,9 +142,6 @@ public:
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
|
||||
|
||||
@@ -165,8 +151,7 @@ public:
|
||||
void EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
|
||||
unsigned CodeObjectVersion) override;
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
|
||||
};
|
||||
|
||||
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
|
||||
@@ -198,13 +183,6 @@ public:
|
||||
|
||||
void EmitDirectiveAMDGCNTarget() override;
|
||||
|
||||
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
|
||||
uint32_t Minor) override;
|
||||
|
||||
void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
|
||||
uint32_t Stepping, StringRef VendorName,
|
||||
StringRef ArchName) override;
|
||||
|
||||
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
|
||||
|
||||
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
|
||||
@@ -217,9 +195,6 @@ public:
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
|
||||
|
||||
@@ -229,9 +204,7 @@ public:
|
||||
void EmitAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo &STI, StringRef KernelName,
|
||||
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
|
||||
unsigned CodeObjectVersion) override;
|
||||
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2302,7 +2302,7 @@ void SITargetLowering::allocateSpecialInputSGPRs(
|
||||
|
||||
const Module *M = MF.getFunction().getParent();
|
||||
if (UserSGPRInfo.hasQueuePtr() &&
|
||||
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
|
||||
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
|
||||
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
|
||||
|
||||
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
|
||||
@@ -2355,7 +2355,7 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
|
||||
|
||||
const Module *M = MF.getFunction().getParent();
|
||||
if (UserSGPRInfo.hasQueuePtr() &&
|
||||
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
|
||||
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
|
||||
Register QueuePtrReg = Info.addQueuePtr(TRI);
|
||||
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(QueuePtrReg);
|
||||
@@ -6438,7 +6438,7 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr(
|
||||
SDValue QueuePtr;
|
||||
// For code object version 5, QueuePtr is passed through implicit kernarg.
|
||||
const Module *M = DAG.getMachineFunction().getFunction().getParent();
|
||||
if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) {
|
||||
if (AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) {
|
||||
QueuePtr =
|
||||
loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR);
|
||||
} else {
|
||||
@@ -6542,7 +6542,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
|
||||
// For code object version 5, private_base and shared_base are passed through
|
||||
// implicit kernargs.
|
||||
const Module *M = DAG.getMachineFunction().getFunction().getParent();
|
||||
if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) {
|
||||
if (AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) {
|
||||
ImplicitParameter Param =
|
||||
(AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE;
|
||||
return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param);
|
||||
|
||||
@@ -31,10 +31,11 @@
|
||||
#define GET_INSTRMAP_INFO
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
static llvm::cl::opt<unsigned>
|
||||
AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
|
||||
llvm::cl::desc("AMDHSA Code Object Version"),
|
||||
llvm::cl::init(4));
|
||||
static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
|
||||
"amdhsa-code-object-version", llvm::cl::Hidden,
|
||||
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV4),
|
||||
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
|
||||
"or asm directive still take priority if present)"));
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -161,47 +162,34 @@ bool isHsaAbi(const MCSubtargetInfo &STI) {
|
||||
return STI.getTargetTriple().getOS() == Triple::AMDHSA;
|
||||
}
|
||||
|
||||
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
|
||||
if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return std::nullopt;
|
||||
unsigned getAMDHSACodeObjectVersion(const Module &M) {
|
||||
if (auto Ver = mdconst::extract_or_null<ConstantInt>(
|
||||
M.getModuleFlag("amdgpu_code_object_version"))) {
|
||||
return (unsigned)Ver->getZExtValue() / 100;
|
||||
}
|
||||
|
||||
switch (AmdhsaCodeObjectVersion) {
|
||||
return getDefaultAMDHSACodeObjectVersion();
|
||||
}
|
||||
|
||||
unsigned getDefaultAMDHSACodeObjectVersion() {
|
||||
return DefaultAMDHSACodeObjectVersion;
|
||||
}
|
||||
|
||||
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
|
||||
if (T.getOS() != Triple::AMDHSA)
|
||||
return 0;
|
||||
|
||||
switch (CodeObjectVersion) {
|
||||
case 4:
|
||||
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
|
||||
case 5:
|
||||
return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
|
||||
default:
|
||||
report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
|
||||
Twine(AmdhsaCodeObjectVersion));
|
||||
report_fatal_error("Unsupported AMDHSA Code Object Version " +
|
||||
Twine(CodeObjectVersion));
|
||||
}
|
||||
}
|
||||
|
||||
bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
|
||||
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
|
||||
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
|
||||
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
|
||||
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned getAmdhsaCodeObjectVersion() {
|
||||
return AmdhsaCodeObjectVersion;
|
||||
}
|
||||
|
||||
unsigned getCodeObjectVersion(const Module &M) {
|
||||
if (auto Ver = mdconst::extract_or_null<ConstantInt>(
|
||||
M.getModuleFlag("amdgpu_code_object_version"))) {
|
||||
return (unsigned)Ver->getZExtValue() / 100;
|
||||
}
|
||||
|
||||
// Default code object version.
|
||||
return AMDHSA_COV4;
|
||||
}
|
||||
|
||||
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
|
||||
switch (CodeObjectVersion) {
|
||||
case AMDHSA_COV4:
|
||||
@@ -705,7 +693,7 @@ namespace IsaInfo {
|
||||
|
||||
AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
|
||||
: STI(STI), XnackSetting(TargetIDSetting::Any),
|
||||
SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) {
|
||||
SramEccSetting(TargetIDSetting::Any) {
|
||||
if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
|
||||
XnackSetting = TargetIDSetting::Unsupported;
|
||||
if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
|
||||
@@ -817,23 +805,16 @@ std::string AMDGPUTargetID::toString() const {
|
||||
|
||||
std::string Features;
|
||||
if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
|
||||
switch (CodeObjectVersion) {
|
||||
case AMDGPU::AMDHSA_COV4:
|
||||
case AMDGPU::AMDHSA_COV5:
|
||||
// sramecc.
|
||||
if (getSramEccSetting() == TargetIDSetting::Off)
|
||||
Features += ":sramecc-";
|
||||
else if (getSramEccSetting() == TargetIDSetting::On)
|
||||
Features += ":sramecc+";
|
||||
// xnack.
|
||||
if (getXnackSetting() == TargetIDSetting::Off)
|
||||
Features += ":xnack-";
|
||||
else if (getXnackSetting() == TargetIDSetting::On)
|
||||
Features += ":xnack+";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
// sramecc.
|
||||
if (getSramEccSetting() == TargetIDSetting::Off)
|
||||
Features += ":sramecc-";
|
||||
else if (getSramEccSetting() == TargetIDSetting::On)
|
||||
Features += ":sramecc+";
|
||||
// xnack.
|
||||
if (getXnackSetting() == TargetIDSetting::Off)
|
||||
Features += ":xnack-";
|
||||
else if (getXnackSetting() == TargetIDSetting::On)
|
||||
Features += ":xnack+";
|
||||
}
|
||||
|
||||
StreamRep << Processor << Features;
|
||||
|
||||
@@ -46,14 +46,18 @@ enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 };
|
||||
|
||||
/// \returns True if \p STI is AMDHSA.
|
||||
bool isHsaAbi(const MCSubtargetInfo &STI);
|
||||
/// \returns HSA OS ABI Version identification.
|
||||
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
|
||||
/// \returns True if HSA OS ABI Version identification is 4,
|
||||
/// false otherwise.
|
||||
bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
|
||||
/// \returns True if HSA OS ABI Version identification is 5,
|
||||
/// false otherwise.
|
||||
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
|
||||
|
||||
/// \returns Code object version from the IR module flag.
|
||||
unsigned getAMDHSACodeObjectVersion(const Module &M);
|
||||
|
||||
/// \returns The default HSA code object version. This should only be used when
|
||||
/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
|
||||
/// flag or a .amdhsa_code_object_version directive)
|
||||
unsigned getDefaultAMDHSACodeObjectVersion();
|
||||
|
||||
/// \returns ABIVersion suitable for use in ELF's e_ident[ABIVERSION]. \param
|
||||
/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
|
||||
uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
|
||||
|
||||
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
|
||||
unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
|
||||
@@ -64,12 +68,6 @@ unsigned getHostcallImplicitArgPosition(unsigned COV);
|
||||
unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
|
||||
unsigned getCompletionActionImplicitArgPosition(unsigned COV);
|
||||
|
||||
/// \returns Code object version.
|
||||
unsigned getAmdhsaCodeObjectVersion();
|
||||
|
||||
/// \returns Code object version.
|
||||
unsigned getCodeObjectVersion(const Module &M);
|
||||
|
||||
struct GcnBufferFormatInfo {
|
||||
unsigned Format;
|
||||
unsigned BitsPerComp;
|
||||
@@ -114,7 +112,6 @@ private:
|
||||
const MCSubtargetInfo &STI;
|
||||
TargetIDSetting XnackSetting;
|
||||
TargetIDSetting SramEccSetting;
|
||||
unsigned CodeObjectVersion;
|
||||
|
||||
public:
|
||||
explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
|
||||
@@ -144,10 +141,6 @@ public:
|
||||
return XnackSetting;
|
||||
}
|
||||
|
||||
void setCodeObjectVersion(unsigned COV) {
|
||||
CodeObjectVersion = COV;
|
||||
}
|
||||
|
||||
/// Sets xnack setting to \p NewXnackSetting.
|
||||
void setXnackSetting(TargetIDSetting NewXnackSetting) {
|
||||
XnackSetting = NewXnackSetting;
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
; OPT: .text
|
||||
; OPT-NEXT: .section ".note.GNU-stack"
|
||||
; OPT-NEXT: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
|
||||
; COV4-NEXT: .amdhsa_code_object_version 4
|
||||
; COV5-NEXT: .amdhsa_code_object_version 5
|
||||
; OPT-NEXT: .amdgpu_metadata
|
||||
; OPT-NEXT: ---
|
||||
; OPT-NEXT: amdhsa.kernels: []
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
|
||||
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
|
||||
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
|
||||
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
|
||||
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
|
||||
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
|
||||
|
||||
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
|
||||
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
|
||||
|
||||
24
llvm/test/MC/AMDGPU/elf-header-cov.s
Normal file
24
llvm/test/MC/AMDGPU/elf-header-cov.s
Normal file
@@ -0,0 +1,24 @@
|
||||
// RUN: sed 's/COV/4/g' %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj | \
|
||||
// RUN: llvm-readobj --file-headers - | FileCheck %s --check-prefixes=HS4
|
||||
|
||||
// RUN: sed 's/COV/5/g' %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj | \
|
||||
// RUN: llvm-readobj --file-headers - | FileCheck %s --check-prefixes=HS5
|
||||
|
||||
// RUN: sed 's/COV/4/g' %s | not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 2>&1 | \
|
||||
// RUN: FileCheck %s --check-prefix=ERR
|
||||
|
||||
// RUN: sed 's/COV/4/g' %s | not llvm-mc -triple amdgcn-amd-mesa3d -mcpu=gfx802 2>&1 | \
|
||||
// RUN: FileCheck %s --check-prefix=ERR
|
||||
|
||||
// RUN: sed 's/COV/4/g' %s | not llvm-mc -triple amdgcn-amd- -mcpu=gfx802 2>&1 | \
|
||||
// RUN: FileCheck %s --check-prefix=ERR
|
||||
|
||||
.amdhsa_code_object_version COV
|
||||
|
||||
// ERR: error: unknown directive
|
||||
|
||||
// HS4: OS/ABI: AMDGPU_HSA (0x40)
|
||||
// HS4-NEXT: ABIVersion: 2
|
||||
|
||||
// HS5: OS/ABI: AMDGPU_HSA (0x40)
|
||||
// HS5-NEXT: ABIVersion: 3
|
||||
@@ -1,5 +1,5 @@
|
||||
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | FileCheck %s --check-prefix=ASM
|
||||
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
|
||||
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
|
||||
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
|
||||
|
||||
// ELF: Section {
|
||||
// ELF: Name: .text
|
||||
@@ -19,6 +19,9 @@
|
||||
.amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
|
||||
// ASM: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
|
||||
|
||||
.amdhsa_code_object_version 4
|
||||
// ASM: .amdhsa_code_object_version 4
|
||||
|
||||
.set my_is_ptr64, 1
|
||||
|
||||
.if my_is_ptr64 == 0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=ASM %s
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 --amdhsa-code-object-version=4 -filetype=obj < %s > %t
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t
|
||||
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
|
||||
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
|
||||
|
||||
@@ -52,6 +52,9 @@
|
||||
.amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
|
||||
// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
|
||||
|
||||
.amdhsa_code_object_version 4
|
||||
// ASM: .amdhsa_code_object_version 4
|
||||
|
||||
.p2align 8
|
||||
.type minimal,@function
|
||||
minimal:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
|
||||
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
|
||||
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
|
||||
|
||||
@@ -52,6 +52,9 @@
|
||||
.amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
|
||||
// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
|
||||
|
||||
.amdhsa_code_object_version 4
|
||||
// ASM: .amdhsa_code_object_version 4
|
||||
|
||||
.p2align 8
|
||||
.type minimal,@function
|
||||
minimal:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=5 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=5 -mattr=+xnack -filetype=obj < %s > %t
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
|
||||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
|
||||
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
|
||||
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
|
||||
|
||||
@@ -52,6 +52,9 @@
|
||||
.amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
|
||||
// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
|
||||
|
||||
.amdhsa_code_object_version 5
|
||||
// ASM: .amdhsa_code_object_version 5
|
||||
|
||||
.p2align 8
|
||||
.type minimal,@function
|
||||
minimal:
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx801 -mattr=-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts,-xnack -show-encoding %s | FileCheck --check-prefix=GFX9 %s
|
||||
|
||||
.hsa_code_object_isa
|
||||
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
|
||||
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
||||
Reference in New Issue
Block a user