diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 768358c345f0..d2d622ee5201 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -15428,6 +15428,14 @@ command-line options such as ``-triple``, ``-mcpu``, and The target ID syntax used for code object V2 to V3 for this directive differs from that used elsewhere. See :ref:`amdgpu-target-id-v2-v3`. +.. _amdgpu-assembler-directive-amdhsa-code-object-version: + +.amdhsa_code_object_version ++++++++++++++++++++++++++++++++++++++ + +Optional directive which declares the code object version to be generated by the +assembler. If not present, a default value will be used. + .amdhsa_kernel +++++++++++++++++++++ diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h index 8c1045237393..e66bb2c932dd 100644 --- a/llvm/include/llvm/MC/MCObjectWriter.h +++ b/llvm/include/llvm/MC/MCObjectWriter.h @@ -92,6 +92,9 @@ public: /// ELF only. Mark that we have seen GNU ABI usage (e.g. SHF_GNU_RETAIN). virtual void markGnuAbi() {} + /// ELF only, override the default ABIVersion in the ELF header. + virtual void setOverrideABIVersion(uint8_t ABIVersion) {} + /// Tell the object writer to emit an address-significance table during /// writeObject(). If this function is not called, all symbols are treated as /// address-significant. diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h index e0838a1f425e..2dae6feac088 100644 --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -29,11 +29,6 @@ namespace AMDGPU { //===----------------------------------------------------------------------===// namespace HSAMD { -/// HSA metadata major version for code object V2. -constexpr uint32_t VersionMajorV2 = 1; -/// HSA metadata minor version for code object V2. -constexpr uint32_t VersionMinorV2 = 0; - /// HSA metadata major version for code object V3. constexpr uint32_t VersionMajorV3 = 1; /// HSA metadata minor version for code object V3. @@ -49,10 +44,9 @@ constexpr uint32_t VersionMajorV5 = 1; /// HSA metadata minor version for code object V5. constexpr uint32_t VersionMinorV5 = 2; -/// HSA metadata beginning assembler directive. +/// Old HSA metadata beginning assembler directive for V2. This is only used for +/// diagnostics now. constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata"; -/// HSA metadata ending assembler directive. -constexpr char AssemblerDirectiveEnd[] = ".end_amd_amdgpu_hsa_metadata"; /// Access qualifiers. enum class AccessQualifier : uint8_t { diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index cb8af1aa9955..531d29954c38 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -226,6 +226,8 @@ class ELFObjectWriter : public MCObjectWriter { bool SeenGnuAbi = false; + std::optional OverrideABIVersion; + bool hasRelocationAddend() const; bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCValue &Val, @@ -238,6 +240,7 @@ public: void reset() override { SeenGnuAbi = false; + OverrideABIVersion.reset(); Relocations.clear(); Renames.clear(); MCObjectWriter::reset(); @@ -264,6 +267,10 @@ public: void markGnuAbi() override { SeenGnuAbi = true; } bool seenGnuAbi() const { return SeenGnuAbi; } + bool seenOverrideABIVersion() const { return OverrideABIVersion.has_value(); } + uint8_t getOverrideABIVersion() const { return OverrideABIVersion.value(); } + void setOverrideABIVersion(uint8_t V) override { OverrideABIVersion = V; } + friend struct ELFWriter; }; @@ -417,7 +424,9 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) { ? int(ELF::ELFOSABI_GNU) : OSABI); // e_ident[EI_ABIVERSION] - W.OS << char(OWriter.TargetObjectWriter->getABIVersion()); + W.OS << char(OWriter.seenOverrideABIVersion() + ? OWriter.getOverrideABIVersion() + : OWriter.TargetObjectWriter->getABIVersion()); W.OS.write_zeros(ELF::EI_NIDENT - ELF::EI_PAD); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 10f7e7a26edb..279ef8ca2751 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -123,8 +123,11 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) { getTargetStreamer()->EmitDirectiveAMDGCNTarget(); - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { + getTargetStreamer()->EmitDirectiveAMDHSACodeObjectVersion( + CodeObjectVersion); HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID()); + } if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); @@ -230,8 +233,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { IsaInfo::getNumExtraSGPRs( &STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, getTargetStreamer()->getTargetID()->isXnackOnOrAny()), - CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, - CodeObjectVersion); + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed); Streamer.popSection(); } @@ -323,7 +325,7 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { } bool AMDGPUAsmPrinter::doInitialization(Module &M) { - CodeObjectVersion = AMDGPU::getCodeObjectVersion(M); + CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M); if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { @@ -631,8 +633,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { void AMDGPUAsmPrinter::initializeTargetID(const Module &M) { // In the beginning all features are either 'Any' or 'NotSupported', // depending on global target features. This will cover empty modules. - getTargetStreamer()->initializeTargetID( - *getGlobalSTI(), getGlobalSTI()->getFeatureString(), CodeObjectVersion); + getTargetStreamer()->initializeTargetID(*getGlobalSTI(), + getGlobalSTI()->getFeatureString()); // If module is empty, we are done. if (M.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 5fd9e571282d..d7f5110427ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -144,7 +144,7 @@ public: BumpPtrAllocator &Allocator, SetVector *CGSCC, TargetMachine &TM) : InformationCache(M, AG, Allocator, CGSCC), TM(TM), - CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {} + CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {} TargetMachine &TM; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index d3b2cb1936b5..6d05c3678bf0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -474,7 +474,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, const Module *M = MF.getFunction().getParent(); if (UserSGPRInfo.hasQueuePtr() && - AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { + AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 74e9cd7d0965..186fa58524b9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -532,7 +532,8 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, Func.getCallingConv() != CallingConv::SPIR_KERNEL) return; - auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent()); + auto CodeObjectVersion = + AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent()); auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); auto Kernels = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0040cd7c3f1a..8e74d4c0e945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2139,7 +2139,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); // For code object version 5, private_base and shared_base are passed through // implicit kernargs. - if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >= + if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >= AMDGPU::AMDHSA_COV5) { AMDGPUTargetLowering::ImplicitParameter Param = AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE @@ -6582,7 +6582,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( Register SGPR01(AMDGPU::SGPR0_SGPR1); // For code object version 5, queue_ptr is passed through implicit kernarg. - if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >= + if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >= AMDGPU::AMDHSA_COV5) { AMDGPUTargetLowering::ImplicitParameter Param = AMDGPUTargetLowering::QUEUE_PTR; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 097722157d41..bf7f67c086f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -323,7 +323,8 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { // TargetPassConfig for subtarget. bool AMDGPULowerKernelAttributes::runOnModule(Module &M) { bool MadeChange = false; - bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5; + bool IsV5OrAbove = + AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5; Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. @@ -356,7 +357,7 @@ ModulePass *llvm::createAMDGPULowerKernelAttributesPass() { PreservedAnalyses AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) { bool IsV5OrAbove = - AMDGPU::getCodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5; + AMDGPU::getAMDHSACodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5; Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index fc47b02c98e0..0c759e7f3b09 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -112,7 +112,7 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) { // By default, for code object v5 and later, track only the minimum scratch // size - if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 || + if (AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 || STI.getTargetTriple().getOS() == Triple::AMDPAL) { if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences()) AssumedStackSizeForDynamicSizeObjects = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index f19c57668564..bcc7dedf3229 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -571,7 +571,7 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const { // Assume all implicit inputs are used by default const Module *M = F.getParent(); unsigned NBytes = - AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56; + AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56; return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes", NBytes); } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index bd68054589b1..66267c9255f4 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1303,10 +1303,8 @@ private: unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks); bool ParseDirectiveAMDGCNTarget(); + bool ParseDirectiveAMDHSACodeObjectVersion(); bool ParseDirectiveAMDHSAKernel(); - bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); - bool ParseDirectiveHSACodeObjectVersion(); - bool ParseDirectiveHSACodeObjectISA(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); // TODO: Possibly make subtargetHasRegister const. @@ -5133,20 +5131,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { return false; } -bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, - uint32_t &Minor) { - if (ParseAsAbsoluteExpression(Major)) - return TokError("invalid major version"); - - if (!trySkipToken(AsmToken::Comma)) - return TokError("minor version number required, comma expected"); - - if (ParseAsAbsoluteExpression(Minor)) - return TokError("invalid minor version"); - - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) return TokError("directive only supported for amdgcn architecture"); @@ -5612,63 +5596,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { } } - getTargetStreamer().EmitAmdhsaKernelDescriptor( - getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, - ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); + getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD, + NextFreeVGPR, NextFreeSGPR, + ReserveVCC, ReserveFlatScr); return false; } -bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { - uint32_t Major; - uint32_t Minor; - - if (ParseDirectiveMajorMinor(Major, Minor)) +bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { + uint32_t Version; + if (ParseAsAbsoluteExpression(Version)) return true; - getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { - uint32_t Major; - uint32_t Minor; - uint32_t Stepping; - StringRef VendorName; - StringRef ArchName; - - // If this directive has no arguments, then use the ISA version for the - // targeted GPU. - if (isToken(AsmToken::EndOfStatement)) { - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); - getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, - ISA.Stepping, - "AMD", "AMDGPU"); - return false; - } - - if (ParseDirectiveMajorMinor(Major, Minor)) - return true; - - if (!trySkipToken(AsmToken::Comma)) - return TokError("stepping version number required, comma expected"); - - if (ParseAsAbsoluteExpression(Stepping)) - return TokError("invalid stepping version"); - - if (!trySkipToken(AsmToken::Comma)) - return TokError("vendor name required, comma expected"); - - if (!parseString(VendorName, "invalid vendor name")) - return true; - - if (!trySkipToken(AsmToken::Comma)) - return TokError("arch name required, comma expected"); - - if (!parseString(ArchName, "invalid arch name")) - return true; - - getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, - VendorName, ArchName); + getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version); return false; } @@ -5955,16 +5894,13 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amdhsa_kernel") return ParseDirectiveAMDHSAKernel(); + if (IDVal == ".amdhsa_code_object_version") + return ParseDirectiveAMDHSACodeObjectVersion(); + // TODO: Restructure/combine with PAL metadata directive. if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) return ParseDirectiveHSAMetadata(); } else { - if (IDVal == ".hsa_code_object_version") - return ParseDirectiveHSACodeObjectVersion(); - - if (IDVal == ".hsa_code_object_isa") - return ParseDirectiveHSACodeObjectISA(); - if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); @@ -8137,9 +8073,8 @@ void AMDGPUAsmParser::onBeginOfFile() { return; if (!getTargetStreamer().getTargetID()) - getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(), - // TODO: Should try to check code object version from directive??? - AMDGPU::getAmdhsaCodeObjectVersion()); + getTargetStreamer().initializeTargetID(getSTI(), + getSTI().getFeatureString()); if (isHsaAbi(getSTI())) getTargetStreamer().EmitDirectiveAMDGCNTarget(); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 441032a37dfd..86096b0d80b4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2184,7 +2184,8 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } - if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5) + // FIXME: We should be looking at the ELF header ABI version for this. + if (AMDGPU::getDefaultAMDHSACodeObjectVersion() >= AMDGPU::AMDHSA_COV5) PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index f91f36ed851b..8eb246ef57c9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -232,13 +232,11 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { bool Is64Bit; bool HasRelocationAddend; uint8_t OSABI = ELF::ELFOSABI_NONE; - uint8_t ABIVersion = 0; public: - ELFAMDGPUAsmBackend(const Target &T, const Triple &TT, uint8_t ABIVersion) : - AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn), - HasRelocationAddend(TT.getOS() == Triple::AMDHSA), - ABIVersion(ABIVersion) { + ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) + : AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn), + HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { switch (TT.getOS()) { case Triple::AMDHSA: OSABI = ELF::ELFOSABI_AMDGPU_HSA; @@ -256,8 +254,7 @@ public: std::unique_ptr createObjectTargetWriter() const override { - return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend, - ABIVersion); + return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend); } }; @@ -267,6 +264,5 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { - return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(), - getHsaAbiVersion(&STI).value_or(0)); + return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple()); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 58eed81e0755..2d960a32339f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -18,8 +18,7 @@ namespace { class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: - AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend, - uint8_t ABIVersion); + AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend); protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, @@ -29,12 +28,10 @@ protected: } // end anonymous namespace -AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit, - uint8_t OSABI, - bool HasRelocationAddend, - uint8_t ABIVersion) - : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU, - HasRelocationAddend, ABIVersion) {} +AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU, + HasRelocationAddend) {} unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, @@ -100,9 +97,7 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, - bool HasRelocationAddend, - uint8_t ABIVersion) { + bool HasRelocationAddend) { return std::make_unique(Is64Bit, OSABI, - HasRelocationAddend, - ABIVersion); + HasRelocationAddend); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 006115ba14fc..3ef00f75735b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -42,8 +42,8 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T, std::unique_ptr createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, - bool HasRelocationAddend, uint8_t ABIVersion); -} // End llvm namespace + bool HasRelocationAddend); +} // namespace llvm #define GET_REGINFO_ENUM #include "AMDGPUGenRegisterInfo.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index e135a4e25dd1..d7e8ab76d5ff 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -20,6 +20,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/AMDGPUMetadata.h" @@ -35,27 +36,6 @@ using namespace llvm::AMDGPU; // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// -static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor, - uint32_t &Stepping, bool Sramecc, bool Xnack) { - if (Major == 9 && Minor == 0) { - switch (Stepping) { - case 0: - case 2: - case 4: - case 6: - if (Xnack) - Stepping++; - } - } -} - -bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { - HSAMD::Metadata HSAMetadata; - if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) - return false; - return EmitHSAMetadata(HSAMetadata); -} - bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { msgpack::Document HSAMetadataDoc; if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) @@ -238,21 +218,10 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() { OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n"; } -void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( - uint32_t Major, uint32_t Minor) { - OS << "\t.hsa_code_object_version " << - Twine(Major) << "," << Twine(Minor) << '\n'; -} - -void -AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major, - uint32_t Minor, - uint32_t Stepping, - StringRef VendorName, - StringRef ArchName) { - convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny()); - OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << "," - << Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; +void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion( + unsigned COV) { + AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV); + OS << "\t.amdhsa_code_object_version " << COV << '\n'; } void @@ -283,18 +252,6 @@ bool AMDGPUTargetAsmStreamer::EmitISAVersion() { return true; } -bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( - const AMDGPU::HSAMD::Metadata &HSAMetadata) { - std::string HSAMetadataString; - if (HSAMD::toString(HSAMetadata, HSAMetadataString)) - return false; - - OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n'; - OS << HSAMetadataString << '\n'; - OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n'; - return true; -} - bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( msgpack::Document &HSAMetadataDoc, bool Strict) { HSAMD::V3::MetadataVerifier Verifier(Strict); @@ -336,7 +293,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, - bool ReserveVCC, bool ReserveFlatScr, unsigned CodeObjectVersion) { + bool ReserveVCC, bool ReserveFlatScr) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -529,6 +486,8 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { void AMDGPUTargetELFStreamer::finish() { MCAssembler &MCA = getStreamer().getAssembler(); MCA.setELFHeaderEFlags(getEFlags()); + MCA.getWriter().setOverrideABIVersion( + getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion)); std::string Blob; const char *Vendor = getPALMetadata()->getVendor(); @@ -616,17 +575,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { assert(isHsaAbi(STI)); - if (std::optional HsaAbiVer = getHsaAbiVersion(&STI)) { - switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - return getEFlagsV3(); - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - return getEFlagsV4(); - } - } - - llvm_unreachable("HSA OS ABI Version identification must be defined"); + return getEFlagsV4(); } unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() { @@ -699,44 +648,6 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} -void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( - uint32_t Major, uint32_t Minor) { - - EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), - ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { - OS.emitInt32(Major); - OS.emitInt32(Minor); - }); -} - -void -AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major, - uint32_t Minor, - uint32_t Stepping, - StringRef VendorName, - StringRef ArchName) { - uint16_t VendorNameSize = VendorName.size() + 1; - uint16_t ArchNameSize = ArchName.size() + 1; - - unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + - sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + - VendorNameSize + ArchNameSize; - - convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny()); - EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), - ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) { - OS.emitInt16(VendorNameSize); - OS.emitInt16(ArchNameSize); - OS.emitInt32(Major); - OS.emitInt32(Minor); - OS.emitInt32(Stepping); - OS.emitBytes(VendorName); - OS.emitInt8(0); // NULL terminate VendorName - OS.emitBytes(ArchName); - OS.emitInt8(0); // NULL terminate ArchName - }); -} - void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { @@ -818,30 +729,6 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, return true; } -bool AMDGPUTargetELFStreamer::EmitHSAMetadata( - const AMDGPU::HSAMD::Metadata &HSAMetadata) { - std::string HSAMetadataString; - if (HSAMD::toString(HSAMetadata, HSAMetadataString)) - return false; - - // Create two labels to mark the beginning and end of the desc field - // and a MCExpr to calculate the size of the desc field. - auto &Context = getContext(); - auto *DescBegin = Context.createTempSymbol(); - auto *DescEnd = Context.createTempSymbol(); - auto *DescSZ = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(DescEnd, Context), - MCSymbolRefExpr::create(DescBegin, Context), Context); - - EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA, - [&](MCELFStreamer &OS) { - OS.emitLabel(DescBegin); - OS.emitBytes(HSAMetadataString); - OS.emitLabel(DescEnd); - }); - return true; -} - bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader( const MCSubtargetInfo &STI) { for (int i = 0; i < 64; ++i) { @@ -889,8 +776,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - unsigned CodeObjectVersion) { + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 55b5246c9210..7f8ddc42b2ee 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -37,23 +37,24 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: // TODO: Move HSAMetadataStream to AMDGPUTargetStreamer. std::optional TargetID; + unsigned CodeObjectVersion; MCContext &getContext() const { return Streamer.getContext(); } public: - AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + AMDGPUTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), + // Assume the default COV for now, EmitDirectiveAMDHSACodeObjectVersion + // will update this if it is encountered. + CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {} AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; } virtual void EmitDirectiveAMDGCNTarget(){}; - virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, - uint32_t Minor){}; - - virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, - uint32_t Stepping, - StringRef VendorName, - StringRef ArchName){}; + virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) { + CodeObjectVersion = COV; + } virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){}; @@ -65,9 +66,6 @@ public: /// \returns True on success, false on failure. virtual bool EmitISAVersion() { return true; } - /// \returns True on success, false on failure. - virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString); - /// \returns True on success, false on failure. virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString); @@ -98,8 +96,7 @@ public: virtual void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - unsigned CodeObjectVersion){}; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {} static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); @@ -110,15 +107,12 @@ public: std::optional &getTargetID() { return TargetID; } - void initializeTargetID(const MCSubtargetInfo &STI, - unsigned CodeObjectVersion) { + void initializeTargetID(const MCSubtargetInfo &STI) { assert(TargetID == std::nullopt && "TargetID can only be initialized once"); TargetID.emplace(STI); - getTargetID()->setCodeObjectVersion(CodeObjectVersion); } - void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString, - unsigned CodeObjectVersion) { - initializeTargetID(STI, CodeObjectVersion); + void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) { + initializeTargetID(STI); assert(getTargetID() != std::nullopt && "TargetID is None"); getTargetID()->setTargetIDFromFeaturesString(FeatureString); @@ -134,12 +128,7 @@ public: void EmitDirectiveAMDGCNTarget() override; - void EmitDirectiveHSACodeObjectVersion(uint32_t Major, - uint32_t Minor) override; - - void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, - uint32_t Stepping, StringRef VendorName, - StringRef ArchName) override; + void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override; void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; @@ -153,9 +142,6 @@ public: /// \returns True on success, false on failure. bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override; - /// \returns True on success, false on failure. - bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; - /// \returns True on success, false on failure. bool EmitCodeEnd(const MCSubtargetInfo &STI) override; @@ -165,8 +151,7 @@ public: void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - unsigned CodeObjectVersion) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -198,13 +183,6 @@ public: void EmitDirectiveAMDGCNTarget() override; - void EmitDirectiveHSACodeObjectVersion(uint32_t Major, - uint32_t Minor) override; - - void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, - uint32_t Stepping, StringRef VendorName, - StringRef ArchName) override; - void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; @@ -217,9 +195,6 @@ public: /// \returns True on success, false on failure. bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override; - /// \returns True on success, false on failure. - bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; - /// \returns True on success, false on failure. bool EmitCodeEnd(const MCSubtargetInfo &STI) override; @@ -229,9 +204,7 @@ public: void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - unsigned CodeObjectVersion) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; }; - } #endif diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cc0c4d4e36ea..073c8cc72117 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2302,7 +2302,7 @@ void SITargetLowering::allocateSpecialInputSGPRs( const Module *M = MF.getFunction().getParent(); if (UserSGPRInfo.hasQueuePtr() && - AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) + AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a @@ -2355,7 +2355,7 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, const Module *M = MF.getFunction().getParent(); if (UserSGPRInfo.hasQueuePtr() && - AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { + AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); @@ -6438,7 +6438,7 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr( SDValue QueuePtr; // For code object version 5, QueuePtr is passed through implicit kernarg. const Module *M = DAG.getMachineFunction().getFunction().getParent(); - if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) { + if (AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) { QueuePtr = loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR); } else { @@ -6542,7 +6542,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, // For code object version 5, private_base and shared_base are passed through // implicit kernargs. const Module *M = DAG.getMachineFunction().getFunction().getParent(); - if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) { + if (AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) { ImplicitParameter Param = (AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE; return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b4f7fc456f0b..f1c05446bf60 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -31,10 +31,11 @@ #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" -static llvm::cl::opt - AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::desc("AMDHSA Code Object Version"), - llvm::cl::init(4)); +static llvm::cl::opt DefaultAMDHSACodeObjectVersion( + "amdhsa-code-object-version", llvm::cl::Hidden, + llvm::cl::init(llvm::AMDGPU::AMDHSA_COV4), + llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " + "or asm directive still take priority if present)")); namespace { @@ -161,47 +162,34 @@ bool isHsaAbi(const MCSubtargetInfo &STI) { return STI.getTargetTriple().getOS() == Triple::AMDHSA; } -std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) { - if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) - return std::nullopt; +unsigned getAMDHSACodeObjectVersion(const Module &M) { + if (auto Ver = mdconst::extract_or_null( + M.getModuleFlag("amdgpu_code_object_version"))) { + return (unsigned)Ver->getZExtValue() / 100; + } - switch (AmdhsaCodeObjectVersion) { + return getDefaultAMDHSACodeObjectVersion(); +} + +unsigned getDefaultAMDHSACodeObjectVersion() { + return DefaultAMDHSACodeObjectVersion; +} + +uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { + if (T.getOS() != Triple::AMDHSA) + return 0; + + switch (CodeObjectVersion) { case 4: return ELF::ELFABIVERSION_AMDGPU_HSA_V4; case 5: return ELF::ELFABIVERSION_AMDGPU_HSA_V5; default: - report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + - Twine(AmdhsaCodeObjectVersion)); + report_fatal_error("Unsupported AMDHSA Code Object Version " + + Twine(CodeObjectVersion)); } } -bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { - if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) - return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; - return false; -} - -bool isHsaAbiVersion5(const MCSubtargetInfo *STI) { - if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) - return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5; - return false; -} - -unsigned getAmdhsaCodeObjectVersion() { - return AmdhsaCodeObjectVersion; -} - -unsigned getCodeObjectVersion(const Module &M) { - if (auto Ver = mdconst::extract_or_null( - M.getModuleFlag("amdgpu_code_object_version"))) { - return (unsigned)Ver->getZExtValue() / 100; - } - - // Default code object version. - return AMDHSA_COV4; -} - unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { case AMDHSA_COV4: @@ -705,7 +693,7 @@ namespace IsaInfo { AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) : STI(STI), XnackSetting(TargetIDSetting::Any), - SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) { + SramEccSetting(TargetIDSetting::Any) { if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) XnackSetting = TargetIDSetting::Unsupported; if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) @@ -817,23 +805,16 @@ std::string AMDGPUTargetID::toString() const { std::string Features; if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { - switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV4: - case AMDGPU::AMDHSA_COV5: - // sramecc. - if (getSramEccSetting() == TargetIDSetting::Off) - Features += ":sramecc-"; - else if (getSramEccSetting() == TargetIDSetting::On) - Features += ":sramecc+"; - // xnack. - if (getXnackSetting() == TargetIDSetting::Off) - Features += ":xnack-"; - else if (getXnackSetting() == TargetIDSetting::On) - Features += ":xnack+"; - break; - default: - break; - } + // sramecc. + if (getSramEccSetting() == TargetIDSetting::Off) + Features += ":sramecc-"; + else if (getSramEccSetting() == TargetIDSetting::On) + Features += ":sramecc+"; + // xnack. + if (getXnackSetting() == TargetIDSetting::Off) + Features += ":xnack-"; + else if (getXnackSetting() == TargetIDSetting::On) + Features += ":xnack+"; } StreamRep << Processor << Features; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 351563e957f1..d3f55c792017 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -46,14 +46,18 @@ enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 }; /// \returns True if \p STI is AMDHSA. bool isHsaAbi(const MCSubtargetInfo &STI); -/// \returns HSA OS ABI Version identification. -std::optional getHsaAbiVersion(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 4, -/// false otherwise. -bool isHsaAbiVersion4(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 5, -/// false otherwise. -bool isHsaAbiVersion5(const MCSubtargetInfo *STI); + +/// \returns Code object version from the IR module flag. +unsigned getAMDHSACodeObjectVersion(const Module &M); + +/// \returns The default HSA code object version. This should only be used when +/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module +/// flag or a .amdhsa_code_object_version directive) +unsigned getDefaultAMDHSACodeObjectVersion(); + +/// \returns ABIVersion suitable for use in ELF's e_ident[ABIVERSION]. \param +/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion(). +uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion); /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); @@ -64,12 +68,6 @@ unsigned getHostcallImplicitArgPosition(unsigned COV); unsigned getDefaultQueueImplicitArgPosition(unsigned COV); unsigned getCompletionActionImplicitArgPosition(unsigned COV); -/// \returns Code object version. -unsigned getAmdhsaCodeObjectVersion(); - -/// \returns Code object version. -unsigned getCodeObjectVersion(const Module &M); - struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; @@ -114,7 +112,6 @@ private: const MCSubtargetInfo &STI; TargetIDSetting XnackSetting; TargetIDSetting SramEccSetting; - unsigned CodeObjectVersion; public: explicit AMDGPUTargetID(const MCSubtargetInfo &STI); @@ -144,10 +141,6 @@ public: return XnackSetting; } - void setCodeObjectVersion(unsigned COV) { - CodeObjectVersion = COV; - } - /// Sets xnack setting to \p NewXnackSetting. void setXnackSetting(TargetIDSetting NewXnackSetting) { XnackSetting = NewXnackSetting; diff --git a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll index 680fae186960..07b230d8f974 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll @@ -9,6 +9,8 @@ ; OPT: .text ; OPT-NEXT: .section ".note.GNU-stack" ; OPT-NEXT: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; COV4-NEXT: .amdhsa_code_object_version 4 +; COV5-NEXT: .amdhsa_code_object_version 5 ; OPT-NEXT: .amdgpu_metadata ; OPT-NEXT: --- ; OPT-NEXT: amdhsa.kernels: [] diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll index b78f6412cd67..41311abb6983 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll index a3c75e099753..3f380a97240e 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll index d4d8af8c1a99..da3f5640e618 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll index 9ca8b055a279..d458f3489129 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll index fd3f5878469e..5c23e1ef5b42 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll index 34673dd5b891..e3635ba5c2ac 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll index c283ece7e8bd..1b7c65a9151d 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll index 869254cae525..bd7457474603 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll index b1bcb34c8aee..321c20bc91de 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll index cc04eb0e661d..18b118fb5739 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll index e84778b526f1..db6e8923165b 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll index a1ab6ed5f082..0725c779cc66 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll @@ -3,8 +3,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' diff --git a/llvm/test/MC/AMDGPU/elf-header-cov.s b/llvm/test/MC/AMDGPU/elf-header-cov.s new file mode 100644 index 000000000000..e8baad1ba533 --- /dev/null +++ b/llvm/test/MC/AMDGPU/elf-header-cov.s @@ -0,0 +1,24 @@ +// RUN: sed 's/COV/4/g' %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj | \ +// RUN: llvm-readobj --file-headers - | FileCheck %s --check-prefixes=HS4 + +// RUN: sed 's/COV/5/g' %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj | \ +// RUN: llvm-readobj --file-headers - | FileCheck %s --check-prefixes=HS5 + +// RUN: sed 's/COV/4/g' %s | not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERR + +// RUN: sed 's/COV/4/g' %s | not llvm-mc -triple amdgcn-amd-mesa3d -mcpu=gfx802 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERR + +// RUN: sed 's/COV/4/g' %s | not llvm-mc -triple amdgcn-amd- -mcpu=gfx802 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERR + +.amdhsa_code_object_version COV + +// ERR: error: unknown directive + +// HS4: OS/ABI: AMDGPU_HSA (0x40) +// HS4-NEXT: ABIVersion: 2 + +// HS5: OS/ABI: AMDGPU_HSA (0x40) +// HS5-NEXT: ABIVersion: 3 diff --git a/llvm/test/MC/AMDGPU/hsa-exp.s b/llvm/test/MC/AMDGPU/hsa-exp.s index 23b2b8f31a4c..2c8dd6f8eeb4 100644 --- a/llvm/test/MC/AMDGPU/hsa-exp.s +++ b/llvm/test/MC/AMDGPU/hsa-exp.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | FileCheck %s --check-prefix=ASM -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF +// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF // ELF: Section { // ELF: Name: .text @@ -19,6 +19,9 @@ .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" // ASM: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + .set my_is_ptr64, 1 .if my_is_ptr64 == 0 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s index 186d98f78b98..8b90e20bb87d 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 --amdhsa-code-object-version=4 -filetype=obj < %s > %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t // RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s @@ -52,6 +52,9 @@ .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" // ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + .p2align 8 .type minimal,@function minimal: diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s index 6a824b8bcc7b..e19dba0f5fd0 100644 --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v4.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t // RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s @@ -52,6 +52,9 @@ .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" // ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + .p2align 8 .type minimal,@function minimal: diff --git a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s index 6edac771faa0..248890391a6b 100644 --- a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s +++ b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=5 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=5 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t // RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s @@ -52,6 +52,9 @@ .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" // ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" +.amdhsa_code_object_version 5 +// ASM: .amdhsa_code_object_version 5 + .p2align 8 .type minimal,@function minimal: diff --git a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s deleted file mode 100644 index aafad9bbaf4c..000000000000 --- a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s +++ /dev/null @@ -1,6 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx801 -mattr=-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts,-xnack -show-encoding %s | FileCheck --check-prefix=GFX9 %s - -.hsa_code_object_isa -// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" -// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"