[AMDGPU] Extend ComputePGMRSrc3 to gfx10+. NFCI. (#129289)

ComputePGMRSrc3 exists since gfx90a and gfx10+. Current code
only expects gfx90a. This is NFCI since we do not fill it on
gfx10+ yet.
This commit is contained in:
Stanislav Mekhanoshin
2025-03-03 08:22:15 -08:00
committed by GitHub
parent 3dafa486a6
commit 2479479285
3 changed files with 15 additions and 14 deletions

View File

@@ -622,12 +622,13 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
int64_t PGRM_Rsrc3 = 1;
bool EvaluatableRsrc3 =
CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute(PGRM_Rsrc3);
CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGRM_Rsrc3);
(void)PGRM_Rsrc3;
(void)EvaluatableRsrc3;
assert(STM.hasGFX90AInsts() || !EvaluatableRsrc3 ||
assert(STM.getGeneration() >= AMDGPUSubtarget::GFX10 ||
STM.hasGFX90AInsts() || !EvaluatableRsrc3 ||
static_cast<uint64_t>(PGRM_Rsrc3) == 0);
KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A;
KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3;
KernelDescriptor.kernarg_preload = MCConstantExpr::create(
AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
@@ -822,22 +823,22 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
[[maybe_unused]] int64_t PGMRSrc3;
assert(STM.hasGFX90AInsts() ||
(CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute(
PGMRSrc3) &&
assert(STM.getGeneration() >= AMDGPUSubtarget::GFX10 ||
STM.hasGFX90AInsts() ||
(CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
static_cast<uint64_t>(PGMRSrc3) == 0));
if (STM.hasGFX90AInsts()) {
OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
getMCExprStr(MCKernelDescriptor::bits_get(
CurrentProgramInfo.ComputePGMRSrc3GFX90A,
CurrentProgramInfo.ComputePGMRSrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
false);
OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
getMCExprStr(MCKernelDescriptor::bits_get(
CurrentProgramInfo.ComputePGMRSrc3GFX90A,
CurrentProgramInfo.ComputePGMRSrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
false);
@@ -1241,12 +1242,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
return Dst;
};
ProgInfo.ComputePGMRSrc3GFX90A =
SetBits(ProgInfo.ComputePGMRSrc3GFX90A, ProgInfo.AccumOffset,
ProgInfo.ComputePGMRSrc3 =
SetBits(ProgInfo.ComputePGMRSrc3, ProgInfo.AccumOffset,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
ProgInfo.ComputePGMRSrc3GFX90A =
SetBits(ProgInfo.ComputePGMRSrc3GFX90A, CreateExpr(ProgInfo.TgSplit),
ProgInfo.ComputePGMRSrc3 =
SetBits(ProgInfo.ComputePGMRSrc3, CreateExpr(ProgInfo.TgSplit),
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
}

View File

@@ -57,7 +57,7 @@ void SIProgramInfo::reset(const MachineFunction &MF) {
LdsSize = 0;
EXCPEnable = 0;
ComputePGMRSrc3GFX90A = ZeroExpr;
ComputePGMRSrc3 = ZeroExpr;
NumVGPR = ZeroExpr;
NumArchVGPR = ZeroExpr;

View File

@@ -63,7 +63,7 @@ struct LLVM_EXTERNAL_VISIBILITY SIProgramInfo {
uint32_t LdsSize = 0;
uint32_t EXCPEnable = 0;
const MCExpr *ComputePGMRSrc3GFX90A = nullptr;
const MCExpr *ComputePGMRSrc3 = nullptr;
const MCExpr *NumVGPR = nullptr;
const MCExpr *NumArchVGPR = nullptr;