[AMDGPU] Remove CC exception for Promote Alloca Limits

Apparently it was used to work around some issue that has been fixed.
Removing it helps with high scratch usage observed in some cases due to failed alloca promotion.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D145586
This commit is contained in:
pvanhout
2023-04-12 13:04:55 +02:00
parent d20a1b87f6
commit fd1d60873f
2 changed files with 12 additions and 23 deletions

View File

@@ -124,6 +124,14 @@ public:
}
};
unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
if (!TM.getTargetTriple().isAMDGCN())
return 128;
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
}
} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
@@ -176,16 +184,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
if (!ST.isPromoteAllocaEnabled())
return false;
if (IsAMDGCN) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
// A non-entry function has only 32 caller preserved registers.
// Do not promote alloca which will force spilling.
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
MaxVGPRs = std::min(MaxVGPRs, 32u);
} else {
MaxVGPRs = 128;
}
MaxVGPRs = getMaxVGPRs(TM, F);
bool SufficientLDS = hasSufficientLocalMem(F);
bool Changed = false;
@@ -1200,17 +1199,7 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
if (!ST.isPromoteAllocaEnabled())
return false;
unsigned MaxVGPRs;
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
// A non-entry function has only 32 caller preserved registers.
// Do not promote alloca which will force spilling.
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
MaxVGPRs = std::min(MaxVGPRs, 32u);
} else {
MaxVGPRs = 128;
}
const unsigned MaxVGPRs = getMaxVGPRs(TM, F);
bool Changed = false;
BasicBlock &EntryBB = *F.begin();

View File

@@ -139,8 +139,8 @@ entry:
}
; OPT-LABEL: @func_alloca_9xi64_max256(
; OPT: alloca
; OPT-NOT: <9 x i64>
; OPT-NOT: alloca
; OPT: <9 x i64>
; LIMIT32: alloca
; LIMIT32-NOT: <9 x i64>
define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {