[AMDGPU] Remove CC exception for Promote Alloca Limits

Apparently it was used to work around some issue that has been fixed. Removing it helps with high scratch usage observed in some cases due to failed alloca promotion. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D145586
2023-04-12 13:04:55 +02:00
parent d20a1b87f6
commit fd1d60873f
2 changed files with 12 additions and 23 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -124,6 +124,14 @@ public:
  }
 };

+unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
+  if (!TM.getTargetTriple().isAMDGCN())
+    return 128;
+
+  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+  return ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+}
+
 } // end anonymous namespace

 char AMDGPUPromoteAlloca::ID = 0;
@@ -176,16 +184,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
  if (!ST.isPromoteAllocaEnabled())
    return false;

-  if (IsAMDGCN) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
-    MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
-    // A non-entry function has only 32 caller preserved registers.
-    // Do not promote alloca which will force spilling.
-    if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
-      MaxVGPRs = std::min(MaxVGPRs, 32u);
-  } else {
-    MaxVGPRs = 128;
-  }
+  MaxVGPRs = getMaxVGPRs(TM, F);

  bool SufficientLDS = hasSufficientLocalMem(F);
  bool Changed = false;
@@ -1200,17 +1199,7 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
  if (!ST.isPromoteAllocaEnabled())
    return false;

-  unsigned MaxVGPRs;
-  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
-    MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
-    // A non-entry function has only 32 caller preserved registers.
-    // Do not promote alloca which will force spilling.
-    if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
-      MaxVGPRs = std::min(MaxVGPRs, 32u);
-  } else {
-    MaxVGPRs = 128;
-  }
+  const unsigned MaxVGPRs = getMaxVGPRs(TM, F);

  bool Changed = false;
  BasicBlock &EntryBB = *F.begin();
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
@@ -139,8 +139,8 @@ entry:
 }

 ; OPT-LABEL: @func_alloca_9xi64_max256(
-; OPT: alloca
-; OPT-NOT: <9 x i64>
+; OPT-NOT: alloca
+; OPT: <9 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i64>
 define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {