[NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit (#144136)

Currently, the NVPTXPrologEpilogPass will crash if LIFETIME_START or
LIFETIME_END instructions are encountered. Usually this isn't a problem
since a couple earlier passes will always remove them. However, when
using opt-bisect-limit crashes can occur. This can hinder debugging and
reveals a potential future problem if these optimization passes change
their behavior. https://cuda.godbolt.org/z/E81xxKGdb

This change updates NVPTXPrologEpilogPass and
NVPTXRegisterInfo::eliminateFrameIndex to gracefully handle these
instructions by simply removing them. While I'm here I also did some
general fixup in NVPTXPrologEpilogPass to make it look more like
PrologEpilogInserter (from which it was copied).
This commit is contained in:
Alex MacLean
2025-06-27 08:31:08 -07:00
committed by GitHub
parent 37b0b0f7d2
commit e933cfcfb2
5 changed files with 117 additions and 35 deletions

View File

@@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &);
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
void initializeNVPTXPeepholePass(PassRegistry &);
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);

View File

@@ -41,7 +41,7 @@ public:
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
};
}
} // end anonymous namespace
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
return new NVPTXPrologEpilogPass();
@@ -49,6 +49,44 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
char NVPTXPrologEpilogPass::ID = 0;
INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE,
"NVPTX Prologue/Epilogue Insertion", false, false)
static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
unsigned OpIdx) {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
if (MI.isDebugValue()) {
MachineOperand &Op = MI.getOperand(OpIdx);
assert(MI.isDebugOperand(&Op) &&
"Frame indices can only appear as a debug operand in a DBG_VALUE*"
" machine instruction");
Register Reg;
unsigned FrameIdx = Op.getIndex();
StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
Op.ChangeToRegister(Reg, false /*isDef*/);
const DIExpression *DIExpr = MI.getDebugExpression();
if (MI.isNonListDebugValue()) {
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(),
DIExpression::ApplyOffset, Offset);
} else {
// The debug operand at DebugOpIndex was a frame index at offset
// `Offset`; now the operand has been replaced with the frame
// register, we must add Offset with `register x, plus Offset`.
unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
SmallVector<uint64_t, 3> Ops;
TRI.getOffsetOpcodes(Offset, Ops);
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
}
MI.getDebugExpressionOp().setMetadata(DIExpr);
return true;
}
return false;
}
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetFrameLowering &TFI = *STI.getFrameLowering();
@@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
calculateFrameObjectOffsets(MF);
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isFI())
for (MachineBasicBlock &BB : MF) {
for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) {
MachineInstr &MI = *std::prev(I);
bool RemovedMI = false;
for (const auto &[Idx, Op] : enumerate(MI.operands())) {
if (!Op.isFI())
continue;
// Frame indices in debug values are encoded in a target independent
// way with simply the frame index and offset rather than any
// target-specific addressing mode.
if (MI.isDebugValue()) {
MachineOperand &Op = MI.getOperand(i);
assert(
MI.isDebugOperand(&Op) &&
"Frame indices can only appear as a debug operand in a DBG_VALUE*"
" machine instruction");
Register Reg;
auto Offset =
TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
Op.ChangeToRegister(Reg, /*isDef=*/false);
const DIExpression *DIExpr = MI.getDebugExpression();
if (MI.isNonListDebugValue()) {
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
} else {
SmallVector<uint64_t, 3> Ops;
TRI.getOffsetOpcodes(Offset, Ops);
unsigned OpIdx = MI.getDebugOperandIndex(&Op);
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx);
}
MI.getDebugExpressionOp().setMetadata(DIExpr);
if (replaceFrameIndexDebugInstr(MF, MI, Idx))
continue;
}
TRI.eliminateFrameIndex(MI, 0, i, nullptr);
// Eliminate this FrameIndex operand.
RemovedMI = TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
Modified = true;
if (RemovedMI)
break;
}
if (!RemovedMI)
--I;
}
}

View File

@@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
RegScavenger *) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
if (MI.isLifetimeMarker()) {
MI.eraseFromParent();
return true;
}
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
MI.getOperand(FIOperandNum + 1).getImm();
const int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
const MachineFunction &MF = *MI.getParent()->getParent();
const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);

View File

@@ -115,6 +115,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
initializeNVPTXExternalAAWrapperPass(PR);
initializeNVPTXPeepholePass(PR);
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
initializeNVPTXPrologEpilogPassPass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {

View File

@@ -0,0 +1,51 @@
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
;; This test is intended to verify that we don't crash when -opt-bisect-limit
;; is used in conjunction with lifetime markers. Previously, later passes
;; would not handle these intructions correctly and relied on earlier passes
;; to remove them.
declare void @bar(ptr)
define void @foo() {
%p = alloca i32
call void @llvm.lifetime.start(i64 4, ptr %p)
call void @bar(ptr %p)
call void @llvm.lifetime.end(i64 4, ptr %p)
ret void
}