[NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit (#144136)
Currently, the NVPTXPrologEpilogPass will crash if LIFETIME_START or LIFETIME_END instructions are encountered. Usually this isn't a problem since a couple earlier passes will always remove them. However, when using opt-bisect-limit crashes can occur. This can hinder debugging and reveals a potential future problem if these optimization passes change their behavior. https://cuda.godbolt.org/z/E81xxKGdb This change updates NVPTXPrologEpilogPass and NVPTXRegisterInfo::eliminateFrameIndex to gracefully handle these instructions by simply removing them. While I'm here I also did some general fixup in NVPTXPrologEpilogPass to make it look more like PrologEpilogInserter (from which it was copied).
This commit is contained in:
@@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &);
|
||||
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
|
||||
void initializeNVPTXPeepholePass(PassRegistry &);
|
||||
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
|
||||
void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
|
||||
|
||||
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
|
||||
@@ -41,7 +41,7 @@ public:
|
||||
private:
|
||||
void calculateFrameObjectOffsets(MachineFunction &Fn);
|
||||
};
|
||||
}
|
||||
} // end anonymous namespace
|
||||
|
||||
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
|
||||
return new NVPTXPrologEpilogPass();
|
||||
@@ -49,6 +49,44 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
|
||||
|
||||
char NVPTXPrologEpilogPass::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE,
|
||||
"NVPTX Prologue/Epilogue Insertion", false, false)
|
||||
|
||||
static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
|
||||
unsigned OpIdx) {
|
||||
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
|
||||
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
|
||||
if (MI.isDebugValue()) {
|
||||
|
||||
MachineOperand &Op = MI.getOperand(OpIdx);
|
||||
assert(MI.isDebugOperand(&Op) &&
|
||||
"Frame indices can only appear as a debug operand in a DBG_VALUE*"
|
||||
" machine instruction");
|
||||
Register Reg;
|
||||
unsigned FrameIdx = Op.getIndex();
|
||||
|
||||
StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
|
||||
Op.ChangeToRegister(Reg, false /*isDef*/);
|
||||
|
||||
const DIExpression *DIExpr = MI.getDebugExpression();
|
||||
if (MI.isNonListDebugValue()) {
|
||||
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(),
|
||||
DIExpression::ApplyOffset, Offset);
|
||||
} else {
|
||||
// The debug operand at DebugOpIndex was a frame index at offset
|
||||
// `Offset`; now the operand has been replaced with the frame
|
||||
// register, we must add Offset with `register x, plus Offset`.
|
||||
unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
|
||||
SmallVector<uint64_t, 3> Ops;
|
||||
TRI.getOffsetOpcodes(Offset, Ops);
|
||||
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
|
||||
}
|
||||
MI.getDebugExpressionOp().setMetadata(DIExpr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
const TargetSubtargetInfo &STI = MF.getSubtarget();
|
||||
const TargetFrameLowering &TFI = *STI.getFrameLowering();
|
||||
@@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
calculateFrameObjectOffsets(MF);
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
for (MachineInstr &MI : MBB) {
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
if (!MI.getOperand(i).isFI())
|
||||
for (MachineBasicBlock &BB : MF) {
|
||||
for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) {
|
||||
MachineInstr &MI = *std::prev(I);
|
||||
|
||||
bool RemovedMI = false;
|
||||
for (const auto &[Idx, Op] : enumerate(MI.operands())) {
|
||||
if (!Op.isFI())
|
||||
continue;
|
||||
|
||||
// Frame indices in debug values are encoded in a target independent
|
||||
// way with simply the frame index and offset rather than any
|
||||
// target-specific addressing mode.
|
||||
if (MI.isDebugValue()) {
|
||||
MachineOperand &Op = MI.getOperand(i);
|
||||
assert(
|
||||
MI.isDebugOperand(&Op) &&
|
||||
"Frame indices can only appear as a debug operand in a DBG_VALUE*"
|
||||
" machine instruction");
|
||||
Register Reg;
|
||||
auto Offset =
|
||||
TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
|
||||
Op.ChangeToRegister(Reg, /*isDef=*/false);
|
||||
const DIExpression *DIExpr = MI.getDebugExpression();
|
||||
if (MI.isNonListDebugValue()) {
|
||||
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
|
||||
} else {
|
||||
SmallVector<uint64_t, 3> Ops;
|
||||
TRI.getOffsetOpcodes(Offset, Ops);
|
||||
unsigned OpIdx = MI.getDebugOperandIndex(&Op);
|
||||
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx);
|
||||
}
|
||||
MI.getDebugExpressionOp().setMetadata(DIExpr);
|
||||
if (replaceFrameIndexDebugInstr(MF, MI, Idx))
|
||||
continue;
|
||||
}
|
||||
|
||||
TRI.eliminateFrameIndex(MI, 0, i, nullptr);
|
||||
// Eliminate this FrameIndex operand.
|
||||
RemovedMI = TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
|
||||
Modified = true;
|
||||
if (RemovedMI)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!RemovedMI)
|
||||
--I;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
|
||||
bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS) const {
|
||||
RegScavenger *) const {
|
||||
assert(SPAdj == 0 && "Unexpected");
|
||||
|
||||
MachineInstr &MI = *II;
|
||||
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
||||
if (MI.isLifetimeMarker()) {
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
|
||||
MI.getOperand(FIOperandNum + 1).getImm();
|
||||
const int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
||||
|
||||
const MachineFunction &MF = *MI.getParent()->getParent();
|
||||
const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
|
||||
MI.getOperand(FIOperandNum + 1).getImm();
|
||||
|
||||
// Using I0 as the frame pointer
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
|
||||
|
||||
@@ -115,6 +115,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
|
||||
initializeNVPTXExternalAAWrapperPass(PR);
|
||||
initializeNVPTXPeepholePass(PR);
|
||||
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
|
||||
initializeNVPTXPrologEpilogPassPass(PR);
|
||||
}
|
||||
|
||||
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
|
||||
|
||||
51
llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
Normal file
51
llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
Normal file
@@ -0,0 +1,51 @@
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
|
||||
|
||||
;; This test is intended to verify that we don't crash when -opt-bisect-limit
|
||||
;; is used in conjunction with lifetime markers. Previously, later passes
|
||||
;; would not handle these intructions correctly and relied on earlier passes
|
||||
;; to remove them.
|
||||
|
||||
declare void @bar(ptr)
|
||||
|
||||
define void @foo() {
|
||||
%p = alloca i32
|
||||
call void @llvm.lifetime.start(i64 4, ptr %p)
|
||||
call void @bar(ptr %p)
|
||||
call void @llvm.lifetime.end(i64 4, ptr %p)
|
||||
ret void
|
||||
}
|
||||
Reference in New Issue
Block a user