From f96eeec0057c8535be809d324be22ccb690345bb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 29 Nov 2016 19:20:48 +0000 Subject: [PATCH] AMDGPU: Materialize frame index before add It isn't generally safe to fold the frame index directly into the operand since it will possibly not be an inline immediate after it is expanded. This surprisingly seems to produce better code, since the FI doesn't prevent folding other immediate operands. llvm-svn: 288185 --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 7 ++++++- llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll | 7 ++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 1d933da47c21..936bb8b656fe 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -245,12 +245,17 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(Offset); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg) + .addFrameIndex(FrameIdx); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) .addReg(OffsetReg, RegState::Kill) - .addFrameIndex(FrameIdx); + .addReg(FIReg); } void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll index dc01f76108ee..92c592b5e269 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -7,15 +7,16 @@ ; ; CHECK-LABEL: {{^}}main: +; CHECK: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}} + ; FIXME: add 0? -; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x140 -; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0 +; CHECK-DAG: v_add_i32_e32 [[ADD_K0:v[0-9]+]], vcc, 0x140, [[BASE_FI]] ; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 ; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} ; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] -; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]] +; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BASE_FI]], [[BYTES]] ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen