[AMDGPU] Fold more AGPR copies/PHIs in SIFoldOperands
Generalize `tryFoldLCSSAPhi` into `tryFoldPhiAGPR` which works on any kind of PHI node (not just LCSSA ones) and attempts to create AGPR Phis more aggressively. Also adds a GFX908-only "cleanup" function `tryOptimizeAGPRPhis` which tries to minimize AGPR to AGPR copies on GFX908, which doesn't have a ACCVGPR MOV instruction (so AGPR-AGPR copies become 2 or 3 instructions as they need a VGPR temp). The reason why this is needed is because D143731 + the new `tryFoldPhiAGPR` may create a lot more PHIs (one 32xfloat PHI becomes 32 float phis), and if each PHI hits the same AGPR (like in `test_mfma_loop_agpr_init`) they will be lowered to 32 copies from the same AGPR, which will each become 2-3 instructions. Creating a VGPR cache in this case prevents all those copies from being generated (we have AGPR-VGPR copies instead which are trivial). This is a prepation patch intended to prevent regressions in D143731 when AGPRs are involved. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D144099
This commit is contained in:
@@ -111,9 +111,11 @@ public:
|
||||
std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
|
||||
bool tryFoldOMod(MachineInstr &MI);
|
||||
bool tryFoldRegSequence(MachineInstr &MI);
|
||||
bool tryFoldLCSSAPhi(MachineInstr &MI);
|
||||
bool tryFoldPhiAGPR(MachineInstr &MI);
|
||||
bool tryFoldLoad(MachineInstr &MI);
|
||||
|
||||
bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB);
|
||||
|
||||
public:
|
||||
SIFoldOperands() : MachineFunctionPass(ID) {
|
||||
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
|
||||
@@ -138,6 +140,16 @@ char SIFoldOperands::ID = 0;
|
||||
|
||||
char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
|
||||
|
||||
static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI,
|
||||
const MachineOperand &MO) {
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg());
|
||||
if (const TargetRegisterClass *SubRC =
|
||||
TRI.getSubRegisterClass(RC, MO.getSubReg()))
|
||||
RC = SubRC;
|
||||
return RC;
|
||||
}
|
||||
|
||||
// Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
|
||||
static unsigned macToMad(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
@@ -1631,52 +1643,133 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to hoist an AGPR to VGPR copy out of the loop across a LCSSA PHI.
|
||||
// Try to hoist an AGPR to VGPR copy across a PHI.
|
||||
// This should allow folding of an AGPR into a consumer which may support it.
|
||||
// I.e.:
|
||||
//
|
||||
// loop: // loop:
|
||||
// %1:vreg = COPY %0:areg // exit:
|
||||
// exit: => // %1:areg = PHI %0:areg, %loop
|
||||
// %2:vreg = PHI %1:vreg, %loop // %2:vreg = COPY %1:areg
|
||||
bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) {
|
||||
// Example 1: LCSSA PHI
|
||||
// loop:
|
||||
// %1:vreg = COPY %0:areg
|
||||
// exit:
|
||||
// %2:vreg = PHI %1:vreg, %loop
|
||||
// =>
|
||||
// loop:
|
||||
// exit:
|
||||
// %1:areg = PHI %0:areg, %loop
|
||||
// %2:vreg = COPY %1:areg
|
||||
//
|
||||
// Example 2: PHI with multiple incoming values:
|
||||
// entry:
|
||||
// %1:vreg = GLOBAL_LOAD(..)
|
||||
// loop:
|
||||
// %2:vreg = PHI %1:vreg, %entry, %5:vreg, %loop
|
||||
// %3:areg = COPY %2:vreg
|
||||
// %4:areg = (instr using %3:areg)
|
||||
// %5:vreg = COPY %4:areg
|
||||
// =>
|
||||
// entry:
|
||||
// %1:vreg = GLOBAL_LOAD(..)
|
||||
// %2:areg = COPY %1:vreg
|
||||
// loop:
|
||||
// %3:areg = PHI %2:areg, %entry, %X:areg,
|
||||
// %4:areg = (instr using %3:areg)
|
||||
bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
|
||||
assert(PHI.isPHI());
|
||||
|
||||
if (PHI.getNumExplicitOperands() != 3) // Single input LCSSA PHI
|
||||
return false;
|
||||
|
||||
Register PhiIn = PHI.getOperand(1).getReg();
|
||||
Register PhiOut = PHI.getOperand(0).getReg();
|
||||
if (PHI.getOperand(1).getSubReg() ||
|
||||
!TRI->isVGPR(*MRI, PhiIn) || !TRI->isVGPR(*MRI, PhiOut))
|
||||
if (!TRI->isVGPR(*MRI, PhiOut))
|
||||
return false;
|
||||
|
||||
// A single use should not matter for correctness, but if it has another use
|
||||
// inside the loop we may perform copy twice in a worst case.
|
||||
if (!MRI->hasOneNonDBGUse(PhiIn))
|
||||
// Iterate once over all incoming values of the PHI to check if this PHI is
|
||||
// eligible, and determine the exact AGPR RC we'll target.
|
||||
const TargetRegisterClass *ARC = nullptr;
|
||||
for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) {
|
||||
MachineOperand &MO = PHI.getOperand(K);
|
||||
|
||||
Register PhiIn = MO.getReg();
|
||||
if (MO.getSubReg() || !TRI->isVGPR(*MRI, PhiIn))
|
||||
return false;
|
||||
|
||||
MachineInstr *Copy = MRI->getVRegDef(PhiIn);
|
||||
if (!Copy || !Copy->isCopy())
|
||||
continue;
|
||||
|
||||
Register CopyIn = Copy->getOperand(1).getReg();
|
||||
if (CopyIn.isVirtual() && TRI->isAGPR(*MRI, CopyIn)) {
|
||||
const TargetRegisterClass *CopyInRC =
|
||||
getRegOpRC(*MRI, *TRI, Copy->getOperand(1));
|
||||
if (ARC && !ARC->hasSubClassEq(CopyInRC))
|
||||
return false;
|
||||
ARC = CopyInRC;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ARC)
|
||||
return false;
|
||||
|
||||
MachineInstr *Copy = MRI->getVRegDef(PhiIn);
|
||||
if (!Copy || !Copy->isCopy())
|
||||
return false;
|
||||
// Rewrite the PHI's incoming values to ARC.
|
||||
LLVM_DEBUG(dbgs() << "Folding AGPR copies into: " << PHI);
|
||||
for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) {
|
||||
MachineOperand &MO = PHI.getOperand(K);
|
||||
Register Reg = MO.getReg();
|
||||
|
||||
Register CopyIn = Copy->getOperand(1).getReg();
|
||||
if (!TRI->isAGPR(*MRI, CopyIn) || Copy->getOperand(1).getSubReg())
|
||||
return false;
|
||||
MachineBasicBlock::iterator InsertPt;
|
||||
MachineBasicBlock *InsertMBB = nullptr;
|
||||
|
||||
const TargetRegisterClass *ARC = MRI->getRegClass(CopyIn);
|
||||
// Look at the def of Reg, ignoring all copies.
|
||||
bool UseAccVGPRWrite = false;
|
||||
if (MachineInstr *Def = MRI->getVRegDef(Reg)) {
|
||||
|
||||
// Look at pre-existing COPY instructions from ARC: Steal the operand. If
|
||||
// the copy was single-use, it will be removed by DCE later.
|
||||
if (Def->isCopy()) {
|
||||
MachineOperand &CopyIn = Def->getOperand(1);
|
||||
if (CopyIn.getReg().isVirtual() &&
|
||||
getRegOpRC(*MRI, *TRI, CopyIn)->hasSubClassEq(ARC)) {
|
||||
MO.setReg(CopyIn.getReg());
|
||||
MO.setSubReg(CopyIn.getSubReg());
|
||||
continue;
|
||||
}
|
||||
|
||||
// If this is a multi-use SGPR -> VGPR copy, use V_ACCVGPR_WRITE on
|
||||
// GFX908 directly instead of a COPY. Otherwise, SIFoldOperand may try
|
||||
// to fold the sgpr -> vgpr -> agpr copy into a sgpr -> agpr copy which
|
||||
// is unlikely to be profitable.
|
||||
if (!ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) &&
|
||||
TRI->isSGPRReg(*MRI, CopyIn.getReg()))
|
||||
UseAccVGPRWrite = true;
|
||||
}
|
||||
|
||||
InsertPt = ++Def->getIterator();
|
||||
InsertMBB = Def->getParent();
|
||||
} else {
|
||||
InsertMBB = PHI.getOperand(MO.getOperandNo() + 1).getMBB();
|
||||
InsertPt = InsertMBB->getFirstTerminator();
|
||||
}
|
||||
|
||||
const unsigned CopyOpc =
|
||||
UseAccVGPRWrite ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
|
||||
Register NewReg = MRI->createVirtualRegister(ARC);
|
||||
MachineInstr *MI = BuildMI(*InsertMBB, InsertPt, PHI.getDebugLoc(),
|
||||
TII->get(CopyOpc), NewReg)
|
||||
.addReg(Reg);
|
||||
MO.setReg(NewReg);
|
||||
|
||||
(void)MI;
|
||||
LLVM_DEBUG(dbgs() << " Created COPY: " << *MI);
|
||||
}
|
||||
|
||||
// Replace the PHI's result with a new register.
|
||||
Register NewReg = MRI->createVirtualRegister(ARC);
|
||||
PHI.getOperand(1).setReg(CopyIn);
|
||||
PHI.getOperand(0).setReg(NewReg);
|
||||
|
||||
// COPY that new register back to the original PhiOut register. This COPY will
|
||||
// usually be folded out later.
|
||||
MachineBasicBlock *MBB = PHI.getParent();
|
||||
BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(),
|
||||
BuildMI(*MBB, MBB->getFirstNonPHI(), PHI.getDebugLoc(),
|
||||
TII->get(AMDGPU::COPY), PhiOut)
|
||||
.addReg(NewReg, RegState::Kill);
|
||||
Copy->eraseFromParent(); // We know this copy had a single use.
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Folded " << PHI);
|
||||
.addReg(NewReg);
|
||||
|
||||
LLVM_DEBUG(dbgs() << " Done: Folded " << PHI);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1736,6 +1829,101 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// tryFoldPhiAGPR will aggressively try to create AGPR PHIs.
|
||||
// For GFX90A and later, this is pretty much always a good thing, but for GFX908
|
||||
// there's cases where it can create a lot more AGPR-AGPR copies, which are
|
||||
// expensive on this architecture due to the lack of V_ACCVGPR_MOV.
|
||||
//
|
||||
// This function looks at all AGPR PHIs in a basic block and collects their
|
||||
// operands. Then, it checks for register that are used more than once across
|
||||
// all PHIs and caches them in a VGPR. This prevents ExpandPostRAPseudo from
|
||||
// having to create one VGPR temporary per use, which can get very messy if
|
||||
// these PHIs come from a broken-up large PHI (e.g. 32 AGPR phis, one per vector
|
||||
// element).
|
||||
//
|
||||
// Example
|
||||
// a:
|
||||
// %in:agpr_256 = COPY %foo:vgpr_256
|
||||
// c:
|
||||
// %x:agpr_32 = ..
|
||||
// b:
|
||||
// %0:areg = PHI %in.sub0:agpr_32, %a, %x, %c
|
||||
// %1:areg = PHI %in.sub0:agpr_32, %a, %y, %c
|
||||
// %2:areg = PHI %in.sub0:agpr_32, %a, %z, %c
|
||||
// =>
|
||||
// a:
|
||||
// %in:agpr_256 = COPY %foo:vgpr_256
|
||||
// %tmp:vgpr_32 = V_ACCVGPR_READ_B32_e64 %in.sub0:agpr_32
|
||||
// %tmp_agpr:agpr_32 = COPY %tmp
|
||||
// c:
|
||||
// %x:agpr_32 = ..
|
||||
// b:
|
||||
// %0:areg = PHI %tmp_agpr, %a, %x, %c
|
||||
// %1:areg = PHI %tmp_agpr, %a, %y, %c
|
||||
// %2:areg = PHI %tmp_agpr, %a, %z, %c
|
||||
bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
|
||||
// This is only really needed on GFX908 where AGPR-AGPR copies are
|
||||
// unreasonably difficult.
|
||||
if (ST->hasGFX90AInsts())
|
||||
return false;
|
||||
|
||||
// Look at all AGPR Phis and collect the register + subregister used.
|
||||
DenseMap<std::pair<Register, unsigned>, std::vector<MachineOperand *>>
|
||||
RegToMO;
|
||||
|
||||
for (auto &MI : MBB) {
|
||||
if (!MI.isPHI())
|
||||
break;
|
||||
|
||||
if (!TRI->isAGPR(*MRI, MI.getOperand(0).getReg()))
|
||||
continue;
|
||||
|
||||
for (unsigned K = 1; K < MI.getNumOperands(); K += 2) {
|
||||
MachineOperand &PhiMO = MI.getOperand(K);
|
||||
RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO);
|
||||
}
|
||||
}
|
||||
|
||||
// For all (Reg, SubReg) pair that are used more than once, cache the value in
|
||||
// a VGPR.
|
||||
bool Changed = false;
|
||||
for (const auto &[Entry, MOs] : RegToMO) {
|
||||
if (MOs.size() == 1)
|
||||
continue;
|
||||
|
||||
const auto [Reg, SubReg] = Entry;
|
||||
MachineInstr *Def = MRI->getVRegDef(Reg);
|
||||
MachineBasicBlock *DefMBB = Def->getParent();
|
||||
|
||||
// Create a copy in a VGPR using V_ACCVGPR_READ_B32_e64 so it's not folded
|
||||
// out.
|
||||
const TargetRegisterClass *ARC = getRegOpRC(*MRI, *TRI, *MOs.front());
|
||||
Register TempVGPR =
|
||||
MRI->createVirtualRegister(TRI->getEquivalentVGPRClass(ARC));
|
||||
MachineInstr *VGPRCopy =
|
||||
BuildMI(*DefMBB, ++Def->getIterator(), Def->getDebugLoc(),
|
||||
TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
|
||||
.addReg(Reg, /* flags */ 0, SubReg);
|
||||
|
||||
// Copy back to an AGPR and use that instead of the AGPR subreg in all MOs.
|
||||
Register TempAGPR = MRI->createVirtualRegister(ARC);
|
||||
BuildMI(*DefMBB, ++VGPRCopy->getIterator(), Def->getDebugLoc(),
|
||||
TII->get(AMDGPU::COPY), TempAGPR)
|
||||
.addReg(TempVGPR);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Caching AGPR into VGPR: " << *VGPRCopy);
|
||||
for (MachineOperand *MO : MOs) {
|
||||
MO->setReg(TempAGPR);
|
||||
MO->setSubReg(AMDGPU::NoSubRegister);
|
||||
LLVM_DEBUG(dbgs() << " Changed PHI Operand: " << *MO << "\n");
|
||||
}
|
||||
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
@@ -1769,7 +1957,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (MI.isPHI() && tryFoldLCSSAPhi(MI)) {
|
||||
if (MI.isPHI() && tryFoldPhiAGPR(MI)) {
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
@@ -1794,6 +1982,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
!tryFoldOMod(MI))
|
||||
Changed |= tryFoldClamp(MI);
|
||||
}
|
||||
|
||||
Changed |= tryOptimizeAGPRPhis(*MBB);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
||||
410
llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
Normal file
410
llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
Normal file
@@ -0,0 +1,410 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX908
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
|
||||
|
||||
---
|
||||
name: test_sgpr_init_multiuse
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GFX908-LABEL: name: test_sgpr_init_multiuse
|
||||
; GFX908: bb.0:
|
||||
; GFX908-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX908-NEXT: liveins: $sgpr0, $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.1:
|
||||
; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX908-NEXT: liveins: $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_3]], %bb.0, %13.sub0, %bb.1
|
||||
; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_2]], %bb.0, %13.sub1, %bb.1
|
||||
; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_1]], %bb.0, %13.sub2, %bb.1
|
||||
; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_]], %bb.0, %13.sub3, %bb.1
|
||||
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.2:
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: test_sgpr_init_multiuse
|
||||
; GFX90A: bb.0:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr0, $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY1]]
|
||||
; GFX90A-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY1]]
|
||||
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY1]]
|
||||
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:agpr_32 = COPY [[COPY1]]
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.1:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY5]], %bb.0, %13.sub0, %bb.1
|
||||
; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %13.sub1, %bb.1
|
||||
; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.0, %13.sub2, %bb.1
|
||||
; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %13.sub3, %bb.1
|
||||
; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2:
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $scc
|
||||
successors: %bb.1
|
||||
|
||||
%0:sgpr_32 = COPY $sgpr0
|
||||
%1:vgpr_32 = COPY %0
|
||||
|
||||
bb.1:
|
||||
liveins: $scc
|
||||
successors: %bb.1, %bb.2
|
||||
|
||||
%8:vgpr_32 = PHI %1, %bb.0, %16, %bb.1
|
||||
%9:vgpr_32 = PHI %1, %bb.0, %17, %bb.1
|
||||
%10:vgpr_32 = PHI %1, %bb.0, %18, %bb.1
|
||||
%11:vgpr_32 = PHI %1, %bb.0, %19, %bb.1
|
||||
%12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3
|
||||
%13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
|
||||
%16:vgpr_32 = COPY %15.sub0
|
||||
%17:vgpr_32 = COPY %15.sub1
|
||||
%18:vgpr_32 = COPY %15.sub2
|
||||
%19:vgpr_32 = COPY %15.sub3
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_sgpr_init_singleuse
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GFX908-LABEL: name: test_sgpr_init_singleuse
|
||||
; GFX908: bb.0:
|
||||
; GFX908-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX908-NEXT: liveins: $sgpr0, $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY1]]
|
||||
; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY3]]
|
||||
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY6:%[0-9]+]]:agpr_32 = COPY [[COPY5]]
|
||||
; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY8:%[0-9]+]]:agpr_32 = COPY [[COPY7]]
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.1:
|
||||
; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX908-NEXT: liveins: $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %16.sub0, %bb.1
|
||||
; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %16.sub1, %bb.1
|
||||
; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY6]], %bb.0, %16.sub2, %bb.1
|
||||
; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY8]], %bb.0, %16.sub3, %bb.1
|
||||
; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.2:
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: test_sgpr_init_singleuse
|
||||
; GFX90A: bb.0:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr0, $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY1]]
|
||||
; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY3]]
|
||||
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY6:%[0-9]+]]:agpr_32 = COPY [[COPY5]]
|
||||
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:agpr_32 = COPY [[COPY7]]
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.1:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %16.sub0, %bb.1
|
||||
; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %16.sub1, %bb.1
|
||||
; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY6]], %bb.0, %16.sub2, %bb.1
|
||||
; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY8]], %bb.0, %16.sub3, %bb.1
|
||||
; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2:
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $scc
|
||||
successors: %bb.1
|
||||
|
||||
%0:sgpr_32 = COPY $sgpr0
|
||||
%1:vgpr_32 = COPY %0
|
||||
%2:vgpr_32 = COPY %0
|
||||
%3:vgpr_32 = COPY %0
|
||||
%4:vgpr_32 = COPY %0
|
||||
|
||||
bb.1:
|
||||
liveins: $scc
|
||||
successors: %bb.1, %bb.2
|
||||
|
||||
%8:vgpr_32 = PHI %1, %bb.0, %16, %bb.1
|
||||
%9:vgpr_32 = PHI %2, %bb.0, %17, %bb.1
|
||||
%10:vgpr_32 = PHI %3, %bb.0, %18, %bb.1
|
||||
%11:vgpr_32 = PHI %4, %bb.0, %19, %bb.1
|
||||
%12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3
|
||||
%13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
|
||||
%16:vgpr_32 = COPY %15.sub0
|
||||
%17:vgpr_32 = COPY %15.sub1
|
||||
%18:vgpr_32 = COPY %15.sub2
|
||||
%19:vgpr_32 = COPY %15.sub3
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_vgpr_init
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GFX908-LABEL: name: test_vgpr_init
|
||||
; GFX908: bb.0:
|
||||
; GFX908-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX908-NEXT: liveins: $vgpr0, $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX908-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.1:
|
||||
; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX908-NEXT: liveins: $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %12.sub0, %bb.1
|
||||
; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.0, %12.sub1, %bb.1
|
||||
; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %12.sub2, %bb.1
|
||||
; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %12.sub3, %bb.1
|
||||
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.2:
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: test_vgpr_init
|
||||
; GFX90A: bb.0:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $vgpr0, $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]]
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.1:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %12.sub0, %bb.1
|
||||
; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.0, %12.sub1, %bb.1
|
||||
; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %12.sub2, %bb.1
|
||||
; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %12.sub3, %bb.1
|
||||
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2:
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0, $scc
|
||||
successors: %bb.1
|
||||
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
|
||||
bb.1:
|
||||
liveins: $scc
|
||||
successors: %bb.1, %bb.2
|
||||
|
||||
%8:vgpr_32 = PHI %0, %bb.0, %16, %bb.1
|
||||
%9:vgpr_32 = PHI %0, %bb.0, %17, %bb.1
|
||||
%10:vgpr_32 = PHI %0, %bb.0, %18, %bb.1
|
||||
%11:vgpr_32 = PHI %0, %bb.0, %19, %bb.1
|
||||
%12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3
|
||||
%13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
|
||||
%16:vgpr_32 = COPY %15.sub0
|
||||
%17:vgpr_32 = COPY %15.sub1
|
||||
%18:vgpr_32 = COPY %15.sub2
|
||||
%19:vgpr_32 = COPY %15.sub3
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_use_vgpr_temp
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GFX908-LABEL: name: test_use_vgpr_temp
|
||||
; GFX908: bb.0:
|
||||
; GFX908-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX908-NEXT: liveins: $sgpr0, $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
|
||||
; GFX908-NEXT: [[V_ACCVGPR_READ_B32_e64_:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 [[REG_SEQUENCE]].sub0, implicit $exec
|
||||
; GFX908-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[V_ACCVGPR_READ_B32_e64_]]
|
||||
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.1:
|
||||
; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX908-NEXT: liveins: $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub0, %bb.1
|
||||
; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub1, %bb.1
|
||||
; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub2, %bb.1
|
||||
; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub3, %bb.1
|
||||
; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: bb.2:
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: test_use_vgpr_temp
|
||||
; GFX90A: bb.0:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr0, $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
|
||||
; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.1:
|
||||
; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub0, %bb.1
|
||||
; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub1, %bb.1
|
||||
; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub2, %bb.1
|
||||
; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub3, %bb.1
|
||||
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
|
||||
; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
|
||||
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[PHI1]]
|
||||
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2:
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
; Tests that tryOptimizeAGPRPhis kicks in for GFX908.
|
||||
liveins: $sgpr0, $scc
|
||||
successors: %bb.1
|
||||
|
||||
%1:vgpr_32 = COPY $sgpr0
|
||||
%2:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec
|
||||
%3:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec
|
||||
%4:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec
|
||||
%5:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec
|
||||
%6:areg_128_align2 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1, %4, %subreg.sub2, %5, %subreg.sub3
|
||||
%7:vgpr_32 = COPY %6.sub0
|
||||
bb.1:
|
||||
liveins: $scc
|
||||
successors: %bb.1, %bb.2
|
||||
|
||||
%8:vgpr_32 = PHI %7, %bb.0, %16, %bb.1
|
||||
%9:vgpr_32 = PHI %7, %bb.0, %17, %bb.1
|
||||
%10:vgpr_32 = PHI %7, %bb.0, %18, %bb.1
|
||||
%11:vgpr_32 = PHI %7, %bb.0, %19, %bb.1
|
||||
%12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3
|
||||
%13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
|
||||
%16:vgpr_32 = COPY %15.sub0
|
||||
%17:vgpr_32 = COPY %15.sub1
|
||||
%18:vgpr_32 = COPY %15.sub2
|
||||
%19:vgpr_32 = COPY %15.sub3
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
Reference in New Issue
Block a user