Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (#123632)
This PR aims to reland work done by @arsenm which was previously
reverted due to some tangentially related scheduler issues as discussed
on #76416.
This PR cherry-picks the original commit (0e46b49de4), and adds
another patch on top with the following changes:
* The code in `updateRegDefsUses` now updates subranges when
subreg-liveness-tracking is enabled.
* When adding an implicit-def operand for the super-register,
the code in `reMaterializeTrivialDef` which tries to remove
undefined subranges should now take into account that the lanes
from the super-reg are no longer undefined.
Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
This commit is contained in:
@@ -306,7 +306,11 @@ namespace {
|
||||
/// number if it is not zero. If DstReg is a physical register and the
|
||||
/// existing subregister number of the def / use being updated is not zero,
|
||||
/// make sure to set it to the correct physical subregister.
|
||||
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
|
||||
///
|
||||
/// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
|
||||
/// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
|
||||
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
|
||||
bool IsSubregToReg);
|
||||
|
||||
/// If the given machine operand reads only undefined lanes add an undef
|
||||
/// flag.
|
||||
@@ -1430,6 +1434,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
|
||||
|
||||
// CopyMI may have implicit operands, save them so that we can transfer them
|
||||
// over to the newly materialized instruction after CopyMI is removed.
|
||||
LaneBitmask NewMIImplicitOpsMask;
|
||||
SmallVector<MachineOperand, 4> ImplicitOps;
|
||||
ImplicitOps.reserve(CopyMI->getNumOperands() -
|
||||
CopyMI->getDesc().getNumOperands());
|
||||
@@ -1443,6 +1448,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
|
||||
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
|
||||
"unexpected implicit virtual register def");
|
||||
ImplicitOps.push_back(MO);
|
||||
if (MO.isDef() && MO.getReg().isVirtual() &&
|
||||
MRI->shouldTrackSubRegLiveness(DstReg))
|
||||
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1485,14 +1493,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
|
||||
} else {
|
||||
assert(MO.getReg() == NewMI.getOperand(0).getReg());
|
||||
|
||||
// We're only expecting another def of the main output, so the range
|
||||
// should get updated with the regular output range.
|
||||
//
|
||||
// FIXME: The range updating below probably needs updating to look at
|
||||
// the super register if subranges are tracked.
|
||||
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
|
||||
"subrange update for implicit-def of super register may not be "
|
||||
"properly handled");
|
||||
// If lanemasks need to be tracked, compile the lanemask of the NewMI
|
||||
// implicit def operands to avoid subranges for the super-regs from
|
||||
// being removed by code later on in this function.
|
||||
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
|
||||
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1516,7 +1521,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
|
||||
MRI->setRegClass(DstReg, NewRC);
|
||||
|
||||
// Update machine operands and add flags.
|
||||
updateRegDefsUses(DstReg, DstReg, DstIdx);
|
||||
updateRegDefsUses(DstReg, DstReg, DstIdx, false);
|
||||
NewMI.getOperand(0).setSubReg(NewIdx);
|
||||
// updateRegDefUses can add an "undef" flag to the definition, since
|
||||
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
|
||||
@@ -1592,7 +1597,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
|
||||
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
|
||||
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
|
||||
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
|
||||
if ((SR.LaneMask & DstMask).none()) {
|
||||
if ((SR.LaneMask & DstMask).none() &&
|
||||
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "Removing undefined SubRange "
|
||||
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
|
||||
@@ -1857,7 +1863,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
|
||||
}
|
||||
|
||||
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
|
||||
unsigned SubIdx) {
|
||||
unsigned SubIdx, bool IsSubregToReg) {
|
||||
bool DstIsPhys = DstReg.isPhysical();
|
||||
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
|
||||
|
||||
@@ -1877,6 +1883,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
|
||||
}
|
||||
}
|
||||
|
||||
// If DstInt already has a subrange for the unused lanes, then we shouldn't
|
||||
// create duplicate subranges when we update the interval for unused lanes.
|
||||
LaneBitmask DefinedLanes;
|
||||
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
|
||||
for (LiveInterval::SubRange &SR : DstInt->subranges())
|
||||
DefinedLanes |= SR.LaneMask;
|
||||
}
|
||||
|
||||
SmallPtrSet<MachineInstr*, 8> Visited;
|
||||
for (MachineRegisterInfo::reg_instr_iterator
|
||||
I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
|
||||
@@ -1900,6 +1914,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
|
||||
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
|
||||
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
|
||||
|
||||
bool FullDef = true;
|
||||
|
||||
// Replace SrcReg with DstReg in all UseMI operands.
|
||||
for (unsigned Op : Ops) {
|
||||
MachineOperand &MO = UseMI->getOperand(Op);
|
||||
@@ -1907,8 +1923,10 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
|
||||
// Adjust <undef> flags in case of sub-register joins. We don't want to
|
||||
// turn a full def into a read-modify-write sub-register def and vice
|
||||
// versa.
|
||||
if (SubIdx && MO.isDef())
|
||||
if (SubIdx && MO.isDef()) {
|
||||
MO.setIsUndef(!Reads);
|
||||
FullDef = false;
|
||||
}
|
||||
|
||||
// A subreg use of a partially undef (super) register may be a complete
|
||||
// undef use now and then has to be marked that way.
|
||||
@@ -1941,6 +1959,32 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
|
||||
MO.substVirtReg(DstReg, SubIdx, *TRI);
|
||||
}
|
||||
|
||||
if (IsSubregToReg && !FullDef) {
|
||||
// If the coalesed instruction doesn't fully define the register, we need
|
||||
// to preserve the original super register liveness for SUBREG_TO_REG.
|
||||
//
|
||||
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
|
||||
// but it introduces liveness for other subregisters. Downstream users may
|
||||
// have been relying on those bits, so we need to ensure their liveness is
|
||||
// captured with a def of other lanes.
|
||||
|
||||
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
|
||||
assert(DstInt->hasSubRanges() &&
|
||||
"SUBREG_TO_REG should have resulted in subrange");
|
||||
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
|
||||
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
|
||||
LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
|
||||
if ((UnusedLanes).any()) {
|
||||
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
|
||||
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
|
||||
DefinedLanes |= UnusedLanes;
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB(*MF, UseMI);
|
||||
MIB.addReg(DstReg, RegState::ImplicitDefine);
|
||||
}
|
||||
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "\t\tupdated: ";
|
||||
if (!UseMI->isDebugInstr())
|
||||
@@ -2142,6 +2186,8 @@ bool RegisterCoalescer::joinCopy(
|
||||
});
|
||||
}
|
||||
|
||||
const bool IsSubregToReg = CopyMI->isSubregToReg();
|
||||
|
||||
ShrinkMask = LaneBitmask::getNone();
|
||||
ShrinkMainRange = false;
|
||||
|
||||
@@ -2211,9 +2257,12 @@ bool RegisterCoalescer::joinCopy(
|
||||
|
||||
// Rewrite all SrcReg operands to DstReg.
|
||||
// Also update DstReg operands to include DstIdx if it is set.
|
||||
if (CP.getDstIdx())
|
||||
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
|
||||
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
|
||||
if (CP.getDstIdx()) {
|
||||
assert(!IsSubregToReg && "can this happen?");
|
||||
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
|
||||
}
|
||||
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
|
||||
IsSubregToReg);
|
||||
|
||||
// Shrink subregister ranges if necessary.
|
||||
if (ShrinkMask.any()) {
|
||||
|
||||
@@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
|
||||
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $w2, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0, $x2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
|
||||
; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1.cmpxchg.start:
|
||||
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0, $x9
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
|
||||
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $w2, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
|
||||
; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
|
||||
@@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w9, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
|
||||
; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
|
||||
@@ -726,8 +726,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -752,7 +752,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $x0, $x1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -775,8 +775,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -799,8 +799,8 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -823,8 +823,8 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -847,8 +847,8 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -871,10 +871,10 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
|
||||
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -897,10 +897,10 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
|
||||
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -925,10 +925,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w9, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -953,10 +953,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w9, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -979,8 +979,8 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1005,7 +1005,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $x0, $x1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1028,8 +1028,8 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1052,8 +1052,8 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1076,8 +1076,8 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1100,8 +1100,8 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1124,10 +1124,10 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
|
||||
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1150,10 +1150,10 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
|
||||
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
|
||||
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1178,10 +1178,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w9, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1206,10 +1206,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
|
||||
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w9, $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
|
||||
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
|
||||
; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
|
||||
; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
|
||||
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@@ -1235,7 +1235,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
|
||||
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x2, $x8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
|
||||
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
|
||||
@@ -1278,7 +1278,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
|
||||
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
|
||||
; CHECK-NEXT: liveins: $w1, $x2, $x8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
|
||||
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
|
||||
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
|
||||
; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
|
||||
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s
|
||||
|
||||
; Check there's no assert in spilling from implicit-def operands on an
|
||||
; IMPLICIT_DEF.
|
||||
@@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
|
||||
; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov x0, xzr
|
||||
; CHECK-NEXT: mov x1, xzr
|
||||
; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8
|
||||
; CHECK-NEXT: str x8, [sp]
|
||||
; CHECK-NEXT: bl _fprintf
|
||||
; CHECK-NEXT: brk #0x1
|
||||
|
||||
@@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
|
||||
; CHECK-NEXT: sub sp, sp, #208
|
||||
; CHECK-NEXT: mov w8, #10 ; =0xa
|
||||
; CHECK-NEXT: mov w9, #9 ; =0x9
|
||||
; CHECK-NEXT: mov w10, #8 ; =0x8
|
||||
; CHECK-NEXT: stp x9, x8, [sp, #24]
|
||||
; CHECK-NEXT: mov w8, #7 ; =0x7
|
||||
; CHECK-NEXT: mov w9, #6 ; =0x6
|
||||
; CHECK-NEXT: mov w0, #1 ; =0x1
|
||||
; CHECK-NEXT: stp x9, x8, [sp, #24]
|
||||
; CHECK-NEXT: mov w8, #8 ; =0x8
|
||||
; CHECK-NEXT: mov w9, #6 ; =0x6
|
||||
; CHECK-NEXT: str x8, [sp, #16]
|
||||
; CHECK-NEXT: mov w8, #7 ; =0x7
|
||||
; CHECK-NEXT: mov w1, #2 ; =0x2
|
||||
; CHECK-NEXT: mov w2, #3 ; =0x3
|
||||
; CHECK-NEXT: mov w3, #4 ; =0x4
|
||||
@@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
|
||||
; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp x8, x10, [sp, #8]
|
||||
; CHECK-NEXT: str x9, [sp]
|
||||
; CHECK-NEXT: stp x9, x8, [sp]
|
||||
; CHECK-NEXT: bl _callee
|
||||
; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
|
||||
|
||||
51
llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
Normal file
51
llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
Normal file
@@ -0,0 +1,51 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -enable-subreg-liveness=false < %s | FileCheck %s
|
||||
; RUN: llc -enable-subreg-liveness=true < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define void @_ZN4llvm5APInt6divideEPKmjS2_jPmS3_(i32 %lhsWords, i32 %rhsWords) {
|
||||
; CHECK-LABEL: _ZN4llvm5APInt6divideEPKmjS2_jPmS3_:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: lsl w9, w0, #1
|
||||
; CHECK-NEXT: mov w10, #1 // =0x1
|
||||
; CHECK-NEXT: mov w8, w0
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: sub w9, w9, w1, lsl #1
|
||||
; CHECK-NEXT: bfi w0, w8, #1, #31
|
||||
; CHECK-NEXT: lsr w9, w9, #1
|
||||
; CHECK-NEXT: bfi w10, w9, #2, #30
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: b.hs .LBB0_2
|
||||
; CHECK-NEXT: // %bb.1: // %if.then15
|
||||
; CHECK-NEXT: lsl x8, x0, #2
|
||||
; CHECK-NEXT: ldr xzr, [x8]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: b _Znam
|
||||
%mul = shl i32 %rhsWords, 1
|
||||
%mul1 = shl i32 %lhsWords, 1
|
||||
%sub = sub i32 %mul1, %mul
|
||||
%add7 = or i32 %mul1, 1
|
||||
%idxprom = zext i32 %add7 to i64
|
||||
%mul3 = shl i32 %sub, 1
|
||||
%add4 = or i32 %mul3, 1
|
||||
%1 = icmp ult i32 %add4, 0
|
||||
br i1 %1, label %if.then15, label %3
|
||||
|
||||
common.ret: ; preds = %3, %if.then15
|
||||
ret void
|
||||
|
||||
if.then15: ; preds = %0
|
||||
%idxprom12 = zext i32 %add7 to i64
|
||||
%arrayidx13 = getelementptr [128 x i32], ptr null, i64 0, i64 %idxprom12
|
||||
%2 = load volatile ptr, ptr %arrayidx13, align 8
|
||||
br label %common.ret
|
||||
|
||||
3: ; preds = %0
|
||||
%call = tail call ptr @_Znam(i64 %idxprom)
|
||||
br label %common.ret
|
||||
}
|
||||
|
||||
declare ptr @_Znam(i64)
|
||||
@@ -0,0 +1,23 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s
|
||||
---
|
||||
name: test
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x1
|
||||
; CHECK-LABEL: name: test
|
||||
; CHECK: liveins: $x1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $x0 = COPY $x1
|
||||
; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0
|
||||
%190:gpr64 = COPY killed $x1
|
||||
%191:gpr32 = COPY %190.sub_32:gpr64
|
||||
%192:gpr32 = ORRWrr $wzr, killed %191:gpr32
|
||||
%193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
|
||||
$x0 = COPY killed %190:gpr64
|
||||
$x1 = COPY killed %193:gpr64all
|
||||
RET_ReallyLR implicit $x1, implicit $x0
|
||||
...
|
||||
@@ -7,8 +7,8 @@
|
||||
# CHECK-DBG: ********** JOINING INTERVALS ***********
|
||||
# CHECK-DBG: ********** INTERVALS **********
|
||||
# CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00
|
||||
# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00
|
||||
# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00
|
||||
# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000080 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00
|
||||
# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00
|
||||
# CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00
|
||||
---
|
||||
name: test
|
||||
@@ -43,7 +43,7 @@ body: |
|
||||
# CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r
|
||||
# CHECK-DBG-SAME: weight:0.000000e+00
|
||||
# CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
|
||||
# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi
|
||||
# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi
|
||||
# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
|
||||
# CHECK-DBG-SAME: weight:0.000000e+00
|
||||
---
|
||||
@@ -127,3 +127,55 @@ body: |
|
||||
B %bb.1
|
||||
|
||||
...
|
||||
# Test that the interval `L0000000000000080 [112r,112d:1)` is not removed,
|
||||
# when removing undefined subranges.
|
||||
#
|
||||
# CHECK-DBG: ********** REGISTER COALESCER **********
|
||||
# CHECK-DBG: ********** Function: reproducer3
|
||||
# CHECK-DBG: ********** JOINING INTERVALS ***********
|
||||
# CHECK-DBG: ********** INTERVALS **********
|
||||
# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r
|
||||
# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi
|
||||
# CHECK-DBG: %0 [16r,64r:0) 0@16r weight:0.000000e+00
|
||||
# CHECK-DBG: %1 [32r,128r:0) 0@32r weight:0.000000e+00
|
||||
# CHECK-DBG: %2 [48r,64r:0) 0@48r weight:0.000000e+00
|
||||
# CHECK-DBG: %3 [64r,80r:0) 0@64r weight:0.000000e+00
|
||||
# CHECK-DBG: %4 [80r,176r:0) 0@80r weight:0.000000e+00
|
||||
# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
|
||||
# CHECK-DBG-SAME: L0000000000000080 [112r,112d:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
|
||||
# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
|
||||
# CHECK-DBG-SAME: weight:0.000000e+00
|
||||
# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r weight:0.000000e+00
|
||||
# CHECK-DBG: %9 [256r,272r:0) 0@256r weight:0.000000e+00
|
||||
---
|
||||
name: reproducer3
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1
|
||||
|
||||
%0:gpr32 = COPY killed $w1
|
||||
%1:gpr32 = COPY killed $w0
|
||||
%3:gpr32 = UBFMWri %1, 31, 30
|
||||
%4:gpr32 = SUBWrs killed %3, killed %0, 1
|
||||
%5:gpr32 = UBFMWri killed %4, 1, 31
|
||||
%6:gpr32 = MOVi32imm 1
|
||||
%7:gpr32 = COPY %6
|
||||
%7:gpr32 = BFMWri %7, killed %1, 31, 30
|
||||
%8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
|
||||
%9:gpr32common = COPY killed %6
|
||||
%9:gpr32common = BFMWri %9, killed %5, 30, 29
|
||||
dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
|
||||
Bcc 2, %bb.2, implicit killed $nzcv
|
||||
B %bb.1
|
||||
|
||||
bb.1:
|
||||
%10:gpr64common = UBFMXri killed %8, 62, 61
|
||||
dead $xzr = LDRXui killed %10, 0
|
||||
RET_ReallyLR
|
||||
|
||||
bb.2:
|
||||
$x0 = COPY killed %8
|
||||
RET_ReallyLR implicit killed $x0
|
||||
|
||||
...
|
||||
|
||||
@@ -750,21 +750,25 @@ entry:
|
||||
define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
|
||||
; CHECK-64-LABEL: testDoubleImm1:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testDoubleImm1:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleImm1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleImm1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
|
||||
@@ -1757,7 +1757,11 @@ entry:
|
||||
define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
|
||||
; P9BE-LABEL: fromRegsConvdtoi:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9BE-NEXT: xxmrghd vs0, vs2, vs4
|
||||
; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9BE-NEXT: xvcvdpsxws v2, vs0
|
||||
; P9BE-NEXT: xxmrghd vs0, vs1, vs3
|
||||
; P9BE-NEXT: xvcvdpsxws v3, vs0
|
||||
@@ -1766,7 +1770,11 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
|
||||
;
|
||||
; P9LE-LABEL: fromRegsConvdtoi:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9LE-NEXT: xxmrghd vs0, vs3, vs1
|
||||
; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9LE-NEXT: xvcvdpsxws v2, vs0
|
||||
; P9LE-NEXT: xxmrghd vs0, vs4, vs2
|
||||
; P9LE-NEXT: xvcvdpsxws v3, vs0
|
||||
@@ -1775,6 +1783,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
|
||||
;
|
||||
; P8BE-LABEL: fromRegsConvdtoi:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8BE-NEXT: xxmrghd vs0, vs2, vs4
|
||||
; P8BE-NEXT: xxmrghd vs1, vs1, vs3
|
||||
; P8BE-NEXT: xvcvdpsxws v2, vs0
|
||||
@@ -1784,6 +1796,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
|
||||
;
|
||||
; P8LE-LABEL: fromRegsConvdtoi:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8LE-NEXT: xxmrghd vs0, vs3, vs1
|
||||
; P8LE-NEXT: xxmrghd vs1, vs4, vs2
|
||||
; P8LE-NEXT: xvcvdpsxws v2, vs0
|
||||
@@ -3246,7 +3262,11 @@ entry:
|
||||
define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
|
||||
; P9BE-LABEL: fromRegsConvdtoui:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9BE-NEXT: xxmrghd vs0, vs2, vs4
|
||||
; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9BE-NEXT: xvcvdpuxws v2, vs0
|
||||
; P9BE-NEXT: xxmrghd vs0, vs1, vs3
|
||||
; P9BE-NEXT: xvcvdpuxws v3, vs0
|
||||
@@ -3255,7 +3275,11 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
|
||||
;
|
||||
; P9LE-LABEL: fromRegsConvdtoui:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9LE-NEXT: xxmrghd vs0, vs3, vs1
|
||||
; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9LE-NEXT: xvcvdpuxws v2, vs0
|
||||
; P9LE-NEXT: xxmrghd vs0, vs4, vs2
|
||||
; P9LE-NEXT: xvcvdpuxws v3, vs0
|
||||
@@ -3264,6 +3288,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
|
||||
;
|
||||
; P8BE-LABEL: fromRegsConvdtoui:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8BE-NEXT: xxmrghd vs0, vs2, vs4
|
||||
; P8BE-NEXT: xxmrghd vs1, vs1, vs3
|
||||
; P8BE-NEXT: xvcvdpuxws v2, vs0
|
||||
@@ -3273,6 +3301,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
|
||||
;
|
||||
; P8LE-LABEL: fromRegsConvdtoui:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
|
||||
; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8LE-NEXT: xxmrghd vs0, vs3, vs1
|
||||
; P8LE-NEXT: xxmrghd vs1, vs4, vs2
|
||||
; P8LE-NEXT: xvcvdpuxws v2, vs0
|
||||
@@ -4558,24 +4590,32 @@ entry:
|
||||
define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
|
||||
; P9BE-LABEL: fromRegsConvdtoll:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9BE-NEXT: xxmrghd vs0, vs1, vs2
|
||||
; P9BE-NEXT: xvcvdpsxds v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fromRegsConvdtoll:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9LE-NEXT: xxmrghd vs0, vs2, vs1
|
||||
; P9LE-NEXT: xvcvdpsxds v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fromRegsConvdtoll:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8BE-NEXT: xxmrghd vs0, vs1, vs2
|
||||
; P8BE-NEXT: xvcvdpsxds v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fromRegsConvdtoll:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8LE-NEXT: xxmrghd vs0, vs2, vs1
|
||||
; P8LE-NEXT: xvcvdpsxds v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
@@ -5724,24 +5764,32 @@ entry:
|
||||
define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
|
||||
; P9BE-LABEL: fromRegsConvdtoull:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9BE-NEXT: xxmrghd vs0, vs1, vs2
|
||||
; P9BE-NEXT: xvcvdpuxds v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fromRegsConvdtoull:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9LE-NEXT: xxmrghd vs0, vs2, vs1
|
||||
; P9LE-NEXT: xvcvdpuxds v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fromRegsConvdtoull:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8BE-NEXT: xxmrghd vs0, vs1, vs2
|
||||
; P8BE-NEXT: xvcvdpuxds v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fromRegsConvdtoull:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8LE-NEXT: xxmrghd vs0, vs2, vs1
|
||||
; P8LE-NEXT: xvcvdpuxds v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
|
||||
@@ -562,6 +562,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
|
||||
; CHECK-P8-NEXT: bl dummy
|
||||
; CHECK-P8-NEXT: nop
|
||||
; CHECK-P8-NEXT: xxlxor f0, f0, f0
|
||||
; CHECK-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
|
||||
; CHECK-P8-NEXT: xxswapd vs0, vs0
|
||||
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
||||
@@ -576,6 +577,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
|
||||
; CHECK-P9-NEXT: bl dummy
|
||||
; CHECK-P9-NEXT: nop
|
||||
; CHECK-P9-NEXT: xxlxor f0, f0, f0
|
||||
; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
|
||||
; CHECK-P9-NEXT: stxv vs0, 0(r30)
|
||||
;
|
||||
@@ -589,6 +591,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
|
||||
; CHECK-P9-BE-NEXT: bl dummy
|
||||
; CHECK-P9-BE-NEXT: nop
|
||||
; CHECK-P9-BE-NEXT: xxlxor f0, f0, f0
|
||||
; CHECK-P9-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-P9-BE-NEXT: xxmrghd vs0, vs0, vs1
|
||||
; CHECK-P9-BE-NEXT: stxv vs0, 0(r30)
|
||||
;
|
||||
@@ -615,6 +618,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
|
||||
; CHECK-P7-NEXT: bl dummy
|
||||
; CHECK-P7-NEXT: nop
|
||||
; CHECK-P7-NEXT: xxlxor f0, f0, f0
|
||||
; CHECK-P7-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-P7-NEXT: xxmrghd vs0, vs1, vs0
|
||||
; CHECK-P7-NEXT: xxswapd vs0, vs0
|
||||
; CHECK-P7-NEXT: stxvd2x vs0, 0, r30
|
||||
@@ -629,6 +633,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
|
||||
; P8-AIX-64-NEXT: bl .dummy[PR]
|
||||
; P8-AIX-64-NEXT: nop
|
||||
; P8-AIX-64-NEXT: xxlxor f0, f0, f0
|
||||
; P8-AIX-64-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8-AIX-64-NEXT: xxmrghd vs0, vs0, vs1
|
||||
; P8-AIX-64-NEXT: stxvd2x vs0, 0, r31
|
||||
;
|
||||
@@ -642,6 +647,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
|
||||
; P8-AIX-32-NEXT: bl .dummy[PR]
|
||||
; P8-AIX-32-NEXT: nop
|
||||
; P8-AIX-32-NEXT: xxlxor f0, f0, f0
|
||||
; P8-AIX-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8-AIX-32-NEXT: xxmrghd vs0, vs0, vs1
|
||||
; P8-AIX-32-NEXT: stxvd2x vs0, 0, r31
|
||||
test_entry:
|
||||
|
||||
@@ -6,6 +6,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
|
||||
; CHECK-LABEL: fneg_fdiv_splat:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxspltd 0, 1, 0
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-NEXT: xvredp 1, 0
|
||||
|
||||
@@ -229,6 +229,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
|
||||
; P8-NEXT: xscvspdpn f1, vs0
|
||||
; P8-NEXT: bl nearbyintf
|
||||
; P8-NEXT: nop
|
||||
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8-NEXT: xxmrghd vs0, vs1, v30
|
||||
; P8-NEXT: xscvspdpn f1, v31
|
||||
; P8-NEXT: xvcvdpsp v29, vs0
|
||||
@@ -239,6 +240,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
|
||||
; P8-NEXT: xscvspdpn f1, vs0
|
||||
; P8-NEXT: bl nearbyintf
|
||||
; P8-NEXT: nop
|
||||
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8-NEXT: xxmrghd vs0, v30, vs1
|
||||
; P8-NEXT: li r3, 160
|
||||
; P8-NEXT: xvcvdpsp v2, vs0
|
||||
@@ -276,6 +278,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
|
||||
; P9-NEXT: xscvspdpn f1, vs0
|
||||
; P9-NEXT: bl nearbyintf
|
||||
; P9-NEXT: nop
|
||||
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9-NEXT: xxmrghd vs0, vs1, v30
|
||||
; P9-NEXT: xscvspdpn f1, v31
|
||||
; P9-NEXT: xvcvdpsp v29, vs0
|
||||
@@ -286,6 +289,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
|
||||
; P9-NEXT: xscvspdpn f1, vs0
|
||||
; P9-NEXT: bl nearbyintf
|
||||
; P9-NEXT: nop
|
||||
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9-NEXT: xxmrghd vs0, v30, vs1
|
||||
; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
|
||||
; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
|
||||
@@ -326,6 +330,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
|
||||
; P8-NEXT: bl nearbyint
|
||||
; P8-NEXT: nop
|
||||
; P8-NEXT: li r3, 144
|
||||
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8-NEXT: xxmrghd v2, v30, vs1
|
||||
; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
|
||||
; P8-NEXT: li r3, 128
|
||||
@@ -354,6 +359,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
|
||||
; P9-NEXT: xxswapd vs1, v31
|
||||
; P9-NEXT: bl nearbyint
|
||||
; P9-NEXT: nop
|
||||
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P9-NEXT: xxmrghd v2, v30, vs1
|
||||
; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload
|
||||
; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload
|
||||
|
||||
@@ -70,6 +70,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-NEXT: xscvspdpn 2, 0
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxmrghd 0, 1, 61
|
||||
; CHECK-NEXT: xscvspdpn 1, 62
|
||||
; CHECK-NEXT: xscvspdpn 2, 63
|
||||
@@ -83,6 +84,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-NEXT: xscvspdpn 2, 0
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxmrghd 0, 61, 1
|
||||
; CHECK-NEXT: lxv 63, 80(1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: lxv 62, 64(1) # 16-byte Folded Reload
|
||||
@@ -124,6 +126,7 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-NEXT: xxswapd 2, 63
|
||||
; CHECK-NEXT: bl fmod
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxmrghd 34, 61, 1
|
||||
; CHECK-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
|
||||
|
||||
@@ -666,6 +666,7 @@ define <4 x float> @test_extend32_vec4(ptr %p) #0 {
|
||||
; P8-NEXT: bl __gnu_h2f_ieee
|
||||
; P8-NEXT: nop
|
||||
; P8-NEXT: li r3, 80
|
||||
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; P8-NEXT: xxmrghd vs0, vs61, vs1
|
||||
; P8-NEXT: xxmrghd vs1, vs63, vs62
|
||||
; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
|
||||
|
||||
@@ -122,6 +122,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
|
||||
; CHECK-NEXT: extsw r4, r3
|
||||
; CHECK-NEXT: bl ldexpf
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxmrghd vs0, vs1, v29
|
||||
; CHECK-NEXT: li r3, 0
|
||||
; CHECK-NEXT: vextuwrx r3, r3, v31
|
||||
@@ -138,6 +139,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
|
||||
; CHECK-NEXT: xscvspdpn f1, vs0
|
||||
; CHECK-NEXT: bl ldexpf
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxmrghd vs0, vs1, v29
|
||||
; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload
|
||||
|
||||
@@ -940,21 +940,25 @@ entry:
|
||||
define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
|
||||
; CHECK-LABEL: testDoubleImm1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-NEXT: xxmrghd v2, v2, vs1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: testDoubleImm1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-BE-NEXT: xxpermdi v2, vs1, v2, 1
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: testDoubleImm1:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-P9-NEXT: xxpermdi v2, vs1, v2, 1
|
||||
; CHECK-P9-NEXT: blr
|
||||
;
|
||||
; AIX-P8-LABEL: testDoubleImm1:
|
||||
; AIX-P8: # %bb.0: # %entry
|
||||
; AIX-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; AIX-P8-NEXT: xxpermdi v2, vs1, v2, 1
|
||||
; AIX-P8-NEXT: blr
|
||||
entry:
|
||||
|
||||
@@ -107,6 +107,10 @@ entry:
|
||||
define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> %y) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE-NEXT: xsdivdp 3, 3, 6
|
||||
@@ -116,6 +120,10 @@ define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double>
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE9-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE9-NEXT: xsdivdp 3, 3, 6
|
||||
@@ -209,6 +217,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
|
||||
; PC64LE-NEXT: bl fmod
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -239,6 +248,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
|
||||
; PC64LE9-NEXT: xxswapd 2, 63
|
||||
; PC64LE9-NEXT: bl fmod
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 61, 1
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
|
||||
@@ -390,6 +400,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE-NEXT: fmr 2, 30
|
||||
; PC64LE-NEXT: bl fmod
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 29
|
||||
; PC64LE-NEXT: fmr 2, 31
|
||||
@@ -431,6 +442,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE9-NEXT: fmr 2, 30
|
||||
; PC64LE9-NEXT: bl fmod
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 29
|
||||
; PC64LE9-NEXT: fmr 2, 31
|
||||
@@ -486,6 +498,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE-NEXT: xxswapd 2, 62
|
||||
; PC64LE-NEXT: bl fmod
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 59, 1
|
||||
; PC64LE-NEXT: xxlor 1, 61, 61
|
||||
; PC64LE-NEXT: xxlor 2, 63, 63
|
||||
@@ -498,6 +511,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 112
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 60, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 96
|
||||
@@ -536,6 +550,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE9-NEXT: xxswapd 2, 62
|
||||
; PC64LE9-NEXT: bl fmod
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 59, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 61, 61
|
||||
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
|
||||
@@ -546,6 +561,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE9-NEXT: xxswapd 2, 63
|
||||
; PC64LE9-NEXT: bl fmod
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 60, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload
|
||||
@@ -670,6 +686,10 @@ entry:
|
||||
define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> %y) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_fmul_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE-NEXT: xsmuldp 3, 3, 6
|
||||
@@ -679,6 +699,10 @@ define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double>
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE9-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE9-NEXT: xsmuldp 3, 3, 6
|
||||
@@ -820,6 +844,10 @@ entry:
|
||||
define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> %y) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_fadd_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE-NEXT: xsadddp 3, 3, 6
|
||||
@@ -829,6 +857,10 @@ define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double>
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE9-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE9-NEXT: xsadddp 3, 3, 6
|
||||
@@ -970,6 +1002,10 @@ entry:
|
||||
define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> %y) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_fsub_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE-NEXT: xssubdp 3, 3, 6
|
||||
@@ -979,6 +1015,10 @@ define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double>
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE9-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE9-NEXT: xssubdp 3, 3, 6
|
||||
@@ -1105,6 +1145,8 @@ entry:
|
||||
define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE-NEXT: xssqrtdp 3, 3
|
||||
; PC64LE-NEXT: xvsqrtdp 2, 0
|
||||
@@ -1113,6 +1155,8 @@ define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE9-NEXT: xssqrtdp 3, 3
|
||||
; PC64LE9-NEXT: xvsqrtdp 2, 0
|
||||
@@ -1203,6 +1247,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
|
||||
; PC64LE-NEXT: bl pow
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -1233,6 +1278,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
|
||||
; PC64LE9-NEXT: xxswapd 2, 63
|
||||
; PC64LE9-NEXT: bl pow
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 61, 1
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
|
||||
@@ -1384,6 +1430,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE-NEXT: fmr 2, 30
|
||||
; PC64LE-NEXT: bl pow
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 29
|
||||
; PC64LE-NEXT: fmr 2, 31
|
||||
@@ -1425,6 +1472,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE9-NEXT: fmr 2, 30
|
||||
; PC64LE9-NEXT: bl pow
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 29
|
||||
; PC64LE9-NEXT: fmr 2, 31
|
||||
@@ -1480,6 +1528,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE-NEXT: xxswapd 2, 62
|
||||
; PC64LE-NEXT: bl pow
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 59, 1
|
||||
; PC64LE-NEXT: xxlor 1, 61, 61
|
||||
; PC64LE-NEXT: xxlor 2, 63, 63
|
||||
@@ -1492,6 +1541,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 112
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 60, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 96
|
||||
@@ -1530,6 +1580,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE9-NEXT: xxswapd 2, 62
|
||||
; PC64LE9-NEXT: bl pow
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 59, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 61, 61
|
||||
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
|
||||
@@ -1540,6 +1591,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
|
||||
; PC64LE9-NEXT: xxswapd 2, 63
|
||||
; PC64LE9-NEXT: bl pow
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 60, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload
|
||||
@@ -1618,6 +1670,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
|
||||
; PC64LE-NEXT: bl __powidf2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: ld 30, 80(1) # 8-byte Folded Reload
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
@@ -1647,6 +1700,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
|
||||
; PC64LE9-NEXT: mr 4, 30
|
||||
; PC64LE9-NEXT: bl __powidf2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -1790,6 +1844,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
|
||||
; PC64LE-NEXT: mr 4, 30
|
||||
; PC64LE-NEXT: bl __powidf2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: mr 4, 30
|
||||
@@ -1828,6 +1883,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
|
||||
; PC64LE9-NEXT: mr 4, 30
|
||||
; PC64LE9-NEXT: bl __powidf2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: mr 4, 30
|
||||
@@ -1878,6 +1934,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
|
||||
; PC64LE-NEXT: mr 4, 30
|
||||
; PC64LE-NEXT: bl __powidf2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: mr 4, 30
|
||||
@@ -1890,6 +1947,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: ld 30, 96(1) # 8-byte Folded Reload
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
@@ -1923,6 +1981,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
|
||||
; PC64LE9-NEXT: mr 4, 30
|
||||
; PC64LE9-NEXT: bl __powidf2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: mr 4, 30
|
||||
@@ -1933,6 +1992,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
|
||||
; PC64LE9-NEXT: mr 4, 30
|
||||
; PC64LE9-NEXT: bl __powidf2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -2003,6 +2063,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl sin
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -2027,6 +2088,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl sin
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -2149,6 +2211,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl sin
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl sin
|
||||
@@ -2181,6 +2244,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl sin
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl sin
|
||||
@@ -2224,6 +2288,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl sin
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl sin
|
||||
@@ -2234,6 +2299,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -2262,6 +2328,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl sin
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl sin
|
||||
@@ -2270,6 +2337,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl sin
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -2338,6 +2406,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl cos
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -2362,6 +2431,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl cos
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -2484,6 +2554,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl cos
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl cos
|
||||
@@ -2516,6 +2587,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl cos
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl cos
|
||||
@@ -2559,6 +2631,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl cos
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl cos
|
||||
@@ -2569,6 +2642,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -2597,6 +2671,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl cos
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl cos
|
||||
@@ -2605,6 +2680,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl cos
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -2673,6 +2749,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl exp
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -2697,6 +2774,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl exp
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -2819,6 +2897,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl exp
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl exp
|
||||
@@ -2851,6 +2930,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl exp
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl exp
|
||||
@@ -2894,6 +2974,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl exp
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl exp
|
||||
@@ -2904,6 +2985,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -2932,6 +3014,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl exp
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl exp
|
||||
@@ -2940,6 +3023,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl exp
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -3008,6 +3092,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl exp2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -3032,6 +3117,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl exp2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -3154,6 +3240,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl exp2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl exp2
|
||||
@@ -3186,6 +3273,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl exp2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl exp2
|
||||
@@ -3229,6 +3317,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl exp2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl exp2
|
||||
@@ -3239,6 +3328,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -3267,6 +3357,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl exp2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl exp2
|
||||
@@ -3275,6 +3366,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl exp2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -3343,6 +3435,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl log
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -3367,6 +3460,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl log
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -3489,6 +3583,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl log
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl log
|
||||
@@ -3521,6 +3616,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl log
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl log
|
||||
@@ -3564,6 +3660,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl log
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl log
|
||||
@@ -3574,6 +3671,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -3602,6 +3700,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl log
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl log
|
||||
@@ -3610,6 +3709,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl log
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -3678,6 +3778,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl log10
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -3702,6 +3803,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl log10
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -3824,6 +3926,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl log10
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl log10
|
||||
@@ -3856,6 +3959,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl log10
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl log10
|
||||
@@ -3899,6 +4003,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl log10
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl log10
|
||||
@@ -3909,6 +4014,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -3937,6 +4043,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl log10
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl log10
|
||||
@@ -3945,6 +4052,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl log10
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -4013,6 +4121,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl log2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -4037,6 +4146,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl log2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -4159,6 +4269,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl log2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl log2
|
||||
@@ -4191,6 +4302,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl log2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl log2
|
||||
@@ -4234,6 +4346,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl log2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl log2
|
||||
@@ -4244,6 +4357,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -4272,6 +4386,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl log2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl log2
|
||||
@@ -4280,6 +4395,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl log2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -4387,6 +4503,8 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
|
||||
define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_rint_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE-NEXT: xsrdpic 3, 3
|
||||
; PC64LE-NEXT: xvrdpic 2, 0
|
||||
@@ -4395,6 +4513,8 @@ define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_rint_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE9-NEXT: xsrdpic 3, 3
|
||||
; PC64LE9-NEXT: xvrdpic 2, 0
|
||||
@@ -4479,6 +4599,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl nearbyint
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -4503,6 +4624,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl nearbyint
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -4625,6 +4747,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl nearbyint
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl nearbyint
|
||||
@@ -4657,6 +4780,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl nearbyint
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl nearbyint
|
||||
@@ -4700,6 +4824,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl nearbyint
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl nearbyint
|
||||
@@ -4710,6 +4835,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -4738,6 +4864,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl nearbyint
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl nearbyint
|
||||
@@ -4746,6 +4873,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl nearbyint
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -4927,6 +5055,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE-NEXT: mflr 0
|
||||
; PC64LE-NEXT: stdu 1, -64(1)
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE-NEXT: std 0, 80(1)
|
||||
@@ -4950,6 +5082,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: mflr 0
|
||||
; PC64LE9-NEXT: stdu 1, -48(1)
|
||||
; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE9-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE9-NEXT: std 0, 64(1)
|
||||
@@ -5159,6 +5295,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE-NEXT: mflr 0
|
||||
; PC64LE-NEXT: stdu 1, -64(1)
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE-NEXT: std 0, 80(1)
|
||||
@@ -5182,6 +5322,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: mflr 0
|
||||
; PC64LE9-NEXT: stdu 1, -48(1)
|
||||
; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
|
||||
; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 5, 4
|
||||
; PC64LE9-NEXT: xxmrghd 1, 2, 1
|
||||
; PC64LE9-NEXT: std 0, 64(1)
|
||||
@@ -6520,6 +6664,8 @@ entry:
|
||||
define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_ceil_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE-NEXT: xsrdpip 3, 3
|
||||
; PC64LE-NEXT: xvrdpip 2, 0
|
||||
@@ -6528,6 +6674,8 @@ define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE9-NEXT: xsrdpip 3, 3
|
||||
; PC64LE9-NEXT: xvrdpip 2, 0
|
||||
@@ -6628,6 +6776,8 @@ entry:
|
||||
define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_floor_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE-NEXT: xsrdpim 3, 3
|
||||
; PC64LE-NEXT: xvrdpim 2, 0
|
||||
@@ -6636,6 +6786,8 @@ define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_floor_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE9-NEXT: xsrdpim 3, 3
|
||||
; PC64LE9-NEXT: xvrdpim 2, 0
|
||||
@@ -6736,6 +6888,8 @@ entry:
|
||||
define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_round_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE-NEXT: xsrdpi 3, 3
|
||||
; PC64LE-NEXT: xvrdpi 2, 0
|
||||
@@ -6744,6 +6898,8 @@ define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_round_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE9-NEXT: xsrdpi 3, 3
|
||||
; PC64LE9-NEXT: xvrdpi 2, 0
|
||||
@@ -6843,6 +6999,8 @@ entry:
|
||||
define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_trunc_v3f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE-NEXT: xsrdpiz 3, 3
|
||||
; PC64LE-NEXT: xvrdpiz 2, 0
|
||||
@@ -6851,6 +7009,8 @@ define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 0, 2, 1
|
||||
; PC64LE9-NEXT: xsrdpiz 3, 3
|
||||
; PC64LE9-NEXT: xvrdpiz 2, 0
|
||||
@@ -8049,6 +8209,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE-NEXT: bl tan
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 48
|
||||
@@ -8073,6 +8234,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl tan
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 62, 1
|
||||
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
|
||||
@@ -8195,6 +8357,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE-NEXT: fmr 1, 30
|
||||
; PC64LE-NEXT: bl tan
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 31
|
||||
; PC64LE-NEXT: bl tan
|
||||
@@ -8227,6 +8390,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: fmr 1, 30
|
||||
; PC64LE9-NEXT: bl tan
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 31
|
||||
; PC64LE9-NEXT: bl tan
|
||||
@@ -8270,6 +8434,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: xxswapd 1, 62
|
||||
; PC64LE-NEXT: bl tan
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE-NEXT: xxlor 1, 63, 63
|
||||
; PC64LE-NEXT: bl tan
|
||||
@@ -8280,6 +8445,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -8308,6 +8474,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 62
|
||||
; PC64LE9-NEXT: bl tan
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 61, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
|
||||
; PC64LE9-NEXT: bl tan
|
||||
@@ -8316,6 +8483,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE9-NEXT: xxswapd 1, 63
|
||||
; PC64LE9-NEXT: bl tan
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 61, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
@@ -8390,6 +8558,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double
|
||||
; PC64LE-NEXT: bl atan2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 80
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 34, 61, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 64
|
||||
@@ -8420,6 +8589,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double
|
||||
; PC64LE9-NEXT: xxswapd 2, 63
|
||||
; PC64LE9-NEXT: bl atan2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 34, 61, 1
|
||||
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
|
||||
; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
|
||||
@@ -8571,6 +8741,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double
|
||||
; PC64LE-NEXT: fmr 2, 30
|
||||
; PC64LE-NEXT: bl atan2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE-NEXT: fmr 1, 29
|
||||
; PC64LE-NEXT: fmr 2, 31
|
||||
@@ -8612,6 +8783,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double
|
||||
; PC64LE9-NEXT: fmr 2, 30
|
||||
; PC64LE9-NEXT: bl atan2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 63, 1, 63
|
||||
; PC64LE9-NEXT: fmr 1, 29
|
||||
; PC64LE9-NEXT: fmr 2, 31
|
||||
@@ -8667,6 +8839,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
|
||||
; PC64LE-NEXT: xxswapd 2, 62
|
||||
; PC64LE-NEXT: bl atan2
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 62, 59, 1
|
||||
; PC64LE-NEXT: xxlor 1, 61, 61
|
||||
; PC64LE-NEXT: xxlor 2, 63, 63
|
||||
@@ -8679,6 +8852,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
|
||||
; PC64LE-NEXT: nop
|
||||
; PC64LE-NEXT: li 3, 112
|
||||
; PC64LE-NEXT: vmr 2, 30
|
||||
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE-NEXT: xxmrghd 35, 60, 1
|
||||
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
|
||||
; PC64LE-NEXT: li 3, 96
|
||||
@@ -8717,6 +8891,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
|
||||
; PC64LE9-NEXT: xxswapd 2, 62
|
||||
; PC64LE9-NEXT: bl atan2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 62, 59, 1
|
||||
; PC64LE9-NEXT: xscpsgndp 1, 61, 61
|
||||
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
|
||||
@@ -8727,6 +8902,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
|
||||
; PC64LE9-NEXT: xxswapd 2, 63
|
||||
; PC64LE9-NEXT: bl atan2
|
||||
; PC64LE9-NEXT: nop
|
||||
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; PC64LE9-NEXT: xxmrghd 35, 60, 1
|
||||
; PC64LE9-NEXT: vmr 2, 30
|
||||
; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload
|
||||
|
||||
185
llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
Normal file
185
llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
Normal file
@@ -0,0 +1,185 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
|
||||
|
||||
%struct.wibble = type { %struct.wombat }
|
||||
%struct.wombat = type { %struct.ham, [3 x i8] }
|
||||
%struct.ham = type { %struct.zot }
|
||||
%struct.zot = type { %struct.blam }
|
||||
%struct.blam = type { %struct.ham.0 }
|
||||
%struct.ham.0 = type { %struct.bar }
|
||||
%struct.bar = type { %struct.bar.1 }
|
||||
%struct.bar.1 = type { %struct.baz, i8 }
|
||||
%struct.baz = type { %struct.snork }
|
||||
%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
|
||||
%struct.spam = type { %struct.snork.2, %struct.snork.2 }
|
||||
%struct.snork.2 = type { i32 }
|
||||
%struct.snork.3 = type { %struct.baz, i8, [3 x i8] }
|
||||
|
||||
define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: pushq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
; CHECK-NEXT: movq %rsp, %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
; CHECK-NEXT: pushq %r15
|
||||
; CHECK-NEXT: pushq %r14
|
||||
; CHECK-NEXT: pushq %r13
|
||||
; CHECK-NEXT: pushq %r12
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: subq $24, %rsp
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -56
|
||||
; CHECK-NEXT: .cfi_offset %r12, -48
|
||||
; CHECK-NEXT: .cfi_offset %r13, -40
|
||||
; CHECK-NEXT: .cfi_offset %r14, -32
|
||||
; CHECK-NEXT: .cfi_offset %r15, -24
|
||||
; CHECK-NEXT: movl %r8d, %r14d
|
||||
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: movq %rsi, %r13
|
||||
; CHECK-NEXT: movq %rdi, %r15
|
||||
; CHECK-NEXT: incl %r14d
|
||||
; CHECK-NEXT: xorl %ebx, %ebx
|
||||
; CHECK-NEXT: # implicit-def: $r12
|
||||
; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: jmp .LBB0_3
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_1: # %bb17
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: movq %r15, %r13
|
||||
; CHECK-NEXT: xorl %r15d, %r15d
|
||||
; CHECK-NEXT: testq %rbx, %rbx
|
||||
; CHECK-NEXT: sete %r15b
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: callq _Znwm@PLT
|
||||
; CHECK-NEXT: shll $4, %r15d
|
||||
; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: movq %r12, %rcx
|
||||
; CHECK-NEXT: shrq $32, %rcx
|
||||
; CHECK-NEXT: movb %cl, 12(%rax)
|
||||
; CHECK-NEXT: movl %r12d, 8(%rax)
|
||||
; CHECK-NEXT: movq %r15, %rbx
|
||||
; CHECK-NEXT: movq %r13, %r15
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
|
||||
; CHECK-NEXT: decl %r14d
|
||||
; CHECK-NEXT: je .LBB0_8
|
||||
; CHECK-NEXT: .LBB0_3: # %bb7
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: callq widget@PLT
|
||||
; CHECK-NEXT: cmpb $-5, (%r13)
|
||||
; CHECK-NEXT: jae .LBB0_5
|
||||
; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: movl %r12d, %r12d
|
||||
; CHECK-NEXT: cmpq %r15, %rbx
|
||||
; CHECK-NEXT: jbe .LBB0_1
|
||||
; CHECK-NEXT: jmp .LBB0_7
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_5: # %bb12
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: movq 0, %rax
|
||||
; CHECK-NEXT: movq 8, %rax
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
|
||||
; CHECK-NEXT: cmpq %r15, %rbx
|
||||
; CHECK-NEXT: jbe .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: xorl %ebx, %ebx
|
||||
; CHECK-NEXT: decl %r14d
|
||||
; CHECK-NEXT: jne .LBB0_3
|
||||
; CHECK-NEXT: .LBB0_8: # %bb21
|
||||
; CHECK-NEXT: cmpb $0, 12(%rax)
|
||||
; CHECK-NEXT: jne .LBB0_10
|
||||
; CHECK-NEXT: # %bb.9: # %bb26
|
||||
; CHECK-NEXT: addq $24, %rsp
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: popq %r12
|
||||
; CHECK-NEXT: popq %r13
|
||||
; CHECK-NEXT: popq %r14
|
||||
; CHECK-NEXT: popq %r15
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB0_10: # %bb25
|
||||
; CHECK-NEXT: .cfi_def_cfa %rbp, 16
|
||||
; CHECK-NEXT: movq %r15, %rdi
|
||||
; CHECK-NEXT: callq pluto@PLT
|
||||
bb:
|
||||
br label %bb7
|
||||
|
||||
bb5: ; preds = %bb17, %bb14
|
||||
%phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ]
|
||||
%phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ]
|
||||
%add = add i32 %phi9, 1
|
||||
%icmp = icmp eq i32 %phi9, %arg4
|
||||
br i1 %icmp, label %bb21, label %bb7
|
||||
|
||||
bb7: ; preds = %bb5, %bb
|
||||
%phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
|
||||
%phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
|
||||
%phi10 = phi i40 [ poison, %bb ], [ %phi15, %bb5 ]
|
||||
%call = call ptr @widget()
|
||||
%load = load i8, ptr %arg1, align 8
|
||||
%icmp11 = icmp ult i8 %load, -5
|
||||
%and = and i40 %phi10, 4294967295
|
||||
br i1 %icmp11, label %bb14, label %bb12
|
||||
|
||||
bb12: ; preds = %bb7
|
||||
%load13 = load volatile { i64, i64 }, ptr null, align 4294967296
|
||||
br label %bb14
|
||||
|
||||
bb14: ; preds = %bb12, %bb7
|
||||
%phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ]
|
||||
%icmp16 = icmp ugt ptr %phi8, %arg
|
||||
br i1 %icmp16, label %bb5, label %bb17
|
||||
|
||||
bb17: ; preds = %bb14
|
||||
%icmp18 = icmp eq ptr %phi8, null
|
||||
%zext = zext i1 %icmp18 to i64
|
||||
%call19 = call ptr @_Znwm(i64 0)
|
||||
%getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
|
||||
%getelementptr20 = getelementptr i8, ptr %call19, i64 8
|
||||
store i40 %phi15, ptr %getelementptr20, align 4
|
||||
br label %bb5
|
||||
|
||||
bb21: ; preds = %bb5
|
||||
%getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1
|
||||
%load23 = load i8, ptr %getelementptr22, align 4
|
||||
%icmp24 = icmp eq i8 %load23, 0
|
||||
br i1 %icmp24, label %bb26, label %bb25
|
||||
|
||||
bb25: ; preds = %bb21
|
||||
call void @pluto(ptr %arg)
|
||||
unreachable
|
||||
|
||||
bb26: ; preds = %bb21
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @eggs(ptr %arg, ptr %arg1) {
|
||||
; CHECK-LABEL: eggs:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq %rsi, %rdi
|
||||
; CHECK-NEXT: movq %rax, %rsi
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: xorl %r8d, %r8d
|
||||
; CHECK-NEXT: callq foo@PLT
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare ptr @widget()
|
||||
|
||||
declare void @pluto(ptr)
|
||||
|
||||
declare ptr @_Znwm(i64)
|
||||
|
||||
attributes #0 = { noinline "frame-pointer"="all" }
|
||||
@@ -9,7 +9,7 @@ body: |
|
||||
; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555)
|
||||
; CHECK-NEXT: liveins: $edi
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
|
||||
; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
|
||||
; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
@@ -28,7 +28,7 @@ body: |
|
||||
; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.4:
|
||||
; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al
|
||||
; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al
|
||||
; CHECK-NEXT: RET 0, killed undef $al
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.5:
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
|
||||
# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s
|
||||
|
||||
|
||||
# FIXME: Need to handle subrange updates when coalescing with subreg_to_reg
|
||||
# This will fail if x86 enables subregister liveness.
|
||||
---
|
||||
name: requires_new_subrange_coalesce_subreg_to_reg
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK-NEXT: liveins: $eax
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax
|
||||
; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
|
||||
; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.2(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
|
||||
; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
|
||||
; CHECK-NEXT: RET 0, implicit %c
|
||||
bb.0:
|
||||
liveins: $eax
|
||||
%init_eax:gr32 = COPY $eax
|
||||
%a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit
|
||||
%b:gr32 = IMPLICIT_DEF
|
||||
%c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
|
||||
JCC_1 %bb.2, 4, implicit undef $eflags
|
||||
|
||||
bb.1:
|
||||
%imm0:gr32 = MOV32r0 implicit-def dead $eflags
|
||||
%a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit
|
||||
%c.sub_32bit = COPY %a
|
||||
|
||||
bb.2:
|
||||
%c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
|
||||
RET 0, implicit %c
|
||||
|
||||
...
|
||||
79
llvm/test/CodeGen/X86/pr76416.ll
Normal file
79
llvm/test/CodeGen/X86/pr76416.ll
Normal file
@@ -0,0 +1,79 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;
|
||||
; Reproducer from https://github.com/llvm/llvm-project/issues/76416
|
||||
;
|
||||
|
||||
@load_p = external global ptr, align 8
|
||||
@load_data = external global i8, align 1
|
||||
|
||||
define dso_local void @pr76416() {
|
||||
; CHECK-LABEL: pr76416:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jg .LBB0_3
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_2: # %for.body
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: incl -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jle .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_3: # %for.end
|
||||
; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq load_p@GOTPCREL(%rip), %rax
|
||||
; CHECK-NEXT: movq load_data@GOTPCREL(%rip), %rcx
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_4: # %for.cond1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: movq (%rax), %rdx
|
||||
; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rsi
|
||||
; CHECK-NEXT: movzbl (%rdx,%rsi), %edx
|
||||
; CHECK-NEXT: movb %dl, (%rcx)
|
||||
; CHECK-NEXT: leal 1(%rsi), %edx
|
||||
; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jmp .LBB0_4
|
||||
entry:
|
||||
%alloca = alloca i32, align 4
|
||||
store i32 0, ptr %alloca, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.body, %entry
|
||||
%load.from.alloca.0 = load i32, ptr %alloca, align 4
|
||||
%cmp = icmp slt i32 %load.from.alloca.0, 4
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind
|
||||
%load.from.alloca.1 = load i32, ptr %alloca, align 4
|
||||
%inc = add nsw i32 %load.from.alloca.1, 1
|
||||
store i32 %inc, ptr %alloca, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
store i32 0, ptr %alloca, align 4
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.cond1, %for.end
|
||||
call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind
|
||||
%load.from.load_p = load ptr, ptr @load_p, align 8
|
||||
%regs = getelementptr inbounds { [4 x i8] }, ptr %load.from.load_p, i32 0, i32 0
|
||||
%load.from.alloca.2 = load i32, ptr %alloca, align 4
|
||||
%idxprom = sext i32 %load.from.alloca.2 to i64
|
||||
%arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
|
||||
%load.with.gep.ptr = load i8, ptr %arrayidx, align 1
|
||||
store i8 %load.with.gep.ptr, ptr @load_data, align 1
|
||||
%load.from.alloca.3 = load i32, ptr %alloca, align 4
|
||||
%inc2 = add nsw i32 %load.from.alloca.3, 1
|
||||
store i32 %inc2, ptr %alloca, align 4
|
||||
br label %for.cond1
|
||||
}
|
||||
@@ -14,8 +14,8 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test1
|
||||
; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
|
||||
; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
|
||||
; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm]] :: (volatile load (s32) from `ptr undef`)
|
||||
; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm1]] :: (volatile load (s32) from `ptr undef`)
|
||||
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
|
||||
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
|
||||
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
|
||||
|
||||
372
llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
Normal file
372
llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
Normal file
@@ -0,0 +1,372 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
||||
# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s
|
||||
|
||||
# We cannot lose the liveness of the high subregister of %1 when
|
||||
# coalesced with %0, so introduce an implicit-def of the super
|
||||
# register on the MOV.
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64
|
||||
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1
|
||||
; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%0:gr32 = MOV32r0 implicit-def dead $eflags
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: subreg_to_reg_folds_to_undef
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $rax
|
||||
|
||||
; CHECK-LABEL: name: subreg_to_reg_folds_to_undef
|
||||
; CHECK: liveins: $rax
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax
|
||||
; CHECK-NEXT: undef %4.sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def %4
|
||||
; CHECK-NEXT: RET 0, implicit %4
|
||||
%0:gr64 = COPY killed $rax
|
||||
%1:gr32 = COPY killed %0.sub_32bit
|
||||
%2:gr32 = MOV32rr killed %1
|
||||
%3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit
|
||||
%4:gr64 = COPY killed %3
|
||||
RET 0, implicit %4
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
|
||||
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
|
||||
; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
|
||||
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1
|
||||
; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
|
||||
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef %1.sub_8bit, implicit-def %1
|
||||
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit %1
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
|
||||
INLINEASM &"", 0, implicit %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
|
||||
# Reduced realistic case which was asserting after introducing new implicit-defs
|
||||
---
|
||||
name: coalesce_needs_implicit_defs
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: coalesce_needs_implicit_defs
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: liveins: $rdi
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK-NEXT: undef %2.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %2
|
||||
; CHECK-NEXT: undef %3.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef %10.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
|
||||
; CHECK-NEXT: TEST64rr %3, %3, implicit-def $eflags
|
||||
; CHECK-NEXT: %10.sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
|
||||
; CHECK-NEXT: CALL64r %2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[SHL64ri]], 4, implicit-def dead $eflags
|
||||
; CHECK-NEXT: [[ADD64rr:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[ADD64rr]], [[COPY]], implicit-def dead $eflags
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[ADD64rr]]
|
||||
; CHECK-NEXT: JMP_1 %bb.1
|
||||
bb.0:
|
||||
liveins: $rdi
|
||||
|
||||
%0:gr64 = COPY killed $rdi
|
||||
%1:gr32 = MOV32r0 implicit-def dead $eflags
|
||||
%2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
|
||||
%3:gr64 = COPY killed %2
|
||||
|
||||
bb.1:
|
||||
%4:gr64 = COPY killed %3
|
||||
%5:gr32 = MOV32r0 implicit-def dead $eflags
|
||||
TEST64rr killed %4, %4, implicit-def $eflags
|
||||
%6:gr8 = SETCCr 4, implicit killed $eflags
|
||||
%7:gr32 = COPY killed %5
|
||||
%7.sub_8bit:gr32 = COPY killed %6
|
||||
%8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
|
||||
$rdi = COPY %9
|
||||
CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%10:gr64 = COPY killed %8
|
||||
%10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags
|
||||
%11:gr64 = COPY killed %10
|
||||
%11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags
|
||||
%3:gr64 = COPY killed %11
|
||||
JMP_1 %bb.1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
|
||||
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
|
||||
; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%0:gr32 = MOV32r0 implicit-def dead $eflags
|
||||
$rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
|
||||
CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $eax
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
|
||||
; CHECK: liveins: $eax
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit
|
||||
; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
$eax = MOV32r0 implicit-def dead $eflags
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
# Coalesced instruction is a copy with other implicit operands
|
||||
---
|
||||
name: coalesce_copy_into_subreg_to_reg64
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $eax
|
||||
; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64
|
||||
; CHECK: liveins: $eax
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def %1
|
||||
; CHECK-NEXT: $rdi = COPY %1
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%0:gr32 = COPY $eax, implicit-def dead $eflags
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
|
||||
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1
|
||||
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def %1.sub_32bit, implicit %1.sub_32bit
|
||||
; CHECK-NEXT: $rdi = COPY %1
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
%0:gr32 = MOV32r0 implicit-def dead $eflags
|
||||
INLINEASM &"", 0, implicit-def %0, implicit %0
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
|
||||
; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
|
||||
; CHECK-NEXT: JMP_1 %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: $rdi = COPY %1
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: RET 0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
bb.0:
|
||||
INLINEASM &"", 0, implicit-def %0:gr32
|
||||
JCC_1 %bb.1, 4, implicit undef $eflags
|
||||
JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
%1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
RET 0
|
||||
|
||||
bb.2:
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
|
||||
frameInfo:
|
||||
adjustsStack: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
|
||||
; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
|
||||
; CHECK-NEXT: JMP_1 %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $rdi = COPY %1
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: JMP_1 %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
bb.0:
|
||||
|
||||
INLINEASM &"", 0, implicit-def %0:gr32
|
||||
JCC_1 %bb.1, 4, implicit undef $eflags
|
||||
JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
%1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit
|
||||
$rdi = COPY %1
|
||||
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
|
||||
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
JMP_1 %bb.1
|
||||
|
||||
bb.2:
|
||||
|
||||
...
|
||||
@@ -90,7 +90,7 @@ define <4 x float> @test_compress_v4f32(<4 x float> %vec, <4 x i1> %mask, <4 x f
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $3, %ecx
|
||||
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX2-NEXT: vmovss %xmm0, -24(%rsp,%rcx,4)
|
||||
@@ -380,26 +380,26 @@ define <8 x float> @test_compress_v8f32(<8 x float> %vec, <8 x i1> %mask, <8 x f
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $7, %ecx
|
||||
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vmovss %xmm0, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: vextractps $1, %xmm0, (%rsp,%rax,4)
|
||||
; AVX2-NEXT: vpextrd $2, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $7, %ecx
|
||||
; AVX2-NEXT: vextractps $2, %xmm0, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rdx, %rax
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $7, %edx
|
||||
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX2-NEXT: vmovss %xmm0, (%rsp,%rdx,4)
|
||||
@@ -790,68 +790,68 @@ define <16 x float> @test_compress_v16f32(<16 x float> %vec, <16 x i1> %mask, <1
|
||||
; AVX2-NEXT: vpextrb $5, %xmm2, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vextractps $1, %xmm0, (%rsp,%rax,4)
|
||||
; AVX2-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vextractps $2, %xmm0, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrb $7, %xmm2, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vextractps $3, %xmm0, (%rsp,%rax,4)
|
||||
; AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vmovss %xmm1, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrb $9, %xmm2, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vextractps $1, %xmm1, (%rsp,%rax,4)
|
||||
; AVX2-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vextractps $2, %xmm1, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrb $11, %xmm2, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vextractps $3, %xmm1, (%rsp,%rax,4)
|
||||
; AVX2-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX2-NEXT: vmovss %xmm0, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrb $13, %xmm2, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vextractps $1, %xmm0, (%rsp,%rax,4)
|
||||
; AVX2-NEXT: vpextrb $14, %xmm2, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vextractps $2, %xmm0, (%rsp,%rcx,4)
|
||||
; AVX2-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rdx, %rax
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $15, %edx
|
||||
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX2-NEXT: vmovss %xmm0, (%rsp,%rdx,4)
|
||||
@@ -1024,26 +1024,26 @@ define <8 x double> @test_compress_v8f64(<8 x double> %vec, <8 x i1> %mask, <8 x
|
||||
; AVX2-NEXT: vpextrw $4, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $7, %ecx
|
||||
; AVX2-NEXT: vmovlpd %xmm1, (%rsp,%rcx,8)
|
||||
; AVX2-NEXT: vpextrw $5, %xmm2, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: vmovhpd %xmm1, (%rsp,%rax,8)
|
||||
; AVX2-NEXT: vpextrw $6, %xmm2, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $7, %ecx
|
||||
; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX2-NEXT: vmovlpd %xmm0, (%rsp,%rcx,8)
|
||||
; AVX2-NEXT: vpextrw $7, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rdx, %rax
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $7, %edx
|
||||
; AVX2-NEXT: vmovhpd %xmm0, (%rsp,%rdx,8)
|
||||
; AVX2-NEXT: cmpq $8, %rax
|
||||
@@ -1158,67 +1158,67 @@ define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask, <16 x i8>
|
||||
; AVX2-NEXT: movzbl %r15b, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $5, %xmm0, -40(%rsp,%rcx)
|
||||
; AVX2-NEXT: movzbl %r14b, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $6, %xmm0, -40(%rsp,%rax)
|
||||
; AVX2-NEXT: movzbl %bpl, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $7, %xmm0, -40(%rsp,%rcx)
|
||||
; AVX2-NEXT: movzbl %bl, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $8, %xmm0, -40(%rsp,%rax)
|
||||
; AVX2-NEXT: movzbl %r10b, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, -40(%rsp,%rcx)
|
||||
; AVX2-NEXT: movzbl %r9b, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $10, %xmm0, -40(%rsp,%rax)
|
||||
; AVX2-NEXT: movzbl %r8b, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $11, %xmm0, -40(%rsp,%rcx)
|
||||
; AVX2-NEXT: movzbl %dil, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $12, %xmm0, -40(%rsp,%rax)
|
||||
; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $13, %xmm0, -40(%rsp,%rcx)
|
||||
; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $14, %xmm0, -40(%rsp,%rax)
|
||||
; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $15, %xmm0, -40(%rsp,%rcx)
|
||||
; AVX2-NEXT: cmpq $15, %rax
|
||||
@@ -1421,158 +1421,158 @@ define <32 x i8> @test_compress_v32i8(<32 x i8> %vec, <32 x i1> %mask, <32 x i8>
|
||||
; AVX2-NEXT: vpextrb $6, %xmm3, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $6, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $7, %xmm3, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $7, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $8, %xmm3, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $8, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $9, %xmm3, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $10, %xmm3, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $10, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $11, %xmm3, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $11, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $12, %xmm3, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $12, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $13, %xmm3, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $13, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $14, %xmm3, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $14, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $15, %xmm3, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $15, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $1, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $1, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $2, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $3, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $3, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $4, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $5, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $5, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $6, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $7, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $7, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $8, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $9, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $10, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $11, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $11, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $12, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $13, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $13, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rdx, %rcx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx killed $rdx def $rdx
|
||||
; AVX2-NEXT: # kill: def $edx killed $edx def $rdx killed $rdx
|
||||
; AVX2-NEXT: andl $31, %edx
|
||||
; AVX2-NEXT: vpextrb $14, %xmm0, (%rsp,%rdx)
|
||||
; AVX2-NEXT: vpextrb $15, %xmm1, %edx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: addq %rcx, %rdx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $31, %ecx
|
||||
; AVX2-NEXT: vpextrb $15, %xmm0, (%rsp,%rcx)
|
||||
; AVX2-NEXT: cmpq $31, %rdx
|
||||
@@ -2007,54 +2007,54 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: andl $1, %esi
|
||||
; AVX2-NEXT: addq %rax, %rsi
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $63, %eax
|
||||
; AVX2-NEXT: vpextrb $6, %xmm0, (%rsp,%rax)
|
||||
; AVX2-NEXT: andl $1, %edi
|
||||
; AVX2-NEXT: addq %rsi, %rdi
|
||||
; AVX2-NEXT: # kill: def $esi killed $esi killed $rsi def $rsi
|
||||
; AVX2-NEXT: # kill: def $esi killed $esi def $rsi killed $rsi
|
||||
; AVX2-NEXT: andl $63, %esi
|
||||
; AVX2-NEXT: vpextrb $7, %xmm0, (%rsp,%rsi)
|
||||
; AVX2-NEXT: andl $1, %r8d
|
||||
; AVX2-NEXT: addq %rdi, %r8
|
||||
; AVX2-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi
|
||||
; AVX2-NEXT: # kill: def $edi killed $edi def $rdi killed $rdi
|
||||
; AVX2-NEXT: andl $63, %edi
|
||||
; AVX2-NEXT: vpextrb $8, %xmm0, (%rsp,%rdi)
|
||||
; AVX2-NEXT: andl $1, %r9d
|
||||
; AVX2-NEXT: addq %r8, %r9
|
||||
; AVX2-NEXT: # kill: def $r8d killed $r8d killed $r8 def $r8
|
||||
; AVX2-NEXT: # kill: def $r8d killed $r8d def $r8 killed $r8
|
||||
; AVX2-NEXT: andl $63, %r8d
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, (%rsp,%r8)
|
||||
; AVX2-NEXT: andl $1, %r10d
|
||||
; AVX2-NEXT: addq %r9, %r10
|
||||
; AVX2-NEXT: # kill: def $r9d killed $r9d killed $r9 def $r9
|
||||
; AVX2-NEXT: # kill: def $r9d killed $r9d def $r9 killed $r9
|
||||
; AVX2-NEXT: andl $63, %r9d
|
||||
; AVX2-NEXT: vpextrb $10, %xmm0, (%rsp,%r9)
|
||||
; AVX2-NEXT: andl $1, %r11d
|
||||
; AVX2-NEXT: addq %r10, %r11
|
||||
; AVX2-NEXT: # kill: def $r10d killed $r10d killed $r10 def $r10
|
||||
; AVX2-NEXT: # kill: def $r10d killed $r10d def $r10 killed $r10
|
||||
; AVX2-NEXT: andl $63, %r10d
|
||||
; AVX2-NEXT: vpextrb $11, %xmm0, (%rsp,%r10)
|
||||
; AVX2-NEXT: andl $1, %r14d
|
||||
; AVX2-NEXT: addq %r11, %r14
|
||||
; AVX2-NEXT: # kill: def $r11d killed $r11d killed $r11 def $r11
|
||||
; AVX2-NEXT: # kill: def $r11d killed $r11d def $r11 killed $r11
|
||||
; AVX2-NEXT: andl $63, %r11d
|
||||
; AVX2-NEXT: vpextrb $12, %xmm0, (%rsp,%r11)
|
||||
; AVX2-NEXT: andl $1, %r12d
|
||||
; AVX2-NEXT: addq %r14, %r12
|
||||
; AVX2-NEXT: # kill: def $r14d killed $r14d killed $r14 def $r14
|
||||
; AVX2-NEXT: # kill: def $r14d killed $r14d def $r14 killed $r14
|
||||
; AVX2-NEXT: andl $63, %r14d
|
||||
; AVX2-NEXT: vpextrb $13, %xmm0, (%rsp,%r14)
|
||||
; AVX2-NEXT: movl 80(%rbp), %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %r12, %rax
|
||||
; AVX2-NEXT: # kill: def $r12d killed $r12d killed $r12 def $r12
|
||||
; AVX2-NEXT: # kill: def $r12d killed $r12d def $r12 killed $r12
|
||||
; AVX2-NEXT: andl $63, %r12d
|
||||
; AVX2-NEXT: vpextrb $14, %xmm0, (%rsp,%r12)
|
||||
; AVX2-NEXT: movl 88(%rbp), %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $63, %eax
|
||||
; AVX2-NEXT: vpextrb $15, %xmm0, (%rsp,%rax)
|
||||
; AVX2-NEXT: movl 96(%rbp), %edx
|
||||
@@ -4507,61 +4507,61 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
|
||||
; AVX2-NEXT: vpextrb $5, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $5, %xmm0, -24(%rsp,%rax)
|
||||
; AVX2-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $6, %xmm0, -24(%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $7, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $7, %xmm0, -24(%rsp,%rax)
|
||||
; AVX2-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $8, %xmm0, -24(%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, -24(%rsp,%rax)
|
||||
; AVX2-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $10, %xmm0, -24(%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $11, %xmm0, -24(%rsp,%rax)
|
||||
; AVX2-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addq %rcx, %rax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $12, %xmm0, -24(%rsp,%rcx)
|
||||
; AVX2-NEXT: vpextrb $13, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: vpextrb $13, %xmm0, -24(%rsp,%rax)
|
||||
; AVX2-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: addl %ecx, %eax
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
|
||||
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx killed $rcx
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpextrb $14, %xmm0, -24(%rsp,%rcx)
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
@@ -4817,7 +4817,7 @@ define <4 x i32> @test_compress_v4i32_zero_passthru(<4 x i32> %vec, <4 x i1> %ma
|
||||
; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: addq %rax, %rcx
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax def $rax killed $rax
|
||||
; AVX2-NEXT: andl $3, %eax
|
||||
; AVX2-NEXT: vextractps $3, %xmm0, -24(%rsp,%rax,4)
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
|
||||
Reference in New Issue
Block a user