Reapply "[CodeGen] Add new pass for late cleanup of redundant definitions."

Init captures added in processBlock() to avoid capturing structured bindings,
which caused the build problems (with clang).

RISCV has this disabled for now until problems relating to post RA pseudo
expansions are resolved.
This commit is contained in:
Jonas Paulsson
2022-12-01 19:33:11 +01:00
parent 9a41739565
commit 17db0de330
58 changed files with 895 additions and 1258 deletions

View File

@@ -1130,6 +1130,9 @@ void CodeGenPassBuilder<Derived>::addMachineLateOptimization(
if (!TM.requiresStructuredCFG())
addPass(TailDuplicatePass());
// Cleanup of redundant (identical) address/immediate loads.
addPass(MachineLateInstrsCleanupPass());
// Copy propagation.
addPass(MachineCopyPropagationPass());
}

View File

@@ -151,6 +151,7 @@ DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ())
DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ())
DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ())
DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ())

View File

@@ -334,6 +334,10 @@ namespace llvm {
MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr);
/// MachineLateInstrsCleanup - This pass removes redundant identical
/// instructions after register allocation and rematerialization.
extern char &MachineLateInstrsCleanupID;
/// PeepholeOptimizer - This pass performs peephole optimizations -
/// like extension and comparison eliminations.
extern char &PeepholeOptimizerID;

View File

@@ -277,6 +277,7 @@ void initializeMachineDominanceFrontierPass(PassRegistry&);
void initializeMachineDominatorTreePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeMachineFunctionSplitterPass(PassRegistry &);
void initializeMachineLateInstrsCleanupPass(PassRegistry&);
void initializeMachineLICMPass(PassRegistry&);
void initializeMachineLoopInfoPass(PassRegistry&);
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);

View File

@@ -119,6 +119,7 @@ add_llvm_component_library(LLVMCodeGen
MachineFunctionSplitter.cpp
MachineInstrBundle.cpp
MachineInstr.cpp
MachineLateInstrsCleanup.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
MachineLoopUtils.cpp

View File

@@ -78,6 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLateInstrsCleanupPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoWrapperPassPass(Registry);

View File

@@ -0,0 +1,240 @@
//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This simple pass removes any identical and redundant immediate or address
// loads to the same register. The immediate loads removed can originally be
// the result of rematerialization, while the addresses are redundant frame
// addressing anchor points created during Frame Indices elimination.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "machine-latecleanup"
STATISTIC(NumRemoved, "Number of redundant instructions removed.");
namespace {
class MachineLateInstrsCleanup : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
// Data structures to map regs to their definitions per MBB.
using Reg2DefMap = std::map<Register, MachineInstr*>;
std::vector<Reg2DefMap> RegDefs;
// Walk through the instructions in MBB and remove any redundant
// instructions.
bool processBlock(MachineBasicBlock *MBB);
public:
static char ID; // Pass identification, replacement for typeid
MachineLateInstrsCleanup() : MachineFunctionPass(ID) {
initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
};
} // end anonymous namespace
char MachineLateInstrsCleanup::ID = 0;
char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID;
INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE,
"Machine Late Instructions Cleanup Pass", false, false)
bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
bool Changed = false;
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
RegDefs.clear();
RegDefs.resize(MF.getNumBlockIDs());
// Visit all MBBs in an order that maximises the reuse from predecessors.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
for (MachineBasicBlock *MBB : RPOT)
Changed |= processBlock(MBB);
return Changed;
}
// Clear any previous kill flag on Reg found before I in MBB. Walk backwards
// in MBB and if needed continue in predecessors until a use/def of Reg is
// encountered. This seems to be faster in practice than tracking kill flags
// in a map.
static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
BitVector &VisitedPreds,
const TargetRegisterInfo *TRI) {
VisitedPreds.set(MBB->getNumber());
while (I != MBB->begin()) {
I--;
bool Found = false;
for (auto &MO : I->operands())
if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
if (MO.isDef())
return;
if (MO.readsReg()) {
MO.setIsKill(false);
Found = true; // Keep going for an implicit kill of the super-reg.
}
}
if (Found)
return;
}
// If an earlier def is not in MBB, continue in predecessors.
if (!MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
assert(!MBB->pred_empty() && "Predecessor def not found!");
for (MachineBasicBlock *Pred : MBB->predecessors())
if (!VisitedPreds.test(Pred->getNumber()))
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
}
static void removeRedundantDef(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
Register Reg = MI->getOperand(0).getReg();
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
MI->eraseFromParent();
++NumRemoved;
}
// Return true if MI is a potential candidate for reuse/removal and if so
// also the register it defines in DefedReg. A candidate is a simple
// instruction that does not touch memory, has only one register definition
// and the only reg it may use is FrameReg. Typically this is an immediate
// load or a load-address instruction.
static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
Register FrameReg) {
DefedReg = MCRegister::NoRegister;
bool SawStore = true;
if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() ||
MI->isInlineAsm())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
if (MO.isDef()) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
MO.isGlobal() || MO.isSymbol()))
return false;
}
return DefedReg.isValid();
}
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool Changed = false;
Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
// Find reusable definitions in the predecessor(s).
if (!MBB->pred_empty()) {
MachineBasicBlock *FirstPred = *MBB->pred_begin();
for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
if (llvm::all_of(
drop_begin(MBB->predecessors()),
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
return PredDefI != RegDefs[Pred->getNumber()].end() &&
DefMI->isIdenticalTo(*PredDefI->second);
})) {
MBBDefs[Reg] = DefMI;
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
<< printMBBReference(*MBB) << ": " << *DefMI;);
}
}
// Process MBB.
MachineFunction *MF = MBB->getParent();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Register FrameReg = TRI->getFrameRegister(*MF);
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
// If FrameReg is modified, no previous load-address instructions are valid.
if (MI.modifiesRegister(FrameReg, TRI)) {
MBBDefs.clear();
continue;
}
Register DefedReg;
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
// Check for an earlier identical and reusable instruction.
if (IsCandidate) {
auto DefI = MBBDefs.find(DefedReg);
if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
removeRedundantDef(&MI, TRI);
Changed = true;
continue;
}
}
// Clear any entries in map that MI clobbers.
for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
Register Reg = DefI->first;
if (MI.modifiesRegister(Reg, TRI))
DefI = MBBDefs.erase(DefI);
else
++DefI;
}
// Record this MI for potential later reuse.
if (IsCandidate) {
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
MBBDefs[DefedReg] = &MI;
}
}
return Changed;
}

View File

@@ -1522,6 +1522,9 @@ void TargetPassConfig::addOptimizedRegAlloc() {
/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
// Cleanup of redundant immediate/address loads.
addPass(&MachineLateInstrsCleanupID);
// Branch folding must be run after regalloc and prolog/epilog insertion.
addPass(&BranchFolderPassID);

View File

@@ -291,6 +291,7 @@ void NVPTXPassConfig::addIRPasses() {
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineLateInstrsCleanupID);
disablePass(&MachineCopyPropagationID);
disablePass(&TailDuplicateID);
disablePass(&StackMapLivenessID);

View File

@@ -286,6 +286,10 @@ void RISCVPassConfig::addPreRegAlloc() {
void RISCVPassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
addPass(createRISCVRedundantCopyEliminationPass());
// Temporarily disabled until post-RA pseudo expansion problem is fixed,
// see D123394 and D139169.
disablePass(&MachineLateInstrsCleanupID);
}
yaml::MachineFunctionInfo *

View File

@@ -501,6 +501,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
// them.
// These functions all require the NoVRegs property.
disablePass(&MachineLateInstrsCleanupID);
disablePass(&MachineCopyPropagationID);
disablePass(&PostRAMachineSinkingID);
disablePass(&PostRASchedulerID);

View File

@@ -188,6 +188,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@@ -29,14 +29,8 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr x9, [x9]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: Lloh6:
; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
; CHECK-NEXT: stur x9, [x29, #-8]
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
; CHECK-NEXT: ldur x9, [x29, #-8]
; CHECK-NEXT: Lloh8:
; CHECK-NEXT: ldr x8, [x8]
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: b.ne LBB0_2
; CHECK-NEXT: ; %bb.1: ; %entry
@@ -46,7 +40,6 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_2: ; %entry
; CHECK-NEXT: bl ___stack_chk_fail
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh6, Lloh7, Lloh8
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh1, Lloh3, Lloh5
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh2, Lloh4
entry:

View File

@@ -59,26 +59,23 @@ define float @foo2(double* %x0, double* %x1) nounwind {
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov w1, #1
; CHECK-NEXT: mov w2, #2
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w3, #3
; CHECK-NEXT: mov w4, #4
; CHECK-NEXT: mov w5, #5
; CHECK-NEXT: mov w6, #6
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w7, #7
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl callee2

View File

@@ -157,8 +157,6 @@ define amdgpu_kernel void @kernel_caller_byval() {
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0
; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
; FLATSCR-NEXT: s_mov_b32 s11, 0
; FLATSCR-NEXT: s_mov_b32 s10, 0
; FLATSCR-NEXT: s_mov_b32 s9, 0
@@ -171,9 +169,8 @@ define amdgpu_kernel void @kernel_caller_byval() {
; FLATSCR-NEXT: s_mov_b32 s4, 0
; FLATSCR-NEXT: s_mov_b32 s3, 0
; FLATSCR-NEXT: s_mov_b32 s2, 0
; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0
; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0
; FLATSCR-NEXT: s_mov_b32 s40, 0
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40
@@ -188,6 +185,7 @@ define amdgpu_kernel void @kernel_caller_byval() {
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:112
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128
; FLATSCR-NEXT: s_mov_b32 s40, 0
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s40 offset:8
; FLATSCR-NEXT: s_mov_b32 s39, 0
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s39 offset:16

View File

@@ -1354,7 +1354,6 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX7-NEXT: s_cbranch_execz .LBB13_2

View File

@@ -537,7 +537,6 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; GFX803-NEXT: ;;#ASMSTART
; GFX803-NEXT: ;;#ASMEND
; GFX803-NEXT: s_mov_b32 s4, 0x40000
; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
@@ -554,7 +553,6 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s4, 0x40000
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
@@ -569,8 +567,6 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc dlc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-NEXT: s_mov_b32 s4, 0x20000
; GFX1010-NEXT: ;;#ASMSTART
; GFX1010-NEXT: ;;#ASMEND
; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
@@ -585,7 +581,6 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: s_movk_i32 s0, 0x1000
; GFX1100-NEXT: scratch_store_b32 off, v0, s0 ; 4-byte Folded Spill
; GFX1100-NEXT: s_movk_i32 s0, 0x1000
; GFX1100-NEXT: ;;#ASMSTART
; GFX1100-NEXT: ;;#ASMEND
; GFX1100-NEXT: scratch_load_b32 v0, off, s0 ; 4-byte Folded Reload

View File

@@ -76,12 +76,10 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1
; CHECK-NEXT: ; %bb.10: ; %bb16
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[16:17], 0
; CHECK-NEXT: s_mov_b64 s[20:21], -1
; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11]
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]
; CHECK-NEXT: s_branch .LBB0_2
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[22:23], -1
; CHECK-NEXT: s_mov_b64 s[20:21], 0
; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]

File diff suppressed because it is too large Load Diff

View File

@@ -374,6 +374,7 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Shrink Wrapping analysis
; GCN-O1-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O1-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O1-NEXT: Control Flow Optimizer
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-NEXT: Tail Duplication
@@ -670,6 +671,7 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Shrink Wrapping analysis
; GCN-O1-OPTS-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O1-OPTS-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O1-OPTS-NEXT: Control Flow Optimizer
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Tail Duplication
@@ -968,6 +970,7 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Shrink Wrapping analysis
; GCN-O2-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O2-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O2-NEXT: Control Flow Optimizer
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O2-NEXT: Tail Duplication
@@ -1279,6 +1282,7 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Shrink Wrapping analysis
; GCN-O3-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O3-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O3-NEXT: Control Flow Optimizer
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O3-NEXT: Tail Duplication

View File

@@ -188,7 +188,6 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_cmp_eq_u32 s8, 1
; GCN-NEXT: s_mov_b64 s[4:5], -1
; GCN-NEXT: s_cbranch_scc0 .LBB1_5
; GCN-NEXT: ; %bb.4: ; %case1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1

View File

@@ -187,8 +187,6 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; SI-NEXT: s_branch .LBB3_3
; SI-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 s[14:15], -1
; SI-NEXT: .LBB3_2: ; %Flow
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[14:15]
@@ -206,7 +204,6 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; SI-NEXT: s_cbranch_vccz .LBB3_1
; SI-NEXT: ; %bb.5: ; %if.end
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[14:15], -1
; SI-NEXT: s_mov_b64 vcc, s[6:7]
; SI-NEXT: s_cbranch_vccz .LBB3_7
; SI-NEXT: ; %bb.6: ; %if.else
@@ -263,8 +260,6 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; FLAT-NEXT: s_branch .LBB3_3
; FLAT-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[8:9], 0
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 s[14:15], -1
; FLAT-NEXT: .LBB3_2: ; %Flow
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15]
@@ -282,7 +277,6 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; FLAT-NEXT: s_cbranch_vccz .LBB3_1
; FLAT-NEXT: ; %bb.5: ; %if.end
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[14:15], -1
; FLAT-NEXT: s_mov_b64 vcc, s[6:7]
; FLAT-NEXT: s_cbranch_vccz .LBB3_7
; FLAT-NEXT: ; %bb.6: ; %if.else

View File

@@ -60,7 +60,6 @@ define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n)
; CHECK-NEXT: s_cmp_lg_u32 s10, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_14
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: s_mov_b64 s[2:3], 0
; CHECK-NEXT: s_mov_b64 s[0:1], -1
; CHECK-NEXT: .LBB0_4: ; %Flow3
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec
@@ -103,7 +102,6 @@ define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n)
; CHECK-NEXT: s_branch .LBB0_10
; CHECK-NEXT: .LBB0_14: ; %cond.false.i8
; CHECK-NEXT: s_mov_b64 s[2:3], -1
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: s_branch .LBB0_4
entry:

View File

@@ -140,7 +140,6 @@ define void @my_func(i32 %0) {
; GCN-NEXT: s_cbranch_scc1 .LBB0_10
; GCN-NEXT: ; %bb.9:
; GCN-NEXT: s_mov_b64 s[6:7], -1
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
; GCN-NEXT: s_cbranch_execnz .LBB0_3
; GCN-NEXT: s_branch .LBB0_4
@@ -173,7 +172,6 @@ define void @my_func(i32 %0) {
; GCN-NEXT: ; %bb.15: ; %LeafBlock9
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
; GCN-NEXT: s_mov_b64 s[8:9], -1
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i
; GCN-NEXT: s_mov_b64 s[4:5], exec

View File

@@ -34,7 +34,6 @@ define amdgpu_kernel void @test_inst_offset_kernel() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_movk_i32 s0, 0xffc
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8
@@ -71,7 +70,6 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_mov_b32 s4, 0x40000
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
@@ -90,7 +88,6 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8
@@ -237,7 +234,6 @@ define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_movk_i32 s8, 0x1004
; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@@ -320,7 +316,6 @@ define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s0, 0xff8
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@@ -367,7 +362,6 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_mov_b32 s4, 0x3ff00
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_nop 0
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
@@ -391,7 +385,6 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s0, 0xffc
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART

View File

@@ -10551,7 +10551,6 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: ;;#ASMSTART
; GFX6-NEXT: ;;#ASMEND
; GFX6-NEXT: s_mov_b32 s2, 0x84800
; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
@@ -10796,7 +10795,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill
; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill
; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100
; GFX9-FLATSCR-NEXT: s_nop 0
; GFX9-FLATSCR-NEXT: ;;#ASMSTART
; GFX9-FLATSCR-NEXT: ;;#ASMEND
; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload
@@ -11032,7 +11031,6 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v60
; GFX10-FLATSCR-NEXT: ;;#ASMSTART
; GFX10-FLATSCR-NEXT: ;;#ASMEND
; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v65
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v66
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v67

View File

@@ -149,6 +149,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@@ -1652,7 +1652,6 @@ define void @infiniteloop3() "frame-pointer"="all" {
; THUMB-ENABLE-NEXT: movs r0, #0
; THUMB-ENABLE-NEXT: cbnz r0, LBB11_5
; THUMB-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader
; THUMB-ENABLE-NEXT: movs r0, #0
; THUMB-ENABLE-NEXT: movs r1, #0
; THUMB-ENABLE-NEXT: mov r2, r0
; THUMB-ENABLE-NEXT: b LBB11_3
@@ -1679,7 +1678,6 @@ define void @infiniteloop3() "frame-pointer"="all" {
; THUMB-DISABLE-NEXT: movs r0, #0
; THUMB-DISABLE-NEXT: cbnz r0, LBB11_5
; THUMB-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader
; THUMB-DISABLE-NEXT: movs r0, #0
; THUMB-DISABLE-NEXT: movs r1, #0
; THUMB-DISABLE-NEXT: mov r2, r0
; THUMB-DISABLE-NEXT: b LBB11_3

View File

@@ -3764,7 +3764,6 @@ define i64 @stest_f32i64_mm(float %x) {
; SOFT-NEXT: @ %bb.18: @ %entry
; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: .LBB48_19: @ %entry
; SOFT-NEXT: ldr r0, .LCPI48_0
; SOFT-NEXT: cmp r4, r0
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: beq .LBB48_21
@@ -4347,7 +4346,6 @@ define i64 @stest_f16i64_mm(half %x) {
; SOFT-NEXT: @ %bb.18: @ %entry
; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: .LBB51_19: @ %entry
; SOFT-NEXT: ldr r0, .LCPI51_0
; SOFT-NEXT: cmp r4, r0
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: beq .LBB51_21

View File

@@ -22,7 +22,7 @@ entry:
; for.body -> for.cond.backedge (100%)
; -> cond.false.i (0%)
; CHECK: bb.1.for.body:
; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000)
; CHECK: successors: %bb.2(0x80000000), %bb.5(0x00000000)
for.body:
br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1

View File

@@ -1,6 +1,6 @@
; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s
%BigInt = type i5500
%BigInt = type i8500
define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
; CHECK-LABEL: test_moved_jumptable:

View File

@@ -283,7 +283,6 @@ define arm_aapcs_vfpcc i32 @t10(float %x) nounwind {
; CHECK-NEXT: vst1.32 {d17[1]}, [r0:32]
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movne r0, #0
; CHECK-NEXT: bxne lr
; CHECK-NEXT: LBB9_1:
; CHECK-NEXT: trap

View File

@@ -14,9 +14,8 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
; <label>:4: ; preds = %2
br label %5
; CHECK: if r4 s>= r3 goto +11 <LBB0_3>
; CHECK: r0 = 0
; CHECK-LABEL: <LBB0_2>:
; CHECK: if r4 s>= r3 goto +10 <LBB0_2>
; CHECK-LABEL: <LBB0_1>:
; <label>:5: ; preds = %4, %5
%6 = phi i32 [ %9, %5 ], [ 0, %4 ]
@@ -28,12 +27,12 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
%12 = icmp slt i32 %10, %11
br i1 %12, label %5, label %13
; CHECK: r1 = r3
; CHECK: if r2 s> r3 goto -10 <LBB0_2>
; CHECK: if r2 s> r3 goto -10 <LBB0_1>
; <label>:13: ; preds = %5, %2
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]
ret i32 %14
; CHECK-LABEL: <LBB0_3>:
; CHECK-LABEL: <LBB0_2>:
; CHECK: exit
}
attributes #0 = { norecurse nounwind readnone }

View File

@@ -841,7 +841,6 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) {
; MMR3-NEXT: or16 $6, $4
; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload
; MMR3-NEXT: movn $1, $7, $4
; MMR3-NEXT: li16 $7, 0
; MMR3-NEXT: movn $1, $6, $10
; MMR3-NEXT: lw $4, 24($sp) # 4-byte Folded Reload
; MMR3-NEXT: movz $1, $4, $16

View File

@@ -915,7 +915,6 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) {
; MMR3-NEXT: or16 $5, $3
; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload
; MMR3-NEXT: movn $8, $7, $3
; MMR3-NEXT: li16 $7, 0
; MMR3-NEXT: movn $8, $5, $10
; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload
; MMR3-NEXT: movz $8, $3, $16

View File

@@ -182,6 +182,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@@ -11,7 +11,6 @@ define dso_local void @wibble(ptr nocapture readonly %arg, i32 signext %arg1, pt
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: # %bb.1: # %bb6
; CHECK-NEXT: clrldi 4, 4, 32
; CHECK-NEXT: li 7, 7
; CHECK-NEXT: addi 4, 4, -1
; CHECK-NEXT: mtctr 4
; CHECK-NEXT: li 4, 8

View File

@@ -53,7 +53,7 @@ define signext i32 @bar() #0 {
; AIX64-NEXT: L..BB0_1: # %for.cond
; AIX64-NEXT: #
; AIX64-NEXT: lwz 3, 120(1)
; AIX64-NEXT: ld 4, L..C0(2) # @x
; AIX64-NEXT: ld 4, L..C0(2)
; AIX64-NEXT: lwz 4, 0(4)
; AIX64-NEXT: cmpw 3, 4
; AIX64-NEXT: bge 0, L..BB0_4

View File

@@ -618,7 +618,6 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
; P8-NEXT: lfs f0, .LCPI13_0@toc@l(r3)
; P8-NEXT: lis r3, -32768
; P8-NEXT: fcmpo cr0, f2, f3
; P8-NEXT: xxlxor f3, f3, f3
; P8-NEXT: fcmpo cr1, f1, f0
; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt
; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
@@ -660,7 +659,6 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
; P9-NEXT: lfs f0, .LCPI13_0@toc@l(r3)
; P9-NEXT: fcmpo cr1, f2, f3
; P9-NEXT: lis r3, -32768
; P9-NEXT: xxlxor f3, f3, f3
; P9-NEXT: fcmpo cr0, f1, f0
; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
; P9-NEXT: crandc 4*cr5+gt, lt, eq

View File

@@ -1295,7 +1295,6 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3)
; PC64LE-NEXT: lis 3, -32768
; PC64LE-NEXT: fcmpo 0, 2, 3
; PC64LE-NEXT: xxlxor 3, 3, 3
; PC64LE-NEXT: fcmpo 1, 1, 0
; PC64LE-NEXT: crand 20, 6, 0
; PC64LE-NEXT: crandc 21, 4, 6
@@ -1333,7 +1332,6 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3)
; PC64LE9-NEXT: fcmpo 1, 2, 3
; PC64LE9-NEXT: lis 3, -32768
; PC64LE9-NEXT: xxlxor 3, 3, 3
; PC64LE9-NEXT: fcmpo 0, 1, 0
; PC64LE9-NEXT: crand 20, 2, 4
; PC64LE9-NEXT: crandc 21, 0, 2

View File

@@ -0,0 +1,327 @@
# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - -mcpu=z14 \
# RUN: -verify-machineinstrs 2>&1 | FileCheck %s
# REQUIRES: asserts
#
# Test that redundant frame addressing anchor points are removed by
# MachineLateInstrsCleanup.
--- |
define void @fun1() { ret void }
define void @fun2() { ret void }
define void @fun3() { ret void }
define void @fun4() { ret void }
define void @fun5() { ret void }
define void @fun6() { ret void }
define void @fun7() { ret void }
define void @fun8() { ret void }
declare i32 @foo()
@ptr = external dso_local local_unnamed_addr global ptr
---
# Test elimination of redundant LAYs in successor blocks.
# CHECK-LABEL: fun1:
# CHECK: lay %r1, 4096(%r15)
# CHECK: # %bb.1:
# CHECK-NOT: lay
# CHECK: .LBB0_2:
# CHECK-NOT: lay
---
name: fun1
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
- { id: 2, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
liveins: $f16d
VST64 renamable $f16d, %stack.2, 0, $noreg
J %bb.2
bb.2:
liveins: $f16d
VST64 renamable $f16d, %stack.1, 0, $noreg
Return
...
# In this function the LAY in bb.1 will have a different offset, so the first
# LAY in bb.2 must remain.
# CHECK-LABEL: fun2:
# CHECK: lay %r1, 4096(%r15)
# CHECK: # %bb.1:
# CHECK: lay %r1, 8192(%r15)
# CHECK: .LBB1_2:
# CHECK: lay %r1, 4096(%r15)
# CHECK-NOT: lay
---
name: fun2
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 5000 }
- { id: 2, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
liveins: $f16d
VST64 renamable $f16d, %stack.2, 0, $noreg
J %bb.2
bb.2:
liveins: $f16d
VST64 renamable $f16d, %stack.1, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
Return
...
# Test case with a loop (with room for improvement: since %r1 is not clobbered
# inside the loop only the first LAY is needed).
# CHECK-LABEL: fun3:
# CHECK: lay %r1, 4096(%r15)
# CHECK: .LBB2_1:
# CHECK: lay %r1, 4096(%r15)
# CHECK: .LBB2_2:
# CHECK-NOT: lay %r1, 4096(%r15)
---
name: fun3
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
- { id: 2, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.2, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.1, implicit killed $cc
J %bb.2
bb.2:
liveins: $f16d
VST64 renamable $f16d, %stack.1, 0, $noreg
Return
...
# Test case with a call which clobbers r1: the second LAY after the call is needed.
# CHECK-LABEL: fun4:
# CHECK: lay %r1, 4096(%r15)
# CHECK: brasl
# CHECK: lay %r1, 4096(%r15)
---
name: fun4
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
ADJCALLSTACKDOWN 0, 0
CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
ADJCALLSTACKUP 0, 0
$f17d = IMPLICIT_DEF
VST64 renamable $f17d, %stack.1, 0, $noreg
Return
...
# Test case where index reg is loaded instead of using an LAY. Only one LGHI is needed.
# CHECK-LABEL: fun5:
# CHECK: lghi %r1, 4096
# CHECK-NOT: lghi
---
name: fun5
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
$f0q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc
$f1q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc
Return
...
# Test where the constant is a Global. Only one LARL is needed.
# CHECK-LABEL: fun6:
# CHECK: larl %r1, ptr
# CHECK-NOT: larl
---
name: fun6
alignment: 16
tracksRegLiveness: true
tracksDebugUserValues: true
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
fixedStack:
- { id: 0, offset: -160, size: 8, alignment: 8 }
machineFunctionInfo: {}
body: |
bb.0:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
renamable $r1d = LARL @ptr
CGHSI killed renamable $r1d, 0, 0, implicit-def $cc :: (volatile dereferenceable load (s64) from @ptr)
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
renamable $r1d = LARL @ptr
MVGHI killed renamable $r1d, 0, 0
bb.2:
Return
...
# Load of an invariant location (GOT). Only one LGRL is needed.
# CHECK-LABEL: fun7:
# CHECK: lgrl %r1, ptr
# CHECK-NOT: lgrl
---
name: fun7
alignment: 16
tracksRegLiveness: true
tracksDebugUserValues: true
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
fixedStack:
- { id: 0, offset: -160, size: 8, alignment: 8 }
machineFunctionInfo: {}
body: |
bb.0:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
renamable $r1d = LGRL @ptr :: (load (s64) from got)
CGHSI killed renamable $r1d, 0, 0, implicit-def $cc :: (volatile dereferenceable load (s64) from @ptr)
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
renamable $r1d = LGRL @ptr :: (load (s64) from got)
MVGHI killed renamable $r1d, 0, 0
bb.2:
Return
...
# Load from constant pool. Only one LARL is needed.
# CHECK-LABEL: fun8:
# CHECK: larl %r1, .LCPI7_0
# CHECK-NOT: larl
---
name: fun8
alignment: 16
tracksRegLiveness: true
tracksDebugUserValues: true
liveins:
- { reg: '$f0s' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
fixedStack:
- { id: 0, offset: -160, size: 8, alignment: 8 }
constants:
- id: 0
value: float 0x43E0000000000000
alignment: 4
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
successors: %bb.1, %bb.2
liveins: $f0s
renamable $r1d = LARL %const.0
renamable $f1s = LE killed renamable $r1d, 0, $noreg :: (load (s32) from constant-pool)
nofpexcept CEBR renamable $f0s, renamable $f1s, implicit-def $cc, implicit $fpc
BRC 15, 11, %bb.2, implicit killed $cc
bb.1:
liveins: $f0s
J %bb.3
bb.2 (%ir-block.0):
liveins: $f0s, $f1s
renamable $r1d = LARL %const.0
renamable $f1s = LE killed renamable $r1d, 0, $noreg :: (load (s32) from constant-pool)
bb.3 (%ir-block.0):
liveins: $r2d
Return
...

View File

@@ -77,12 +77,9 @@ entry:
; CHECK-FP-ATPCS: adds r0, #8
; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3}
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS: str r1, [r0, #8]
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS: str r2, [r0, #12]
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS: str r3, [r0, #16]
; CHECK-FP-AAPCS: mov r7, r0
; CHECK-FP-AAPCS: adds r7, #8
; CHECK-FP-AAPCS: stm r7!, {r1, r2, r3}
; Re-aligned stack, access via FP
; int test_args_realign(int a, int b, int c, int d, int e) {
; __attribute__((aligned(16))) int v[4];
@@ -148,11 +145,9 @@ entry:
; CHECK-ATPCS-NEXT: adds r0, #8
; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3}
; CHECK-AAPCS: mov r0, r11
; CHECK-AAPCS: str r1, [r0, #8]
; CHECK-AAPCS: mov r0, r11
; CHECK-AAPCS: str r2, [r0, #12]
; CHECK-AAPCS: mov r0, r11
; CHECK-AAPCS: str r3, [r0, #16]
; CHECK-AAPCS: mov r7, r0
; CHECK-AAPCS: adds r7, #8
; CHECK-AAPCS: stm r7!, {r1, r2, r3}
; VLAs present, access via FP
; int test_args_vla(int a, int b, int c, int d, int e) {
; int v[a];
@@ -308,11 +303,9 @@ entry:
; CHECK-FP-ATPCS-NEXT: adds r0, #8
; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3}
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8]
; CHECK-FP-AAPCS-NEXT: mov r0, r11
; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12]
; CHECK-FP-AAPCS-NEXT: mov r0, r11
; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16]
; CHECK-FP-AAPCS-NEXT: mov r5, r0
; CHECK-FP-AAPCS-NEXT: adds r5, #8
; CHECK-FP-AAPCS-NEXT: stm r5!, {r1, r2, r3}
; struct S { int x[128]; } s;
; int test(S a, int b) {

View File

@@ -1890,7 +1890,6 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: csel r5, r6, r5, eq
@@ -2152,7 +2151,6 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: csel r5, r6, r5, eq
@@ -2410,7 +2408,6 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: csel r5, r6, r5, eq

View File

@@ -18,7 +18,6 @@ define fastcc ptr @pushdecl(ptr %x) nounwind {
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl
; CHECK-NEXT: .LBB0_1: # %bb160
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl

View File

@@ -27,7 +27,6 @@ define i16 @SQLDriversW(ptr %henv, i16 zeroext %fDir, ptr %szDrvDesc, i16 signe
; CHECK-NEXT: jne LBB0_6
; CHECK-NEXT: ## %bb.4: ## %bb37
; CHECK-NEXT: movw $0, 40(%edi)
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: leal (,%ecx,4), %ecx
; CHECK-NEXT: leal (,%ebx,4), %edx

View File

@@ -58,7 +58,6 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: tileloadd (%rax,%r14), %tmm0
; CHECK-NEXT: movabsq $64, %rcx
; CHECK-NEXT: tileloadd 1088(%rsp,%rcx), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: movabsq $64, %rcx
; CHECK-NEXT: tileloadd 64(%rsp,%rcx), %tmm2 # 1024-byte Folded Reload
; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0
; CHECK-NEXT: tilestored %tmm0, (%rax,%r14)

View File

@@ -46,7 +46,6 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm5
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vzeroupper
@@ -64,7 +63,6 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tdpbssd %tmm3, %tmm2, %tmm5
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vzeroupper

View File

@@ -8,7 +8,6 @@ target triple = "x86_64-apple-macosx"
; CHECK-LABEL: foo:
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
define void @foo() #0 {
entry:
%_tags = alloca [3 x i32], align 4

View File

@@ -338,26 +338,24 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: movl %edx, %ebx
; X86-SLOW-NEXT: movl %esi, %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: je .LBB6_5
; X86-SLOW-NEXT: .LBB6_4:
; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl %ebp, %esi
; X86-SLOW-NEXT: movl %edx, %ebp
; X86-SLOW-NEXT: movl %ecx, %edx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: .LBB6_5:
; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_6:
; X86-SLOW-NEXT: movl %edx, %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %edi
; X86-SLOW-NEXT: shrl %ebx

View File

@@ -247,7 +247,6 @@ define <4 x double> @load_v4f64_v4i32_zero(<4 x i32> %trigger, ptr %addr) {
; SSE-NEXT: retq
; SSE-NEXT: LBB3_1: ## %cond.load
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: testb $2, %al
; SSE-NEXT: je LBB3_4
; SSE-NEXT: LBB3_3: ## %cond.load1
@@ -1129,7 +1128,6 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE2-NEXT: retq
; SSE2-NEXT: LBB10_1: ## %cond.load
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: testb $2, %al
; SSE2-NEXT: je LBB10_4
; SSE2-NEXT: LBB10_3: ## %cond.load1
@@ -1209,7 +1207,6 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE42-NEXT: retq
; SSE42-NEXT: LBB10_1: ## %cond.load
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE42-NEXT: xorps %xmm1, %xmm1
; SSE42-NEXT: testb $2, %al
; SSE42-NEXT: je LBB10_4
; SSE42-NEXT: LBB10_3: ## %cond.load1
@@ -2650,7 +2647,6 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE2-NEXT: retq
; SSE2-NEXT: LBB20_1: ## %cond.load
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: testb $2, %al
; SSE2-NEXT: je LBB20_4
; SSE2-NEXT: LBB20_3: ## %cond.load1
@@ -2730,7 +2726,6 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE42-NEXT: retq
; SSE42-NEXT: LBB20_1: ## %cond.load
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE42-NEXT: pxor %xmm1, %xmm1
; SSE42-NEXT: testb $2, %al
; SSE42-NEXT: je LBB20_4
; SSE42-NEXT: LBB20_3: ## %cond.load1

View File

@@ -2231,7 +2231,6 @@ define <16 x i32> @splat_v3i32(ptr %ptr) {
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,0,1]
; SSE42-NEXT: pxor %xmm1, %xmm1
; SSE42-NEXT: xorps %xmm3, %xmm3
; SSE42-NEXT: retq
;

View File

@@ -170,6 +170,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@@ -1240,7 +1240,6 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: cmovsl %esi, %eax
; X86-NEXT: movl $0, %esi
; X86-NEXT: movl $-1, %ebx
; X86-NEXT: cmovsl %ebx, %edi
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload

View File

@@ -533,7 +533,6 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
; i686-NEXT: .LBB6_9: # %entry
; i686-NEXT: movl %edi, %esi
; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; i686-NEXT: shrl %cl, %ebp
; i686-NEXT: testb $32, %cl
@@ -846,7 +845,6 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
; i686-NEXT: movl {{[0-9]+}}(%esp), %esi
; i686-NEXT: movb $64, %cl
; i686-NEXT: subb %dl, %cl
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx
; i686-NEXT: shldl %cl, %ebx, %ebp
; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

View File

@@ -354,7 +354,6 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; X86-NEXT: cmpw %si, %dx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: cmovnel %eax, %ebx
; X86-NEXT: movl $65535, %eax # imm = 0xFFFF
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shll %cl, %esi

View File

@@ -111,14 +111,12 @@ define <4 x i32> @ossfuzz15662(ptr %in) {
; X32: # %bb.0:
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: movaps %xmm0, (%eax)
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: ossfuzz15662:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rax)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%C10 = icmp ule i1 false, false
%C3 = icmp ule i1 true, undef

View File

@@ -178,14 +178,12 @@ define <4 x i32> @test17(<4 x i32> %a0, ptr %dummy) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, (%eax)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test17:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6)
store <4 x i32> %a, ptr %dummy
@@ -199,14 +197,12 @@ define <4 x i32> @test18(<4 x i32> %a0, ptr %dummy) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, (%eax)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test18:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 3)
store <4 x i32> %a, ptr %dummy

View File

@@ -87,13 +87,10 @@ declare void @g(i32*, i32*)
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r5, cp[[[INDEX1]]]
; CHECK: stw r1, r0[r5]
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r1, cp[[[INDEX2]]]
; CHECK: stw r2, r0[r1]
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r1, cp[[[INDEX3]]]
; CHECK: stw r3, r0[r1]
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r1, cp[[[INDEX4]]]
; CHECK: stw r11, r0[r1]
; CHECK: ldaw sp, sp[65535]