[AArch64][GlobalISel] Lower G_BUILD_VECTOR -> G_DUP
If we have ``` %vec = G_BUILD_VECTOR %reg, %reg, ..., %reg ``` Then lower it to ``` %vec = G_DUP %reg ``` Also update the selector to handle constant splats on G_DUP. This will not combine when the splat is all zeros or ones. Tablegen-imported patterns rely on these being G_BUILD_VECTOR. Minor code size improvements on CTMark at -Os. Also adds some utility functions to make it a bit easier to recognize splats, and an AArch64-specific splat helper. Differential Revision: https://reviews.llvm.org/D97731
This commit is contained in:
@@ -260,6 +260,31 @@ LLT getLCMType(LLT OrigTy, LLT TargetTy);
|
||||
LLVM_READNONE
|
||||
LLT getGCDType(LLT OrigTy, LLT TargetTy);
|
||||
|
||||
/// Represents a value which can be a Register or a constant.
|
||||
///
|
||||
/// This is useful in situations where an instruction may have an interesting
|
||||
/// register operand or interesting constant operand. For a concrete example,
|
||||
/// \see getVectorSplat.
|
||||
class RegOrConstant {
|
||||
int64_t Cst;
|
||||
Register Reg;
|
||||
bool IsReg;
|
||||
|
||||
public:
|
||||
explicit RegOrConstant(Register Reg) : Reg(Reg), IsReg(true) {}
|
||||
explicit RegOrConstant(int64_t Cst) : Cst(Cst), IsReg(false) {}
|
||||
bool isReg() const { return IsReg; }
|
||||
bool isCst() const { return !IsReg; }
|
||||
Register getReg() const {
|
||||
assert(isReg() && "Expected a register!");
|
||||
return Reg;
|
||||
}
|
||||
int64_t getCst() const {
|
||||
assert(isCst() && "Expected a constant!");
|
||||
return Cst;
|
||||
}
|
||||
};
|
||||
|
||||
/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
|
||||
/// If \p MI is not a splat, returns None.
|
||||
Optional<int> getSplatIndex(MachineInstr &MI);
|
||||
@@ -278,6 +303,28 @@ bool isBuildVectorAllZeros(const MachineInstr &MI,
|
||||
bool isBuildVectorAllOnes(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// \returns a value when \p MI is a vector splat. The splat can be either a
|
||||
/// Register or a constant.
|
||||
///
|
||||
/// Examples:
|
||||
///
|
||||
/// \code
|
||||
/// %reg = COPY $physreg
|
||||
/// %reg_splat = G_BUILD_VECTOR %reg, %reg, ..., %reg
|
||||
/// \endcode
|
||||
///
|
||||
/// If called on the G_BUILD_VECTOR above, this will return a RegOrConstant
|
||||
/// containing %reg.
|
||||
///
|
||||
/// \code
|
||||
/// %cst = G_CONSTANT iN 4
|
||||
/// %constant_splat = G_BUILD_VECTOR %cst, %cst, ..., %cst
|
||||
/// \endcode
|
||||
///
|
||||
/// In the above case, this will return a RegOrConstant containing 4.
|
||||
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Returns true if given the TargetLowering's boolean contents information,
|
||||
/// the value \p Val contains a true value.
|
||||
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
|
||||
@@ -828,6 +828,20 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
|
||||
return isBuildVectorConstantSplat(MI, MRI, -1);
|
||||
}
|
||||
|
||||
Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
if (!isBuildVectorOp(Opc))
|
||||
return None;
|
||||
if (auto Splat = getBuildVectorConstantSplat(MI, MRI))
|
||||
return RegOrConstant(*Splat);
|
||||
auto Reg = MI.getOperand(1).getReg();
|
||||
if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
|
||||
[&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
|
||||
return None;
|
||||
return RegOrConstant(Reg);
|
||||
}
|
||||
|
||||
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
bool IsFP) {
|
||||
switch (TLI.getBooleanContents(IsVector, IsFP)) {
|
||||
|
||||
@@ -135,13 +135,22 @@ def mul_const : GICombineRule<
|
||||
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def build_vector_to_dup : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_BUILD_VECTOR):$root,
|
||||
[{ return matchBuildVectorToDup(*${root}, MRI); }]),
|
||||
(apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
|
||||
>;
|
||||
|
||||
def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
|
||||
|
||||
// Post-legalization combines which should happen at all optimization levels.
|
||||
// (E.g. ones that facilitate matching for the selector) For example, matching
|
||||
// pseudos.
|
||||
def AArch64PostLegalizerLoweringHelper
|
||||
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
|
||||
[shuffle_vector_lowering, vashr_vlshr_imm,
|
||||
icmp_lowering]> {
|
||||
icmp_lowering, build_vector_lowering]> {
|
||||
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ add_public_tablegen_target(AArch64CommonTableGen)
|
||||
|
||||
add_llvm_target(AArch64CodeGen
|
||||
GISel/AArch64CallLowering.cpp
|
||||
GISel/AArch64GlobalISelUtils.cpp
|
||||
GISel/AArch64InstructionSelector.cpp
|
||||
GISel/AArch64LegalizerInfo.cpp
|
||||
GISel/AArch64PreLegalizerCombiner.cpp
|
||||
|
||||
38
llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
Normal file
38
llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
//===- AArch64GlobalISelUtils.cpp --------------------------------*- C++ -*-==//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file Implementations of AArch64-specific helper functions used in the
|
||||
/// GlobalISel pipeline.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AArch64GlobalISelUtils.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
Optional<RegOrConstant>
|
||||
AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
if (auto Splat = getVectorSplat(MI, MRI))
|
||||
return Splat;
|
||||
if (MI.getOpcode() != AArch64::G_DUP)
|
||||
return None;
|
||||
Register Src = MI.getOperand(1).getReg();
|
||||
if (auto ValAndVReg =
|
||||
getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
|
||||
return RegOrConstant(ValAndVReg->Value.getSExtValue());
|
||||
return RegOrConstant(Src);
|
||||
}
|
||||
|
||||
Optional<int64_t> AArch64GISelUtils::getAArch64VectorSplatScalar(
|
||||
const MachineInstr &MI, const MachineRegisterInfo &MRI) {
|
||||
auto Splat = getAArch64VectorSplat(MI, MRI);
|
||||
if (!Splat || Splat->isReg())
|
||||
return None;
|
||||
return Splat->getCst();
|
||||
}
|
||||
@@ -12,6 +12,9 @@
|
||||
#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
|
||||
#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
|
||||
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/Register.h"
|
||||
#include <cstdint>
|
||||
|
||||
namespace llvm {
|
||||
@@ -23,6 +26,16 @@ constexpr bool isLegalArithImmed(const uint64_t C) {
|
||||
return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
|
||||
}
|
||||
|
||||
/// \returns A value when \p MI is a vector splat of a Register or constant.
|
||||
/// Checks for generic opcodes and AArch64-specific generic opcodes.
|
||||
Optional<RegOrConstant> getAArch64VectorSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// \returns A value when \p MI is a constant vector splat.
|
||||
/// Checks for generic opcodes and AArch64-specific generic opcodes.
|
||||
Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
} // namespace AArch64GISelUtils
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
/// \todo This should be generated by TableGen.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64GlobalISelUtils.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "AArch64MachineFunctionInfo.h"
|
||||
#include "AArch64RegisterBankInfo.h"
|
||||
@@ -24,7 +25,6 @@
|
||||
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
@@ -46,6 +47,7 @@
|
||||
|
||||
using namespace llvm;
|
||||
using namespace MIPatternMatch;
|
||||
using namespace AArch64GISelUtils;
|
||||
|
||||
namespace llvm {
|
||||
class BlockFrequencyInfo;
|
||||
@@ -145,6 +147,16 @@ private:
|
||||
Register EltReg, unsigned LaneIdx,
|
||||
const RegisterBank &RB,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
|
||||
/// Emit a sequence of instructions representing a constant \p CV for a
|
||||
/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
|
||||
///
|
||||
/// \returns the last instruction in the sequence on success, and nullptr
|
||||
/// otherwise.
|
||||
MachineInstr *emitConstantVector(Register Dst, Constant *CV,
|
||||
MachineIRBuilder &MIRBuilder,
|
||||
MachineRegisterInfo &MRI) const;
|
||||
|
||||
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
|
||||
MachineRegisterInfo &MRI) const;
|
||||
@@ -1659,23 +1671,7 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
|
||||
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
|
||||
MachineInstr *OpMI = MRI.getVRegDef(Reg);
|
||||
assert(OpMI && "Expected to find a vreg def for vector shift operand");
|
||||
if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
|
||||
return None;
|
||||
|
||||
// Check all operands are identical immediates.
|
||||
int64_t ImmVal = 0;
|
||||
for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
|
||||
auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
|
||||
if (!VRegAndVal)
|
||||
return None;
|
||||
|
||||
if (Idx == 1)
|
||||
ImmVal = VRegAndVal->Value.getSExtValue();
|
||||
if (ImmVal != VRegAndVal->Value.getSExtValue())
|
||||
return None;
|
||||
}
|
||||
|
||||
return ImmVal;
|
||||
return getAArch64VectorSplatScalar(*OpMI, MRI);
|
||||
}
|
||||
|
||||
/// Matches and returns the shift immediate value for a SHL instruction given
|
||||
@@ -1963,7 +1959,7 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
|
||||
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
|
||||
MRI.setType(I.getOperand(0).getReg(),
|
||||
DstTy.changeElementType(LLT::scalar(64)));
|
||||
MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
|
||||
MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
|
||||
I.getOperand(1).setReg(NewSrc.getReg(0));
|
||||
return true;
|
||||
}
|
||||
@@ -2125,6 +2121,25 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
switch (I.getOpcode()) {
|
||||
case AArch64::G_DUP: {
|
||||
// Before selecting a DUP instruction, check if it is better selected as a
|
||||
// MOV or load from a constant pool.
|
||||
Register Src = I.getOperand(1).getReg();
|
||||
auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
|
||||
if (!ValAndVReg)
|
||||
return false;
|
||||
LLVMContext &Ctx = MF.getFunction().getContext();
|
||||
Register Dst = I.getOperand(0).getReg();
|
||||
auto *CV = ConstantDataVector::getSplat(
|
||||
MRI.getType(Dst).getNumElements(),
|
||||
ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
|
||||
ValAndVReg->Value));
|
||||
MachineIRBuilder MIRBuilder(I);
|
||||
if (!emitConstantVector(Dst, CV, MIRBuilder, MRI))
|
||||
return false;
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case TargetOpcode::G_BR: {
|
||||
// If the branch jumps to the fallthrough block, don't bother emitting it.
|
||||
// Only do this for -O0 for a good code size improvement, because when
|
||||
@@ -4811,6 +4826,44 @@ bool AArch64InstructionSelector::selectInsertElt(
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
|
||||
MachineIRBuilder &MIRBuilder,
|
||||
MachineRegisterInfo &MRI) const {
|
||||
LLT DstTy = MRI.getType(Dst);
|
||||
unsigned DstSize = DstTy.getSizeInBits();
|
||||
if (CV->isNullValue()) {
|
||||
if (DstSize == 128) {
|
||||
auto Mov =
|
||||
MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
|
||||
constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
|
||||
return &*Mov;
|
||||
}
|
||||
|
||||
if (DstSize == 64) {
|
||||
auto Mov =
|
||||
MIRBuilder
|
||||
.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
|
||||
.addImm(0);
|
||||
auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
|
||||
.addReg(Mov.getReg(0), 0, AArch64::dsub);
|
||||
RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
|
||||
return &*Copy;
|
||||
}
|
||||
}
|
||||
|
||||
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
|
||||
if (!CPLoad) {
|
||||
LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
|
||||
RBI.constrainGenericRegister(
|
||||
Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
|
||||
return &*Copy;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::tryOptConstantBuildVec(
|
||||
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
||||
@@ -4837,33 +4890,8 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
|
||||
}
|
||||
Constant *CV = ConstantVector::get(Csts);
|
||||
MachineIRBuilder MIB(I);
|
||||
if (CV->isNullValue()) {
|
||||
// Until the importer can support immAllZerosV in pattern leaf nodes,
|
||||
// select a zero move manually here.
|
||||
Register DstReg = I.getOperand(0).getReg();
|
||||
if (DstSize == 128) {
|
||||
auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
|
||||
I.eraseFromParent();
|
||||
return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
|
||||
} else if (DstSize == 64) {
|
||||
auto Mov =
|
||||
MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
|
||||
.addImm(0);
|
||||
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
|
||||
.addReg(Mov.getReg(0), 0, AArch64::dsub);
|
||||
I.eraseFromParent();
|
||||
return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
|
||||
}
|
||||
}
|
||||
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
|
||||
if (!CPLoad) {
|
||||
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
|
||||
if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
|
||||
return false;
|
||||
}
|
||||
MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
|
||||
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
|
||||
*MRI.getRegClass(CPLoad->getOperand(0).getReg()),
|
||||
MRI);
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -471,7 +471,7 @@ static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
|
||||
int64_t &Cnt) {
|
||||
assert(Ty.isVector() && "vector shift count is not a vector type");
|
||||
MachineInstr *MI = MRI.getVRegDef(Reg);
|
||||
auto Cst = getBuildVectorConstantSplat(*MI, MRI);
|
||||
auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
|
||||
if (!Cst)
|
||||
return false;
|
||||
Cnt = *Cst;
|
||||
@@ -696,6 +696,29 @@ bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
||||
auto Splat = getAArch64VectorSplat(MI, MRI);
|
||||
if (!Splat)
|
||||
return false;
|
||||
if (Splat->isReg())
|
||||
return true;
|
||||
// Later, during selection, we'll try to match imported patterns using
|
||||
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
|
||||
// G_BUILD_VECTORs which could match those patterns.
|
||||
int64_t Cst = Splat->getCst();
|
||||
return (Cst != 0 && Cst != -1);
|
||||
}
|
||||
|
||||
static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) {
|
||||
B.setInstrAndDebugLoc(MI);
|
||||
B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
|
||||
{MI.getOperand(1).getReg()});
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
|
||||
#include "AArch64GenPostLegalizeGILowering.inc"
|
||||
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
|
||||
|
||||
@@ -0,0 +1,181 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LOWER
|
||||
# RUN: llc -mtriple aarch64 -O2 -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECT
|
||||
...
|
||||
---
|
||||
name: same_reg
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
; LOWER-LABEL: name: same_reg
|
||||
; LOWER: liveins: $d0
|
||||
; LOWER: %r:_(s8) = G_IMPLICIT_DEF
|
||||
; LOWER: %build_vector:_(<8 x s8>) = G_DUP %r(s8)
|
||||
; LOWER: $d0 = COPY %build_vector(<8 x s8>)
|
||||
; LOWER: RET_ReallyLR implicit $d0
|
||||
; SELECT-LABEL: name: same_reg
|
||||
; SELECT: liveins: $d0
|
||||
; SELECT: %r:gpr32 = IMPLICIT_DEF
|
||||
; SELECT: %build_vector:fpr64 = DUPv8i8gpr %r
|
||||
; SELECT: $d0 = COPY %build_vector
|
||||
; SELECT: RET_ReallyLR implicit $d0
|
||||
%r:_(s8) = G_IMPLICIT_DEF
|
||||
%build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
|
||||
$d0 = COPY %build_vector(<8 x s8>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_combine_different_reg
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0, $w0, $w1
|
||||
; LOWER-LABEL: name: dont_combine_different_reg
|
||||
; LOWER: liveins: $d0, $w0, $w1
|
||||
; LOWER: %r:_(s32) = COPY $w0
|
||||
; LOWER: %q:_(s32) = COPY $w1
|
||||
; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
|
||||
; LOWER: $d0 = COPY %build_vector(<2 x s32>)
|
||||
; LOWER: RET_ReallyLR implicit $d0
|
||||
; SELECT-LABEL: name: dont_combine_different_reg
|
||||
; SELECT: liveins: $d0, $w0, $w1
|
||||
; SELECT: %r:gpr32all = COPY $w0
|
||||
; SELECT: %q:gpr32 = COPY $w1
|
||||
; SELECT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; SELECT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
|
||||
; SELECT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
|
||||
; SELECT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
|
||||
; SELECT: $d0 = COPY %build_vector
|
||||
; SELECT: RET_ReallyLR implicit $d0
|
||||
%r:_(s32) = COPY $w0
|
||||
%q:_(s32) = COPY $w1
|
||||
%build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r, %q
|
||||
$d0 = COPY %build_vector(<2 x s32>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_combine_zero
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
; Don't combine with 0. We want to avoid blocking immAllZerosV selection
|
||||
; patterns.
|
||||
|
||||
; LOWER-LABEL: name: dont_combine_zero
|
||||
; LOWER: liveins: $d0
|
||||
; LOWER: %r:_(s8) = G_CONSTANT i8 0
|
||||
; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
|
||||
; LOWER: $d0 = COPY %build_vector(<8 x s8>)
|
||||
; LOWER: RET_ReallyLR implicit $d0
|
||||
; SELECT-LABEL: name: dont_combine_zero
|
||||
; SELECT: liveins: $d0
|
||||
; SELECT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
|
||||
; SELECT: %build_vector:fpr64 = COPY [[MOVIv2d_ns]].dsub
|
||||
; SELECT: $d0 = COPY %build_vector
|
||||
; SELECT: RET_ReallyLR implicit $d0
|
||||
%r:_(s8) = G_CONSTANT i8 0
|
||||
%build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
|
||||
$d0 = COPY %build_vector(<8 x s8>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_combine_all_ones
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
; Don't combine with -1. We want to avoid blocking immAllOnesV selection
|
||||
; patterns.
|
||||
|
||||
; LOWER-LABEL: name: dont_combine_all_ones
|
||||
; LOWER: liveins: $d0
|
||||
; LOWER: %r:_(s8) = G_CONSTANT i8 -1
|
||||
; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
|
||||
; LOWER: $d0 = COPY %build_vector(<8 x s8>)
|
||||
; LOWER: RET_ReallyLR implicit $d0
|
||||
; SELECT-LABEL: name: dont_combine_all_ones
|
||||
; SELECT: liveins: $d0
|
||||
; SELECT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
|
||||
; SELECT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
|
||||
; SELECT: $d0 = COPY [[LDRDui]]
|
||||
; SELECT: RET_ReallyLR implicit $d0
|
||||
%r:_(s8) = G_CONSTANT i8 -1
|
||||
%build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
|
||||
$d0 = COPY %build_vector(<8 x s8>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: all_zeros_pat_example
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
; We should get a NEGv2i32 here.
|
||||
|
||||
; LOWER-LABEL: name: all_zeros_pat_example
|
||||
; LOWER: liveins: $d0
|
||||
; LOWER: %v:_(<2 x s32>) = COPY $d0
|
||||
; LOWER: %cst:_(s32) = G_CONSTANT i32 0
|
||||
; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
|
||||
; LOWER: %sub:_(<2 x s32>) = G_SUB %build_vector, %v
|
||||
; LOWER: $d0 = COPY %sub(<2 x s32>)
|
||||
; LOWER: RET_ReallyLR implicit $d0
|
||||
; SELECT-LABEL: name: all_zeros_pat_example
|
||||
; SELECT: liveins: $d0
|
||||
; SELECT: %v:fpr64 = COPY $d0
|
||||
; SELECT: %sub:fpr64 = NEGv2i32 %v
|
||||
; SELECT: $d0 = COPY %sub
|
||||
; SELECT: RET_ReallyLR implicit $d0
|
||||
%v:_(<2 x s32>) = COPY $d0
|
||||
%cst:_(s32) = G_CONSTANT i32 0
|
||||
%build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
|
||||
%sub:_(<2 x s32>) = G_SUB %build_vector, %v
|
||||
$d0 = COPY %sub(<2 x s32>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: all_ones_pat_example
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0, $d1
|
||||
; We should get a BICv8i8 here.
|
||||
|
||||
; LOWER-LABEL: name: all_ones_pat_example
|
||||
; LOWER: liveins: $d0, $d1
|
||||
; LOWER: %v0:_(<2 x s32>) = COPY $d0
|
||||
; LOWER: %v1:_(<2 x s32>) = COPY $d1
|
||||
; LOWER: %cst:_(s32) = G_CONSTANT i32 -1
|
||||
; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
|
||||
; LOWER: %xor:_(<2 x s32>) = G_XOR %v0, %build_vector
|
||||
; LOWER: %and:_(<2 x s32>) = G_AND %v1, %xor
|
||||
; LOWER: $d0 = COPY %and(<2 x s32>)
|
||||
; LOWER: RET_ReallyLR implicit $d0
|
||||
; SELECT-LABEL: name: all_ones_pat_example
|
||||
; SELECT: liveins: $d0, $d1
|
||||
; SELECT: %v0:fpr64 = COPY $d0
|
||||
; SELECT: %v1:fpr64 = COPY $d1
|
||||
; SELECT: %and:fpr64 = BICv8i8 %v1, %v0
|
||||
; SELECT: $d0 = COPY %and
|
||||
; SELECT: RET_ReallyLR implicit $d0
|
||||
%v0:_(<2 x s32>) = COPY $d0
|
||||
%v1:_(<2 x s32>) = COPY $d1
|
||||
%cst:_(s32) = G_CONSTANT i32 -1
|
||||
%build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
|
||||
%xor:_(<2 x s32>) = G_XOR %v0, %build_vector
|
||||
%and:_(<2 x s32>) = G_AND %v1, %xor
|
||||
$d0 = COPY %and(<2 x s32>)
|
||||
RET_ReallyLR implicit $d0
|
||||
@@ -84,8 +84,8 @@ body: |
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[DUP]](<4 x s32>)
|
||||
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
|
||||
@@ -379,3 +379,61 @@ body: |
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: cst_v4s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: cst_v4s32
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
|
||||
; CHECK: $q0 = COPY [[LDRQui]]
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%cst:gpr(s32) = G_CONSTANT i32 3
|
||||
%dup:fpr(<4 x s32>) = G_DUP %cst(s32)
|
||||
$q0 = COPY %dup(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: cst_v8s8
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: cst_v8s8
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
|
||||
; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
|
||||
; CHECK: $d0 = COPY [[LDRDui]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%cst:gpr(s8) = G_CONSTANT i8 3
|
||||
%dup:fpr(<8 x s8>) = G_DUP %cst(s8)
|
||||
$d0 = COPY %dup(<8 x s8>)
|
||||
RET_ReallyLR implicit $d0
|
||||
...
|
||||
---
|
||||
name: cst_v2p0
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: cst_v2p0
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %cst:gpr64 = MOVi64imm 3
|
||||
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
|
||||
; CHECK: $q0 = COPY [[LDRQui]]
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%cst:gpr(p0) = G_CONSTANT i64 3
|
||||
%dup:fpr(<2 x p0>) = G_DUP %cst(p0)
|
||||
$q0 = COPY %dup(<2 x p0>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
@@ -572,3 +572,38 @@ body: |
|
||||
$q0 = COPY %2(<16 x s8>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: shl_v2i32_imm_dup
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr }
|
||||
- { id: 1, class: fpr }
|
||||
- { id: 2, class: gpr }
|
||||
- { id: 3, class: fpr }
|
||||
liveins:
|
||||
- { reg: '$d0' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0
|
||||
|
||||
; Should still be able to select immediate forms using a G_DUP from a
|
||||
; constant.
|
||||
|
||||
; CHECK-LABEL: name: shl_v2i32_imm_dup
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
|
||||
; CHECK: $d0 = COPY [[SHLv2i32_shift]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:fpr(<2 x s32>) = COPY $d0
|
||||
%2:gpr(s32) = G_CONSTANT i32 24
|
||||
%1:fpr(<2 x s32>) = G_DUP %2(s32)
|
||||
%3:fpr(<2 x s32>) = G_SHL %0, %1(<2 x s32>)
|
||||
$d0 = COPY %3(<2 x s32>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
Reference in New Issue
Block a user