[AArch64][GlobalISel] Lower G_BUILD_VECTOR -> G_DUP

If we have

```
%vec = G_BUILD_VECTOR %reg, %reg, ..., %reg
```

Then lower it to

```
%vec = G_DUP %reg
```

Also update the selector to handle constant splats on G_DUP.

This will not combine when the splat is all zeros or ones. Tablegen-imported
patterns rely on these being G_BUILD_VECTOR.

Minor code size improvements on CTMark at -Os.

Also adds some utility functions to make it a bit easier to recognize splats,
and an AArch64-specific splat helper.

Differential Revision: https://reviews.llvm.org/D97731
This commit is contained in:
Jessica Paquette
2021-03-01 11:58:07 -08:00
parent 29482426b5
commit 5c26be214d
12 changed files with 496 additions and 49 deletions

View File

@@ -260,6 +260,31 @@ LLT getLCMType(LLT OrigTy, LLT TargetTy);
LLVM_READNONE
LLT getGCDType(LLT OrigTy, LLT TargetTy);
/// Represents a value which can be a Register or a constant.
///
/// This is useful in situations where an instruction may have an interesting
/// register operand or interesting constant operand. For a concrete example,
/// \see getVectorSplat.
class RegOrConstant {
int64_t Cst;
Register Reg;
bool IsReg;
public:
explicit RegOrConstant(Register Reg) : Reg(Reg), IsReg(true) {}
explicit RegOrConstant(int64_t Cst) : Cst(Cst), IsReg(false) {}
bool isReg() const { return IsReg; }
bool isCst() const { return !IsReg; }
Register getReg() const {
assert(isReg() && "Expected a register!");
return Reg;
}
int64_t getCst() const {
assert(isCst() && "Expected a constant!");
return Cst;
}
};
/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
/// If \p MI is not a splat, returns None.
Optional<int> getSplatIndex(MachineInstr &MI);
@@ -278,6 +303,28 @@ bool isBuildVectorAllZeros(const MachineInstr &MI,
bool isBuildVectorAllOnes(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
/// \returns a value when \p MI is a vector splat. The splat can be either a
/// Register or a constant.
///
/// Examples:
///
/// \code
/// %reg = COPY $physreg
/// %reg_splat = G_BUILD_VECTOR %reg, %reg, ..., %reg
/// \endcode
///
/// If called on the G_BUILD_VECTOR above, this will return a RegOrConstant
/// containing %reg.
///
/// \code
/// %cst = G_CONSTANT iN 4
/// %constant_splat = G_BUILD_VECTOR %cst, %cst, ..., %cst
/// \endcode
///
/// In the above case, this will return a RegOrConstant containing 4.
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
/// Returns true if given the TargetLowering's boolean contents information,
/// the value \p Val contains a true value.
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,

View File

@@ -828,6 +828,20 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
return isBuildVectorConstantSplat(MI, MRI, -1);
}
Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
unsigned Opc = MI.getOpcode();
if (!isBuildVectorOp(Opc))
return None;
if (auto Splat = getBuildVectorConstantSplat(MI, MRI))
return RegOrConstant(*Splat);
auto Reg = MI.getOperand(1).getReg();
if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
[&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
return None;
return RegOrConstant(Reg);
}
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
bool IsFP) {
switch (TLI.getBooleanContents(IsVector, IsFP)) {

View File

@@ -135,13 +135,22 @@ def mul_const : GICombineRule<
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
>;
def build_vector_to_dup : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_BUILD_VECTOR):$root,
[{ return matchBuildVectorToDup(*${root}, MRI); }]),
(apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
>;
def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
def AArch64PostLegalizerLoweringHelper
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering]> {
icmp_lowering, build_vector_lowering]> {
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
}

View File

@@ -29,6 +29,7 @@ add_public_tablegen_target(AArch64CommonTableGen)
add_llvm_target(AArch64CodeGen
GISel/AArch64CallLowering.cpp
GISel/AArch64GlobalISelUtils.cpp
GISel/AArch64InstructionSelector.cpp
GISel/AArch64LegalizerInfo.cpp
GISel/AArch64PreLegalizerCombiner.cpp

View File

@@ -0,0 +1,38 @@
//===- AArch64GlobalISelUtils.cpp --------------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file Implementations of AArch64-specific helper functions used in the
/// GlobalISel pipeline.
//===----------------------------------------------------------------------===//
#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
Optional<RegOrConstant>
AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
if (auto Splat = getVectorSplat(MI, MRI))
return Splat;
if (MI.getOpcode() != AArch64::G_DUP)
return None;
Register Src = MI.getOperand(1).getReg();
if (auto ValAndVReg =
getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
return RegOrConstant(ValAndVReg->Value.getSExtValue());
return RegOrConstant(Src);
}
Optional<int64_t> AArch64GISelUtils::getAArch64VectorSplatScalar(
const MachineInstr &MI, const MachineRegisterInfo &MRI) {
auto Splat = getAArch64VectorSplat(MI, MRI);
if (!Splat || Splat->isReg())
return None;
return Splat->getCst();
}

View File

@@ -12,6 +12,9 @@
#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/Register.h"
#include <cstdint>
namespace llvm {
@@ -23,6 +26,16 @@ constexpr bool isLegalArithImmed(const uint64_t C) {
return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
}
/// \returns A value when \p MI is a vector splat of a Register or constant.
/// Checks for generic opcodes and AArch64-specific generic opcodes.
Optional<RegOrConstant> getAArch64VectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
/// \returns A value when \p MI is a constant vector splat.
/// Checks for generic opcodes and AArch64-specific generic opcodes.
Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
} // namespace AArch64GISelUtils
} // namespace llvm

View File

@@ -11,6 +11,7 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
@@ -24,7 +25,6 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -46,6 +47,7 @@
using namespace llvm;
using namespace MIPatternMatch;
using namespace AArch64GISelUtils;
namespace llvm {
class BlockFrequencyInfo;
@@ -145,6 +147,16 @@ private:
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
/// Emit a sequence of instructions representing a constant \p CV for a
/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
///
/// \returns the last instruction in the sequence on success, and nullptr
/// otherwise.
MachineInstr *emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI) const;
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI) const;
@@ -1659,23 +1671,7 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
assert(OpMI && "Expected to find a vreg def for vector shift operand");
if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
return None;
// Check all operands are identical immediates.
int64_t ImmVal = 0;
for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
if (!VRegAndVal)
return None;
if (Idx == 1)
ImmVal = VRegAndVal->Value.getSExtValue();
if (ImmVal != VRegAndVal->Value.getSExtValue())
return None;
}
return ImmVal;
return getAArch64VectorSplatScalar(*OpMI, MRI);
}
/// Matches and returns the shift immediate value for a SHL instruction given
@@ -1963,7 +1959,7 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
MRI.setType(I.getOperand(0).getReg(),
DstTy.changeElementType(LLT::scalar(64)));
MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
@@ -2125,6 +2121,25 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case AArch64::G_DUP: {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
Register Dst = I.getOperand(0).getReg();
auto *CV = ConstantDataVector::getSplat(
MRI.getType(Dst).getNumElements(),
ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
ValAndVReg->Value));
MachineIRBuilder MIRBuilder(I);
if (!emitConstantVector(Dst, CV, MIRBuilder, MRI))
return false;
I.eraseFromParent();
return true;
}
case TargetOpcode::G_BR: {
// If the branch jumps to the fallthrough block, don't bother emitting it.
// Only do this for -O0 for a good code size improvement, because when
@@ -4811,6 +4826,44 @@ bool AArch64InstructionSelector::selectInsertElt(
return true;
}
MachineInstr *
AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI) const {
LLT DstTy = MRI.getType(Dst);
unsigned DstSize = DstTy.getSizeInBits();
if (CV->isNullValue()) {
if (DstSize == 128) {
auto Mov =
MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
return &*Mov;
}
if (DstSize == 64) {
auto Mov =
MIRBuilder
.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
.addImm(0);
auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
.addReg(Mov.getReg(0), 0, AArch64::dsub);
RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
return &*Copy;
}
}
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
return nullptr;
}
auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
RBI.constrainGenericRegister(
Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
return &*Copy;
}
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -4837,33 +4890,8 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
}
Constant *CV = ConstantVector::get(Csts);
MachineIRBuilder MIB(I);
if (CV->isNullValue()) {
// Until the importer can support immAllZerosV in pattern leaf nodes,
// select a zero move manually here.
Register DstReg = I.getOperand(0).getReg();
if (DstSize == 128) {
auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
} else if (DstSize == 64) {
auto Mov =
MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
.addImm(0);
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(Mov.getReg(0), 0, AArch64::dsub);
I.eraseFromParent();
return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
}
}
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
return false;
}
MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
*MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
I.eraseFromParent();
return true;
}

View File

@@ -471,7 +471,7 @@ static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
int64_t &Cnt) {
assert(Ty.isVector() && "vector shift count is not a vector type");
MachineInstr *MI = MRI.getVRegDef(Reg);
auto Cst = getBuildVectorConstantSplat(*MI, MRI);
auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
if (!Cst)
return false;
Cnt = *Cst;
@@ -696,6 +696,29 @@ bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
auto Splat = getAArch64VectorSplat(MI, MRI);
if (!Splat)
return false;
if (Splat->isReg())
return true;
// Later, during selection, we'll try to match imported patterns using
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
// G_BUILD_VECTORs which could match those patterns.
int64_t Cst = Splat->getCst();
return (Cst != 0 && Cst != -1);
}
static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
{MI.getOperand(1).getReg()});
MI.eraseFromParent();
return true;
}
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS

View File

@@ -0,0 +1,181 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LOWER
# RUN: llc -mtriple aarch64 -O2 -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECT
...
---
name: same_reg
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; LOWER-LABEL: name: same_reg
; LOWER: liveins: $d0
; LOWER: %r:_(s8) = G_IMPLICIT_DEF
; LOWER: %build_vector:_(<8 x s8>) = G_DUP %r(s8)
; LOWER: $d0 = COPY %build_vector(<8 x s8>)
; LOWER: RET_ReallyLR implicit $d0
; SELECT-LABEL: name: same_reg
; SELECT: liveins: $d0
; SELECT: %r:gpr32 = IMPLICIT_DEF
; SELECT: %build_vector:fpr64 = DUPv8i8gpr %r
; SELECT: $d0 = COPY %build_vector
; SELECT: RET_ReallyLR implicit $d0
%r:_(s8) = G_IMPLICIT_DEF
%build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
$d0 = COPY %build_vector(<8 x s8>)
RET_ReallyLR implicit $d0
...
---
name: dont_combine_different_reg
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $w0, $w1
; LOWER-LABEL: name: dont_combine_different_reg
; LOWER: liveins: $d0, $w0, $w1
; LOWER: %r:_(s32) = COPY $w0
; LOWER: %q:_(s32) = COPY $w1
; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
; LOWER: $d0 = COPY %build_vector(<2 x s32>)
; LOWER: RET_ReallyLR implicit $d0
; SELECT-LABEL: name: dont_combine_different_reg
; SELECT: liveins: $d0, $w0, $w1
; SELECT: %r:gpr32all = COPY $w0
; SELECT: %q:gpr32 = COPY $w1
; SELECT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
; SELECT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
; SELECT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
; SELECT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
; SELECT: $d0 = COPY %build_vector
; SELECT: RET_ReallyLR implicit $d0
%r:_(s32) = COPY $w0
%q:_(s32) = COPY $w1
%build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r, %q
$d0 = COPY %build_vector(<2 x s32>)
RET_ReallyLR implicit $d0
...
---
name: dont_combine_zero
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; Don't combine with 0. We want to avoid blocking immAllZerosV selection
; patterns.
; LOWER-LABEL: name: dont_combine_zero
; LOWER: liveins: $d0
; LOWER: %r:_(s8) = G_CONSTANT i8 0
; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
; LOWER: $d0 = COPY %build_vector(<8 x s8>)
; LOWER: RET_ReallyLR implicit $d0
; SELECT-LABEL: name: dont_combine_zero
; SELECT: liveins: $d0
; SELECT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
; SELECT: %build_vector:fpr64 = COPY [[MOVIv2d_ns]].dsub
; SELECT: $d0 = COPY %build_vector
; SELECT: RET_ReallyLR implicit $d0
%r:_(s8) = G_CONSTANT i8 0
%build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
$d0 = COPY %build_vector(<8 x s8>)
RET_ReallyLR implicit $d0
...
---
name: dont_combine_all_ones
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; Don't combine with -1. We want to avoid blocking immAllOnesV selection
; patterns.
; LOWER-LABEL: name: dont_combine_all_ones
; LOWER: liveins: $d0
; LOWER: %r:_(s8) = G_CONSTANT i8 -1
; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
; LOWER: $d0 = COPY %build_vector(<8 x s8>)
; LOWER: RET_ReallyLR implicit $d0
; SELECT-LABEL: name: dont_combine_all_ones
; SELECT: liveins: $d0
; SELECT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; SELECT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
; SELECT: $d0 = COPY [[LDRDui]]
; SELECT: RET_ReallyLR implicit $d0
%r:_(s8) = G_CONSTANT i8 -1
%build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
$d0 = COPY %build_vector(<8 x s8>)
RET_ReallyLR implicit $d0
...
---
name: all_zeros_pat_example
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; We should get a NEGv2i32 here.
; LOWER-LABEL: name: all_zeros_pat_example
; LOWER: liveins: $d0
; LOWER: %v:_(<2 x s32>) = COPY $d0
; LOWER: %cst:_(s32) = G_CONSTANT i32 0
; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
; LOWER: %sub:_(<2 x s32>) = G_SUB %build_vector, %v
; LOWER: $d0 = COPY %sub(<2 x s32>)
; LOWER: RET_ReallyLR implicit $d0
; SELECT-LABEL: name: all_zeros_pat_example
; SELECT: liveins: $d0
; SELECT: %v:fpr64 = COPY $d0
; SELECT: %sub:fpr64 = NEGv2i32 %v
; SELECT: $d0 = COPY %sub
; SELECT: RET_ReallyLR implicit $d0
%v:_(<2 x s32>) = COPY $d0
%cst:_(s32) = G_CONSTANT i32 0
%build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
%sub:_(<2 x s32>) = G_SUB %build_vector, %v
$d0 = COPY %sub(<2 x s32>)
RET_ReallyLR implicit $d0
...
---
name: all_ones_pat_example
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
; We should get a BICv8i8 here.
; LOWER-LABEL: name: all_ones_pat_example
; LOWER: liveins: $d0, $d1
; LOWER: %v0:_(<2 x s32>) = COPY $d0
; LOWER: %v1:_(<2 x s32>) = COPY $d1
; LOWER: %cst:_(s32) = G_CONSTANT i32 -1
; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
; LOWER: %xor:_(<2 x s32>) = G_XOR %v0, %build_vector
; LOWER: %and:_(<2 x s32>) = G_AND %v1, %xor
; LOWER: $d0 = COPY %and(<2 x s32>)
; LOWER: RET_ReallyLR implicit $d0
; SELECT-LABEL: name: all_ones_pat_example
; SELECT: liveins: $d0, $d1
; SELECT: %v0:fpr64 = COPY $d0
; SELECT: %v1:fpr64 = COPY $d1
; SELECT: %and:fpr64 = BICv8i8 %v1, %v0
; SELECT: $d0 = COPY %and
; SELECT: RET_ReallyLR implicit $d0
%v0:_(<2 x s32>) = COPY $d0
%v1:_(<2 x s32>) = COPY $d1
%cst:_(s32) = G_CONSTANT i32 -1
%build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
%xor:_(<2 x s32>) = G_XOR %v0, %build_vector
%and:_(<2 x s32>) = G_AND %v1, %xor
$d0 = COPY %and(<2 x s32>)
RET_ReallyLR implicit $d0

View File

@@ -84,8 +84,8 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[DUP]](<4 x s32>)
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0

View File

@@ -379,3 +379,61 @@ body: |
RET_ReallyLR implicit $q0
...
---
name: cst_v4s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: cst_v4s32
; CHECK: liveins: $w0
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
; CHECK: $q0 = COPY [[LDRQui]]
; CHECK: RET_ReallyLR implicit $q0
%cst:gpr(s32) = G_CONSTANT i32 3
%dup:fpr(<4 x s32>) = G_DUP %cst(s32)
$q0 = COPY %dup(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: cst_v8s8
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: cst_v8s8
; CHECK: liveins: $w0
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
; CHECK: $d0 = COPY [[LDRDui]]
; CHECK: RET_ReallyLR implicit $d0
%cst:gpr(s8) = G_CONSTANT i8 3
%dup:fpr(<8 x s8>) = G_DUP %cst(s8)
$d0 = COPY %dup(<8 x s8>)
RET_ReallyLR implicit $d0
...
---
name: cst_v2p0
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: cst_v2p0
; CHECK: liveins: $w0
; CHECK: %cst:gpr64 = MOVi64imm 3
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
; CHECK: $q0 = COPY [[LDRQui]]
; CHECK: RET_ReallyLR implicit $q0
%cst:gpr(p0) = G_CONSTANT i64 3
%dup:fpr(<2 x p0>) = G_DUP %cst(p0)
$q0 = COPY %dup(<2 x p0>)
RET_ReallyLR implicit $q0

View File

@@ -572,3 +572,38 @@ body: |
$q0 = COPY %2(<16 x s8>)
RET_ReallyLR implicit $q0
...
---
name: shl_v2i32_imm_dup
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
- { id: 2, class: gpr }
- { id: 3, class: fpr }
liveins:
- { reg: '$d0' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.1:
liveins: $d0
; Should still be able to select immediate forms using a G_DUP from a
; constant.
; CHECK-LABEL: name: shl_v2i32_imm_dup
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
; CHECK: $d0 = COPY [[SHLv2i32_shift]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<2 x s32>) = COPY $d0
%2:gpr(s32) = G_CONSTANT i32 24
%1:fpr(<2 x s32>) = G_DUP %2(s32)
%3:fpr(<2 x s32>) = G_SHL %0, %1(<2 x s32>)
$d0 = COPY %3(<2 x s32>)
RET_ReallyLR implicit $d0