Files
clang-p2996/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Amara Emerson 2e25d75aaa [AArch64][GlobalISel] Fix llvm.returnaddress(0) selection when LR is clobbered.
The code was originally ported from SelectionDAG, which does CSE behind the scenes
automatically. When copying the return address from LR live into the function, we
need to make sure to use the single copy on function entry. Any later copy from LR
could be using clobbered junk.

Implement this by caching the copy in the per-MF state in the selector.

Should hopefully fix the AArch64 sanitiser buildbot failure.
2020-01-21 22:53:32 -08:00

4973 lines
176 KiB
C++

//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the InstructionSelector class for
/// AArch64.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
CodeGenCoverage &CoverageInfo) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo);
// hasFnAttribute() is expensive to call on every BRCOND selection, so
// cache it here for each run of the selector.
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
MFReturnAddr = Register();
}
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
/// the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
// A lowering phase that runs before any selection attempts.
void preISelLower(MachineInstr &I) const;
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I) const;
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
void contractCrossBankCopyIntoStore(MachineInstr &I,
MachineRegisterInfo &MRI) const;
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
// returned via 'Dst'.
MachineInstr *emitScalarToVector(unsigned EltSize,
const TargetRegisterClass *DstRC,
Register Scalar,
MachineIRBuilder &MIRBuilder) const;
/// Emit a lane insert into \p DstReg, or a new vector register if None is
/// provided.
///
/// The lane inserted into is defined by \p LaneIdx. The vector source
/// register is given by \p SrcReg. The register containing the element is
/// given by \p EltReg.
MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectSplitVectorUnmerge(MachineInstr &I,
MachineRegisterInfo &MRI) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI) const;
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
// Emit a vector concat operation.
MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
Register Op2,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(const Register &LHS, const Register &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
/// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
/// materialized using a FMOV instruction, then update MI and return it.
/// Otherwise, do nothing and return a nullptr.
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const;
/// Emit a CSet for a compare.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const;
ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 1);
}
ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 2);
}
ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 4);
}
ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 8);
}
ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 16);
}
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const;
template <int Width>
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
return selectAddrModeIndexed(Root, Width / 8);
}
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
/// or not a shift + extend should be folded into an addressing mode. Returns
/// None when this is not profitable or possible.
ComplexRendererFns
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
MachineOperand &Offset, unsigned SizeInBytes,
bool WantsExt) const;
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
return selectAddrModeXRO(Root, Width / 8);
}
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
return selectAddrModeWRO(Root, Width / 8);
}
ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
// TODO: selectShiftedRegister should allow for rotates on logical shifts.
// For now, make them the same. The only difference between the two is that
// logical shifts are allowed to fold in rotates. Otherwise, these are
// functionally the same.
return selectShiftedRegister(Root);
}
/// Given an extend instruction, determine the correct shift-extend type for
/// that instruction.
///
/// If the instruction is going to be used in a load or store, pass
/// \p IsLoadStore = true.
AArch64_AM::ShiftExtendType
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
bool IsLoadStore = false) const;
/// Instructions that accept extend modifiers like UXTW expect the register
/// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
/// subregister copy if necessary. Return either ExtReg, or the result of the
/// new copy.
Register narrowExtendRegIfNeeded(Register ExtReg,
MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V,
unsigned OpFlags) const;
// Optimization methods.
bool tryOptVectorShuffle(MachineInstr &I) const;
bool tryOptVectorDup(MachineInstr &MI) const;
bool tryOptSelect(MachineInstr &MI) const;
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
/// register zeroed out. In other words, the result of MI has been explicitly
/// zero extended.
bool isDef32(const MachineInstr &MI) const;
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
const AArch64RegisterInfo &TRI;
const AArch64RegisterBankInfo &RBI;
bool ProduceNonFlagSettingCondBr = false;
// Some cached values used during selection.
// We use LR as a live-in register, and we keep track of it here as it can be
// clobbered by calls.
Register MFReturnAddr;
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
// We declare the temporaries used by selectImpl() in the class to minimize the
// cost of constructing placeholder values.
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_DECL
};
} // end anonymous namespace
#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
: InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
{
}
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
const RegisterBankInfo &RBI,
bool GetAllRegSet = false) {
if (RB.getID() == AArch64::GPRRegBankID) {
if (Ty.getSizeInBits() <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (Ty.getSizeInBits() == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
return nullptr;
}
if (RB.getID() == AArch64::FPRRegBankID) {
if (Ty.getSizeInBits() <= 16)
return &AArch64::FPR16RegClass;
if (Ty.getSizeInBits() == 32)
return &AArch64::FPR32RegClass;
if (Ty.getSizeInBits() == 64)
return &AArch64::FPR64RegClass;
if (Ty.getSizeInBits() == 128)
return &AArch64::FPR128RegClass;
return nullptr;
}
return nullptr;
}
/// Given a register bank, and size in bits, return the smallest register class
/// that can represent that combination.
static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
bool GetAllRegSet = false) {
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (SizeInBits == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
}
if (RegBankID == AArch64::FPRRegBankID) {
switch (SizeInBits) {
default:
return nullptr;
case 8:
return &AArch64::FPR8RegClass;
case 16:
return &AArch64::FPR16RegClass;
case 32:
return &AArch64::FPR32RegClass;
case 64:
return &AArch64::FPR64RegClass;
case 128:
return &AArch64::FPR128RegClass;
}
}
return nullptr;
}
/// Returns the correct subregister to use for a given register class.
static bool getSubRegForClass(const TargetRegisterClass *RC,
const TargetRegisterInfo &TRI, unsigned &SubReg) {
switch (TRI.getRegSizeInBits(*RC)) {
case 8:
SubReg = AArch64::bsub;
break;
case 16:
SubReg = AArch64::hsub;
break;
case 32:
if (RC != &AArch64::FPR32RegClass)
SubReg = AArch64::sub_32;
else
SubReg = AArch64::ssub;
break;
case 64:
SubReg = AArch64::dsub;
break;
default:
LLVM_DEBUG(
dbgs() << "Couldn't find appropriate subregister for register class.");
return false;
}
return true;
}
/// Check whether \p I is a currently unsupported binary operation:
/// - it has an unsized type
/// - an operand is not a vreg
/// - all operands are not in the same bank
/// These are checks that should someday live in the verifier, but right now,
/// these are mostly limitations of the aarch64 selector.
static bool unsupportedBinOp(const MachineInstr &I,
const AArch64RegisterBankInfo &RBI,
const MachineRegisterInfo &MRI,
const AArch64RegisterInfo &TRI) {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (!Ty.isValid()) {
LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
return true;
}
const RegisterBank *PrevOpBank = nullptr;
for (auto &MO : I.operands()) {
// FIXME: Support non-register operands.
if (!MO.isReg()) {
LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
return true;
}
// FIXME: Can generic operations have physical registers operands? If
// so, this will need to be taught about that, and we'll need to get the
// bank out of the minimal class for the register.
// Either way, this needs to be documented (and possibly verified).
if (!Register::isVirtualRegister(MO.getReg())) {
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
return true;
}
const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
if (!OpBank) {
LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
return true;
}
if (PrevOpBank && OpBank != PrevOpBank) {
LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
return true;
}
PrevOpBank = OpBank;
}
return false;
}
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
/// and of size \p OpSize.
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
if (OpSize == 32) {
switch (GenericOpc) {
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
case TargetOpcode::G_PTR_ADD:
return AArch64::ADDXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
default:
return GenericOpc;
}
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDSrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBSrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULSrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVSrr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDDrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBDrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULDrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVDrr;
case TargetOpcode::G_OR:
return AArch64::ORRv8i8;
default:
return GenericOpc;
}
}
break;
}
return GenericOpc;
}
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
/// appropriate for the (value) register bank \p RegBankID and of memory access
/// size \p OpSize. This returns the variant with the base+unsigned-immediate
/// addressing mode (e.g., LDRXui).
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
const bool isStore = GenericOpc == TargetOpcode::G_STORE;
switch (RegBankID) {
case AArch64::GPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
case 16:
return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
case 32:
return isStore ? AArch64::STRWui : AArch64::LDRWui;
case 64:
return isStore ? AArch64::STRXui : AArch64::LDRXui;
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBui : AArch64::LDRBui;
case 16:
return isStore ? AArch64::STRHui : AArch64::LDRHui;
case 32:
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
}
break;
}
return GenericOpc;
}
#ifndef NDEBUG
/// Helper function that verifies that we have a valid copy at the end of
/// selectCopy. Verifies that the source and dest have the expected sizes and
/// then returns true.
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Make sure the size of the source and dest line up.
assert(
(DstSize == SrcSize ||
// Copies are a mean to setup initial types, the number of
// bits may not exactly match.
(Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
// Copies are a mean to copy bits around, as long as we are
// on the same register class, that's fine. Otherwise, that
// means we need some SUBREG_TO_REG or AND & co.
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!");
// Check the size of the destination.
assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
"GPRs cannot get more than 64-bit width values");
return true;
}
#endif
/// Helper function for selectCopy. Inserts a subregister copy from
/// \p *From to \p *To, linking it up to \p I.
///
/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
///
/// CopyReg (From class) = COPY SrcReg
/// SubRegCopy (To class) = COPY CopyReg:SubReg
/// Dst = COPY SubRegCopy
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI, Register SrcReg,
const TargetRegisterClass *From,
const TargetRegisterClass *To,
unsigned SubReg) {
MachineIRBuilder MIB(I);
auto Copy = MIB.buildCopy({From}, {SrcReg});
auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
.addReg(Copy.getReg(0), 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(SubRegCopy.getReg(0));
// It's possible that the destination register won't be constrained. Make
// sure that happens.
if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
return true;
}
/// Helper function to get the source and destination register classes for a
/// copy. Returns a std::pair containing the source register class for the
/// copy, and the destination register class for the copy. If a register class
/// cannot be determined, then it will be nullptr.
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Special casing for cross-bank copies of s1s. We can technically represent
// a 1-bit value with any size of register. The minimum size for a GPR is 32
// bits. So, we need to put the FPR on 32 bits as well.
//
// FIXME: I'm not sure if this case holds true outside of copies. If it does,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
SrcSize = DstSize = 32;
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
getMinClassForRegBank(DstRegBank, DstSize, true)};
}
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
// Find the correct register classes for the source and destination registers.
const TargetRegisterClass *SrcRC;
const TargetRegisterClass *DstRC;
std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Unexpected dest size "
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
return false;
}
// A couple helpers below, for making sure that the copy we produce is valid.
// Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
// to verify that the src and dst are the same size, since that's handled by
// the SUBREG_TO_REG.
bool KnownValid = false;
// Returns true, or asserts if something we don't expect happens. Instead of
// returning true, we return isValidCopy() to ensure that we verify the
// result.
auto CheckCopy = [&]() {
// If we have a bitcast or something, we can't have physical registers.
assert((I.isCopy() ||
(!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
!Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
"No phys reg on generic operator!");
assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
(void)KnownValid;
return true;
};
// Is this a copy? If so, then we may need to insert a subregister copy, or
// a SUBREG_TO_REG.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
return false;
}
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
// If we're doing a cross-bank copy on different-sized registers, we need
// to do a bit more work.
if (SrcSize > DstSize) {
// We're doing a cross-bank copy into a smaller register. We need a
// subregister copy. First, get a register class that's on the same bank
// as the destination, but the same size as the source.
const TargetRegisterClass *SubregRC =
getMinClassForRegBank(DstRegBank, SrcSize, true);
assert(SubregRC && "Didn't get a register class for subreg?");
// Get the appropriate subregister for the destination.
unsigned SubReg = 0;
if (!getSubRegForClass(DstRC, TRI, SubReg)) {
LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
return false;
}
// Now, insert a subregister copy using the new register class.
selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
return CheckCopy();
}
// Is this a cross-bank copy?
if (DstRegBank.getID() != SrcRegBank.getID()) {
if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
SrcSize == 16) {
// Special case for FPR16 to GPR32.
// FIXME: This can probably be generalized like the above case.
Register PromoteReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::hsub);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(PromoteReg);
// Promise that the copy is implicitly validated by the SUBREG_TO_REG.
KnownValid = true;
}
}
// If the destination is a physical register, then there's nothing to
// change, so we're done.
if (Register::isPhysicalRegister(DstReg))
return CheckCopy();
}
// No need to constrain SrcReg. It will get constrained when we hit another
// of its use or its defs. Copies do not have constraints.
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
I.setDesc(TII.get(AArch64::COPY));
return CheckCopy();
}
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
if (!DstTy.isScalar() || !SrcTy.isScalar())
return GenericOpc;
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
switch (DstSize) {
case 32:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
case 64:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
default:
return GenericOpc;
};
return GenericOpc;
}
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
AArch64::GPRRegBankID);
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (Ty == LLT::scalar(32))
return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
return 0;
}
/// Helper function to select the opcode for a G_FCMP.
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
// If this is a compare against +0.0, then we don't have to explicitly
// materialize a constant.
const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
if (OpSize != 32 && OpSize != 64)
return 0;
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
{AArch64::FCMPSri, AArch64::FCMPDri}};
return CmpOpcTbl[ShouldUseImm][OpSize == 64];
}
/// Returns true if \p P is an unsigned integer comparison predicate.
static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
switch (P) {
default:
return false;
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
return true;
}
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
switch (P) {
default:
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return AArch64CC::NE;
case CmpInst::ICMP_EQ:
return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
return AArch64CC::GT;
case CmpInst::ICMP_SGE:
return AArch64CC::GE;
case CmpInst::ICMP_SLT:
return AArch64CC::LT;
case CmpInst::ICMP_SLE:
return AArch64CC::LE;
case CmpInst::ICMP_UGT:
return AArch64CC::HI;
case CmpInst::ICMP_UGE:
return AArch64CC::HS;
case CmpInst::ICMP_ULT:
return AArch64CC::LO;
case CmpInst::ICMP_ULE:
return AArch64CC::LS;
}
}
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (P) {
default:
llvm_unreachable("Unknown FP condition!");
case CmpInst::FCMP_OEQ:
CondCode = AArch64CC::EQ;
break;
case CmpInst::FCMP_OGT:
CondCode = AArch64CC::GT;
break;
case CmpInst::FCMP_OGE:
CondCode = AArch64CC::GE;
break;
case CmpInst::FCMP_OLT:
CondCode = AArch64CC::MI;
break;
case CmpInst::FCMP_OLE:
CondCode = AArch64CC::LS;
break;
case CmpInst::FCMP_ONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case CmpInst::FCMP_ORD:
CondCode = AArch64CC::VC;
break;
case CmpInst::FCMP_UNO:
CondCode = AArch64CC::VS;
break;
case CmpInst::FCMP_UEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case CmpInst::FCMP_UGT:
CondCode = AArch64CC::HI;
break;
case CmpInst::FCMP_UGE:
CondCode = AArch64CC::PL;
break;
case CmpInst::FCMP_ULT:
CondCode = AArch64CC::LT;
break;
case CmpInst::FCMP_ULE:
CondCode = AArch64CC::LE;
break;
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
}
}
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
const Register CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
return false;
Register LHS = CCMI->getOperand(2).getReg();
Register RHS = CCMI->getOperand(3).getReg();
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
if (!VRegAndVal)
std::swap(RHS, LHS);
VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
if (!VRegAndVal || VRegAndVal->Value != 0) {
MachineIRBuilder MIB(I);
// If we can't select a CBZ then emit a cmp + Bcc.
if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
CCMI->getOperand(1), MIB))
return false;
const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
(CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID)
return false;
const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
return false;
const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
unsigned CBOpc = 0;
if (CmpWidth <= 32)
CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
else if (CmpWidth == 64)
CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
else
return false;
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
.addUse(LHS)
.addMBB(DestMBB)
.constrainAllUses(TII, TRI, RBI);
I.eraseFromParent();
return true;
}
/// Returns the element immediate value of a vector shift operand if found.
/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
static Optional<int64_t> getVectorShiftImm(Register Reg,
MachineRegisterInfo &MRI) {
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
assert(OpMI && "Expected to find a vreg def for vector shift operand");
if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
return None;
// Check all operands are identical immediates.
int64_t ImmVal = 0;
for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
if (!VRegAndVal)
return None;
if (Idx == 1)
ImmVal = VRegAndVal->Value;
if (ImmVal != VRegAndVal->Value)
return None;
}
return ImmVal;
}
/// Matches and returns the shift immediate value for a SHL instruction given
/// a shift operand.
static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
if (!ShiftImm)
return None;
// Check the immediate is in range for a SHL.
int64_t Imm = *ShiftImm;
if (Imm < 0)
return None;
switch (SrcTy.getElementType().getSizeInBits()) {
default:
LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
return None;
case 8:
if (Imm > 7)
return None;
break;
case 16:
if (Imm > 15)
return None;
break;
case 32:
if (Imm > 31)
return None;
break;
case 64:
if (Imm > 63)
return None;
break;
}
return Imm;
}
bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_SHL);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
if (!Ty.isVector())
return false;
// Check if we have a vector of constants on RHS that we can select as the
// immediate form.
Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
unsigned Opc = 0;
if (Ty == LLT::vector(2, 64)) {
Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
} else if (Ty == LLT::vector(4, 32)) {
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
}
MachineIRBuilder MIB(I);
auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
if (ImmVal)
Shl.addImm(*ImmVal);
else
Shl.addUse(Src2Reg);
constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVectorASHR(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_ASHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
if (!Ty.isVector())
return false;
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
const TargetRegisterClass *RC = nullptr;
if (Ty == LLT::vector(2, 64)) {
Opc = AArch64::SSHLv2i64;
NegOpc = AArch64::NEGv2i64;
RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(4, 32)) {
Opc = AArch64::SSHLv4i32;
NegOpc = AArch64::NEGv4i32;
RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(2, 32)) {
Opc = AArch64::SSHLv2i32;
NegOpc = AArch64::NEGv2i32;
RC = &AArch64::FPR64RegClass;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
}
MachineIRBuilder MIB(I);
auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
return false;
}
bool AArch64InstructionSelector::selectVaStartDarwin(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Register ListReg = I.getOperand(0).getReg();
Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
.addDef(ArgsAddrReg)
.addFrameIndex(FuncInfo->getVarArgsStackIndex())
.addImm(0)
.addImm(0);
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
.addUse(ArgsAddrReg)
.addUse(ListReg)
.addImm(0)
.addMemOperand(*I.memoperands_begin());
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
void AArch64InstructionSelector::materializeLargeCMVal(
MachineInstr &I, const Value *V, unsigned OpFlags) const {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineIRBuilder MIB(I);
auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
MovZ->addOperand(MF, I.getOperand(1));
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
AArch64II::MO_NC);
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
Register ForceDstReg) {
Register DstReg = ForceDstReg
? ForceDstReg
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
if (auto *GV = dyn_cast<GlobalValue>(V)) {
MovI->addOperand(MF, MachineOperand::CreateGA(
GV, MovZ->getOperand(1).getOffset(), Flags));
} else {
MovI->addOperand(
MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
MovZ->getOperand(1).getOffset(), Flags));
}
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
return DstReg;
};
Register DstReg = BuildMovK(MovZ.getReg(0),
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
return;
}
void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR: {
// These shifts are legalized to have 64 bit shift amounts because we want
// to take advantage of the existing imported selection patterns that assume
// the immediates are s64s. However, if the shifted type is 32 bits and for
// some reason we receive input GMIR that has an s64 shift amount that's not
// a G_CONSTANT, insert a truncate so that we can still select the s32
// register-register variant.
Register SrcReg = I.getOperand(1).getReg();
Register ShiftReg = I.getOperand(2).getReg();
const LLT ShiftTy = MRI.getType(ShiftReg);
const LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return;
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
return;
auto *AmtMI = MRI.getVRegDef(ShiftReg);
assert(AmtMI && "could not find a vreg definition for shift amount");
if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
// Insert a subregister copy to implement a 64->32 trunc
MachineIRBuilder MIB(I);
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
.addReg(ShiftReg, 0, AArch64::sub_32);
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
I.getOperand(2).setReg(Trunc.getReg(0));
}
return;
}
case TargetOpcode::G_STORE:
contractCrossBankCopyIntoStore(I, MRI);
return;
default:
return;
}
}
bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &I, MachineRegisterInfo &MRI) const {
// We try to match the immediate variant of LSL, which is actually an alias
// for a special case of UBFM. Otherwise, we fall back to the imported
// selector which will match the register variant.
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
const auto &MO = I.getOperand(2);
auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
if (!VRegAndVal)
return false;
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (DstTy.isVector())
return false;
bool Is64Bit = DstTy.getSizeInBits() == 64;
auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
MachineIRBuilder MIB(I);
if (!Imm1Fn || !Imm2Fn)
return false;
auto NewI =
MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
{I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
for (auto &RenderFn : *Imm1Fn)
RenderFn(NewI);
for (auto &RenderFn : *Imm2Fn)
RenderFn(NewI);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
// If we're storing a scalar, it doesn't matter what register bank that
// scalar is on. All that matters is the size.
//
// So, if we see something like this (with a 32-bit scalar as an example):
//
// %x:gpr(s32) = ... something ...
// %y:fpr(s32) = COPY %x:gpr(s32)
// G_STORE %y:fpr(s32)
//
// We can fix this up into something like this:
//
// G_STORE %x:gpr(s32)
//
// And then continue the selection process normally.
MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
if (!Def)
return;
Register DefDstReg = Def->getOperand(0).getReg();
LLT DefDstTy = MRI.getType(DefDstReg);
Register StoreSrcReg = I.getOperand(0).getReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
// If we get something strange like a physical register, then we shouldn't
// go any further.
if (!DefDstTy.isValid())
return;
// Are the source and dst types the same size?
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
return;
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
return;
// We have a cross-bank copy, which is entering a store. Let's fold it.
I.getOperand(0).setReg(DefDstReg);
}
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
bool IsZero = false;
if (I.getOperand(1).isCImm())
IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
else if (I.getOperand(1).isImm())
IsZero = I.getOperand(1).getImm() == 0;
if (!IsZero)
return false;
Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
return false;
if (Ty == LLT::scalar(64)) {
I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
} else {
I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
}
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
}
default:
return false;
}
}
bool AArch64InstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Opcode = I.getOpcode();
// G_PHI requires same handling as PHI
if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
// Certain non-generic instructions also need some special handling.
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const RegClassOrRegBank &RegClassOrBank =
MRI.getRegClassOrRegBank(DefReg);
const TargetRegisterClass *DefRC
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
if (!DefRC) {
if (!DefTy.isValid()) {
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
return false;
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
}
}
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
}
if (I.isCopy())
return selectCopy(I, TII, MRI, TRI, RBI);
return true;
}
if (I.getNumOperands() != I.getNumExplicitOperands()) {
LLVM_DEBUG(
dbgs() << "Generic instruction has unexpected implicit operands\n");
return false;
}
// Try to do some lowering before we start instruction selecting. These
// lowerings are purely transformations on the input G_MIR and so selection
// must continue after any modification of the instruction.
preISelLower(I);
// There may be patterns where the importer can't deal with them optimally,
// but does select it to a suboptimal sequence so our custom C++ selection
// code later never has a chance to work on it. Therefore, we have an early
// selection attempt here to give priority to certain selection routines
// over the imported ones.
if (earlySelect(I))
return true;
if (selectImpl(I, *CoverageInfo))
return true;
LLT Ty =
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
MachineIRBuilder MIB(I);
switch (Opcode) {
case TargetOpcode::G_BRCOND: {
if (Ty.getSizeInBits() > 32) {
// We shouldn't need this on AArch64, but it would be implemented as an
// EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
// bit being tested is < 32.
LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
<< ", expected at most 32-bits");
return false;
}
const Register CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
// instructions will not be produced, as they are conditional branch
// instructions that do not set flags.
bool ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
return true;
if (ProduceNonFlagSettingCondBr) {
auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
.addUse(CondReg)
.addImm(/*bit offset=*/0)
.addMBB(DestMBB);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
} else {
auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
.addDef(AArch64::WZR)
.addUse(CondReg)
.addImm(1);
constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
auto Bcc =
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
.addImm(AArch64CC::EQ)
.addMBB(DestMBB);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
}
}
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_BRJT:
return selectBrJT(I, MRI);
case TargetOpcode::G_BSWAP: {
// Handle vector types for G_BSWAP directly.
Register DstReg = I.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
// We should only get vector types here; everything else is handled by the
// importer right now.
if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
return false;
}
// Only handle 4 and 2 element vectors for now.
// TODO: 16-bit elements.
unsigned NumElts = DstTy.getNumElements();
if (NumElts != 4 && NumElts != 2) {
LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
return false;
}
// Choose the correct opcode for the supported types. Right now, that's
// v2s32, v4s32, and v2s64.
unsigned Opc = 0;
unsigned EltSize = DstTy.getElementType().getSizeInBits();
if (EltSize == 32)
Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
: AArch64::REV32v16i8;
else if (EltSize == 64)
Opc = AArch64::REV64v16i8;
// We should always get something by the time we get here...
assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
if (Ty != s32 && Ty != s64) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s32 << " or " << s64
<< '\n');
return false;
}
if (RB.getID() != AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant on bank: " << RB
<< ", expected: FPR\n");
return false;
}
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
return false;
} else {
// s32 and s64 are covered by tablegen.
if (Ty != p0 && Ty != s8 && Ty != s16) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant, expected: " << s32 << ", " << s64
<< ", or " << p0 << '\n');
return false;
}
if (RB.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant on bank: " << RB
<< ", expected: GPR\n");
return false;
}
}
// We allow G_CONSTANT of types < 32b.
const unsigned MovOpc =
DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
if (isFP) {
// Either emit a FMOV, or emit a copy to emit a normal mov.
const TargetRegisterClass &GPRRC =
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
const TargetRegisterClass &FPRRC =
DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
// Can we use a FMOV instruction to represent the immediate?
if (emitFMovForFConstant(I, MRI))
return true;
// Nope. Emit a copy and use a normal mov instead.
const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildCopy({DefReg}, {DefGPRReg});
if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
return false;
}
MachineOperand &ImmOp = I.getOperand(1);
// FIXME: Is going through int64_t always correct?
ImmOp.ChangeToImmediate(
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
} else if (I.getOperand(1).isCImm()) {
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
I.getOperand(1).ChangeToImmediate(Val);
} else if (I.getOperand(1).isImm()) {
uint64_t Val = I.getOperand(1).getImm();
I.getOperand(1).ChangeToImmediate(Val);
}
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
case TargetOpcode::G_EXTRACT: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(DstReg);
(void)DstTy;
unsigned SrcSize = SrcTy.getSizeInBits();
if (SrcTy.getSizeInBits() > 64) {
// This should be an extract of an s128, which is like a vector extract.
if (SrcTy.getSizeInBits() != 128)
return false;
// Only support extracting 64 bits from an s128 at the moment.
if (DstTy.getSizeInBits() != 64)
return false;
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
// Check we have the right regbank always.
assert(SrcRB.getID() == AArch64::FPRRegBankID &&
DstRB.getID() == AArch64::FPRRegBankID &&
"Wrong extract regbank!");
(void)SrcRB;
// Emit the same code as a vector extract.
// Offset must be a multiple of 64.
unsigned Offset = I.getOperand(2).getImm();
if (Offset % 64 != 0)
return false;
unsigned LaneIdx = Offset / 64;
MachineIRBuilder MIB(I);
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
Ty.getSizeInBits() - 1);
if (SrcSize < 64) {
assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types");
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(DstReg, 0, AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(0).setReg(DstReg);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_INSERT: {
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
unsigned DstSize = DstTy.getSizeInBits();
// Larger inserts are vectors, same-size ones should be something else by
// now (split up or turned into COPYs).
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
return false;
I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
unsigned LSB = I.getOperand(3).getImm();
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
I.getOperand(3).setImm((DstSize - LSB) % DstSize);
MachineInstrBuilder(MF, I).addImm(Width - 1);
if (DstSize < 64) {
assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types");
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG))
.addDef(SrcReg)
.addImm(0)
.addUse(I.getOperand(2).getReg())
.addImm(AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(2).setReg(SrcReg);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FRAME_INDEX: {
// allocas and G_FRAME_INDEX are only supported in addrspace(0).
if (Ty != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
I.setDesc(TII.get(AArch64::ADDXri));
// MOs for a #0 shifted immediate.
I.addOperand(MachineOperand::CreateImm(0));
I.addOperand(MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_GLOBAL_VALUE: {
auto GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
// Materialize the global using movz/movk instructions.
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
} else if (TM.getCodeModel() == CodeModel::Tiny) {
I.setDesc(TII.get(AArch64::ADR));
I.getOperand(1).setTargetFlags(OpFlags);
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
MachineInstrBuilder MIB(MF, I);
MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
MachineIRBuilder MIB(I);
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
auto &MemOp = **I.memoperands_begin();
if (MemOp.isAtomic()) {
// For now we just support s8 acquire loads to be able to compile stack
// protector code.
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
MemOp.getSize() == 1) {
I.setDesc(TII.get(AArch64::LDARB));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
unsigned MemSizeInBits = MemOp.getSize() * 8;
const Register PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer");
#endif
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
return false;
I.setDesc(TII.get(NewOpc));
uint64_t Offset = 0;
auto *PtrMI = MRI.getVRegDef(PtrReg);
// Try to fold a GEP into our unsigned immediate addressing mode.
if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
const unsigned Size = MemSizeInBits / 8;
const unsigned Scale = Log2_32(Size);
if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
Register Ptr2Reg = PtrMI->getOperand(1).getReg();
I.getOperand(1).setReg(Ptr2Reg);
PtrMI = MRI.getVRegDef(Ptr2Reg);
Offset = Imm / Size;
}
}
}
// If we haven't folded anything into our addressing mode yet, try to fold
// a frame index into the base+offset.
if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
I.addOperand(MachineOperand::CreateImm(Offset));
// If we're storing a 0, use WZR/XZR.
if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
if (I.getOpcode() == AArch64::STRWui)
I.getOperand(0).setReg(AArch64::WZR);
else if (I.getOpcode() == AArch64::STRXui)
I.getOperand(0).setReg(AArch64::XZR);
}
}
if (IsZExtLoad) {
// The zextload from a smaller type to i32 should be handled by the importer.
if (MRI.getType(ValReg).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
//and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
Register DstReg = I.getOperand(0).getReg();
I.getOperand(0).setReg(LdReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
return false;
}
if (Ty != LLT::scalar(64)) {
LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
<< ", expected: " << LLT::scalar(64) << '\n');
return false;
}
unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
: AArch64::UMULHrr;
I.setDesc(TII.get(NewOpc));
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorASHR(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_OR:
case TargetOpcode::G_LSHR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
const unsigned OpSize = Ty.getSizeInBits();
const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
if (NewOpc == I.getOpcode())
return false;
I.setDesc(TII.get(NewOpc));
// FIXME: Should the type be always reset in setDesc?
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_PTR_ADD: {
MachineIRBuilder MIRBuilder(I);
emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
MIRBuilder);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_UADDO: {
// TODO: Support other types.
unsigned OpSize = Ty.getSizeInBits();
if (OpSize != 32 && OpSize != 64) {
LLVM_DEBUG(
dbgs()
<< "G_UADDO currently only supported for 32 and 64 b types.\n");
return false;
}
// TODO: Support vectors.
if (Ty.isVector()) {
LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
return false;
}
// Add and set the set condition flag.
unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
MachineIRBuilder MIRBuilder(I);
auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
{I.getOperand(2), I.getOperand(3)});
constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
// Now, put the overflow result in the register given by the first operand
// to the G_UADDO. CSINC increments the result when the predicate is false,
// so to get the increment when it's true, we need to use the inverse. In
// this case, we want to increment when carry is set.
auto CsetMI = MIRBuilder
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
{Register(AArch64::WZR), Register(AArch64::WZR)})
.addImm(getInvertedCondCode(AArch64CC::HS));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_PTR_MASK: {
uint64_t Align = I.getOperand(2).getImm();
if (Align >= 64 || Align == 0)
return false;
uint64_t Mask = ~((1ULL << Align) - 1);
I.setDesc(TII.get(AArch64::ANDXri));
I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_TRUNC: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
if (DstRB.getID() != SrcRB.getID()) {
LLVM_DEBUG(
dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
return false;
}
if (DstRB.getID() == AArch64::GPRRegBankID) {
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
if (!DstRC)
return false;
const TargetRegisterClass *SrcRC =
getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
if (!SrcRC)
return false;
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
return false;
}
if (DstRC == SrcRC) {
// Nothing to be done
} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
SrcTy == LLT::scalar(64)) {
llvm_unreachable("TableGen can import this case");
return false;
} else if (DstRC == &AArch64::GPR32RegClass &&
SrcRC == &AArch64::GPR64RegClass) {
I.getOperand(1).setSubReg(AArch64::sub_32);
} else {
LLVM_DEBUG(
dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
return false;
}
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
} else if (DstRB.getID() == AArch64::FPRRegBankID) {
if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
I.setDesc(TII.get(AArch64::XTNv4i16));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
MachineIRBuilder MIB(I);
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
}
return false;
}
case TargetOpcode::G_ANYEXT: {
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
if (RBDst.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
<< ", expected: GPR\n");
return false;
}
const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
if (RBSrc.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
<< ", expected: GPR\n");
return false;
}
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
if (DstSize == 0) {
LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
return false;
}
if (DstSize != 64 && DstSize > 32) {
LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
<< ", expected: 32 or 64\n");
return false;
}
// At this point G_ANYEXT is just like a plain COPY, but we need
// to explicitly form the 64-bit value if any.
if (DstSize > 32) {
Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
.addDef(ExtSrc)
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32);
I.getOperand(1).setReg(ExtSrc);
}
return selectCopy(I, TII, MRI, TRI, RBI);
}
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT: {
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
const Register DefReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DefReg);
const LLT SrcTy = MRI.getType(SrcReg);
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
if (DstTy.isVector())
return false; // Should be handled by imported patterns.
assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
AArch64::GPRRegBankID &&
"Unexpected ext regbank");
MachineIRBuilder MIB(I);
MachineInstr *ExtI;
// First check if we're extending the result of a load which has a dest type
// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
// GPR register on AArch64 and all loads which are smaller automatically
// zero-extend the upper bits. E.g.
// %v(s8) = G_LOAD %p, :: (load 1)
// %v2(s32) = G_ZEXT %v(s8)
if (!IsSigned) {
auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
if (LoadMI &&
RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
unsigned BytesLoaded = MemOp->getSize();
if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
return selectCopy(I, TII, MRI, TRI, RBI);
}
}
if (DstSize == 64) {
// FIXME: Can we avoid manually doing this?
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
<< " operand\n");
return false;
}
auto SubregToReg =
MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32);
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
{DefReg}, {SubregToReg})
.addImm(0)
.addImm(SrcSize - 1);
} else if (DstSize <= 32) {
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
{DefReg}, {SrcReg})
.addImm(0)
.addImm(SrcSize - 1);
} else {
return false;
}
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
SrcTy = MRI.getType(I.getOperand(1).getReg());
const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
if (NewOpc == Opcode)
return false;
I.setDesc(TII.get(NewOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
case TargetOpcode::G_INTTOPTR:
// The importer is currently unable to import pointer types since they
// didn't exist in SelectionDAG.
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_BITCAST:
// Imported SelectionDAG rules can handle every bitcast except those that
// bitcast from a type to the same type. Ideally, these shouldn't occur
// but we might not run an optimizer that deletes them. The other exception
// is bitcasts involving pointer types, as SelectionDAG has no knowledge
// of them.
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_SELECT: {
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
<< ", expected: " << LLT::scalar(1) << '\n');
return false;
}
const Register CondReg = I.getOperand(1).getReg();
const Register TReg = I.getOperand(2).getReg();
const Register FReg = I.getOperand(3).getReg();
if (tryOptSelect(I))
return true;
Register CSelOpc = selectSelectOpc(I, MRI, RBI);
MachineInstr &TstMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
.addDef(AArch64::WZR)
.addUse(CondReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
.addDef(I.getOperand(0).getReg())
.addUse(TReg)
.addUse(FReg)
.addImm(AArch64CC::NE);
constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_ICMP: {
if (Ty.isVector())
return selectVectorICmp(I, MRI);
if (Ty != LLT::scalar(32)) {
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
<< ", expected: " << LLT::scalar(32) << '\n');
return false;
}
MachineIRBuilder MIRBuilder(I);
if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
MIRBuilder))
return false;
emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
MIRBuilder);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
if (Ty != LLT::scalar(32)) {
LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
<< ", expected: " << LLT::scalar(32) << '\n');
return false;
}
unsigned CmpOpc = selectFCMPOpc(I, MRI);
if (!CmpOpc)
return false;
// FIXME: regbank
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(
(CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
// Partially build the compare. Decide if we need to add a use for the
// third operand based off whether or not we're comparing against 0.0.
auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
.addUse(I.getOperand(2).getReg());
// If we don't have an immediate compare, then we need to add a use of the
// register which wasn't used for the immediate.
// Note that the immediate will always be the last operand.
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
const Register DefReg = I.getOperand(0).getReg();
Register Def1Reg = DefReg;
if (CC2 != AArch64CC::AL)
Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineInstr &CSetMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
.addDef(Def1Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
.addImm(getInvertedCondCode(CC1));
if (CC2 != AArch64CC::AL) {
Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineInstr &CSet2MI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
.addDef(Def2Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
.addImm(getInvertedCondCode(CC2));
MachineInstr &OrMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
.addDef(DefReg)
.addUse(Def1Reg)
.addUse(Def2Reg);
constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
}
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_VASTART:
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
: selectVaStartAAPCS(I, MF, MRI);
case TargetOpcode::G_INTRINSIC:
return selectIntrinsic(I, MRI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWithSideEffects(I, MRI);
case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const Register DstReg = I.getOperand(0).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
case TargetOpcode::G_BLOCK_ADDR: {
if (TM.getCodeModel() == CodeModel::Large) {
materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
I.eraseFromParent();
return true;
} else {
I.setDesc(TII.get(AArch64::MOVaddrBA));
auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
I.getOperand(0).getReg())
.addBlockAddress(I.getOperand(1).getBlockAddress(),
/* Offset */ 0, AArch64II::MO_PAGE)
.addBlockAddress(
I.getOperand(1).getBlockAddress(), /* Offset */ 0,
AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
return selectIntrinsicRound(I, MRI);
case TargetOpcode::G_BUILD_VECTOR:
return selectBuildVector(I, MRI);
case TargetOpcode::G_MERGE_VALUES:
return selectMergeValues(I, MRI);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(I, MRI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return selectShuffleVector(I, MRI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return selectExtractElt(I, MRI);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return selectInsertElt(I, MRI);
case TargetOpcode::G_CONCAT_VECTORS:
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
}
return false;
}
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Register JTAddr = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
Register Index = I.getOperand(2).getReg();
MachineIRBuilder MIB(I);
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
{JTAddr, Index})
.addJumpTableIndex(JTI);
// Build the indirect branch.
MIB.buildInstr(AArch64::BR, {}, {TargetReg});
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
Register DstReg = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
MachineIRBuilder MIB(I);
auto MovMI =
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
.addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectTLSGlobalValue(
MachineInstr &I, MachineRegisterInfo &MRI) const {
if (!STI.isTargetMachO())
return false;
MachineFunction &MF = *I.getParent()->getParent();
MF.getFrameInfo().setAdjustsStack(true);
const GlobalValue &GV = *I.getOperand(1).getGlobal();
MachineIRBuilder MIB(I);
MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
.addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
{Register(AArch64::X0)})
.addImm(0);
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
MIB.buildInstr(AArch64::BLR, {}, {Load})
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
MRI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectIntrinsicTrunc(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
// Select the correct opcode.
unsigned Opc = 0;
if (!SrcTy.isVector()) {
switch (SrcTy.getSizeInBits()) {
default:
case 16:
Opc = AArch64::FRINTZHr;
break;
case 32:
Opc = AArch64::FRINTZSr;
break;
case 64:
Opc = AArch64::FRINTZDr;
break;
}
} else {
unsigned NumElts = SrcTy.getNumElements();
switch (SrcTy.getElementType().getSizeInBits()) {
default:
break;
case 16:
if (NumElts == 4)
Opc = AArch64::FRINTZv4f16;
else if (NumElts == 8)
Opc = AArch64::FRINTZv8f16;
break;
case 32:
if (NumElts == 2)
Opc = AArch64::FRINTZv2f32;
else if (NumElts == 4)
Opc = AArch64::FRINTZv4f32;
break;
case 64:
if (NumElts == 2)
Opc = AArch64::FRINTZv2f64;
break;
}
}
if (!Opc) {
// Didn't get an opcode above, bail.
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
return false;
}
// Legalization would have set us up perfectly for this; we just need to
// set the opcode and move on.
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectIntrinsicRound(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
// Select the correct opcode.
unsigned Opc = 0;
if (!SrcTy.isVector()) {
switch (SrcTy.getSizeInBits()) {
default:
case 16:
Opc = AArch64::FRINTAHr;
break;
case 32:
Opc = AArch64::FRINTASr;
break;
case 64:
Opc = AArch64::FRINTADr;
break;
}
} else {
unsigned NumElts = SrcTy.getNumElements();
switch (SrcTy.getElementType().getSizeInBits()) {
default:
break;
case 16:
if (NumElts == 4)
Opc = AArch64::FRINTAv4f16;
else if (NumElts == 8)
Opc = AArch64::FRINTAv8f16;
break;
case 32:
if (NumElts == 2)
Opc = AArch64::FRINTAv2f32;
else if (NumElts == 4)
Opc = AArch64::FRINTAv4f32;
break;
case 64:
if (NumElts == 2)
Opc = AArch64::FRINTAv2f64;
break;
}
}
if (!Opc) {
// Didn't get an opcode above, bail.
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
return false;
}
// Legalization would have set us up perfectly for this; we just need to
// set the opcode and move on.
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectVectorICmp(
MachineInstr &I, MachineRegisterInfo &MRI) const {
Register DstReg = I.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
Register SrcReg = I.getOperand(2).getReg();
Register Src2Reg = I.getOperand(3).getReg();
LLT SrcTy = MRI.getType(SrcReg);
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
unsigned NumElts = DstTy.getNumElements();
// First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
// Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
// Third index is cc opcode:
// 0 == eq
// 1 == ugt
// 2 == uge
// 3 == ult
// 4 == ule
// 5 == sgt
// 6 == sge
// 7 == slt
// 8 == sle
// ne is done by negating 'eq' result.
// This table below assumes that for some comparisons the operands will be
// commuted.
// ult op == commute + ugt op
// ule op == commute + uge op
// slt op == commute + sgt op
// sle op == commute + sge op
unsigned PredIdx = 0;
bool SwapOperands = false;
CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
switch (Pred) {
case CmpInst::ICMP_NE:
case CmpInst::ICMP_EQ:
PredIdx = 0;
break;
case CmpInst::ICMP_UGT:
PredIdx = 1;
break;
case CmpInst::ICMP_UGE:
PredIdx = 2;
break;
case CmpInst::ICMP_ULT:
PredIdx = 3;
SwapOperands = true;
break;
case CmpInst::ICMP_ULE:
PredIdx = 4;
SwapOperands = true;
break;
case CmpInst::ICMP_SGT:
PredIdx = 5;
break;
case CmpInst::ICMP_SGE:
PredIdx = 6;
break;
case CmpInst::ICMP_SLT:
PredIdx = 7;
SwapOperands = true;
break;
case CmpInst::ICMP_SLE:
PredIdx = 8;
SwapOperands = true;
break;
default:
llvm_unreachable("Unhandled icmp predicate");
return false;
}
// This table obviously should be tablegen'd when we have our GISel native
// tablegen selector.
static const unsigned OpcTable[4][4][9] = {
{
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */},
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
},
{
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */},
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */}
},
{
{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */}
},
{
{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */},
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
0 /* invalid */}
},
};
unsigned EltIdx = Log2_32(SrcEltSize / 8);
unsigned NumEltsIdx = Log2_32(NumElts / 2);
unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
if (!Opc) {
LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
return false;
}
const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const TargetRegisterClass *SrcRC =
getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
return false;
}
unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
if (SrcTy.getSizeInBits() == 128)
NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
if (SwapOperands)
std::swap(SrcReg, Src2Reg);
MachineIRBuilder MIB(I);
auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
// Invert if we had a 'ne' cc.
if (NotOpc) {
Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
} else {
MIB.buildCopy(DstReg, Cmp.getReg(0));
}
RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
I.eraseFromParent();
return true;
}
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
MachineIRBuilder &MIRBuilder) const {
auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
auto BuildFn = [&](unsigned SubregIndex) {
auto Ins =
MIRBuilder
.buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
.addImm(SubregIndex);
constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
return &*Ins;
};
switch (EltSize) {
case 16:
return BuildFn(AArch64::hsub);
case 32:
return BuildFn(AArch64::ssub);
case 64:
return BuildFn(AArch64::dsub);
default:
return nullptr;
}
}
bool AArch64InstructionSelector::selectMergeValues(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
if (I.getNumOperands() != 3)
return false;
// Merging 2 s64s into an s128.
if (DstTy == LLT::scalar(128)) {
if (SrcTy.getSizeInBits() != 64)
return false;
MachineIRBuilder MIB(I);
Register DstReg = I.getOperand(0).getReg();
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
MachineInstr *InsMI =
emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
if (!InsMI)
return false;
MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
Src2Reg, /* LaneIdx */ 1, RB, MIB);
if (!Ins2MI)
return false;
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
if (RB.getID() != AArch64::GPRRegBankID)
return false;
if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
return false;
auto *DstRC = &AArch64::GPR64RegClass;
Register SubToRegDef = MRI.createVirtualRegister(DstRC);
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(SubToRegDef)
.addImm(0)
.addUse(I.getOperand(1).getReg())
.addImm(AArch64::sub_32);
Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
// Need to anyext the second scalar before we can use bfm
MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(SubToRegDef2)
.addImm(0)
.addUse(I.getOperand(2).getReg())
.addImm(AArch64::sub_32);
MachineInstr &BFM =
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
.addDef(I.getOperand(0).getReg())
.addUse(SubToRegDef)
.addUse(SubToRegDef2)
.addImm(32)
.addImm(31);
constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
const unsigned EltSize) {
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
switch (EltSize) {
case 16:
CopyOpc = AArch64::CPYi16;
ExtractSubReg = AArch64::hsub;
break;
case 32:
CopyOpc = AArch64::CPYi32;
ExtractSubReg = AArch64::ssub;
break;
case 64:
CopyOpc = AArch64::CPYi64;
ExtractSubReg = AArch64::dsub;
break;
default:
// Unknown size, bail out.
LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
return false;
}
return true;
}
MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
unsigned CopyOpc = 0;
unsigned ExtractSubReg = 0;
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
LLVM_DEBUG(
dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
return nullptr;
}
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
return nullptr;
}
const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
const LLT &VecTy = MRI.getType(VecReg);
const TargetRegisterClass *VecRC =
getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
if (!VecRC) {
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
return nullptr;
}
// The register that we're going to copy into.
Register InsertReg = VecReg;
if (!DstReg)
DstReg = MRI.createVirtualRegister(DstRC);
// If the lane index is 0, we just use a subregister COPY.
if (LaneIdx == 0) {
auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
.addReg(VecReg, 0, ExtractSubReg);
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
return &*Copy;
}
// Lane copies require 128-bit wide registers. If we're dealing with an
// unpacked vector, then we need to move up to that width. Insert an implicit
// def and a subregister insert to get us there.
if (VecTy.getSizeInBits() != 128) {
MachineInstr *ScalarToVector = emitScalarToVector(
VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
if (!ScalarToVector)
return nullptr;
InsertReg = ScalarToVector->getOperand(0).getReg();
}
MachineInstr *LaneCopyMI =
MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
// Make sure that we actually constrain the initial copy.
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
return LaneCopyMI;
}
bool AArch64InstructionSelector::selectExtractElt(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!");
Register DstReg = I.getOperand(0).getReg();
const LLT NarrowTy = MRI.getType(DstReg);
const Register SrcReg = I.getOperand(1).getReg();
const LLT WideTy = MRI.getType(SrcReg);
(void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
// Need the lane index to determine the correct copy opcode.
MachineOperand &LaneIdxOp = I.getOperand(2);
assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
return false;
}
// Find the index to extract from.
auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value;
MachineIRBuilder MIRBuilder(I);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
LaneIdx, MIRBuilder);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectSplitVectorUnmerge(
MachineInstr &I, MachineRegisterInfo &MRI) const {
unsigned NumElts = I.getNumOperands() - 1;
Register SrcReg = I.getOperand(NumElts).getReg();
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(SrcReg);
assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
if (SrcTy.getSizeInBits() > 128) {
LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
return false;
}
MachineIRBuilder MIB(I);
// We implement a split vector operation by treating the sub-vectors as
// scalars and extracting them.
const RegisterBank &DstRB =
*RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
Register Dst = I.getOperand(OpIdx).getReg();
MachineInstr *Extract =
emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
if (!Extract)
return false;
}
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"unexpected opcode");
// TODO: Handle unmerging into GPRs and from scalars to scalars.
if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
AArch64::FPRRegBankID ||
RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n");
return false;
}
// The last operand is the vector source register, and every other operand is
// a register to unpack into.
unsigned NumElts = I.getNumOperands() - 1;
Register SrcReg = I.getOperand(NumElts).getReg();
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
const LLT WideTy = MRI.getType(SrcReg);
(void)WideTy;
assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!");
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!");
if (!NarrowTy.isScalar())
return selectSplitVectorUnmerge(I, MRI);
MachineIRBuilder MIB(I);
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
unsigned CopyOpc = 0;
unsigned ExtractSubReg = 0;
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
return false;
// Set up for the lane copies.
MachineBasicBlock &MBB = *I.getParent();
// Stores the registers we'll be copying from.
SmallVector<Register, 4> InsertRegs;
// We'll use the first register twice, so we only need NumElts-1 registers.
unsigned NumInsertRegs = NumElts - 1;
// If our elements fit into exactly 128 bits, then we can copy from the source
// directly. Otherwise, we need to do a bit of setup with some subregister
// inserts.
if (NarrowTy.getSizeInBits() * NumElts == 128) {
InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
} else {
// No. We have to perform subregister inserts. For each insert, create an
// implicit def and a subregister insert, and save the register we create.
for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
MachineInstr &ImpDefMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
ImpDefReg);
// Now, create the subregister insert from SrcReg.
Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
MachineInstr &InsMI =
*BuildMI(MBB, I, I.getDebugLoc(),
TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
.addUse(ImpDefReg)
.addUse(SrcReg)
.addImm(AArch64::dsub);
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
// Save the register so that we can copy from it after.
InsertRegs.push_back(InsertReg);
}
}
// Now that we've created any necessary subregister inserts, we can
// create the copies.
//
// Perform the first copy separately as a subregister copy.
Register CopyTo = I.getOperand(0).getReg();
auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
.addReg(InsertRegs[0], 0, ExtractSubReg);
constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
// Now, perform the remaining copies as vector lane copies.
unsigned LaneIdx = 1;
for (Register InsReg : InsertRegs) {
Register CopyTo = I.getOperand(LaneIdx).getReg();
MachineInstr &CopyInst =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
.addUse(InsReg)
.addImm(LaneIdx);
constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
++LaneIdx;
}
// Separately constrain the first copy's destination. Because of the
// limitation in constrainOperandRegClass, we can't guarantee that this will
// actually be constrained. So, do it ourselves using the second operand.
const TargetRegisterClass *RC =
MRI.getRegClassOrNull(I.getOperand(1).getReg());
if (!RC) {
LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
return false;
}
RBI.constrainGenericRegister(CopyTo, *RC, MRI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectConcatVectors(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
"Unexpected opcode");
Register Dst = I.getOperand(0).getReg();
Register Op1 = I.getOperand(1).getReg();
Register Op2 = I.getOperand(2).getReg();
MachineIRBuilder MIRBuilder(I);
MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
if (!ConcatMI)
return false;
I.eraseFromParent();
return true;
}
unsigned
AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
MachineFunction &MF) const {
Type *CPTy = CPVal->getType();
unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
if (Align == 0)
Align = MF.getDataLayout().getTypeAllocSize(CPTy);
MachineConstantPool *MCP = MF.getConstantPool();
return MCP->getConstantPoolIndex(CPVal, Align);
}
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
auto Adrp =
MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
.addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
MachineInstr *LoadMI = nullptr;
switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
case 16:
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 8:
LoadMI = &*MIRBuilder
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
.addConstantPoolIndex(
CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
return nullptr;
}
constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
return LoadMI;
}
/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
/// size and RB.
static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
Opc = AArch64::INSvi64gpr;
SubregIdx = AArch64::dsub;
} else {
llvm_unreachable("invalid elt size!");
}
} else {
if (EltSize == 8) {
Opc = AArch64::INSvi8lane;
SubregIdx = AArch64::bsub;
} else if (EltSize == 16) {
Opc = AArch64::INSvi16lane;
SubregIdx = AArch64::hsub;
} else if (EltSize == 32) {
Opc = AArch64::INSvi32lane;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
Opc = AArch64::INSvi64lane;
SubregIdx = AArch64::dsub;
} else {
llvm_unreachable("invalid elt size!");
}
}
return std::make_pair(Opc, SubregIdx);
}
MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
{AArch64::ADDWrr, AArch64::ADDWri}};
bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
auto ImmFns = selectArithImmed(RHS);
unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
// If we matched a valid constant immediate, add those operands.
if (ImmFns) {
for (auto &RenderFn : *ImmFns)
RenderFn(AddMI);
} else {
AddMI.addUse(RHS.getReg());
}
constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
return &*AddMI;
}
MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
{AArch64::ADDSWrr, AArch64::ADDSWri}};
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
auto ImmFns = selectArithImmed(RHS);
unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
// If we matched a valid constant immediate, add those operands.
if (ImmFns) {
for (auto &RenderFn : *ImmFns)
RenderFn(CmpMI);
} else {
CmpMI.addUse(RHS.getReg());
}
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
return &*CmpMI;
}
MachineInstr *
AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
unsigned RegSize = MRI.getType(LHS).getSizeInBits();
bool Is32Bit = (RegSize == 32);
static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
{AArch64::ANDSWrr, AArch64::ANDSWri}};
Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
// We might be able to fold in an immediate into the TST. We need to make sure
// it's a logical immediate though, since ANDS requires that.
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
bool IsImmForm = ValAndVReg.hasValue() &&
AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
unsigned Opc = OpcTable[Is32Bit][IsImmForm];
auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
if (IsImmForm)
TstMI.addImm(
AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
else
TstMI.addUse(RHS);
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
return &*TstMI;
}
MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
// Fold the compare if possible.
MachineInstr *FoldCmp =
tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
if (FoldCmp)
return FoldCmp;
// Can't fold into a CMN. Just emit a normal compare.
unsigned CmpOpc = 0;
Register ZReg;
LLT CmpTy = MRI.getType(LHS.getReg());
assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
"Expected scalar or pointer");
if (CmpTy == LLT::scalar(32)) {
CmpOpc = AArch64::SUBSWrr;
ZReg = AArch64::WZR;
} else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
CmpOpc = AArch64::SUBSXrr;
ZReg = AArch64::XZR;
} else {
return nullptr;
}
// Try to match immediate forms.
auto ImmFns = selectArithImmed(RHS);
if (ImmFns)
CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
// If we matched a valid constant immediate, add those operands.
if (ImmFns) {
for (auto &RenderFn : *ImmFns)
RenderFn(CmpMI);
} else {
CmpMI.addUse(RHS.getReg());
}
// Make sure that we can constrain the compare that we emitted.
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Optional<Register> Dst, Register Op1, Register Op2,
MachineIRBuilder &MIRBuilder) const {
// We implement a vector concat by:
// 1. Use scalar_to_vector to insert the lower vector into the larger dest
// 2. Insert the upper vector into the destination's upper element
// TODO: some of this code is common with G_BUILD_VECTOR handling.
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
const LLT Op1Ty = MRI.getType(Op1);
const LLT Op2Ty = MRI.getType(Op2);
if (Op1Ty != Op2Ty) {
LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
return nullptr;
}
assert(Op1Ty.isVector() && "Expected a vector for vector concat");
if (Op1Ty.getSizeInBits() >= 128) {
LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
return nullptr;
}
// At the moment we just support 64 bit vector concats.
if (Op1Ty.getSizeInBits() != 64) {
LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
return nullptr;
}
const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
const TargetRegisterClass *DstRC =
getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
MachineInstr *WidenedOp1 =
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
MachineInstr *WidenedOp2 =
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
if (!WidenedOp1 || !WidenedOp2) {
LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
return nullptr;
}
// Now do the insert of the upper element.
unsigned InsertOpc, InsSubRegIdx;
std::tie(InsertOpc, InsSubRegIdx) =
getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
if (!Dst)
Dst = MRI.createVirtualRegister(DstRC);
auto InsElt =
MIRBuilder
.buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
.addImm(1) /* Lane index */
.addUse(WidenedOp2->getOperand(0).getReg())
.addImm(0);
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
return &*InsElt;
}
MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
"Expected a G_FCONSTANT!");
MachineOperand &ImmOp = I.getOperand(1);
unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
// Only handle 32 and 64 bit defs for now.
if (DefSize != 32 && DefSize != 64)
return nullptr;
// Don't handle null values using FMOV.
if (ImmOp.getFPImm()->isNullValue())
return nullptr;
// Get the immediate representation for the FMOV.
const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
: AArch64_AM::getFP64Imm(ImmValAPF);
// If this is -1, it means the immediate can't be represented as the requested
// floating point value. Bail.
if (Imm == -1)
return nullptr;
// Update MI to represent the new FMOV instruction, constrain it, and return.
ImmOp.ChangeToImmediate(Imm);
unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return &I;
}
MachineInstr *
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const {
// CSINC increments the result when the predicate is false. Invert it.
const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
auto I =
MIRBuilder
.buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
.addImm(InvCC);
constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
return &*I;
}
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
// We want to recognize this pattern:
//
// $z = G_FCMP pred, $x, $y
// ...
// $w = G_SELECT $z, $a, $b
//
// Where the value of $z is *only* ever used by the G_SELECT (possibly with
// some copies/truncs in between.)
//
// If we see this, then we can emit something like this:
//
// fcmp $x, $y
// fcsel $w, $a, $b, pred
//
// Rather than emitting both of the rather long sequences in the standard
// G_FCMP/G_SELECT select methods.
// First, check if the condition is defined by a compare.
MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
while (CondDef) {
// We can only fold if all of the defs have one use.
if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
return false;
// We can skip over G_TRUNC since the condition is 1-bit.
// Truncating/extending can have no impact on the value.
unsigned Opc = CondDef->getOpcode();
if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
break;
// Can't see past copies from physregs.
if (Opc == TargetOpcode::COPY &&
Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
return false;
CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
}
// Is the condition defined by a compare?
if (!CondDef)
return false;
unsigned CondOpc = CondDef->getOpcode();
if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
return false;
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
CondCode = changeICMPPredToAArch64CC(
(CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
CondDef->getOperand(1), MIB)) {
LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
}
} else {
// Get the condition code for the select.
AArch64CC::CondCode CondCode2;
changeFCMPPredToAArch64CC(
(CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison.
// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
// unnecessary.
if (CondCode2 != AArch64CC::AL)
return false;
// Make sure we'll be able to select the compare.
unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
if (!CmpOpc)
return false;
// Emit a new compare.
auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
Cmp.addUse(CondDef->getOperand(3).getReg());
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
}
// Emit the select.
unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
auto CSel =
MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
{I.getOperand(2).getReg(), I.getOperand(3).getReg()})
.addImm(CondCode);
constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
"Unexpected MachineOperand");
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
// We want to find this sort of thing:
// x = G_SUB 0, y
// G_ICMP z, x
//
// In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
// e.g:
//
// cmn z, y
// Helper lambda to detect the subtract followed by the compare.
// Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
return false;
// Need to make sure NZCV is the same at the end of the transformation.
if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
return false;
// We want to match against SUBs.
if (DefMI->getOpcode() != TargetOpcode::G_SUB)
return false;
// Make sure that we're getting
// x = G_SUB 0, y
auto ValAndVReg =
getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return false;
// This can safely be represented as a CMN.
return true;
};
// Check if the RHS or LHS of the G_ICMP is defined by a SUB
MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
// Given this:
//
// x = G_SUB 0, y
// G_ICMP x, z
//
// Produce this:
//
// cmn y, z
if (IsCMN(LHSDef, CC))
return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
// Same idea here, but with the RHS of the compare instead:
//
// Given this:
//
// x = G_SUB 0, y
// G_ICMP z, x
//
// Produce this:
//
// cmn z, y
if (IsCMN(RHSDef, CC))
return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
// Given this:
//
// z = G_AND x, y
// G_ICMP z, 0
//
// Produce this if the compare is signed:
//
// tst x, y
if (!isUnsignedICMPPred(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
return emitTST(LHSDef->getOperand(1).getReg(),
LHSDef->getOperand(2).getReg(), MIRBuilder);
}
return nullptr;
}
bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
// Try to match a vector splat operation into a dup instruction.
// We're looking for this pattern:
// %scalar:gpr(s64) = COPY $x0
// %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
// %cst0:gpr(s32) = G_CONSTANT i32 0
// %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
// %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
// %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
// %zerovec(<2 x s32>)
//
// ...into:
// %splat = DUP %scalar
// We use the regbank of the scalar to determine which kind of dup to use.
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
using namespace TargetOpcode;
using namespace MIPatternMatch;
// Begin matching the insert.
auto *InsMI =
getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
if (!InsMI)
return false;
// Match the undef vector operand.
auto *UndefMI =
getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
if (!UndefMI)
return false;
// Match the scalar being splatted.
Register ScalarReg = InsMI->getOperand(2).getReg();
const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
// Match the index constant 0.
int64_t Index = 0;
if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
return false;
// The shuffle's second operand doesn't matter if the mask is all zero.
ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
if (!all_of(Mask, [](int Elem) { return Elem == 0; }))
return false;
// We're done, now find out what kind of splat we need.
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
LLT EltTy = VecTy.getElementType();
if (EltTy.getSizeInBits() < 32) {
LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
return false;
}
bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
unsigned Opc = 0;
if (IsFP) {
switch (EltTy.getSizeInBits()) {
case 32:
if (VecTy.getNumElements() == 2) {
Opc = AArch64::DUPv2i32lane;
} else {
Opc = AArch64::DUPv4i32lane;
assert(VecTy.getNumElements() == 4);
}
break;
case 64:
assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
Opc = AArch64::DUPv2i64lane;
break;
}
} else {
switch (EltTy.getSizeInBits()) {
case 32:
if (VecTy.getNumElements() == 2) {
Opc = AArch64::DUPv2i32gpr;
} else {
Opc = AArch64::DUPv4i32gpr;
assert(VecTy.getNumElements() == 4);
}
break;
case 64:
assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
Opc = AArch64::DUPv2i64gpr;
break;
}
}
assert(Opc && "Did not compute an opcode for a dup");
// For FP splats, we need to widen the scalar reg via undef too.
if (IsFP) {
MachineInstr *Widen = emitScalarToVector(
EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
if (!Widen)
return false;
ScalarReg = Widen->getOperand(0).getReg();
}
auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
if (IsFP)
Dup.addImm(0);
constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
if (TM.getOptLevel() == CodeGenOpt::None)
return false;
if (tryOptVectorDup(I))
return true;
return false;
}
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
if (tryOptVectorShuffle(I))
return true;
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Register Src1Reg = I.getOperand(1).getReg();
const LLT Src1Ty = MRI.getType(Src1Reg);
Register Src2Reg = I.getOperand(2).getReg();
const LLT Src2Ty = MRI.getType(Src2Reg);
ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
LLVMContext &Ctx = MF.getFunction().getContext();
// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
// it's originated from a <1 x T> type. Those should have been lowered into
// G_BUILD_VECTOR earlier.
if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
return false;
}
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
SmallVector<Constant *, 64> CstIdxs;
for (int Val : Mask) {
// For now, any undef indexes we'll just assume to be 0. This should be
// optimized in future, e.g. to select DUP etc.
Val = Val < 0 ? 0 : Val;
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
}
}
MachineIRBuilder MIRBuilder(I);
// Use a constant pool to load the index vector for TBL.
Constant *CPVal = ConstantVector::get(CstIdxs);
MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
if (!IndexLoad) {
LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
return false;
}
if (DstTy.getSizeInBits() != 128) {
assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
// This case can be done with TBL1.
MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
if (!Concat) {
LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
return false;
}
// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
IndexLoad =
emitScalarToVector(64, &AArch64::FPR128RegClass,
IndexLoad->getOperand(0).getReg(), MIRBuilder);
auto TBL1 = MIRBuilder.buildInstr(
AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
{Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
auto Copy =
MIRBuilder
.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(TBL1.getReg(0), 0, AArch64::dsub);
RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
I.eraseFromParent();
return true;
}
// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
// Q registers for regalloc.
auto RegSeq = MIRBuilder
.buildInstr(TargetOpcode::REG_SEQUENCE,
{&AArch64::QQRegClass}, {Src1Reg})
.addImm(AArch64::qsub0)
.addUse(Src2Reg)
.addImm(AArch64::qsub1);
auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
{RegSeq, IndexLoad->getOperand(0)});
constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
MachineInstr *AArch64InstructionSelector::emitLaneInsert(
Optional<Register> DstReg, Register SrcReg, Register EltReg,
unsigned LaneIdx, const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const {
MachineInstr *InsElt = nullptr;
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
// Create a register to define with the insert if one wasn't passed in.
if (!DstReg)
DstReg = MRI.createVirtualRegister(DstRC);
unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
if (RB.getID() == AArch64::FPRRegBankID) {
auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
.addImm(LaneIdx)
.addUse(InsSub->getOperand(0).getReg())
.addImm(0);
} else {
InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
.addImm(LaneIdx)
.addUse(EltReg);
}
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
return InsElt;
}
bool AArch64InstructionSelector::selectInsertElt(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
// Get information on the destination.
Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
unsigned VecSize = DstTy.getSizeInBits();
// Get information on the element we want to insert into the destination.
Register EltReg = I.getOperand(2).getReg();
const LLT EltTy = MRI.getType(EltReg);
unsigned EltSize = EltTy.getSizeInBits();
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
// Find the definition of the index. Bail out if it's not defined by a
// G_CONSTANT.
Register IdxReg = I.getOperand(3).getReg();
auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value;
// Perform the lane insert.
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
MachineIRBuilder MIRBuilder(I);
if (VecSize < 128) {
// If the vector we're inserting into is smaller than 128 bits, widen it
// to 128 to do the insert.
MachineInstr *ScalarToVec = emitScalarToVector(
VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
if (!ScalarToVec)
return false;
SrcReg = ScalarToVec->getOperand(0).getReg();
}
// Create an insert into a new FPR128 register.
// Note that if our vector is already 128 bits, we end up emitting an extra
// register.
MachineInstr *InsMI =
emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
if (VecSize < 128) {
// If we had to widen to perform the insert, then we have to demote back to
// the original size to get the result we want.
Register DemoteVec = InsMI->getOperand(0).getReg();
const TargetRegisterClass *RC =
getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
return false;
}
unsigned SubReg = 0;
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
<< "\n");
return false;
}
MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(DemoteVec, 0, SubReg);
RBI.constrainGenericRegister(DstReg, *RC, MRI);
} else {
// No widening needed.
InsMI->getOperand(0).setReg(DstReg);
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
}
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
// Until we port more of the optimized selections, for now just use a vector
// insert sequence.
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
unsigned EltSize = EltTy.getSizeInBits();
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
MachineIRBuilder MIRBuilder(I);
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
MachineInstr *ScalarToVec =
emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
I.getOperand(1).getReg(), MIRBuilder);
if (!ScalarToVec)
return false;
Register DstVec = ScalarToVec->getOperand(0).getReg();
unsigned DstSize = DstTy.getSizeInBits();
// Keep track of the last MI we inserted. Later on, we might be able to save
// a copy using it.
MachineInstr *PrevMI = nullptr;
for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
// Note that if we don't do a subregister copy, we can end up making an
// extra register.
PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
MIRBuilder);
DstVec = PrevMI->getOperand(0).getReg();
}
// If DstTy's size in bits is less than 128, then emit a subregister copy
// from DstVec to the last register we've defined.
if (DstSize < 128) {
// Force this to be FPR using the destination vector.
const TargetRegisterClass *RC =
getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
if (!RC)
return false;
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
return false;
}
unsigned SubReg = 0;
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
<< "\n");
return false;
}
Register Reg = MRI.createVirtualRegister(RC);
Register DstReg = I.getOperand(0).getReg();
MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(DstVec, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(Reg);
RBI.constrainGenericRegister(DstReg, *RC, MRI);
} else {
// We don't need a subregister copy. Save a copy by re-using the
// destination register on the final insert.
assert(PrevMI && "PrevMI was null?");
PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
}
I.eraseFromParent();
return true;
}
/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
/// ID if it exists, and 0 otherwise.
static unsigned findIntrinsicID(MachineInstr &I) {
auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
return Op.isIntrinsicID();
});
if (IntrinOp == I.operands_end())
return 0;
return IntrinOp->getIntrinsicID();
}
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) const {
// Find the intrinsic ID.
unsigned IntrinID = findIntrinsicID(I);
if (!IntrinID)
return false;
MachineIRBuilder MIRBuilder(I);
// Select the instruction.
switch (IntrinID) {
default:
return false;
case Intrinsic::trap:
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
break;
case Intrinsic::debugtrap:
if (!STI.isTargetWindows())
return false;
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
}
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MachineRegisterInfo &MRI) {
unsigned IntrinID = findIntrinsicID(I);
if (!IntrinID)
return false;
MachineIRBuilder MIRBuilder(I);
switch (IntrinID) {
default:
break;
case Intrinsic::aarch64_crypto_sha1h: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(2).getReg();
// FIXME: Should this be an assert?
if (MRI.getType(DstReg).getSizeInBits() != 32 ||
MRI.getType(SrcReg).getSizeInBits() != 32)
return false;
// The operation has to happen on FPRs. Set up some new FPR registers for
// the source and destination if they are on GPRs.
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
// Make sure the copy ends up getting constrained properly.
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
AArch64::GPR32RegClass, MRI);
}
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
// Actually insert the instruction.
auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
// Did we create a new register for the destination?
if (DstReg != I.getOperand(0).getReg()) {
// Yep. Copy the result of the instruction back into the original
// destination.
MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
}
I.eraseFromParent();
return true;
}
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Depth = I.getOperand(2).getImm();
Register DstReg = I.getOperand(0).getReg();
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
if (MFReturnAddr) {
MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
I.eraseFromParent();
return true;
}
MFI.setReturnAddressIsTaken(true);
MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
I.getParent()->addLiveIn(AArch64::LR);
// Insert the copy from LR/X30 into the entry block, before it can be
// clobbered by anything.
MachineIRBuilder EntryBuilder(MF);
EntryBuilder.setInstr(*MF.begin()->begin());
EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
MFReturnAddr = DstReg;
I.eraseFromParent();
return true;
}
MFI.setFrameAddressIsTaken(true);
Register FrameAddr(AArch64::FP);
while (Depth--) {
Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
auto Ldr =
MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
.addImm(0);
constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
FrameAddr = NextFrame;
}
if (IntrinID == Intrinsic::frameaddress)
MIRBuilder.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
}
I.eraseFromParent();
return true;
}
}
return false;
}
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
auto &MI = *Root.getParent();
auto &MBB = *MI.getParent();
auto &MF = *MBB.getParent();
auto &MRI = MF.getRegInfo();
uint64_t Immed;
if (Root.isImm())
Immed = Root.getImm();
else if (Root.isCImm())
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value;
} else
return None;
return Immed;
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 31)
return None;
uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 31)
return None;
uint64_t Enc = 31 - *MaybeImmed;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 63)
return None;
uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 63)
return None;
uint64_t Enc = 63 - *MaybeImmed;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
/// Helper to select an immediate value that can be represented as a 12-bit
/// value shifted left by either 0 or 12. If it is possible to do so, return
/// the immediate and shift value. If not, return None.
///
/// Used by selectArithImmed and selectNegArithImmed.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::select12BitValueWithLeftShift(
uint64_t Immed) const {
unsigned ShiftAmt;
if (Immed >> 12 == 0) {
ShiftAmt = 0;
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
ShiftAmt = 12;
Immed = Immed >> 12;
} else
return None;
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
}};
}
/// SelectArithImmed - Select an immediate value that can be represented as
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
/// Val set to the 12-bit value and Shift set to the shifter operand.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
// This function is called from the addsub_shifted_imm ComplexPattern,
// which lists [imm] as the list of opcode it's interested in, however
// we still need to check whether the operand is actually an immediate
// here because the ComplexPattern opcode list is only used in
// root-level opcode matching.
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None)
return None;
return select12BitValueWithLeftShift(*MaybeImmed);
}
/// SelectNegArithImmed - As above, but negates the value before trying to
/// select it.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
// We need a register here, because we need to know if we have a 64 or 32
// bit immediate.
if (!Root.isReg())
return None;
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None)
return None;
uint64_t Immed = *MaybeImmed;
// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
// have the opposite effect on the C flag, so this pattern mustn't match under
// those circumstances.
if (Immed == 0)
return None;
// Check if we're dealing with a 32-bit type on the root or a 64-bit type on
// the root.
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
Immed = ~((uint32_t)Immed) + 1;
else
Immed = ~Immed + 1ULL;
if (Immed & 0xFFFFFFFFFF000000ULL)
return None;
Immed &= 0xFFFFFFULL;
return select12BitValueWithLeftShift(Immed);
}
/// Return true if it is worth folding MI into an extended register. That is,
/// if it's safe to pull it into the addressing mode of a load or store as a
/// shift.
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
// Always fold if there is one use, or if we're optimizing for size.
Register DefReg = MI.getOperand(0).getReg();
if (MRI.hasOneUse(DefReg) ||
MI.getParent()->getParent()->getFunction().hasMinSize())
return true;
// It's better to avoid folding and recomputing shifts when we don't have a
// fastpath.
if (!STI.hasLSLFast())
return false;
// We have a fastpath, so folding a shift in and potentially computing it
// many times may be beneficial. Check if this is only used in memory ops.
// If it is, then we should fold.
return all_of(MRI.use_instructions(DefReg),
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectExtendedSHL(
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
unsigned SizeInBytes, bool WantsExt) const {
assert(Base.isReg() && "Expected base to be a register operand");
assert(Offset.isReg() && "Expected offset to be a register operand");
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
if (!OffsetInst)
return None;
unsigned OffsetOpc = OffsetInst->getOpcode();
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
return None;
// Make sure that the memory op is a valid size.
int64_t LegalShiftVal = Log2_32(SizeInBytes);
if (LegalShiftVal == 0)
return None;
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;
// Now, try to find the specific G_CONSTANT. Start by assuming that the
// register we will offset is the LHS, and the register containing the
// constant is the RHS.
Register OffsetReg = OffsetInst->getOperand(1).getReg();
Register ConstantReg = OffsetInst->getOperand(2).getReg();
auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg) {
// We didn't get a constant on the RHS. If the opcode is a shift, then
// we're done.
if (OffsetOpc == TargetOpcode::G_SHL)
return None;
// If we have a G_MUL, we can use either register. Try looking at the RHS.
std::swap(OffsetReg, ConstantReg);
ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg)
return None;
}
// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
int64_t ImmVal = ValAndVReg->Value;
// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
if (OffsetOpc == TargetOpcode::G_MUL) {
if (!isPowerOf2_32(ImmVal))
return None;
// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
ImmVal = Log2_32(ImmVal);
}
if ((ImmVal & 0x7) != ImmVal)
return None;
// We are only allowed to shift by LegalShiftVal. This shift value is built
// into the instruction, so we can't just use whatever we want.
if (ImmVal != LegalShiftVal)
return None;
unsigned SignExtend = 0;
if (WantsExt) {
// Check if the offset is defined by an extend.
MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
SignExtend = Ext == AArch64_AM::SXTW;
// Need a 32-bit wide register here.
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
OffsetReg = ExtInst->getOperand(1).getReg();
OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
}
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
// offset. Signify that we are shifting by setting the shift flag to 1.
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
[=](MachineInstrBuilder &MIB) {
// Need to add both immediates here to make sure that they are both
// added to the instruction.
MIB.addImm(SignExtend);
MIB.addImm(1);
}}};
}
/// This is used for computing addresses like this:
///
/// ldr x1, [x2, x3, lsl #3]
///
/// Where x2 is the base register, and x3 is an offset register. The shift-left
/// is a constant value specific to this load instruction. That is, we'll never
/// see anything other than a 3 here (which corresponds to the size of the
/// element being loaded.)
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
MachineOperand &Root, unsigned SizeInBytes) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
// We want to find something like this:
//
// val = G_CONSTANT LegalShiftVal
// shift = G_SHL off_reg val
// ptr = G_PTR_ADD base_reg shift
// x = G_LOAD ptr
//
// And fold it into this addressing mode:
//
// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
// Check if we can find the G_PTR_ADD.
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
return None;
// Now, try to match an opcode which will match our specific offset.
// We want a G_SHL or a G_MUL.
MachineInstr *OffsetInst =
getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
return selectExtendedSHL(Root, PtrAdd->getOperand(1),
OffsetInst->getOperand(0), SizeInBytes,
/*WantsExt=*/false);
}
/// This is used for computing addresses like this:
///
/// ldr x1, [x2, x3]
///
/// Where x2 is the base register, and x3 is an offset register.
///
/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
/// this will do so. Otherwise, it will return None.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeRegisterOffset(
MachineOperand &Root) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
// We need a GEP.
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
return None;
// If this is used more than once, let's not bother folding.
// TODO: Check if they are memory ops. If they are, then we can still fold
// without having to recompute anything.
if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
return None;
// Base is the GEP's LHS, offset is its RHS.
return {{[=](MachineInstrBuilder &MIB) {
MIB.addUse(Gep->getOperand(1).getReg());
},
[=](MachineInstrBuilder &MIB) {
MIB.addUse(Gep->getOperand(2).getReg());
},
[=](MachineInstrBuilder &MIB) {
// Need to add both immediates here to make sure that they are both
// added to the instruction.
MIB.addImm(0);
MIB.addImm(0);
}}};
}
/// This is intended to be equivalent to selectAddrModeXRO in
/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
// If we have a constant offset, then we probably don't want to match a
// register offset.
if (isBaseWithConstantOffset(Root, MRI))
return None;
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
return AddrModeFns;
// If that doesn't work, see if it's possible to fold in registers from
// a GEP.
return selectAddrModeRegisterOffset(Root);
}
/// This is used for computing addresses like this:
///
/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
///
/// Where we have a 64-bit base register, a 32-bit offset register, and an
/// extend (which may or may not be signed).
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
return None;
MachineOperand &LHS = PtrAdd->getOperand(1);
MachineOperand &RHS = PtrAdd->getOperand(2);
MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
// The first case is the same as selectAddrModeXRO, except we need an extend.
// In this case, we try to find a shift and extend, and fold them into the
// addressing mode.
//
// E.g.
//
// off_reg = G_Z/S/ANYEXT ext_reg
// val = G_CONSTANT LegalShiftVal
// shift = G_SHL off_reg val
// ptr = G_PTR_ADD base_reg shift
// x = G_LOAD ptr
//
// In this case we can get a load like this:
//
// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
SizeInBytes, /*WantsExt=*/true);
if (ExtendedShl)
return ExtendedShl;
// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
//
// e.g.
// ldr something, [base_reg, ext_reg, sxtw]
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;
// Check if this is an extend. We'll get an extend type if it is.
AArch64_AM::ShiftExtendType Ext =
getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
// Need a 32-bit wide register.
MachineIRBuilder MIB(*PtrAdd);
Register ExtReg =
narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
unsigned SignExtend = Ext == AArch64_AM::SXTW;
// Base is LHS, offset is ExtReg.
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addImm(SignExtend);
MIB.addImm(0);
}}};
}
/// Select a "register plus unscaled signed 9-bit immediate" address. This
/// should only match when there is an offset that is not valid for a scaled
/// immediate addressing mode. The "Size" argument is the size in bytes of the
/// memory reference, which is needed here to know what is valid for a scaled
/// immediate.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const {
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
if (!Root.isReg())
return None;
if (!isBaseWithConstantOffset(Root, MRI))
return None;
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
if (!RootDef)
return None;
MachineOperand &OffImm = RootDef->getOperand(2);
if (!OffImm.isReg())
return None;
MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
return None;
int64_t RHSC;
MachineOperand &RHSOp1 = RHS->getOperand(1);
if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
return None;
RHSC = RHSOp1.getCImm()->getSExtValue();
// If the offset is valid as a scaled immediate, don't match here.
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
return None;
if (RHSC >= -256 && RHSC < 256) {
MachineOperand &Base = RootDef->getOperand(1);
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Base); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
}};
}
return None;
}
/// Select a "register plus scaled unsigned 12-bit immediate" address. The
/// "Size" argument is the size in bytes of the memory reference, which
/// determines the scale.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const {
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
if (!Root.isReg())
return None;
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
if (!RootDef)
return None;
if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
}};
}
if (isBaseWithConstantOffset(Root, MRI)) {
MachineOperand &LHS = RootDef->getOperand(1);
MachineOperand &RHS = RootDef->getOperand(2);
MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
if (LHSDef && RHSDef) {
int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
unsigned Scale = Log2_32(Size);
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
}};
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
}};
}
}
}
// Before falling back to our general case, check if the unscaled
// instructions can handle this. If so, that's preferable.
if (selectAddrModeUnscaled(Root, Size).hasValue())
return None;
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
}};
}
/// Given a shift instruction, return the correct shift type for that
/// instruction.
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
// TODO: Handle AArch64_AM::ROR
switch (MI.getOpcode()) {
default:
return AArch64_AM::InvalidShiftExtend;
case TargetOpcode::G_SHL:
return AArch64_AM::LSL;
case TargetOpcode::G_LSHR:
return AArch64_AM::LSR;
case TargetOpcode::G_ASHR:
return AArch64_AM::ASR;
}
}
/// Select a "shifted register" operand. If the value is not shifted, set the
/// shift operand to a default value of "lsl 0".
///
/// TODO: Allow shifted register to be rotated in logical instructions.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
// Check if the operand is defined by an instruction which corresponds to
// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
//
// TODO: Handle AArch64_AM::ROR for logical instructions.
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
if (!ShiftInst)
return None;
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
if (ShType == AArch64_AM::InvalidShiftExtend)
return None;
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
return None;
// Need an immediate on the RHS.
MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
auto Immed = getImmedFromMO(ShiftRHS);
if (!Immed)
return None;
// We have something that we can fold. Fold in the shift's LHS and RHS into
// the instruction.
MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
Register ShiftReg = ShiftLHS.getReg();
unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
unsigned Val = *Immed & (NumBits - 1);
unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
}
AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
unsigned Opc = MI.getOpcode();
// Handle explicit extend instructions first.
if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
return AArch64_AM::SXTB;
case 16:
return AArch64_AM::SXTH;
case 32:
return AArch64_AM::SXTW;
default:
return AArch64_AM::InvalidShiftExtend;
}
}
if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
return AArch64_AM::UXTB;
case 16:
return AArch64_AM::UXTH;
case 32:
return AArch64_AM::UXTW;
default:
return AArch64_AM::InvalidShiftExtend;
}
}
// Don't have an explicit extend. Try to handle a G_AND with a constant mask
// on the RHS.
if (Opc != TargetOpcode::G_AND)
return AArch64_AM::InvalidShiftExtend;
Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
if (!MaybeAndMask)
return AArch64_AM::InvalidShiftExtend;
uint64_t AndMask = *MaybeAndMask;
switch (AndMask) {
default:
return AArch64_AM::InvalidShiftExtend;
case 0xFF:
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
case 0xFFFF:
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
case 0xFFFFFFFF:
return AArch64_AM::UXTW;
}
}
Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
Register ExtReg, MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
if (MRI.getType(ExtReg).getSizeInBits() == 32)
return ExtReg;
// Insert a copy to move ExtReg to GPR32.
Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
// Select the copy into a subregister copy.
selectCopy(*Copy, TII, MRI, TRI, RBI);
return Copy.getReg(0);
}
/// Select an "extended register" operand. This operand folds in an extend
/// followed by an optional left shift.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectArithExtendedRegister(
MachineOperand &Root) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
uint64_t ShiftVal = 0;
Register ExtReg;
AArch64_AM::ShiftExtendType Ext;
MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
if (!RootDef)
return None;
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
return None;
// Check if we can fold a shift and an extend.
if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
// Look for a constant on the RHS of the shift.
MachineOperand &RHS = RootDef->getOperand(2);
Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
if (!MaybeShiftVal)
return None;
ShiftVal = *MaybeShiftVal;
if (ShiftVal > 4)
return None;
// Look for a valid extend instruction on the LHS of the shift.
MachineOperand &LHS = RootDef->getOperand(1);
MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
if (!ExtDef)
return None;
Ext = getExtendTypeForInst(*ExtDef, MRI);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
ExtReg = ExtDef->getOperand(1).getReg();
} else {
// Didn't get a shift. Try just folding an extend.
Ext = getExtendTypeForInst(*RootDef, MRI);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
ExtReg = RootDef->getOperand(1).getReg();
// If we have a 32 bit instruction which zeroes out the high half of a
// register, we get an implicit zero extend for free. Check if we have one.
// FIXME: We actually emit the extend right now even though we don't have
// to.
if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
if (ExtInst && isDef32(*ExtInst))
return None;
}
}
// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
// copy.
MachineIRBuilder MIB(*RootDef);
ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addImm(getArithExtendImm(Ext, ShiftVal));
}}};
}
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
void AArch64InstructionSelector::renderLogicalImm32(
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
MIB.addImm(Enc);
}
void AArch64InstructionSelector::renderLogicalImm64(
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
MIB.addImm(Enc);
}
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
const MachineInstr &MI, unsigned NumBytes) const {
if (!MI.mayLoadOrStore())
return false;
assert(MI.hasOneMemOperand() &&
"Expected load/store to have only one mem op!");
return (*MI.memoperands_begin())->getSize() == NumBytes;
}
bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
return false;
// Only return true if we know the operation will zero-out the high half of
// the 64-bit register. Truncates can be subregister copies, which don't
// zero out the high bits. Copies and other copy-like instructions can be
// fed by truncates, or could be lowered as subregister copies.
switch (MI.getOpcode()) {
default:
return true;
case TargetOpcode::COPY:
case TargetOpcode::G_BITCAST:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_PHI:
return false;
}
}
namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
AArch64Subtarget &Subtarget,
AArch64RegisterBankInfo &RBI) {
return new AArch64InstructionSelector(TM, Subtarget, RBI);
}
}