The RISC-V ISA defines RV32E as an alternative "base" instruction set encoding, that differs from RV32I by having only 16 rather than 32 registers. This patch adds basic definitions for RV32E as well as MC layer support (assembling, disassembling) and tests. The only supported ABI on RV32E is ILP32E. Add a new RISCVFeatures::validate() helper to RISCVUtils which can be called from codegen or MC layer libraries to validate the combination of TargetTriple and FeatureBitSet. Other targets have similar checks (e.g. erroring if SPE is enabled on PPC64 or oddspreg + o32 ABI on Mips), but they either duplicate the checks (Mips), or fail to check for both codegen and MC codepaths (PPC). Codegen for the ILP32E ABI support and RV32E codegen are left for a future patch/patches. Differential Revision: https://reviews.llvm.org/D59470 llvm-svn: 356744
2100 lines
79 KiB
C++
2100 lines
79 KiB
C++
//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the interfaces that RISCV uses to lower LLVM code into a
|
|
// selection DAG.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "RISCVISelLowering.h"
|
|
#include "RISCV.h"
|
|
#include "RISCVMachineFunctionInfo.h"
|
|
#include "RISCVRegisterInfo.h"
|
|
#include "RISCVSubtarget.h"
|
|
#include "RISCVTargetMachine.h"
|
|
#include "llvm/ADT/SmallSet.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
|
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
|
#include "llvm/IR/DiagnosticInfo.h"
|
|
#include "llvm/IR/DiagnosticPrinter.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "riscv-lower"
|
|
|
|
STATISTIC(NumTailCalls, "Number of tail calls");
|
|
|
|
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
|
const RISCVSubtarget &STI)
|
|
: TargetLowering(TM), Subtarget(STI) {
|
|
|
|
if (Subtarget.isRV32E())
|
|
report_fatal_error("Codegen not yet implemented for RV32E");
|
|
|
|
RISCVABI::ABI ABI = Subtarget.getTargetABI();
|
|
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
|
|
|
|
if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64)
|
|
report_fatal_error("Don't know how to lower this ABI");
|
|
|
|
MVT XLenVT = Subtarget.getXLenVT();
|
|
|
|
// Set up the register classes.
|
|
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
|
|
|
|
if (Subtarget.hasStdExtF())
|
|
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
|
|
if (Subtarget.hasStdExtD())
|
|
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
|
|
|
|
// Compute derived properties from the register classes.
|
|
computeRegisterProperties(STI.getRegisterInfo());
|
|
|
|
setStackPointerRegisterToSaveRestore(RISCV::X2);
|
|
|
|
for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
|
|
setLoadExtAction(N, XLenVT, MVT::i1, Promote);
|
|
|
|
// TODO: add all necessary setOperationAction calls.
|
|
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
|
|
|
|
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
|
setOperationAction(ISD::BR_CC, XLenVT, Expand);
|
|
setOperationAction(ISD::SELECT, XLenVT, Custom);
|
|
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
|
|
|
|
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
|
|
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::VASTART, MVT::Other, Custom);
|
|
setOperationAction(ISD::VAARG, MVT::Other, Expand);
|
|
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
|
|
setOperationAction(ISD::VAEND, MVT::Other, Expand);
|
|
|
|
for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
|
|
|
|
if (Subtarget.is64Bit()) {
|
|
setOperationAction(ISD::SHL, MVT::i32, Custom);
|
|
setOperationAction(ISD::SRA, MVT::i32, Custom);
|
|
setOperationAction(ISD::SRL, MVT::i32, Custom);
|
|
}
|
|
|
|
if (!Subtarget.hasStdExtM()) {
|
|
setOperationAction(ISD::MUL, XLenVT, Expand);
|
|
setOperationAction(ISD::MULHS, XLenVT, Expand);
|
|
setOperationAction(ISD::MULHU, XLenVT, Expand);
|
|
setOperationAction(ISD::SDIV, XLenVT, Expand);
|
|
setOperationAction(ISD::UDIV, XLenVT, Expand);
|
|
setOperationAction(ISD::SREM, XLenVT, Expand);
|
|
setOperationAction(ISD::UREM, XLenVT, Expand);
|
|
}
|
|
|
|
if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
|
|
setOperationAction(ISD::SDIV, MVT::i32, Custom);
|
|
setOperationAction(ISD::UDIV, MVT::i32, Custom);
|
|
setOperationAction(ISD::UREM, MVT::i32, Custom);
|
|
}
|
|
|
|
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
|
|
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
|
|
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
|
|
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
|
|
|
|
setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
|
|
setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
|
|
setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
|
|
|
|
setOperationAction(ISD::ROTL, XLenVT, Expand);
|
|
setOperationAction(ISD::ROTR, XLenVT, Expand);
|
|
setOperationAction(ISD::BSWAP, XLenVT, Expand);
|
|
setOperationAction(ISD::CTTZ, XLenVT, Expand);
|
|
setOperationAction(ISD::CTLZ, XLenVT, Expand);
|
|
setOperationAction(ISD::CTPOP, XLenVT, Expand);
|
|
|
|
ISD::CondCode FPCCToExtend[] = {
|
|
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ,
|
|
ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
|
|
ISD::SETGT, ISD::SETGE, ISD::SETNE};
|
|
|
|
ISD::NodeType FPOpToExtend[] = {
|
|
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
|
|
|
|
if (Subtarget.hasStdExtF()) {
|
|
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
|
|
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
|
|
for (auto CC : FPCCToExtend)
|
|
setCondCodeAction(CC, MVT::f32, Expand);
|
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
|
|
setOperationAction(ISD::SELECT, MVT::f32, Custom);
|
|
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
|
|
for (auto Op : FPOpToExtend)
|
|
setOperationAction(Op, MVT::f32, Expand);
|
|
}
|
|
|
|
if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
|
|
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
|
|
|
|
if (Subtarget.hasStdExtD()) {
|
|
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
|
|
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
|
|
for (auto CC : FPCCToExtend)
|
|
setCondCodeAction(CC, MVT::f64, Expand);
|
|
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
|
|
setOperationAction(ISD::SELECT, MVT::f64, Custom);
|
|
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
|
|
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
|
|
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
|
for (auto Op : FPOpToExtend)
|
|
setOperationAction(Op, MVT::f64, Expand);
|
|
}
|
|
|
|
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
|
|
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
|
|
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
|
|
|
|
if (Subtarget.hasStdExtA()) {
|
|
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
|
|
setMinCmpXchgSizeInBits(32);
|
|
} else {
|
|
setMaxAtomicSizeInBitsSupported(0);
|
|
}
|
|
|
|
setBooleanContents(ZeroOrOneBooleanContent);
|
|
|
|
// Function alignments (log2).
|
|
unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
|
|
setMinFunctionAlignment(FunctionAlignment);
|
|
setPrefFunctionAlignment(FunctionAlignment);
|
|
|
|
// Effectively disable jump table generation.
|
|
setMinimumJumpTableEntries(INT_MAX);
|
|
}
|
|
|
|
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
|
|
EVT VT) const {
|
|
if (!VT.isVector())
|
|
return getPointerTy(DL);
|
|
return VT.changeVectorElementTypeToInteger();
|
|
}
|
|
|
|
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|
const CallInst &I,
|
|
MachineFunction &MF,
|
|
unsigned Intrinsic) const {
|
|
switch (Intrinsic) {
|
|
default:
|
|
return false;
|
|
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_add_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_max_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_min_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
|
|
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
|
|
case Intrinsic::riscv_masked_cmpxchg_i32:
|
|
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
|
|
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
|
Info.memVT = MVT::getVT(PtrTy->getElementType());
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.offset = 0;
|
|
Info.align = 4;
|
|
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
|
|
MachineMemOperand::MOVolatile;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|
const AddrMode &AM, Type *Ty,
|
|
unsigned AS,
|
|
Instruction *I) const {
|
|
// No global is ever allowed as a base.
|
|
if (AM.BaseGV)
|
|
return false;
|
|
|
|
// Require a 12-bit signed offset.
|
|
if (!isInt<12>(AM.BaseOffs))
|
|
return false;
|
|
|
|
switch (AM.Scale) {
|
|
case 0: // "r+i" or just "i", depending on HasBaseReg.
|
|
break;
|
|
case 1:
|
|
if (!AM.HasBaseReg) // allow "r+i".
|
|
break;
|
|
return false; // disallow "r+r" or "r+r+i".
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
|
|
return isInt<12>(Imm);
|
|
}
|
|
|
|
bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
|
|
return isInt<12>(Imm);
|
|
}
|
|
|
|
// On RV32, 64-bit integers are split into their high and low parts and held
|
|
// in two different registers, so the trunc is free since the low register can
|
|
// just be used.
|
|
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
|
|
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
|
|
return false;
|
|
unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
|
|
unsigned DestBits = DstTy->getPrimitiveSizeInBits();
|
|
return (SrcBits == 64 && DestBits == 32);
|
|
}
|
|
|
|
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
|
|
if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
|
|
!SrcVT.isInteger() || !DstVT.isInteger())
|
|
return false;
|
|
unsigned SrcBits = SrcVT.getSizeInBits();
|
|
unsigned DestBits = DstVT.getSizeInBits();
|
|
return (SrcBits == 64 && DestBits == 32);
|
|
}
|
|
|
|
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
|
|
// Zexts are free if they can be combined with a load.
|
|
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
|
|
EVT MemVT = LD->getMemoryVT();
|
|
if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
|
|
(Subtarget.is64Bit() && MemVT == MVT::i32)) &&
|
|
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
|
|
LD->getExtensionType() == ISD::ZEXTLOAD))
|
|
return true;
|
|
}
|
|
|
|
return TargetLowering::isZExtFree(Val, VT2);
|
|
}
|
|
|
|
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
|
|
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
|
|
}
|
|
|
|
// Changes the condition code and swaps operands if necessary, so the SetCC
|
|
// operation matches one of the comparisons supported directly in the RISC-V
|
|
// ISA.
|
|
static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
|
|
switch (CC) {
|
|
default:
|
|
break;
|
|
case ISD::SETGT:
|
|
case ISD::SETLE:
|
|
case ISD::SETUGT:
|
|
case ISD::SETULE:
|
|
CC = ISD::getSetCCSwappedOperands(CC);
|
|
std::swap(LHS, RHS);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Return the RISC-V branch opcode that matches the given DAG integer
|
|
// condition code. The CondCode must be one of those supported by the RISC-V
|
|
// ISA (see normaliseSetCC).
|
|
static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
|
|
switch (CC) {
|
|
default:
|
|
llvm_unreachable("Unsupported CondCode");
|
|
case ISD::SETEQ:
|
|
return RISCV::BEQ;
|
|
case ISD::SETNE:
|
|
return RISCV::BNE;
|
|
case ISD::SETLT:
|
|
return RISCV::BLT;
|
|
case ISD::SETGE:
|
|
return RISCV::BGE;
|
|
case ISD::SETULT:
|
|
return RISCV::BLTU;
|
|
case ISD::SETUGE:
|
|
return RISCV::BGEU;
|
|
}
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
switch (Op.getOpcode()) {
|
|
default:
|
|
report_fatal_error("unimplemented operand");
|
|
case ISD::GlobalAddress:
|
|
return lowerGlobalAddress(Op, DAG);
|
|
case ISD::BlockAddress:
|
|
return lowerBlockAddress(Op, DAG);
|
|
case ISD::ConstantPool:
|
|
return lowerConstantPool(Op, DAG);
|
|
case ISD::SELECT:
|
|
return lowerSELECT(Op, DAG);
|
|
case ISD::VASTART:
|
|
return lowerVASTART(Op, DAG);
|
|
case ISD::FRAMEADDR:
|
|
return lowerFRAMEADDR(Op, DAG);
|
|
case ISD::RETURNADDR:
|
|
return lowerRETURNADDR(Op, DAG);
|
|
case ISD::BITCAST: {
|
|
assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
|
|
"Unexpected custom legalisation");
|
|
SDLoc DL(Op);
|
|
SDValue Op0 = Op.getOperand(0);
|
|
if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
|
|
return SDValue();
|
|
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
|
|
SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
|
|
return FPConv;
|
|
}
|
|
}
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
SDLoc DL(Op);
|
|
EVT Ty = Op.getValueType();
|
|
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
|
|
const GlobalValue *GV = N->getGlobal();
|
|
int64_t Offset = N->getOffset();
|
|
MVT XLenVT = Subtarget.getXLenVT();
|
|
|
|
if (isPositionIndependent())
|
|
report_fatal_error("Unable to lowerGlobalAddress");
|
|
// In order to maximise the opportunity for common subexpression elimination,
|
|
// emit a separate ADD node for the global address offset instead of folding
|
|
// it in the global address node. Later peephole optimisations may choose to
|
|
// fold it back in when profitable.
|
|
SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
|
|
SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
|
|
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
|
|
SDValue MNLo =
|
|
SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
|
|
if (Offset != 0)
|
|
return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
|
|
DAG.getConstant(Offset, DL, XLenVT));
|
|
return MNLo;
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
SDLoc DL(Op);
|
|
EVT Ty = Op.getValueType();
|
|
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
|
|
const BlockAddress *BA = N->getBlockAddress();
|
|
int64_t Offset = N->getOffset();
|
|
|
|
if (isPositionIndependent())
|
|
report_fatal_error("Unable to lowerBlockAddress");
|
|
|
|
SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
|
|
SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
|
|
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
|
|
SDValue MNLo =
|
|
SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
|
|
return MNLo;
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
SDLoc DL(Op);
|
|
EVT Ty = Op.getValueType();
|
|
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
|
|
const Constant *CPA = N->getConstVal();
|
|
int64_t Offset = N->getOffset();
|
|
unsigned Alignment = N->getAlignment();
|
|
|
|
if (!isPositionIndependent()) {
|
|
SDValue CPAHi =
|
|
DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
|
|
SDValue CPALo =
|
|
DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
|
|
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
|
|
SDValue MNLo =
|
|
SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
|
|
return MNLo;
|
|
} else {
|
|
report_fatal_error("Unable to lowerConstantPool");
|
|
}
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|
SDValue CondV = Op.getOperand(0);
|
|
SDValue TrueV = Op.getOperand(1);
|
|
SDValue FalseV = Op.getOperand(2);
|
|
SDLoc DL(Op);
|
|
MVT XLenVT = Subtarget.getXLenVT();
|
|
|
|
// If the result type is XLenVT and CondV is the output of a SETCC node
|
|
// which also operated on XLenVT inputs, then merge the SETCC node into the
|
|
// lowered RISCVISD::SELECT_CC to take advantage of the integer
|
|
// compare+branch instructions. i.e.:
|
|
// (select (setcc lhs, rhs, cc), truev, falsev)
|
|
// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
|
|
if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
|
|
CondV.getOperand(0).getSimpleValueType() == XLenVT) {
|
|
SDValue LHS = CondV.getOperand(0);
|
|
SDValue RHS = CondV.getOperand(1);
|
|
auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
|
|
ISD::CondCode CCVal = CC->get();
|
|
|
|
normaliseSetCC(LHS, RHS, CCVal);
|
|
|
|
SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
|
|
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
|
|
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
|
|
return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
|
|
}
|
|
|
|
// Otherwise:
|
|
// (select condv, truev, falsev)
|
|
// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
|
|
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
|
|
SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
|
|
|
|
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
|
|
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
|
|
|
|
return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
|
|
|
|
SDLoc DL(Op);
|
|
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
|
|
getPointerTy(MF.getDataLayout()));
|
|
|
|
// vastart just stores the address of the VarArgsFrameIndex slot into the
|
|
// memory location argument.
|
|
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
|
|
return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
|
|
MachinePointerInfo(SV));
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
MFI.setFrameAddressIsTaken(true);
|
|
unsigned FrameReg = RI.getFrameRegister(MF);
|
|
int XLenInBytes = Subtarget.getXLen() / 8;
|
|
|
|
EVT VT = Op.getValueType();
|
|
SDLoc DL(Op);
|
|
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
|
|
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
|
while (Depth--) {
|
|
int Offset = -(XLenInBytes * 2);
|
|
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
|
|
DAG.getIntPtrConstant(Offset, DL));
|
|
FrameAddr =
|
|
DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
|
|
}
|
|
return FrameAddr;
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
MFI.setReturnAddressIsTaken(true);
|
|
MVT XLenVT = Subtarget.getXLenVT();
|
|
int XLenInBytes = Subtarget.getXLen() / 8;
|
|
|
|
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
|
|
return SDValue();
|
|
|
|
EVT VT = Op.getValueType();
|
|
SDLoc DL(Op);
|
|
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
|
if (Depth) {
|
|
int Off = -XLenInBytes;
|
|
SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
|
|
SDValue Offset = DAG.getConstant(Off, DL, VT);
|
|
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
|
|
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
|
|
MachinePointerInfo());
|
|
}
|
|
|
|
// Return the value of the return address register, marking it an implicit
|
|
// live-in.
|
|
unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
|
|
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
|
|
}
|
|
|
|
// Returns the opcode of the target-specific SDNode that implements the 32-bit
|
|
// form of the given Opcode.
|
|
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
|
|
switch (Opcode) {
|
|
default:
|
|
llvm_unreachable("Unexpected opcode");
|
|
case ISD::SHL:
|
|
return RISCVISD::SLLW;
|
|
case ISD::SRA:
|
|
return RISCVISD::SRAW;
|
|
case ISD::SRL:
|
|
return RISCVISD::SRLW;
|
|
case ISD::SDIV:
|
|
return RISCVISD::DIVW;
|
|
case ISD::UDIV:
|
|
return RISCVISD::DIVUW;
|
|
case ISD::UREM:
|
|
return RISCVISD::REMUW;
|
|
}
|
|
}
|
|
|
|
// Converts the given 32-bit operation to a target-specific SelectionDAG node.
|
|
// Because i32 isn't a legal type for RV64, these operations would otherwise
|
|
// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
|
|
// later one because the fact the operation was originally of type i32 is
|
|
// lost.
|
|
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
|
|
SDLoc DL(N);
|
|
RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
|
|
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
|
|
SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
|
|
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
|
|
// ReplaceNodeResults requires we maintain the same type for the return value.
|
|
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
|
|
}
|
|
|
|
void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
|
|
SmallVectorImpl<SDValue> &Results,
|
|
SelectionDAG &DAG) const {
|
|
SDLoc DL(N);
|
|
switch (N->getOpcode()) {
|
|
default:
|
|
llvm_unreachable("Don't know how to custom type legalize this operation!");
|
|
case ISD::SHL:
|
|
case ISD::SRA:
|
|
case ISD::SRL:
|
|
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
|
"Unexpected custom legalisation");
|
|
if (N->getOperand(1).getOpcode() == ISD::Constant)
|
|
return;
|
|
Results.push_back(customLegalizeToWOp(N, DAG));
|
|
break;
|
|
case ISD::SDIV:
|
|
case ISD::UDIV:
|
|
case ISD::UREM:
|
|
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
|
Subtarget.hasStdExtM() && "Unexpected custom legalisation");
|
|
if (N->getOperand(0).getOpcode() == ISD::Constant ||
|
|
N->getOperand(1).getOpcode() == ISD::Constant)
|
|
return;
|
|
Results.push_back(customLegalizeToWOp(N, DAG));
|
|
break;
|
|
case ISD::BITCAST: {
|
|
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
|
Subtarget.hasStdExtF() && "Unexpected custom legalisation");
|
|
SDLoc DL(N);
|
|
SDValue Op0 = N->getOperand(0);
|
|
if (Op0.getValueType() != MVT::f32)
|
|
return;
|
|
SDValue FPConv =
|
|
DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
|
|
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
|
DAGCombinerInfo &DCI) const {
|
|
SelectionDAG &DAG = DCI.DAG;
|
|
|
|
switch (N->getOpcode()) {
|
|
default:
|
|
break;
|
|
case RISCVISD::SplitF64: {
|
|
SDValue Op0 = N->getOperand(0);
|
|
// If the input to SplitF64 is just BuildPairF64 then the operation is
|
|
// redundant. Instead, use BuildPairF64's operands directly.
|
|
if (Op0->getOpcode() == RISCVISD::BuildPairF64)
|
|
return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
|
|
|
|
SDLoc DL(N);
|
|
// This is a target-specific version of a DAGCombine performed in
|
|
// DAGCombiner::visitBITCAST. It performs the equivalent of:
|
|
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
|
|
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
|
|
if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
|
|
!Op0.getNode()->hasOneUse())
|
|
break;
|
|
SDValue NewSplitF64 =
|
|
DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
|
|
Op0.getOperand(0));
|
|
SDValue Lo = NewSplitF64.getValue(0);
|
|
SDValue Hi = NewSplitF64.getValue(1);
|
|
APInt SignBit = APInt::getSignMask(32);
|
|
if (Op0.getOpcode() == ISD::FNEG) {
|
|
SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
|
|
DAG.getConstant(SignBit, DL, MVT::i32));
|
|
return DCI.CombineTo(N, Lo, NewHi);
|
|
}
|
|
assert(Op0.getOpcode() == ISD::FABS);
|
|
SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
|
|
DAG.getConstant(~SignBit, DL, MVT::i32));
|
|
return DCI.CombineTo(N, Lo, NewHi);
|
|
}
|
|
case RISCVISD::SLLW:
|
|
case RISCVISD::SRAW:
|
|
case RISCVISD::SRLW: {
|
|
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
|
|
SDValue LHS = N->getOperand(0);
|
|
SDValue RHS = N->getOperand(1);
|
|
APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
|
|
APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
|
|
if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
|
|
(SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
|
|
return SDValue();
|
|
break;
|
|
}
|
|
case RISCVISD::FMV_X_ANYEXTW_RV64: {
|
|
SDLoc DL(N);
|
|
SDValue Op0 = N->getOperand(0);
|
|
// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
|
|
// conversion is unnecessary and can be replaced with an ANY_EXTEND
|
|
// of the FMV_W_X_RV64 operand.
|
|
if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
|
|
SDValue AExtOp =
|
|
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
|
|
return DCI.CombineTo(N, AExtOp);
|
|
}
|
|
|
|
// This is a target-specific version of a DAGCombine performed in
|
|
// DAGCombiner::visitBITCAST. It performs the equivalent of:
|
|
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
|
|
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
|
|
if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
|
|
!Op0.getNode()->hasOneUse())
|
|
break;
|
|
SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
|
|
Op0.getOperand(0));
|
|
APInt SignBit = APInt::getSignMask(32).sext(64);
|
|
if (Op0.getOpcode() == ISD::FNEG) {
|
|
return DCI.CombineTo(N,
|
|
DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
|
|
DAG.getConstant(SignBit, DL, MVT::i64)));
|
|
}
|
|
assert(Op0.getOpcode() == ISD::FABS);
|
|
return DCI.CombineTo(N,
|
|
DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
|
|
DAG.getConstant(~SignBit, DL, MVT::i64)));
|
|
}
|
|
}
|
|
|
|
return SDValue();
|
|
}
|
|
|
|
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
|
|
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
|
|
unsigned Depth) const {
|
|
switch (Op.getOpcode()) {
|
|
default:
|
|
break;
|
|
case RISCVISD::SLLW:
|
|
case RISCVISD::SRAW:
|
|
case RISCVISD::SRLW:
|
|
case RISCVISD::DIVW:
|
|
case RISCVISD::DIVUW:
|
|
case RISCVISD::REMUW:
|
|
// TODO: As the result is sign-extended, this is conservatively correct. A
|
|
// more precise answer could be calculated for SRAW depending on known
|
|
// bits in the shift amount.
|
|
return 33;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
|
|
MachineBasicBlock *BB) {
|
|
assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
|
|
|
|
MachineFunction &MF = *BB->getParent();
|
|
DebugLoc DL = MI.getDebugLoc();
|
|
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
|
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
|
|
unsigned LoReg = MI.getOperand(0).getReg();
|
|
unsigned HiReg = MI.getOperand(1).getReg();
|
|
unsigned SrcReg = MI.getOperand(2).getReg();
|
|
const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
|
|
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
|
|
|
|
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
|
|
RI);
|
|
MachineMemOperand *MMO =
|
|
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
|
|
MachineMemOperand::MOLoad, 8, 8);
|
|
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
|
|
.addFrameIndex(FI)
|
|
.addImm(0)
|
|
.addMemOperand(MMO);
|
|
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
|
|
.addFrameIndex(FI)
|
|
.addImm(4)
|
|
.addMemOperand(MMO);
|
|
MI.eraseFromParent(); // The pseudo instruction is gone now.
|
|
return BB;
|
|
}
|
|
|
|
static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
|
|
MachineBasicBlock *BB) {
|
|
assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
|
|
"Unexpected instruction");
|
|
|
|
MachineFunction &MF = *BB->getParent();
|
|
DebugLoc DL = MI.getDebugLoc();
|
|
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
|
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
|
|
unsigned DstReg = MI.getOperand(0).getReg();
|
|
unsigned LoReg = MI.getOperand(1).getReg();
|
|
unsigned HiReg = MI.getOperand(2).getReg();
|
|
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
|
|
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
|
|
|
|
MachineMemOperand *MMO =
|
|
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
|
|
MachineMemOperand::MOStore, 8, 8);
|
|
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
|
|
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
|
|
.addFrameIndex(FI)
|
|
.addImm(0)
|
|
.addMemOperand(MMO);
|
|
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
|
|
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
|
|
.addFrameIndex(FI)
|
|
.addImm(4)
|
|
.addMemOperand(MMO);
|
|
TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
|
|
MI.eraseFromParent(); // The pseudo instruction is gone now.
|
|
return BB;
|
|
}
|
|
|
|
static bool isSelectPseudo(MachineInstr &MI) {
|
|
switch (MI.getOpcode()) {
|
|
default:
|
|
return false;
|
|
case RISCV::Select_GPR_Using_CC_GPR:
|
|
case RISCV::Select_FPR32_Using_CC_GPR:
|
|
case RISCV::Select_FPR64_Using_CC_GPR:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
|
|
MachineBasicBlock *BB) {
|
|
// To "insert" Select_* instructions, we actually have to insert the triangle
|
|
// control-flow pattern. The incoming instructions know the destination vreg
|
|
// to set, the condition code register to branch on, the true/false values to
|
|
// select between, and the condcode to use to select the appropriate branch.
|
|
//
|
|
// We produce the following control flow:
|
|
// HeadMBB
|
|
// | \
|
|
// | IfFalseMBB
|
|
// | /
|
|
// TailMBB
|
|
//
|
|
// When we find a sequence of selects we attempt to optimize their emission
|
|
// by sharing the control flow. Currently we only handle cases where we have
|
|
// multiple selects with the exact same condition (same LHS, RHS and CC).
|
|
// The selects may be interleaved with other instructions if the other
|
|
// instructions meet some requirements we deem safe:
|
|
// - They are debug instructions. Otherwise,
|
|
// - They do not have side-effects, do not access memory and their inputs do
|
|
// not depend on the results of the select pseudo-instructions.
|
|
// The TrueV/FalseV operands of the selects cannot depend on the result of
|
|
// previous selects in the sequence.
|
|
// These conditions could be further relaxed. See the X86 target for a
|
|
// related approach and more information.
|
|
unsigned LHS = MI.getOperand(1).getReg();
|
|
unsigned RHS = MI.getOperand(2).getReg();
|
|
auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
|
|
|
|
SmallVector<MachineInstr *, 4> SelectDebugValues;
|
|
SmallSet<unsigned, 4> SelectDests;
|
|
SelectDests.insert(MI.getOperand(0).getReg());
|
|
|
|
MachineInstr *LastSelectPseudo = &MI;
|
|
|
|
for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
|
|
SequenceMBBI != E; ++SequenceMBBI) {
|
|
if (SequenceMBBI->isDebugInstr())
|
|
continue;
|
|
else if (isSelectPseudo(*SequenceMBBI)) {
|
|
if (SequenceMBBI->getOperand(1).getReg() != LHS ||
|
|
SequenceMBBI->getOperand(2).getReg() != RHS ||
|
|
SequenceMBBI->getOperand(3).getImm() != CC ||
|
|
SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
|
|
SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
|
|
break;
|
|
LastSelectPseudo = &*SequenceMBBI;
|
|
SequenceMBBI->collectDebugValues(SelectDebugValues);
|
|
SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
|
|
} else {
|
|
if (SequenceMBBI->hasUnmodeledSideEffects() ||
|
|
SequenceMBBI->mayLoadOrStore())
|
|
break;
|
|
if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
|
|
return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
|
|
}))
|
|
break;
|
|
}
|
|
}
|
|
|
|
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
|
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
|
DebugLoc DL = MI.getDebugLoc();
|
|
MachineFunction::iterator I = ++BB->getIterator();
|
|
|
|
MachineBasicBlock *HeadMBB = BB;
|
|
MachineFunction *F = BB->getParent();
|
|
MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
F->insert(I, IfFalseMBB);
|
|
F->insert(I, TailMBB);
|
|
|
|
// Transfer debug instructions associated with the selects to TailMBB.
|
|
for (MachineInstr *DebugInstr : SelectDebugValues) {
|
|
TailMBB->push_back(DebugInstr->removeFromParent());
|
|
}
|
|
|
|
// Move all instructions after the sequence to TailMBB.
|
|
TailMBB->splice(TailMBB->end(), HeadMBB,
|
|
std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
|
|
// Update machine-CFG edges by transferring all successors of the current
|
|
// block to the new block which will contain the Phi nodes for the selects.
|
|
TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
|
|
// Set the successors for HeadMBB.
|
|
HeadMBB->addSuccessor(IfFalseMBB);
|
|
HeadMBB->addSuccessor(TailMBB);
|
|
|
|
// Insert appropriate branch.
|
|
unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
|
|
|
|
BuildMI(HeadMBB, DL, TII.get(Opcode))
|
|
.addReg(LHS)
|
|
.addReg(RHS)
|
|
.addMBB(TailMBB);
|
|
|
|
// IfFalseMBB just falls through to TailMBB.
|
|
IfFalseMBB->addSuccessor(TailMBB);
|
|
|
|
// Create PHIs for all of the select pseudo-instructions.
|
|
auto SelectMBBI = MI.getIterator();
|
|
auto SelectEnd = std::next(LastSelectPseudo->getIterator());
|
|
auto InsertionPoint = TailMBB->begin();
|
|
while (SelectMBBI != SelectEnd) {
|
|
auto Next = std::next(SelectMBBI);
|
|
if (isSelectPseudo(*SelectMBBI)) {
|
|
// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
|
|
BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
|
|
TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
|
|
.addReg(SelectMBBI->getOperand(4).getReg())
|
|
.addMBB(HeadMBB)
|
|
.addReg(SelectMBBI->getOperand(5).getReg())
|
|
.addMBB(IfFalseMBB);
|
|
SelectMBBI->eraseFromParent();
|
|
}
|
|
SelectMBBI = Next;
|
|
}
|
|
|
|
return TailMBB;
|
|
}
|
|
|
|
MachineBasicBlock *
|
|
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|
MachineBasicBlock *BB) const {
|
|
switch (MI.getOpcode()) {
|
|
default:
|
|
llvm_unreachable("Unexpected instr type to insert");
|
|
case RISCV::Select_GPR_Using_CC_GPR:
|
|
case RISCV::Select_FPR32_Using_CC_GPR:
|
|
case RISCV::Select_FPR64_Using_CC_GPR:
|
|
return emitSelectPseudo(MI, BB);
|
|
case RISCV::BuildPairF64Pseudo:
|
|
return emitBuildPairF64Pseudo(MI, BB);
|
|
case RISCV::SplitF64Pseudo:
|
|
return emitSplitF64Pseudo(MI, BB);
|
|
}
|
|
}
|
|
|
|
// Calling Convention Implementation.
|
|
// The expectations for frontend ABI lowering vary from target to target.
|
|
// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
|
|
// details, but this is a longer term goal. For now, we simply try to keep the
|
|
// role of the frontend as simple and well-defined as possible. The rules can
|
|
// be summarised as:
|
|
// * Never split up large scalar arguments. We handle them here.
|
|
// * If a hardfloat calling convention is being used, and the struct may be
|
|
// passed in a pair of registers (fp+fp, int+fp), and both registers are
|
|
// available, then pass as two separate arguments. If either the GPRs or FPRs
|
|
// are exhausted, then pass according to the rule below.
|
|
// * If a struct could never be passed in registers or directly in a stack
|
|
// slot (as it is larger than 2*XLEN and the floating point rules don't
|
|
// apply), then pass it using a pointer with the byval attribute.
|
|
// * If a struct is less than 2*XLEN, then coerce to either a two-element
|
|
// word-sized array or a 2*XLEN scalar (depending on alignment).
|
|
// * The frontend can determine whether a struct is returned by reference or
|
|
// not based on its size and fields. If it will be returned by reference, the
|
|
// frontend must modify the prototype so a pointer with the sret annotation is
|
|
// passed as the first argument. This is not necessary for large scalar
|
|
// returns.
|
|
// * Struct return values and varargs should be coerced to structs containing
|
|
// register-size fields in the same situations they would be for fixed
|
|
// arguments.
|
|
|
|
static const MCPhysReg ArgGPRs[] = {
|
|
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
|
|
RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
|
|
};
|
|
|
|
// Pass a 2*XLEN argument that has been split into two XLEN values through
|
|
// registers or the stack as necessary.
|
|
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
|
|
ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
|
|
MVT ValVT2, MVT LocVT2,
|
|
ISD::ArgFlagsTy ArgFlags2) {
|
|
unsigned XLenInBytes = XLen / 8;
|
|
if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
|
|
// At least one half can be passed via register.
|
|
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
|
|
VA1.getLocVT(), CCValAssign::Full));
|
|
} else {
|
|
// Both halves must be passed on the stack, with proper alignment.
|
|
unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
|
|
State.addLoc(
|
|
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
|
|
State.AllocateStack(XLenInBytes, StackAlign),
|
|
VA1.getLocVT(), CCValAssign::Full));
|
|
State.addLoc(CCValAssign::getMem(
|
|
ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
|
|
CCValAssign::Full));
|
|
return false;
|
|
}
|
|
|
|
if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
|
|
// The second half can also be passed via register.
|
|
State.addLoc(
|
|
CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
|
|
} else {
|
|
// The second half is passed via the stack, without additional alignment.
|
|
State.addLoc(CCValAssign::getMem(
|
|
ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
|
|
CCValAssign::Full));
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Implements the RISC-V calling convention. Returns true upon failure.
|
|
static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
|
|
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
|
|
CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
|
|
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
|
|
assert(XLen == 32 || XLen == 64);
|
|
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
|
|
|
|
// Any return value split in to more than two values can't be returned
|
|
// directly.
|
|
if (IsRet && ValNo > 1)
|
|
return true;
|
|
|
|
if (ValVT == MVT::f32) {
|
|
LocVT = XLenVT;
|
|
LocInfo = CCValAssign::BCvt;
|
|
} else if (XLen == 64 && ValVT == MVT::f64) {
|
|
LocVT = MVT::i64;
|
|
LocInfo = CCValAssign::BCvt;
|
|
}
|
|
|
|
// If this is a variadic argument, the RISC-V calling convention requires
|
|
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
|
|
// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
|
|
// be used regardless of whether the original argument was split during
|
|
// legalisation or not. The argument will not be passed by registers if the
|
|
// original type is larger than 2*XLEN, so the register alignment rule does
|
|
// not apply.
|
|
unsigned TwoXLenInBytes = (2 * XLen) / 8;
|
|
if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
|
|
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
|
|
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
|
|
// Skip 'odd' register if necessary.
|
|
if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
|
|
State.AllocateReg(ArgGPRs);
|
|
}
|
|
|
|
SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
|
|
SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
|
|
State.getPendingArgFlags();
|
|
|
|
assert(PendingLocs.size() == PendingArgFlags.size() &&
|
|
"PendingLocs and PendingArgFlags out of sync");
|
|
|
|
// Handle passing f64 on RV32D with a soft float ABI.
|
|
if (XLen == 32 && ValVT == MVT::f64) {
|
|
assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
|
|
"Can't lower f64 if it is split");
|
|
// Depending on available argument GPRS, f64 may be passed in a pair of
|
|
// GPRs, split between a GPR and the stack, or passed completely on the
|
|
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
|
|
// cases.
|
|
unsigned Reg = State.AllocateReg(ArgGPRs);
|
|
LocVT = MVT::i32;
|
|
if (!Reg) {
|
|
unsigned StackOffset = State.AllocateStack(8, 8);
|
|
State.addLoc(
|
|
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
|
|
return false;
|
|
}
|
|
if (!State.AllocateReg(ArgGPRs))
|
|
State.AllocateStack(4, 4);
|
|
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
|
return false;
|
|
}
|
|
|
|
// Split arguments might be passed indirectly, so keep track of the pending
|
|
// values.
|
|
if (ArgFlags.isSplit() || !PendingLocs.empty()) {
|
|
LocVT = XLenVT;
|
|
LocInfo = CCValAssign::Indirect;
|
|
PendingLocs.push_back(
|
|
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
|
PendingArgFlags.push_back(ArgFlags);
|
|
if (!ArgFlags.isSplitEnd()) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// If the split argument only had two elements, it should be passed directly
|
|
// in registers or on the stack.
|
|
if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
|
|
assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
|
|
// Apply the normal calling convention rules to the first half of the
|
|
// split argument.
|
|
CCValAssign VA = PendingLocs[0];
|
|
ISD::ArgFlagsTy AF = PendingArgFlags[0];
|
|
PendingLocs.clear();
|
|
PendingArgFlags.clear();
|
|
return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
|
|
ArgFlags);
|
|
}
|
|
|
|
// Allocate to a register if possible, or else a stack slot.
|
|
unsigned Reg = State.AllocateReg(ArgGPRs);
|
|
unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
|
|
|
|
// If we reach this point and PendingLocs is non-empty, we must be at the
|
|
// end of a split argument that must be passed indirectly.
|
|
if (!PendingLocs.empty()) {
|
|
assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
|
|
assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
|
|
|
|
for (auto &It : PendingLocs) {
|
|
if (Reg)
|
|
It.convertToReg(Reg);
|
|
else
|
|
It.convertToMem(StackOffset);
|
|
State.addLoc(It);
|
|
}
|
|
PendingLocs.clear();
|
|
PendingArgFlags.clear();
|
|
return false;
|
|
}
|
|
|
|
assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
|
|
|
|
if (Reg) {
|
|
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
|
return false;
|
|
}
|
|
|
|
// When an f32 or f64 is passed on the stack, no bit-conversion is needed.
|
|
if (ValVT == MVT::f32 || ValVT == MVT::f64) {
|
|
LocVT = ValVT;
|
|
LocInfo = CCValAssign::Full;
|
|
}
|
|
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
|
|
return false;
|
|
}
|
|
|
|
void RISCVTargetLowering::analyzeInputArgs(
|
|
MachineFunction &MF, CCState &CCInfo,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
|
|
unsigned NumArgs = Ins.size();
|
|
FunctionType *FType = MF.getFunction().getFunctionType();
|
|
|
|
for (unsigned i = 0; i != NumArgs; ++i) {
|
|
MVT ArgVT = Ins[i].VT;
|
|
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
|
|
|
|
Type *ArgTy = nullptr;
|
|
if (IsRet)
|
|
ArgTy = FType->getReturnType();
|
|
else if (Ins[i].isOrigArg())
|
|
ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
|
|
|
|
if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
|
|
ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
|
|
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
|
|
<< EVT(ArgVT).getEVTString() << '\n');
|
|
llvm_unreachable(nullptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
void RISCVTargetLowering::analyzeOutputArgs(
|
|
MachineFunction &MF, CCState &CCInfo,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
|
|
CallLoweringInfo *CLI) const {
|
|
unsigned NumArgs = Outs.size();
|
|
|
|
for (unsigned i = 0; i != NumArgs; i++) {
|
|
MVT ArgVT = Outs[i].VT;
|
|
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
|
|
Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
|
|
|
|
if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
|
|
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
|
|
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
|
|
<< EVT(ArgVT).getEVTString() << "\n");
|
|
llvm_unreachable(nullptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
|
|
// values.
|
|
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
|
|
const CCValAssign &VA, const SDLoc &DL) {
|
|
switch (VA.getLocInfo()) {
|
|
default:
|
|
llvm_unreachable("Unexpected CCValAssign::LocInfo");
|
|
case CCValAssign::Full:
|
|
break;
|
|
case CCValAssign::BCvt:
|
|
if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
|
|
Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
|
|
break;
|
|
}
|
|
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
|
|
break;
|
|
}
|
|
return Val;
|
|
}
|
|
|
|
// The caller is responsible for loading the full value if the argument is
|
|
// passed with CCValAssign::Indirect.
|
|
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
|
|
const CCValAssign &VA, const SDLoc &DL) {
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
|
EVT LocVT = VA.getLocVT();
|
|
SDValue Val;
|
|
|
|
unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
|
|
RegInfo.addLiveIn(VA.getLocReg(), VReg);
|
|
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
|
|
|
|
if (VA.getLocInfo() == CCValAssign::Indirect)
|
|
return Val;
|
|
|
|
return convertLocVTToValVT(DAG, Val, VA, DL);
|
|
}
|
|
|
|
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
|
|
const CCValAssign &VA, const SDLoc &DL) {
|
|
EVT LocVT = VA.getLocVT();
|
|
|
|
switch (VA.getLocInfo()) {
|
|
default:
|
|
llvm_unreachable("Unexpected CCValAssign::LocInfo");
|
|
case CCValAssign::Full:
|
|
break;
|
|
case CCValAssign::BCvt:
|
|
if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
|
|
Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
|
|
break;
|
|
}
|
|
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
|
|
break;
|
|
}
|
|
return Val;
|
|
}
|
|
|
|
// The caller is responsible for loading the full value if the argument is
|
|
// passed with CCValAssign::Indirect.
|
|
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
|
|
const CCValAssign &VA, const SDLoc &DL) {
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
EVT LocVT = VA.getLocVT();
|
|
EVT ValVT = VA.getValVT();
|
|
EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
|
|
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
|
|
VA.getLocMemOffset(), /*Immutable=*/true);
|
|
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
|
SDValue Val;
|
|
|
|
ISD::LoadExtType ExtType;
|
|
switch (VA.getLocInfo()) {
|
|
default:
|
|
llvm_unreachable("Unexpected CCValAssign::LocInfo");
|
|
case CCValAssign::Full:
|
|
case CCValAssign::Indirect:
|
|
case CCValAssign::BCvt:
|
|
ExtType = ISD::NON_EXTLOAD;
|
|
break;
|
|
}
|
|
Val = DAG.getExtLoad(
|
|
ExtType, DL, LocVT, Chain, FIN,
|
|
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
|
|
return Val;
|
|
}
|
|
|
|
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
|
|
const CCValAssign &VA, const SDLoc &DL) {
|
|
assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
|
|
"Unexpected VA");
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
|
|
|
if (VA.isMemLoc()) {
|
|
// f64 is passed on the stack.
|
|
int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
|
|
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
return DAG.getLoad(MVT::f64, DL, Chain, FIN,
|
|
MachinePointerInfo::getFixedStack(MF, FI));
|
|
}
|
|
|
|
assert(VA.isRegLoc() && "Expected register VA assignment");
|
|
|
|
unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
|
|
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
|
|
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
|
|
SDValue Hi;
|
|
if (VA.getLocReg() == RISCV::X17) {
|
|
// Second half of f64 is passed on the stack.
|
|
int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
|
|
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
|
|
MachinePointerInfo::getFixedStack(MF, FI));
|
|
} else {
|
|
// Second half of f64 is passed in another GPR.
|
|
unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
|
|
RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
|
|
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
|
|
}
|
|
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
|
|
}
|
|
|
|
// Transform physical registers into virtual registers.
|
|
SDValue RISCVTargetLowering::LowerFormalArguments(
|
|
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
|
|
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
|
|
|
|
switch (CallConv) {
|
|
default:
|
|
report_fatal_error("Unsupported calling convention");
|
|
case CallingConv::C:
|
|
case CallingConv::Fast:
|
|
break;
|
|
}
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
const Function &Func = MF.getFunction();
|
|
if (Func.hasFnAttribute("interrupt")) {
|
|
if (!Func.arg_empty())
|
|
report_fatal_error(
|
|
"Functions with the interrupt attribute cannot have arguments!");
|
|
|
|
StringRef Kind =
|
|
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
|
|
|
|
if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
|
|
report_fatal_error(
|
|
"Function interrupt attribute argument not supported!");
|
|
}
|
|
|
|
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
|
MVT XLenVT = Subtarget.getXLenVT();
|
|
unsigned XLenInBytes = Subtarget.getXLen() / 8;
|
|
// Used with vargs to acumulate store chains.
|
|
std::vector<SDValue> OutChains;
|
|
|
|
// Assign locations to all of the incoming arguments.
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
|
|
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
|
|
|
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = ArgLocs[i];
|
|
SDValue ArgValue;
|
|
// Passing f64 on RV32D with a soft float ABI must be handled as a special
|
|
// case.
|
|
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
|
|
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
|
|
else if (VA.isRegLoc())
|
|
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
|
|
else
|
|
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
|
|
|
|
if (VA.getLocInfo() == CCValAssign::Indirect) {
|
|
// If the original argument was split and passed by reference (e.g. i128
|
|
// on RV32), we need to load all parts of it here (using the same
|
|
// address).
|
|
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
|
|
MachinePointerInfo()));
|
|
unsigned ArgIndex = Ins[i].OrigArgIndex;
|
|
assert(Ins[i].PartOffset == 0);
|
|
while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
|
|
CCValAssign &PartVA = ArgLocs[i + 1];
|
|
unsigned PartOffset = Ins[i + 1].PartOffset;
|
|
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
|
|
DAG.getIntPtrConstant(PartOffset, DL));
|
|
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
|
|
MachinePointerInfo()));
|
|
++i;
|
|
}
|
|
continue;
|
|
}
|
|
InVals.push_back(ArgValue);
|
|
}
|
|
|
|
if (IsVarArg) {
|
|
ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
|
|
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
|
|
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
|
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
|
|
|
|
// Offset of the first variable argument from stack pointer, and size of
|
|
// the vararg save area. For now, the varargs save area is either zero or
|
|
// large enough to hold a0-a7.
|
|
int VaArgOffset, VarArgsSaveSize;
|
|
|
|
// If all registers are allocated, then all varargs must be passed on the
|
|
// stack and we don't need to save any argregs.
|
|
if (ArgRegs.size() == Idx) {
|
|
VaArgOffset = CCInfo.getNextStackOffset();
|
|
VarArgsSaveSize = 0;
|
|
} else {
|
|
VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
|
|
VaArgOffset = -VarArgsSaveSize;
|
|
}
|
|
|
|
// Record the frame index of the first variable argument
|
|
// which is a value necessary to VASTART.
|
|
int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
|
|
RVFI->setVarArgsFrameIndex(FI);
|
|
|
|
// If saving an odd number of registers then create an extra stack slot to
|
|
// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
|
|
// offsets to even-numbered registered remain 2*XLEN-aligned.
|
|
if (Idx % 2) {
|
|
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
|
|
true);
|
|
VarArgsSaveSize += XLenInBytes;
|
|
}
|
|
|
|
// Copy the integer registers that may have been used for passing varargs
|
|
// to the vararg save area.
|
|
for (unsigned I = Idx; I < ArgRegs.size();
|
|
++I, VaArgOffset += XLenInBytes) {
|
|
const unsigned Reg = RegInfo.createVirtualRegister(RC);
|
|
RegInfo.addLiveIn(ArgRegs[I], Reg);
|
|
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
|
|
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
|
|
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
|
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
|
|
MachinePointerInfo::getFixedStack(MF, FI));
|
|
cast<StoreSDNode>(Store.getNode())
|
|
->getMemOperand()
|
|
->setValue((Value *)nullptr);
|
|
OutChains.push_back(Store);
|
|
}
|
|
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
|
|
}
|
|
|
|
// All stores are grouped in one node to allow the matching between
|
|
// the size of Ins and InVals. This only happens for vararg functions.
|
|
if (!OutChains.empty()) {
|
|
OutChains.push_back(Chain);
|
|
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
|
|
}
|
|
|
|
return Chain;
|
|
}
|
|
|
|
/// isEligibleForTailCallOptimization - Check whether the call is eligible
|
|
/// for tail call optimization.
|
|
/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
|
|
bool RISCVTargetLowering::isEligibleForTailCallOptimization(
|
|
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
|
|
const SmallVector<CCValAssign, 16> &ArgLocs) const {
|
|
|
|
auto &Callee = CLI.Callee;
|
|
auto CalleeCC = CLI.CallConv;
|
|
auto IsVarArg = CLI.IsVarArg;
|
|
auto &Outs = CLI.Outs;
|
|
auto &Caller = MF.getFunction();
|
|
auto CallerCC = Caller.getCallingConv();
|
|
|
|
// Do not tail call opt functions with "disable-tail-calls" attribute.
|
|
if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
|
|
return false;
|
|
|
|
// Exception-handling functions need a special set of instructions to
|
|
// indicate a return to the hardware. Tail-calling another function would
|
|
// probably break this.
|
|
// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
|
|
// should be expanded as new function attributes are introduced.
|
|
if (Caller.hasFnAttribute("interrupt"))
|
|
return false;
|
|
|
|
// Do not tail call opt functions with varargs.
|
|
if (IsVarArg)
|
|
return false;
|
|
|
|
// Do not tail call opt if the stack is used to pass parameters.
|
|
if (CCInfo.getNextStackOffset() != 0)
|
|
return false;
|
|
|
|
// Do not tail call opt if any parameters need to be passed indirectly.
|
|
// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
|
|
// passed indirectly. So the address of the value will be passed in a
|
|
// register, or if not available, then the address is put on the stack. In
|
|
// order to pass indirectly, space on the stack often needs to be allocated
|
|
// in order to store the value. In this case the CCInfo.getNextStackOffset()
|
|
// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
|
|
// are passed CCValAssign::Indirect.
|
|
for (auto &VA : ArgLocs)
|
|
if (VA.getLocInfo() == CCValAssign::Indirect)
|
|
return false;
|
|
|
|
// Do not tail call opt if either caller or callee uses struct return
|
|
// semantics.
|
|
auto IsCallerStructRet = Caller.hasStructRetAttr();
|
|
auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
|
|
if (IsCallerStructRet || IsCalleeStructRet)
|
|
return false;
|
|
|
|
// Externally-defined functions with weak linkage should not be
|
|
// tail-called. The behaviour of branch instructions in this situation (as
|
|
// used for tail calls) is implementation-defined, so we cannot rely on the
|
|
// linker replacing the tail call with a return.
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
|
|
const GlobalValue *GV = G->getGlobal();
|
|
if (GV->hasExternalWeakLinkage())
|
|
return false;
|
|
}
|
|
|
|
// The callee has to preserve all registers the caller needs to preserve.
|
|
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
|
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
|
|
if (CalleeCC != CallerCC) {
|
|
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
|
|
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
|
|
return false;
|
|
}
|
|
|
|
// Byval parameters hand the function a pointer directly into the stack area
|
|
// we want to reuse during a tail call. Working around this *is* possible
|
|
// but less efficient and uglier in LowerCall.
|
|
for (auto &Arg : Outs)
|
|
if (Arg.Flags.isByVal())
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
|
|
// and output parameter nodes.
|
|
SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|
SmallVectorImpl<SDValue> &InVals) const {
|
|
SelectionDAG &DAG = CLI.DAG;
|
|
SDLoc &DL = CLI.DL;
|
|
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
|
|
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
|
|
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
|
|
SDValue Chain = CLI.Chain;
|
|
SDValue Callee = CLI.Callee;
|
|
bool &IsTailCall = CLI.IsTailCall;
|
|
CallingConv::ID CallConv = CLI.CallConv;
|
|
bool IsVarArg = CLI.IsVarArg;
|
|
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
|
MVT XLenVT = Subtarget.getXLenVT();
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
// Analyze the operands of the call, assigning locations to each operand.
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
|
|
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
|
|
|
|
// Check if it's really possible to do a tail call.
|
|
if (IsTailCall)
|
|
IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
|
|
|
|
if (IsTailCall)
|
|
++NumTailCalls;
|
|
else if (CLI.CS && CLI.CS.isMustTailCall())
|
|
report_fatal_error("failed to perform tail call elimination on a call "
|
|
"site marked musttail");
|
|
|
|
// Get a count of how many bytes are to be pushed on the stack.
|
|
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
|
|
|
|
// Create local copies for byval args
|
|
SmallVector<SDValue, 8> ByValArgs;
|
|
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
|
|
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
|
if (!Flags.isByVal())
|
|
continue;
|
|
|
|
SDValue Arg = OutVals[i];
|
|
unsigned Size = Flags.getByValSize();
|
|
unsigned Align = Flags.getByValAlign();
|
|
|
|
int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
|
|
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
|
SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
|
|
|
|
Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
|
|
/*IsVolatile=*/false,
|
|
/*AlwaysInline=*/false,
|
|
IsTailCall, MachinePointerInfo(),
|
|
MachinePointerInfo());
|
|
ByValArgs.push_back(FIPtr);
|
|
}
|
|
|
|
if (!IsTailCall)
|
|
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
|
|
|
|
// Copy argument values to their designated locations.
|
|
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
|
|
SmallVector<SDValue, 8> MemOpChains;
|
|
SDValue StackPtr;
|
|
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = ArgLocs[i];
|
|
SDValue ArgValue = OutVals[i];
|
|
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
|
|
|
// Handle passing f64 on RV32D with a soft float ABI as a special case.
|
|
bool IsF64OnRV32DSoftABI =
|
|
VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
|
|
if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
|
|
SDValue SplitF64 = DAG.getNode(
|
|
RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
|
|
SDValue Lo = SplitF64.getValue(0);
|
|
SDValue Hi = SplitF64.getValue(1);
|
|
|
|
unsigned RegLo = VA.getLocReg();
|
|
RegsToPass.push_back(std::make_pair(RegLo, Lo));
|
|
|
|
if (RegLo == RISCV::X17) {
|
|
// Second half of f64 is passed on the stack.
|
|
// Work out the address of the stack slot.
|
|
if (!StackPtr.getNode())
|
|
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
|
|
// Emit the store.
|
|
MemOpChains.push_back(
|
|
DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
|
|
} else {
|
|
// Second half of f64 is passed in another GPR.
|
|
unsigned RegHigh = RegLo + 1;
|
|
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
|
|
// as any other MemLoc.
|
|
|
|
// Promote the value if needed.
|
|
// For now, only handle fully promoted and indirect arguments.
|
|
if (VA.getLocInfo() == CCValAssign::Indirect) {
|
|
// Store the argument in a stack slot and pass its address.
|
|
SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
|
|
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
|
|
MemOpChains.push_back(
|
|
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
|
|
MachinePointerInfo::getFixedStack(MF, FI)));
|
|
// If the original argument was split (e.g. i128), we need
|
|
// to store all parts of it here (and pass just one address).
|
|
unsigned ArgIndex = Outs[i].OrigArgIndex;
|
|
assert(Outs[i].PartOffset == 0);
|
|
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
|
|
SDValue PartValue = OutVals[i + 1];
|
|
unsigned PartOffset = Outs[i + 1].PartOffset;
|
|
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
|
|
DAG.getIntPtrConstant(PartOffset, DL));
|
|
MemOpChains.push_back(
|
|
DAG.getStore(Chain, DL, PartValue, Address,
|
|
MachinePointerInfo::getFixedStack(MF, FI)));
|
|
++i;
|
|
}
|
|
ArgValue = SpillSlot;
|
|
} else {
|
|
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
|
|
}
|
|
|
|
// Use local copy if it is a byval arg.
|
|
if (Flags.isByVal())
|
|
ArgValue = ByValArgs[j++];
|
|
|
|
if (VA.isRegLoc()) {
|
|
// Queue up the argument copies and emit them at the end.
|
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
|
|
} else {
|
|
assert(VA.isMemLoc() && "Argument not register or memory");
|
|
assert(!IsTailCall && "Tail call not allowed if stack is used "
|
|
"for passing parameters");
|
|
|
|
// Work out the address of the stack slot.
|
|
if (!StackPtr.getNode())
|
|
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
|
|
SDValue Address =
|
|
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
|
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
|
|
|
|
// Emit the store.
|
|
MemOpChains.push_back(
|
|
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
|
|
}
|
|
}
|
|
|
|
// Join the stores, which are independent of one another.
|
|
if (!MemOpChains.empty())
|
|
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
|
|
|
|
SDValue Glue;
|
|
|
|
// Build a sequence of copy-to-reg nodes, chained and glued together.
|
|
for (auto &Reg : RegsToPass) {
|
|
Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
|
|
Glue = Chain.getValue(1);
|
|
}
|
|
|
|
// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
|
|
// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
|
|
// split it and then direct call can be matched by PseudoCALL.
|
|
if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
|
|
Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
|
|
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
|
|
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
|
|
}
|
|
|
|
// The first call operand is the chain and the second is the target address.
|
|
SmallVector<SDValue, 8> Ops;
|
|
Ops.push_back(Chain);
|
|
Ops.push_back(Callee);
|
|
|
|
// Add argument registers to the end of the list so that they are
|
|
// known live into the call.
|
|
for (auto &Reg : RegsToPass)
|
|
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
|
|
|
|
if (!IsTailCall) {
|
|
// Add a register mask operand representing the call-preserved registers.
|
|
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
|
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
|
|
assert(Mask && "Missing call preserved mask for calling convention");
|
|
Ops.push_back(DAG.getRegisterMask(Mask));
|
|
}
|
|
|
|
// Glue the call to the argument copies, if any.
|
|
if (Glue.getNode())
|
|
Ops.push_back(Glue);
|
|
|
|
// Emit the call.
|
|
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
|
|
|
if (IsTailCall) {
|
|
MF.getFrameInfo().setHasTailCall();
|
|
return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
|
|
}
|
|
|
|
Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
|
|
Glue = Chain.getValue(1);
|
|
|
|
// Mark the end of the call, which is glued to the call itself.
|
|
Chain = DAG.getCALLSEQ_END(Chain,
|
|
DAG.getConstant(NumBytes, DL, PtrVT, true),
|
|
DAG.getConstant(0, DL, PtrVT, true),
|
|
Glue, DL);
|
|
Glue = Chain.getValue(1);
|
|
|
|
// Assign locations to each value returned by this call.
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
|
|
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
|
|
|
|
// Copy all of the result registers out of their specified physreg.
|
|
for (auto &VA : RVLocs) {
|
|
// Copy the value out
|
|
SDValue RetValue =
|
|
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
|
|
// Glue the RetValue to the end of the call sequence
|
|
Chain = RetValue.getValue(1);
|
|
Glue = RetValue.getValue(2);
|
|
|
|
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
|
|
assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
|
|
SDValue RetValue2 =
|
|
DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
|
|
Chain = RetValue2.getValue(1);
|
|
Glue = RetValue2.getValue(2);
|
|
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
|
|
RetValue2);
|
|
}
|
|
|
|
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
|
|
|
|
InVals.push_back(RetValue);
|
|
}
|
|
|
|
return Chain;
|
|
}
|
|
|
|
bool RISCVTargetLowering::CanLowerReturn(
|
|
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
|
|
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
|
|
MVT VT = Outs[i].VT;
|
|
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
|
|
if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
|
|
CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
SDValue
|
|
RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|
bool IsVarArg,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
const SDLoc &DL, SelectionDAG &DAG) const {
|
|
// Stores the assignment of the return value to a location.
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
|
|
// Info about the registers and stack slot.
|
|
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
|
|
*DAG.getContext());
|
|
|
|
analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
|
|
nullptr);
|
|
|
|
SDValue Glue;
|
|
SmallVector<SDValue, 4> RetOps(1, Chain);
|
|
|
|
// Copy the result values into the output registers.
|
|
for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
|
|
SDValue Val = OutVals[i];
|
|
CCValAssign &VA = RVLocs[i];
|
|
assert(VA.isRegLoc() && "Can only return in registers!");
|
|
|
|
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
|
|
// Handle returning f64 on RV32D with a soft float ABI.
|
|
assert(VA.isRegLoc() && "Expected return via registers");
|
|
SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
|
|
DAG.getVTList(MVT::i32, MVT::i32), Val);
|
|
SDValue Lo = SplitF64.getValue(0);
|
|
SDValue Hi = SplitF64.getValue(1);
|
|
unsigned RegLo = VA.getLocReg();
|
|
unsigned RegHi = RegLo + 1;
|
|
Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
|
|
Glue = Chain.getValue(1);
|
|
RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
|
|
Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
|
|
Glue = Chain.getValue(1);
|
|
RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
|
|
} else {
|
|
// Handle a 'normal' return.
|
|
Val = convertValVTToLocVT(DAG, Val, VA, DL);
|
|
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
|
|
|
|
// Guarantee that all emitted copies are stuck together.
|
|
Glue = Chain.getValue(1);
|
|
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
|
}
|
|
}
|
|
|
|
RetOps[0] = Chain; // Update chain.
|
|
|
|
// Add the glue node if we have it.
|
|
if (Glue.getNode()) {
|
|
RetOps.push_back(Glue);
|
|
}
|
|
|
|
// Interrupt service routines use different return instructions.
|
|
const Function &Func = DAG.getMachineFunction().getFunction();
|
|
if (Func.hasFnAttribute("interrupt")) {
|
|
if (!Func.getReturnType()->isVoidTy())
|
|
report_fatal_error(
|
|
"Functions with the interrupt attribute must have void return type!");
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
StringRef Kind =
|
|
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
|
|
|
|
unsigned RetOpc;
|
|
if (Kind == "user")
|
|
RetOpc = RISCVISD::URET_FLAG;
|
|
else if (Kind == "supervisor")
|
|
RetOpc = RISCVISD::SRET_FLAG;
|
|
else
|
|
RetOpc = RISCVISD::MRET_FLAG;
|
|
|
|
return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
|
|
}
|
|
|
|
return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
|
|
}
|
|
|
|
const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|
switch ((RISCVISD::NodeType)Opcode) {
|
|
case RISCVISD::FIRST_NUMBER:
|
|
break;
|
|
case RISCVISD::RET_FLAG:
|
|
return "RISCVISD::RET_FLAG";
|
|
case RISCVISD::URET_FLAG:
|
|
return "RISCVISD::URET_FLAG";
|
|
case RISCVISD::SRET_FLAG:
|
|
return "RISCVISD::SRET_FLAG";
|
|
case RISCVISD::MRET_FLAG:
|
|
return "RISCVISD::MRET_FLAG";
|
|
case RISCVISD::CALL:
|
|
return "RISCVISD::CALL";
|
|
case RISCVISD::SELECT_CC:
|
|
return "RISCVISD::SELECT_CC";
|
|
case RISCVISD::BuildPairF64:
|
|
return "RISCVISD::BuildPairF64";
|
|
case RISCVISD::SplitF64:
|
|
return "RISCVISD::SplitF64";
|
|
case RISCVISD::TAIL:
|
|
return "RISCVISD::TAIL";
|
|
case RISCVISD::SLLW:
|
|
return "RISCVISD::SLLW";
|
|
case RISCVISD::SRAW:
|
|
return "RISCVISD::SRAW";
|
|
case RISCVISD::SRLW:
|
|
return "RISCVISD::SRLW";
|
|
case RISCVISD::DIVW:
|
|
return "RISCVISD::DIVW";
|
|
case RISCVISD::DIVUW:
|
|
return "RISCVISD::DIVUW";
|
|
case RISCVISD::REMUW:
|
|
return "RISCVISD::REMUW";
|
|
case RISCVISD::FMV_W_X_RV64:
|
|
return "RISCVISD::FMV_W_X_RV64";
|
|
case RISCVISD::FMV_X_ANYEXTW_RV64:
|
|
return "RISCVISD::FMV_X_ANYEXTW_RV64";
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
std::pair<unsigned, const TargetRegisterClass *>
|
|
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
|
StringRef Constraint,
|
|
MVT VT) const {
|
|
// First, see if this is a constraint that directly corresponds to a
|
|
// RISCV register class.
|
|
if (Constraint.size() == 1) {
|
|
switch (Constraint[0]) {
|
|
case 'r':
|
|
return std::make_pair(0U, &RISCV::GPRRegClass);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
|
|
}
|
|
|
|
Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
|
|
Instruction *Inst,
|
|
AtomicOrdering Ord) const {
|
|
if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
|
|
return Builder.CreateFence(Ord);
|
|
if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
|
|
return Builder.CreateFence(AtomicOrdering::Release);
|
|
return nullptr;
|
|
}
|
|
|
|
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
|
|
Instruction *Inst,
|
|
AtomicOrdering Ord) const {
|
|
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
|
|
return Builder.CreateFence(AtomicOrdering::Acquire);
|
|
return nullptr;
|
|
}
|
|
|
|
TargetLowering::AtomicExpansionKind
|
|
RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
|
// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
|
|
// point operations can't be used in an lr/sc sequence without breaking the
|
|
// forward-progress guarantee.
|
|
if (AI->isFloatingPointOperation())
|
|
return AtomicExpansionKind::CmpXChg;
|
|
|
|
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
|
if (Size == 8 || Size == 16)
|
|
return AtomicExpansionKind::MaskedIntrinsic;
|
|
return AtomicExpansionKind::None;
|
|
}
|
|
|
|
static Intrinsic::ID
|
|
getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
|
|
if (XLen == 32) {
|
|
switch (BinOp) {
|
|
default:
|
|
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
|
case AtomicRMWInst::Xchg:
|
|
return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
|
|
case AtomicRMWInst::Add:
|
|
return Intrinsic::riscv_masked_atomicrmw_add_i32;
|
|
case AtomicRMWInst::Sub:
|
|
return Intrinsic::riscv_masked_atomicrmw_sub_i32;
|
|
case AtomicRMWInst::Nand:
|
|
return Intrinsic::riscv_masked_atomicrmw_nand_i32;
|
|
case AtomicRMWInst::Max:
|
|
return Intrinsic::riscv_masked_atomicrmw_max_i32;
|
|
case AtomicRMWInst::Min:
|
|
return Intrinsic::riscv_masked_atomicrmw_min_i32;
|
|
case AtomicRMWInst::UMax:
|
|
return Intrinsic::riscv_masked_atomicrmw_umax_i32;
|
|
case AtomicRMWInst::UMin:
|
|
return Intrinsic::riscv_masked_atomicrmw_umin_i32;
|
|
}
|
|
}
|
|
|
|
if (XLen == 64) {
|
|
switch (BinOp) {
|
|
default:
|
|
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
|
case AtomicRMWInst::Xchg:
|
|
return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
|
|
case AtomicRMWInst::Add:
|
|
return Intrinsic::riscv_masked_atomicrmw_add_i64;
|
|
case AtomicRMWInst::Sub:
|
|
return Intrinsic::riscv_masked_atomicrmw_sub_i64;
|
|
case AtomicRMWInst::Nand:
|
|
return Intrinsic::riscv_masked_atomicrmw_nand_i64;
|
|
case AtomicRMWInst::Max:
|
|
return Intrinsic::riscv_masked_atomicrmw_max_i64;
|
|
case AtomicRMWInst::Min:
|
|
return Intrinsic::riscv_masked_atomicrmw_min_i64;
|
|
case AtomicRMWInst::UMax:
|
|
return Intrinsic::riscv_masked_atomicrmw_umax_i64;
|
|
case AtomicRMWInst::UMin:
|
|
return Intrinsic::riscv_masked_atomicrmw_umin_i64;
|
|
}
|
|
}
|
|
|
|
llvm_unreachable("Unexpected XLen\n");
|
|
}
|
|
|
|
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
|
|
IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
|
|
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
|
|
unsigned XLen = Subtarget.getXLen();
|
|
Value *Ordering =
|
|
Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
|
|
Type *Tys[] = {AlignedAddr->getType()};
|
|
Function *LrwOpScwLoop = Intrinsic::getDeclaration(
|
|
AI->getModule(),
|
|
getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
|
|
|
|
if (XLen == 64) {
|
|
Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
|
|
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
|
|
ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
|
|
}
|
|
|
|
Value *Result;
|
|
|
|
// Must pass the shift amount needed to sign extend the loaded value prior
|
|
// to performing a signed comparison for min/max. ShiftAmt is the number of
|
|
// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
|
|
// is the number of bits to left+right shift the value in order to
|
|
// sign-extend.
|
|
if (AI->getOperation() == AtomicRMWInst::Min ||
|
|
AI->getOperation() == AtomicRMWInst::Max) {
|
|
const DataLayout &DL = AI->getModule()->getDataLayout();
|
|
unsigned ValWidth =
|
|
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
|
|
Value *SextShamt =
|
|
Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
|
|
Result = Builder.CreateCall(LrwOpScwLoop,
|
|
{AlignedAddr, Incr, Mask, SextShamt, Ordering});
|
|
} else {
|
|
Result =
|
|
Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
|
|
}
|
|
|
|
if (XLen == 64)
|
|
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
|
|
return Result;
|
|
}
|
|
|
|
TargetLowering::AtomicExpansionKind
|
|
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
|
|
AtomicCmpXchgInst *CI) const {
|
|
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
|
|
if (Size == 8 || Size == 16)
|
|
return AtomicExpansionKind::MaskedIntrinsic;
|
|
return AtomicExpansionKind::None;
|
|
}
|
|
|
|
Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
|
|
IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
|
|
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
|
|
unsigned XLen = Subtarget.getXLen();
|
|
Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
|
|
Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
|
|
if (XLen == 64) {
|
|
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
|
|
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
|
|
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
|
|
CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
|
|
}
|
|
Type *Tys[] = {AlignedAddr->getType()};
|
|
Function *MaskedCmpXchg =
|
|
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
|
|
Value *Result = Builder.CreateCall(
|
|
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
|
|
if (XLen == 64)
|
|
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
|
|
return Result;
|
|
}
|