This patch legalizes G_ZEXT, G_SEXT, and G_ANYEXT. If the type is a legal mask type, then the instruction is legalized as the element-wise select, where the condition on the select is the mask typed source operand, and the true and false values are 1 or -1 (for zero/any-extension and sign extension) and zero. If the type is a legal integer or vector integer type, then the instruction is marked as legal. The legalization of the extends may introduce a G_SPLAT_VECTOR, which needs to be legalized in this patch for the extend test cases to pass. A G_SPLAT_VECTOR is legal if the vector type is a legal integer or floating point vector type and the source operand is sXLen type. This is because the SelectionDAG patterns only support sXLen typed ISD::SPLAT_VECTORS, and we'd like to reuse those patterns. A G_SPLAT_VECTOR is cutom legalized if it has a legal s1 element vector type and s1 scalar operand. It is legalized to G_VMSET_VL or G_VMCLR_VL if the splat is all ones or all zeros respectivley. In the case of a non-constant mask splat, we legalize by promoting the scalar value to s8. In order to get the s8 element vector back into s1 vector, we use a G_ICMP. In order for the splat vector and extend tests to pass, we also need to legalize G_ICMP in this patch. A G_ICMP is legal if the destination type is a legal bool vector and the LHS and RHS are legal integer vector types.
817 lines
32 KiB
C++
817 lines
32 KiB
C++
//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file implements the targeting of the Machinelegalizer class for RISC-V.
|
|
/// \todo This should be generated by TableGen.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "RISCVLegalizerInfo.h"
|
|
#include "MCTargetDesc/RISCVMatInt.h"
|
|
#include "RISCVMachineFunctionInfo.h"
|
|
#include "RISCVSubtarget.h"
|
|
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
|
|
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
|
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/TargetOpcodes.h"
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Type.h"
|
|
|
|
using namespace llvm;
|
|
using namespace LegalityPredicates;
|
|
using namespace LegalizeMutations;
|
|
|
|
// Is this type supported by scalar FP arithmetic operations given the current
|
|
// subtarget.
|
|
static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx,
|
|
const RISCVSubtarget &ST) {
|
|
return [=, &ST](const LegalityQuery &Query) {
|
|
return Query.Types[TypeIdx].isScalar() &&
|
|
((ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) ||
|
|
(ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64));
|
|
};
|
|
}
|
|
|
|
static LegalityPredicate
|
|
typeIsLegalIntOrFPVec(unsigned TypeIdx,
|
|
std::initializer_list<LLT> IntOrFPVecTys,
|
|
const RISCVSubtarget &ST) {
|
|
LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
|
|
return ST.hasVInstructions() &&
|
|
(Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
|
|
ST.hasVInstructionsI64()) &&
|
|
(Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
|
|
ST.getELen() == 64);
|
|
};
|
|
|
|
return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
|
|
}
|
|
|
|
static LegalityPredicate
|
|
typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
|
|
const RISCVSubtarget &ST) {
|
|
LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
|
|
return ST.hasVInstructions() &&
|
|
(Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
|
|
ST.getELen() == 64);
|
|
};
|
|
return all(typeInSet(TypeIdx, BoolVecTys), P);
|
|
}
|
|
|
|
RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
|
|
: STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
|
|
const LLT sDoubleXLen = LLT::scalar(2 * XLen);
|
|
const LLT p0 = LLT::pointer(0, XLen);
|
|
const LLT s1 = LLT::scalar(1);
|
|
const LLT s8 = LLT::scalar(8);
|
|
const LLT s16 = LLT::scalar(16);
|
|
const LLT s32 = LLT::scalar(32);
|
|
const LLT s64 = LLT::scalar(64);
|
|
|
|
const LLT nxv1s1 = LLT::scalable_vector(1, s1);
|
|
const LLT nxv2s1 = LLT::scalable_vector(2, s1);
|
|
const LLT nxv4s1 = LLT::scalable_vector(4, s1);
|
|
const LLT nxv8s1 = LLT::scalable_vector(8, s1);
|
|
const LLT nxv16s1 = LLT::scalable_vector(16, s1);
|
|
const LLT nxv32s1 = LLT::scalable_vector(32, s1);
|
|
const LLT nxv64s1 = LLT::scalable_vector(64, s1);
|
|
|
|
const LLT nxv1s8 = LLT::scalable_vector(1, s8);
|
|
const LLT nxv2s8 = LLT::scalable_vector(2, s8);
|
|
const LLT nxv4s8 = LLT::scalable_vector(4, s8);
|
|
const LLT nxv8s8 = LLT::scalable_vector(8, s8);
|
|
const LLT nxv16s8 = LLT::scalable_vector(16, s8);
|
|
const LLT nxv32s8 = LLT::scalable_vector(32, s8);
|
|
const LLT nxv64s8 = LLT::scalable_vector(64, s8);
|
|
|
|
const LLT nxv1s16 = LLT::scalable_vector(1, s16);
|
|
const LLT nxv2s16 = LLT::scalable_vector(2, s16);
|
|
const LLT nxv4s16 = LLT::scalable_vector(4, s16);
|
|
const LLT nxv8s16 = LLT::scalable_vector(8, s16);
|
|
const LLT nxv16s16 = LLT::scalable_vector(16, s16);
|
|
const LLT nxv32s16 = LLT::scalable_vector(32, s16);
|
|
|
|
const LLT nxv1s32 = LLT::scalable_vector(1, s32);
|
|
const LLT nxv2s32 = LLT::scalable_vector(2, s32);
|
|
const LLT nxv4s32 = LLT::scalable_vector(4, s32);
|
|
const LLT nxv8s32 = LLT::scalable_vector(8, s32);
|
|
const LLT nxv16s32 = LLT::scalable_vector(16, s32);
|
|
|
|
const LLT nxv1s64 = LLT::scalable_vector(1, s64);
|
|
const LLT nxv2s64 = LLT::scalable_vector(2, s64);
|
|
const LLT nxv4s64 = LLT::scalable_vector(4, s64);
|
|
const LLT nxv8s64 = LLT::scalable_vector(8, s64);
|
|
|
|
using namespace TargetOpcode;
|
|
|
|
auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
|
|
|
|
auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
|
|
nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
|
|
nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
|
|
nxv1s64, nxv2s64, nxv4s64, nxv8s64};
|
|
|
|
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
|
|
.legalFor({s32, sXLen})
|
|
.legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(0, s32, sXLen);
|
|
|
|
getActionDefinitionsBuilder(
|
|
{G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
|
|
|
|
getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
|
|
|
|
auto &ShiftActions = getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL});
|
|
if (ST.is64Bit())
|
|
ShiftActions.customFor({{s32, s32}});
|
|
ShiftActions.legalFor({{s32, s32}, {s32, sXLen}, {sXLen, sXLen}})
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(1, s32, sXLen)
|
|
.clampScalar(0, s32, sXLen)
|
|
.minScalarSameAs(1, 0);
|
|
|
|
auto &ExtActions =
|
|
getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
|
|
.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
|
|
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
|
|
if (ST.is64Bit()) {
|
|
ExtActions.legalFor({{sXLen, s32}});
|
|
getActionDefinitionsBuilder(G_SEXT_INREG)
|
|
.customFor({sXLen})
|
|
.maxScalar(0, sXLen)
|
|
.lower();
|
|
} else {
|
|
getActionDefinitionsBuilder(G_SEXT_INREG).maxScalar(0, sXLen).lower();
|
|
}
|
|
ExtActions.customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
|
|
.maxScalar(0, sXLen);
|
|
|
|
// Merge/Unmerge
|
|
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
|
|
auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
|
|
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
|
|
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
|
|
if (XLen == 32 && ST.hasStdExtD()) {
|
|
MergeUnmergeActions.legalIf(
|
|
all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
|
|
}
|
|
MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
|
|
.widenScalarToNextPow2(BigTyIdx, XLen)
|
|
.clampScalar(LitTyIdx, sXLen, sXLen)
|
|
.clampScalar(BigTyIdx, sXLen, sXLen);
|
|
}
|
|
|
|
getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
|
|
|
|
auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR});
|
|
if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) {
|
|
RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}});
|
|
// Widen s32 rotate amount to s64 so SDAG patterns will match.
|
|
if (ST.is64Bit())
|
|
RotateActions.widenScalarIf(all(typeIs(0, s32), typeIs(1, s32)),
|
|
changeTo(1, sXLen));
|
|
}
|
|
RotateActions.lower();
|
|
|
|
getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
|
|
|
|
getActionDefinitionsBuilder(G_BITCAST).legalIf(
|
|
all(LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
|
|
typeIsLegalBoolVec(0, BoolVecTys, ST)),
|
|
LegalityPredicates::any(typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST),
|
|
typeIsLegalBoolVec(1, BoolVecTys, ST))));
|
|
|
|
auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
|
|
if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
|
|
BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
|
|
else
|
|
BSWAPActions.maxScalar(0, sXLen).lower();
|
|
|
|
auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
|
|
auto &CountZerosUndefActions =
|
|
getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
|
|
if (ST.hasStdExtZbb()) {
|
|
CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}})
|
|
.clampScalar(0, s32, sXLen)
|
|
.widenScalarToNextPow2(0)
|
|
.scalarSameSizeAs(1, 0);
|
|
} else {
|
|
CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
|
|
CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
|
|
}
|
|
CountZerosUndefActions.lower();
|
|
|
|
auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
|
|
if (ST.hasStdExtZbb()) {
|
|
CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}})
|
|
.clampScalar(0, s32, sXLen)
|
|
.widenScalarToNextPow2(0)
|
|
.scalarSameSizeAs(1, 0);
|
|
} else {
|
|
CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
|
|
}
|
|
|
|
auto &ConstantActions = getActionDefinitionsBuilder(G_CONSTANT);
|
|
ConstantActions.legalFor({s32, p0});
|
|
if (ST.is64Bit())
|
|
ConstantActions.customFor({s64});
|
|
ConstantActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen);
|
|
|
|
// TODO: transform illegal vector types into legal vector type
|
|
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
|
|
.legalFor({s32, sXLen, p0})
|
|
.legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
|
|
.legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(0, s32, sXLen);
|
|
|
|
getActionDefinitionsBuilder(G_ICMP)
|
|
.legalFor({{sXLen, sXLen}, {sXLen, p0}})
|
|
.legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
|
|
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
|
|
.widenScalarOrEltToNextPow2OrMinSize(1, 8)
|
|
.clampScalar(1, sXLen, sXLen)
|
|
.clampScalar(0, sXLen, sXLen);
|
|
|
|
auto &SelectActions =
|
|
getActionDefinitionsBuilder(G_SELECT)
|
|
.legalFor({{s32, sXLen}, {p0, sXLen}})
|
|
.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
|
|
typeIsLegalBoolVec(1, BoolVecTys, ST)));
|
|
if (XLen == 64 || ST.hasStdExtD())
|
|
SelectActions.legalFor({{s64, sXLen}});
|
|
SelectActions.widenScalarToNextPow2(0)
|
|
.clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
|
|
.clampScalar(1, sXLen, sXLen);
|
|
|
|
auto &LoadStoreActions =
|
|
getActionDefinitionsBuilder({G_LOAD, G_STORE})
|
|
.legalForTypesWithMemDesc({{s32, p0, s8, 8},
|
|
{s32, p0, s16, 16},
|
|
{s32, p0, s32, 32},
|
|
{p0, p0, sXLen, XLen}});
|
|
auto &ExtLoadActions =
|
|
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
|
|
.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}});
|
|
if (XLen == 64) {
|
|
LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8},
|
|
{s64, p0, s16, 16},
|
|
{s64, p0, s32, 32},
|
|
{s64, p0, s64, 64}});
|
|
ExtLoadActions.legalForTypesWithMemDesc(
|
|
{{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}});
|
|
} else if (ST.hasStdExtD()) {
|
|
LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}});
|
|
}
|
|
LoadStoreActions.clampScalar(0, s32, sXLen).lower();
|
|
ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).lower();
|
|
|
|
getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
|
|
|
|
getActionDefinitionsBuilder(G_PTRTOINT)
|
|
.legalFor({{sXLen, p0}})
|
|
.clampScalar(0, sXLen, sXLen);
|
|
|
|
getActionDefinitionsBuilder(G_INTTOPTR)
|
|
.legalFor({{p0, sXLen}})
|
|
.clampScalar(1, sXLen, sXLen);
|
|
|
|
getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
|
|
|
|
getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, sXLen}});
|
|
|
|
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
|
|
|
|
getActionDefinitionsBuilder(G_PHI)
|
|
.legalFor({p0, sXLen})
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(0, sXLen, sXLen);
|
|
|
|
getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
|
|
.legalFor({p0});
|
|
|
|
if (ST.hasStdExtM() || ST.hasStdExtZmmul()) {
|
|
getActionDefinitionsBuilder(G_MUL)
|
|
.legalFor({s32, sXLen})
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(0, s32, sXLen);
|
|
|
|
// clang-format off
|
|
getActionDefinitionsBuilder({G_SMULH, G_UMULH})
|
|
.legalFor({sXLen})
|
|
.lower();
|
|
// clang-format on
|
|
|
|
getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
|
|
} else {
|
|
getActionDefinitionsBuilder(G_MUL)
|
|
.libcallFor({sXLen, sDoubleXLen})
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(0, sXLen, sDoubleXLen);
|
|
|
|
getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
|
|
|
|
getActionDefinitionsBuilder({G_SMULO, G_UMULO})
|
|
.minScalar(0, sXLen)
|
|
// Widen sXLen to sDoubleXLen so we can use a single libcall to get
|
|
// the low bits for the mul result and high bits to do the overflow
|
|
// check.
|
|
.widenScalarIf(typeIs(0, sXLen),
|
|
LegalizeMutations::changeTo(0, sDoubleXLen))
|
|
.lower();
|
|
}
|
|
|
|
if (ST.hasStdExtM()) {
|
|
getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
|
|
.legalFor({s32, sXLen})
|
|
.libcallFor({sDoubleXLen})
|
|
.clampScalar(0, s32, sDoubleXLen)
|
|
.widenScalarToNextPow2(0);
|
|
} else {
|
|
getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
|
|
.libcallFor({sXLen, sDoubleXLen})
|
|
.clampScalar(0, sXLen, sDoubleXLen)
|
|
.widenScalarToNextPow2(0);
|
|
}
|
|
|
|
auto &AbsActions = getActionDefinitionsBuilder(G_ABS);
|
|
if (ST.hasStdExtZbb())
|
|
AbsActions.customFor({s32, sXLen}).minScalar(0, sXLen);
|
|
AbsActions.lower();
|
|
|
|
auto &MinMaxActions =
|
|
getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN});
|
|
if (ST.hasStdExtZbb())
|
|
MinMaxActions.legalFor({sXLen}).minScalar(0, sXLen);
|
|
MinMaxActions.lower();
|
|
|
|
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
|
|
|
|
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
|
|
|
|
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
|
|
|
|
// FP Operations
|
|
|
|
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
|
|
G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
|
|
.legalIf(typeIsScalarFPArith(0, ST));
|
|
|
|
getActionDefinitionsBuilder(G_FCOPYSIGN)
|
|
.legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
|
|
|
|
getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
|
|
[=, &ST](const LegalityQuery &Query) -> bool {
|
|
return (ST.hasStdExtD() && typeIs(0, s32)(Query) &&
|
|
typeIs(1, s64)(Query));
|
|
});
|
|
getActionDefinitionsBuilder(G_FPEXT).legalIf(
|
|
[=, &ST](const LegalityQuery &Query) -> bool {
|
|
return (ST.hasStdExtD() && typeIs(0, s64)(Query) &&
|
|
typeIs(1, s32)(Query));
|
|
});
|
|
|
|
getActionDefinitionsBuilder(G_FCMP)
|
|
.legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)))
|
|
.clampScalar(0, sXLen, sXLen);
|
|
|
|
// TODO: Support vector version of G_IS_FPCLASS.
|
|
getActionDefinitionsBuilder(G_IS_FPCLASS)
|
|
.customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
|
|
|
|
getActionDefinitionsBuilder(G_FCONSTANT)
|
|
.legalIf(typeIsScalarFPArith(0, ST))
|
|
.lowerFor({s32, s64});
|
|
|
|
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
|
|
.legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)))
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(0, s32, sXLen);
|
|
|
|
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
|
|
.legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})))
|
|
.widenScalarToNextPow2(1)
|
|
.clampScalar(1, s32, sXLen);
|
|
|
|
// FIXME: We can do custom inline expansion like SelectionDAG.
|
|
// FIXME: Legal with Zfa.
|
|
getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
|
|
.libcallFor({s32, s64});
|
|
|
|
getActionDefinitionsBuilder(G_VASTART).customFor({p0});
|
|
|
|
// va_list must be a pointer, but most sized types are pretty easy to handle
|
|
// as the destination.
|
|
getActionDefinitionsBuilder(G_VAARG)
|
|
// TODO: Implement narrowScalar and widenScalar for G_VAARG for types
|
|
// outside the [s32, sXLen] range.
|
|
.clampScalar(0, s32, sXLen)
|
|
.lowerForCartesianProduct({s32, sXLen, p0}, {p0});
|
|
|
|
getActionDefinitionsBuilder(G_VSCALE)
|
|
.clampScalar(0, sXLen, sXLen)
|
|
.customFor({sXLen});
|
|
|
|
auto &SplatActions =
|
|
getActionDefinitionsBuilder(G_SPLAT_VECTOR)
|
|
.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
|
|
typeIs(1, sXLen)))
|
|
.customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
|
|
// Handle case of s64 element vectors on RV32. If the subtarget does not have
|
|
// f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
|
|
// does have f64, then we don't know whether the type is an f64 or an i64,
|
|
// so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
|
|
// depending on how the instructions it consumes are legalized. They are not
|
|
// legalized yet since legalization is in reverse postorder, so we cannot
|
|
// make the decision at this moment.
|
|
if (XLen == 32) {
|
|
if (ST.hasVInstructionsF64() && ST.hasStdExtD())
|
|
SplatActions.legalIf(all(
|
|
typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
|
|
else if (ST.hasVInstructionsI64())
|
|
SplatActions.customIf(all(
|
|
typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
|
|
}
|
|
|
|
SplatActions.clampScalar(1, sXLen, sXLen);
|
|
|
|
getLegacyLegalizerInfo().computeTables();
|
|
}
|
|
|
|
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
|
|
if (Ty.isVector())
|
|
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
|
|
Ty.getNumElements());
|
|
return IntegerType::get(C, Ty.getSizeInBits());
|
|
}
|
|
|
|
bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
|
|
MachineInstr &MI) const {
|
|
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
|
|
switch (IntrinsicID) {
|
|
default:
|
|
return false;
|
|
case Intrinsic::vacopy: {
|
|
// vacopy arguments must be legal because of the intrinsic signature.
|
|
// No need to check here.
|
|
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
MachineFunction &MF = *MI.getMF();
|
|
const DataLayout &DL = MIRBuilder.getDataLayout();
|
|
LLVMContext &Ctx = MF.getFunction().getContext();
|
|
|
|
Register DstLst = MI.getOperand(1).getReg();
|
|
LLT PtrTy = MRI.getType(DstLst);
|
|
|
|
// Load the source va_list
|
|
Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
|
|
MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
|
|
MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
|
|
auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
|
|
|
|
// Store the result in the destination va_list
|
|
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
|
|
MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment);
|
|
MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
|
|
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool RISCVLegalizerInfo::legalizeShlAshrLshr(
|
|
MachineInstr &MI, MachineIRBuilder &MIRBuilder,
|
|
GISelChangeObserver &Observer) const {
|
|
assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
|
|
MI.getOpcode() == TargetOpcode::G_LSHR ||
|
|
MI.getOpcode() == TargetOpcode::G_SHL);
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
|
|
// imported patterns can select it later. Either way, it will be legal.
|
|
Register AmtReg = MI.getOperand(2).getReg();
|
|
auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
|
|
if (!VRegAndVal)
|
|
return true;
|
|
// Check the shift amount is in range for an immediate form.
|
|
uint64_t Amount = VRegAndVal->Value.getZExtValue();
|
|
if (Amount > 31)
|
|
return true; // This will have to remain a register variant.
|
|
auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
|
|
Observer.changingInstr(MI);
|
|
MI.getOperand(2).setReg(ExtCst.getReg(0));
|
|
Observer.changedInstr(MI);
|
|
return true;
|
|
}
|
|
|
|
bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
// Stores the address of the VarArgsFrameIndex slot into the memory location
|
|
assert(MI.getOpcode() == TargetOpcode::G_VASTART);
|
|
MachineFunction *MF = MI.getParent()->getParent();
|
|
RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
|
|
int FI = FuncInfo->getVarArgsFrameIndex();
|
|
LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
|
|
auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
|
|
assert(MI.hasOneMemOperand());
|
|
MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
|
|
*MI.memoperands()[0]);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm,
|
|
bool ShouldOptForSize) const {
|
|
assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
|
|
int64_t Imm = APImm.getSExtValue();
|
|
// All simm32 constants should be handled by isel.
|
|
// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
|
|
// this check redundant, but small immediates are common so this check
|
|
// should have better compile time.
|
|
if (isInt<32>(Imm))
|
|
return false;
|
|
|
|
// We only need to cost the immediate, if constant pool lowering is enabled.
|
|
if (!STI.useConstantPoolForLargeInts())
|
|
return false;
|
|
|
|
RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI);
|
|
if (Seq.size() <= STI.getMaxBuildIntsCost())
|
|
return false;
|
|
|
|
// Optimizations below are disabled for opt size. If we're optimizing for
|
|
// size, use a constant pool.
|
|
if (ShouldOptForSize)
|
|
return true;
|
|
//
|
|
// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
|
|
// that if it will avoid a constant pool.
|
|
// It will require an extra temporary register though.
|
|
// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
|
|
// low and high 32 bits are the same and bit 31 and 63 are set.
|
|
unsigned ShiftAmt, AddOpc;
|
|
RISCVMatInt::InstSeq SeqLo =
|
|
RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
|
|
return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
|
|
}
|
|
|
|
bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
|
|
MachineIRBuilder &MIB) const {
|
|
const LLT XLenTy(STI.getXLenVT());
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
// We define our scalable vector types for lmul=1 to use a 64 bit known
|
|
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
|
|
// vscale as VLENB / 8.
|
|
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
|
|
if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
|
|
// Support for VLEN==32 is incomplete.
|
|
return false;
|
|
|
|
// We assume VLENB is a multiple of 8. We manually choose the best shift
|
|
// here because SimplifyDemandedBits isn't always able to simplify it.
|
|
uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
|
|
if (isPowerOf2_64(Val)) {
|
|
uint64_t Log2 = Log2_64(Val);
|
|
if (Log2 < 3) {
|
|
auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
|
|
MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
|
|
} else if (Log2 > 3) {
|
|
auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
|
|
MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
|
|
} else {
|
|
MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
|
|
}
|
|
} else if ((Val % 8) == 0) {
|
|
// If the multiplier is a multiple of 8, scale it down to avoid needing
|
|
// to shift the VLENB value.
|
|
auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
|
|
MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
|
|
} else {
|
|
auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
|
|
auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
|
|
MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
|
|
}
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
// Custom-lower extensions from mask vectors by using a vselect either with 1
|
|
// for zero/any-extension or -1 for sign-extension:
|
|
// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
|
|
// Note that any-extension is lowered identically to zero-extension.
|
|
bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
|
|
MachineIRBuilder &MIB) const {
|
|
|
|
unsigned Opc = MI.getOpcode();
|
|
assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
|
|
Opc == TargetOpcode::G_ANYEXT);
|
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
|
|
LLT DstEltTy = DstTy.getElementType();
|
|
auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
|
|
auto SplatTrue =
|
|
MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
|
|
MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
|
|
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
/// Return the type of the mask type suitable for masking the provided
|
|
/// vector type. This is simply an i1 element type vector of the same
|
|
/// (possibly scalable) length.
|
|
static LLT getMaskTypeFor(LLT VecTy) {
|
|
assert(VecTy.isVector());
|
|
ElementCount EC = VecTy.getElementCount();
|
|
return LLT::vector(EC, LLT::scalar(1));
|
|
}
|
|
|
|
/// Creates an all ones mask suitable for masking a vector of type VecTy with
|
|
/// vector length VL.
|
|
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
|
|
MachineIRBuilder &MIB,
|
|
MachineRegisterInfo &MRI) {
|
|
LLT MaskTy = getMaskTypeFor(VecTy);
|
|
return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
|
|
}
|
|
|
|
/// Gets the two common "VL" operands: an all-ones mask and the vector length.
|
|
/// VecTy is a scalable vector type.
|
|
static std::pair<MachineInstrBuilder, Register>
|
|
buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,
|
|
MachineRegisterInfo &MRI) {
|
|
LLT VecTy = Dst.getLLTTy(MRI);
|
|
assert(VecTy.isScalableVector() && "Expecting scalable container type");
|
|
Register VL(RISCV::X0);
|
|
MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
|
|
return {Mask, VL};
|
|
}
|
|
|
|
static MachineInstrBuilder
|
|
buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
|
|
Register Hi, Register VL, MachineIRBuilder &MIB,
|
|
MachineRegisterInfo &MRI) {
|
|
// TODO: If the Hi bits of the splat are undefined, then it's fine to just
|
|
// splat Lo even if it might be sign extended. I don't think we have
|
|
// introduced a case where we're build a s64 where the upper bits are undef
|
|
// yet.
|
|
|
|
// Fall back to a stack store and stride x0 vector load.
|
|
// TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
|
|
// preprocessDAG in SDAG.
|
|
return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
|
|
{Passthru, Lo, Hi, VL});
|
|
}
|
|
|
|
static MachineInstrBuilder
|
|
buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
|
|
const SrcOp &Scalar, Register VL,
|
|
MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
|
|
assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
|
|
auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
|
|
return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
|
|
Unmerge.getReg(1), VL, MIB, MRI);
|
|
}
|
|
|
|
// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
|
|
// legal equivalently-sized i8 type, so we can use that as a go-between.
|
|
// Splats of s1 types that have constant value can be legalized as VMSET_VL or
|
|
// VMCLR_VL.
|
|
bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
|
|
MachineIRBuilder &MIB) const {
|
|
assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
|
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
Register SplatVal = MI.getOperand(1).getReg();
|
|
|
|
LLT VecTy = MRI.getType(Dst);
|
|
LLT XLenTy(STI.getXLenVT());
|
|
|
|
// Handle case of s64 element vectors on rv32
|
|
if (XLenTy.getSizeInBits() == 32 &&
|
|
VecTy.getElementType().getSizeInBits() == 64) {
|
|
auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI);
|
|
buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
|
|
MRI);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
// All-zeros or all-ones splats are handled specially.
|
|
MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
|
|
if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
|
|
auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
|
|
MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
if (isNullOrNullSplat(SplatValMI, MRI)) {
|
|
auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
|
|
MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
// Handle non-constant mask splat (i.e. not sure if it's all zeros or all
|
|
// ones) by promoting it to an s8 splat.
|
|
LLT InterEltTy = LLT::scalar(8);
|
|
LLT InterTy = VecTy.changeElementType(InterEltTy);
|
|
auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
|
|
auto And =
|
|
MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
|
|
auto LHS = MIB.buildSplatVector(InterTy, And);
|
|
auto ZeroSplat =
|
|
MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
|
|
MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool RISCVLegalizerInfo::legalizeCustom(
|
|
LegalizerHelper &Helper, MachineInstr &MI,
|
|
LostDebugLocObserver &LocObserver) const {
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
GISelChangeObserver &Observer = Helper.Observer;
|
|
MachineFunction &MF = *MI.getParent()->getParent();
|
|
switch (MI.getOpcode()) {
|
|
default:
|
|
// No idea what to do.
|
|
return false;
|
|
case TargetOpcode::G_ABS:
|
|
return Helper.lowerAbsToMaxNeg(MI);
|
|
// TODO: G_FCONSTANT
|
|
case TargetOpcode::G_CONSTANT: {
|
|
const Function &F = MF.getFunction();
|
|
// TODO: if PSI and BFI are present, add " ||
|
|
// llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
|
|
bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize();
|
|
const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
|
|
if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
|
|
return true;
|
|
return Helper.lowerConstant(MI);
|
|
}
|
|
case TargetOpcode::G_SHL:
|
|
case TargetOpcode::G_ASHR:
|
|
case TargetOpcode::G_LSHR:
|
|
return legalizeShlAshrLshr(MI, MIRBuilder, Observer);
|
|
case TargetOpcode::G_SEXT_INREG: {
|
|
// Source size of 32 is sext.w.
|
|
int64_t SizeInBits = MI.getOperand(2).getImm();
|
|
if (SizeInBits == 32)
|
|
return true;
|
|
|
|
return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
|
|
LegalizerHelper::Legalized;
|
|
}
|
|
case TargetOpcode::G_IS_FPCLASS: {
|
|
Register GISFPCLASS = MI.getOperand(0).getReg();
|
|
Register Src = MI.getOperand(1).getReg();
|
|
const MachineOperand &ImmOp = MI.getOperand(2);
|
|
MachineIRBuilder MIB(MI);
|
|
|
|
// Turn LLVM IR's floating point classes to that in RISC-V,
|
|
// by simply rotating the 10-bit immediate right by two bits.
|
|
APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
|
|
auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
|
|
auto ConstZero = MIB.buildConstant(sXLen, 0);
|
|
|
|
auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
|
|
auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
|
|
MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
|
|
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
case TargetOpcode::G_VASTART:
|
|
return legalizeVAStart(MI, MIRBuilder);
|
|
case TargetOpcode::G_VSCALE:
|
|
return legalizeVScale(MI, MIRBuilder);
|
|
case TargetOpcode::G_ZEXT:
|
|
case TargetOpcode::G_SEXT:
|
|
case TargetOpcode::G_ANYEXT:
|
|
return legalizeExt(MI, MIRBuilder);
|
|
case TargetOpcode::G_SPLAT_VECTOR:
|
|
return legalizeSplatVector(MI, MIRBuilder);
|
|
}
|
|
|
|
llvm_unreachable("expected switch to return");
|
|
}
|