[NVPTX] Further cleanup call isel (#146411)
This change continues rewriting and cleanup around DAG ISel for formal-arguments, return values, and function calls. This causes some incidental changes, mostly to instruction ordering and register naming but also a couple improvements caused by using scalar types earlier in the lowering.
This commit is contained in:
@@ -28,6 +28,7 @@
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||
#include "llvm/CodeGen/Register.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
#include "llvm/CodeGen/TargetCallingConv.h"
|
||||
@@ -390,35 +391,27 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
|
||||
/// and promote them to a larger size if they're not.
|
||||
///
|
||||
/// The promoted type is placed in \p PromoteVT if the function returns true.
|
||||
static std::optional<MVT> PromoteScalarIntegerPTX(const EVT &VT) {
|
||||
static EVT promoteScalarIntegerPTX(const EVT VT) {
|
||||
if (VT.isScalarInteger()) {
|
||||
MVT PromotedVT;
|
||||
switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
|
||||
default:
|
||||
llvm_unreachable(
|
||||
"Promotion is not suitable for scalars of size larger than 64-bits");
|
||||
case 1:
|
||||
PromotedVT = MVT::i1;
|
||||
break;
|
||||
return MVT::i1;
|
||||
case 2:
|
||||
case 4:
|
||||
case 8:
|
||||
PromotedVT = MVT::i8;
|
||||
break;
|
||||
return MVT::i8;
|
||||
case 16:
|
||||
PromotedVT = MVT::i16;
|
||||
break;
|
||||
return MVT::i16;
|
||||
case 32:
|
||||
PromotedVT = MVT::i32;
|
||||
break;
|
||||
return MVT::i32;
|
||||
case 64:
|
||||
PromotedVT = MVT::i64;
|
||||
break;
|
||||
return MVT::i64;
|
||||
}
|
||||
if (VT != PromotedVT)
|
||||
return PromotedVT;
|
||||
}
|
||||
return std::nullopt;
|
||||
return VT;
|
||||
}
|
||||
|
||||
// Check whether we can merge loads/stores of some of the pieces of a
|
||||
@@ -1053,10 +1046,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
break;
|
||||
|
||||
MAKE_CASE(NVPTXISD::RET_GLUE)
|
||||
MAKE_CASE(NVPTXISD::DeclareParam)
|
||||
MAKE_CASE(NVPTXISD::DeclareArrayParam)
|
||||
MAKE_CASE(NVPTXISD::DeclareScalarParam)
|
||||
MAKE_CASE(NVPTXISD::DeclareRet)
|
||||
MAKE_CASE(NVPTXISD::DeclareRetParam)
|
||||
MAKE_CASE(NVPTXISD::CALL)
|
||||
MAKE_CASE(NVPTXISD::LoadParam)
|
||||
MAKE_CASE(NVPTXISD::LoadParamV2)
|
||||
@@ -1162,8 +1153,8 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
std::string NVPTXTargetLowering::getPrototype(
|
||||
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign RetAlign,
|
||||
const DataLayout &DL, Type *RetTy, const ArgListTy &Args,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
std::optional<unsigned> FirstVAArg, const CallBase &CB,
|
||||
unsigned UniqueCallSite) const {
|
||||
auto PtrVT = getPointerTy(DL);
|
||||
@@ -1172,22 +1163,22 @@ std::string NVPTXTargetLowering::getPrototype(
|
||||
raw_string_ostream O(Prototype);
|
||||
O << "prototype_" << UniqueCallSite << " : .callprototype ";
|
||||
|
||||
if (retTy->isVoidTy()) {
|
||||
if (RetTy->isVoidTy()) {
|
||||
O << "()";
|
||||
} else {
|
||||
O << "(";
|
||||
if (shouldPassAsArray(retTy)) {
|
||||
assert(RetAlign && "RetAlign must be set for non-void return types");
|
||||
O << ".param .align " << RetAlign->value() << " .b8 _["
|
||||
<< DL.getTypeAllocSize(retTy) << "]";
|
||||
} else if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
|
||||
if (shouldPassAsArray(RetTy)) {
|
||||
const Align RetAlign = getArgumentAlignment(&CB, RetTy, 0, DL);
|
||||
O << ".param .align " << RetAlign.value() << " .b8 _["
|
||||
<< DL.getTypeAllocSize(RetTy) << "]";
|
||||
} else if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy()) {
|
||||
unsigned size = 0;
|
||||
if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
|
||||
if (auto *ITy = dyn_cast<IntegerType>(RetTy)) {
|
||||
size = ITy->getBitWidth();
|
||||
} else {
|
||||
assert(retTy->isFloatingPointTy() &&
|
||||
assert(RetTy->isFloatingPointTy() &&
|
||||
"Floating point type expected here");
|
||||
size = retTy->getPrimitiveSizeInBits();
|
||||
size = RetTy->getPrimitiveSizeInBits();
|
||||
}
|
||||
// PTX ABI requires all scalar return values to be at least 32
|
||||
// bits in size. fp16 normally uses .b16 as its storage type in
|
||||
@@ -1195,7 +1186,7 @@ std::string NVPTXTargetLowering::getPrototype(
|
||||
size = promoteScalarArgumentSize(size);
|
||||
|
||||
O << ".param .b" << size << " _";
|
||||
} else if (isa<PointerType>(retTy)) {
|
||||
} else if (isa<PointerType>(RetTy)) {
|
||||
O << ".param .b" << PtrVT.getSizeInBits() << " _";
|
||||
} else {
|
||||
llvm_unreachable("Unknown return type");
|
||||
@@ -1256,7 +1247,7 @@ std::string NVPTXTargetLowering::getPrototype(
|
||||
|
||||
if (FirstVAArg)
|
||||
O << (first ? "" : ",") << " .param .align "
|
||||
<< STI.getMaxRequiredAlignment() << " .b8 _[]\n";
|
||||
<< STI.getMaxRequiredAlignment() << " .b8 _[]";
|
||||
O << ")";
|
||||
if (shouldEmitPTXNoReturn(&CB, *nvTM))
|
||||
O << " .noreturn";
|
||||
@@ -1442,6 +1433,21 @@ static ISD::NodeType getExtOpcode(const ISD::ArgFlagsTy &Flags) {
|
||||
return ISD::ANY_EXTEND;
|
||||
}
|
||||
|
||||
static SDValue correctParamType(SDValue V, EVT ExpectedVT,
|
||||
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
|
||||
SDLoc dl) {
|
||||
const EVT ActualVT = V.getValueType();
|
||||
assert((ActualVT == ExpectedVT ||
|
||||
(ExpectedVT.isInteger() && ActualVT.isInteger())) &&
|
||||
"Non-integer argument type size mismatch");
|
||||
if (ExpectedVT.bitsGT(ActualVT))
|
||||
return DAG.getNode(getExtOpcode(Flags), dl, ExpectedVT, V);
|
||||
if (ExpectedVT.bitsLT(ActualVT))
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, ExpectedVT, V);
|
||||
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
|
||||
@@ -1505,9 +1511,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
"Outs and OutVals must be the same size");
|
||||
// Declare the .params or .reg need to pass values
|
||||
// to the function
|
||||
for (const auto [ArgI, Arg] : llvm::enumerate(Args)) {
|
||||
const auto ArgOuts = AllOuts.take_while(
|
||||
[ArgI = ArgI](auto O) { return O.OrigArgIndex == ArgI; });
|
||||
for (const auto E : llvm::enumerate(Args)) {
|
||||
const auto ArgI = E.index();
|
||||
const auto Arg = E.value();
|
||||
const auto ArgOuts =
|
||||
AllOuts.take_while([&](auto O) { return O.OrigArgIndex == ArgI; });
|
||||
const auto ArgOutVals = AllOutVals.take_front(ArgOuts.size());
|
||||
AllOuts = AllOuts.drop_front(ArgOuts.size());
|
||||
AllOutVals = AllOutVals.drop_front(ArgOuts.size());
|
||||
@@ -1515,6 +1523,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
const bool IsVAArg = (ArgI >= FirstVAArg);
|
||||
const bool IsByVal = Arg.IsByVal;
|
||||
|
||||
const SDValue ParamSymbol =
|
||||
getCallParamSymbol(DAG, IsVAArg ? FirstVAArg : ArgI, MVT::i32);
|
||||
|
||||
SmallVector<EVT, 16> VTs;
|
||||
SmallVector<uint64_t, 16> Offsets;
|
||||
|
||||
@@ -1525,38 +1536,43 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
assert(VTs.size() == Offsets.size() && "Size mismatch");
|
||||
assert((IsByVal || VTs.size() == ArgOuts.size()) && "Size mismatch");
|
||||
|
||||
Align ArgAlign;
|
||||
if (IsByVal) {
|
||||
// The ByValAlign in the Outs[OIdx].Flags is always set at this point,
|
||||
// so we don't need to worry whether it's naturally aligned or not.
|
||||
// See TargetLowering::LowerCallTo().
|
||||
Align InitialAlign = ArgOuts[0].Flags.getNonZeroByValAlign();
|
||||
ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
|
||||
InitialAlign, DL);
|
||||
if (IsVAArg)
|
||||
VAOffset = alignTo(VAOffset, ArgAlign);
|
||||
} else {
|
||||
ArgAlign = getArgumentAlignment(CB, Arg.Ty, ArgI + 1, DL);
|
||||
}
|
||||
const Align ArgAlign = [&]() {
|
||||
if (IsByVal) {
|
||||
// The ByValAlign in the Outs[OIdx].Flags is always set at this point,
|
||||
// so we don't need to worry whether it's naturally aligned or not.
|
||||
// See TargetLowering::LowerCallTo().
|
||||
const Align InitialAlign = ArgOuts[0].Flags.getNonZeroByValAlign();
|
||||
const Align ByValAlign = getFunctionByValParamAlign(
|
||||
CB->getCalledFunction(), ETy, InitialAlign, DL);
|
||||
if (IsVAArg)
|
||||
VAOffset = alignTo(VAOffset, ByValAlign);
|
||||
return ByValAlign;
|
||||
}
|
||||
return getArgumentAlignment(CB, Arg.Ty, ArgI + 1, DL);
|
||||
}();
|
||||
|
||||
const unsigned TypeSize = DL.getTypeAllocSize(ETy);
|
||||
assert((!IsByVal || TypeSize == ArgOuts[0].Flags.getByValSize()) &&
|
||||
"type size mismatch");
|
||||
|
||||
const bool PassAsArray = IsByVal || shouldPassAsArray(Arg.Ty);
|
||||
if (IsVAArg) {
|
||||
if (ArgI == FirstVAArg) {
|
||||
VADeclareParam = Chain =
|
||||
DAG.getNode(NVPTXISD::DeclareParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, GetI32(STI.getMaxRequiredAlignment()),
|
||||
GetI32(ArgI), GetI32(1), InGlue});
|
||||
const std::optional<SDValue> ArgDeclare = [&]() -> std::optional<SDValue> {
|
||||
if (IsVAArg) {
|
||||
if (ArgI == FirstVAArg) {
|
||||
VADeclareParam = DAG.getNode(
|
||||
NVPTXISD::DeclareArrayParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, ParamSymbol, GetI32(STI.getMaxRequiredAlignment()),
|
||||
GetI32(0), InGlue});
|
||||
return VADeclareParam;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
if (IsByVal || shouldPassAsArray(Arg.Ty)) {
|
||||
// declare .param .align <align> .b8 .param<n>[<size>];
|
||||
return DAG.getNode(NVPTXISD::DeclareArrayParam, dl,
|
||||
{MVT::Other, MVT::Glue},
|
||||
{Chain, ParamSymbol, GetI32(ArgAlign.value()),
|
||||
GetI32(TypeSize), InGlue});
|
||||
}
|
||||
} else if (PassAsArray) {
|
||||
// declare .param .align <align> .b8 .param<n>[<size>];
|
||||
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, GetI32(ArgAlign.value()), GetI32(ArgI),
|
||||
GetI32(TypeSize), InGlue});
|
||||
} else {
|
||||
assert(ArgOuts.size() == 1 && "We must pass only one value as non-array");
|
||||
// declare .param .b<size> .param<n>;
|
||||
|
||||
@@ -1568,11 +1584,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
? promoteScalarArgumentSize(TypeSize * 8)
|
||||
: TypeSize * 8;
|
||||
|
||||
Chain =
|
||||
DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, GetI32(ArgI), GetI32(PromotedSize), InGlue});
|
||||
return DAG.getNode(NVPTXISD::DeclareScalarParam, dl,
|
||||
{MVT::Other, MVT::Glue},
|
||||
{Chain, ParamSymbol, GetI32(PromotedSize), InGlue});
|
||||
}();
|
||||
if (ArgDeclare) {
|
||||
Chain = ArgDeclare->getValue(0);
|
||||
InGlue = ArgDeclare->getValue(1);
|
||||
}
|
||||
InGlue = Chain.getValue(1);
|
||||
|
||||
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter
|
||||
// than 32-bits are sign extended or zero extended, depending on
|
||||
@@ -1594,8 +1613,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
} else {
|
||||
StVal = ArgOutVals[I];
|
||||
|
||||
if (auto PromotedVT = PromoteScalarIntegerPTX(StVal.getValueType())) {
|
||||
StVal = DAG.getNode(getExtOpcode(ArgOuts[I].Flags), dl, *PromotedVT,
|
||||
auto PromotedVT = promoteScalarIntegerPTX(StVal.getValueType());
|
||||
if (PromotedVT != StVal.getValueType()) {
|
||||
StVal = DAG.getNode(getExtOpcode(ArgOuts[I].Flags), dl, PromotedVT,
|
||||
StVal);
|
||||
}
|
||||
}
|
||||
@@ -1619,12 +1639,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
unsigned J = 0;
|
||||
for (const unsigned NumElts : VectorInfo) {
|
||||
const int CurOffset = Offsets[J];
|
||||
EVT EltVT = VTs[J];
|
||||
EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
|
||||
const Align PartAlign = commonAlignment(ArgAlign, CurOffset);
|
||||
|
||||
if (auto PromotedVT = PromoteScalarIntegerPTX(EltVT))
|
||||
EltVT = *PromotedVT;
|
||||
|
||||
// If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
|
||||
// scalar store. In such cases, fall back to byte stores.
|
||||
if (NumElts == 1 && !IsVAArg && PartAlign < DAG.getEVTAlign(EltVT)) {
|
||||
@@ -1695,27 +1712,26 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
}
|
||||
|
||||
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
|
||||
MaybeAlign RetAlign = std::nullopt;
|
||||
|
||||
// Handle Result
|
||||
if (!Ins.empty()) {
|
||||
RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
|
||||
|
||||
// Declare
|
||||
// .param .align N .b8 retval0[<size-in-bytes>], or
|
||||
// .param .b<size-in-bits> retval0
|
||||
const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
|
||||
if (!shouldPassAsArray(RetTy)) {
|
||||
const unsigned PromotedResultSize = promoteScalarArgumentSize(ResultSize);
|
||||
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, GetI32(PromotedResultSize), InGlue});
|
||||
InGlue = Chain.getValue(1);
|
||||
} else {
|
||||
Chain = DAG.getNode(
|
||||
NVPTXISD::DeclareRetParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, GetI32(RetAlign->value()), GetI32(ResultSize / 8), InGlue});
|
||||
InGlue = Chain.getValue(1);
|
||||
}
|
||||
const SDValue RetDeclare = [&]() {
|
||||
const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
|
||||
const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
|
||||
if (shouldPassAsArray(RetTy)) {
|
||||
const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
|
||||
return DAG.getNode(NVPTXISD::DeclareArrayParam, dl,
|
||||
{MVT::Other, MVT::Glue},
|
||||
{Chain, RetSymbol, GetI32(RetAlign.value()),
|
||||
GetI32(ResultSize / 8), InGlue});
|
||||
}
|
||||
const auto PromotedResultSize = promoteScalarArgumentSize(ResultSize);
|
||||
return DAG.getNode(
|
||||
NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, RetSymbol, GetI32(PromotedResultSize), InGlue});
|
||||
}();
|
||||
Chain = RetDeclare.getValue(0);
|
||||
InGlue = RetDeclare.getValue(1);
|
||||
}
|
||||
|
||||
const bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
|
||||
@@ -1760,7 +1776,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
// The prototype is embedded in a string and put as the operand for a
|
||||
// CallPrototype SDNode which will print out to the value of the string.
|
||||
std::string Proto =
|
||||
getPrototype(DL, RetTy, Args, CLI.Outs, RetAlign,
|
||||
getPrototype(DL, RetTy, Args, CLI.Outs,
|
||||
HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB,
|
||||
UniqueCallSite);
|
||||
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
|
||||
@@ -1773,11 +1789,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
if (ConvertToIndirectCall) {
|
||||
// Copy the function ptr to a ptx register and use the register to call the
|
||||
// function.
|
||||
EVT DestVT = Callee.getValueType();
|
||||
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
|
||||
const MVT DestVT = Callee.getValueType().getSimpleVT();
|
||||
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned DestReg =
|
||||
RegInfo.createVirtualRegister(TLI.getRegClassFor(DestVT.getSimpleVT()));
|
||||
Register DestReg = MRI.createVirtualRegister(TLI.getRegClassFor(DestVT));
|
||||
auto RegCopy = DAG.getCopyToReg(DAG.getEntryNode(), dl, DestReg, Callee);
|
||||
Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
|
||||
}
|
||||
@@ -1810,7 +1825,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
|
||||
assert(VTs.size() == Ins.size() && "Bad value decomposition");
|
||||
|
||||
assert(RetAlign && "RetAlign is guaranteed to be set");
|
||||
const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
|
||||
|
||||
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
|
||||
// 32-bits are sign extended or zero extended, depending on whether
|
||||
@@ -1818,17 +1833,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
const bool ExtendIntegerRetVal =
|
||||
RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
|
||||
|
||||
const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, *RetAlign);
|
||||
const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
|
||||
unsigned I = 0;
|
||||
for (const unsigned VectorizedSize : VectorInfo) {
|
||||
EVT TheLoadType = VTs[I];
|
||||
EVT TheLoadType = promoteScalarIntegerPTX(VTs[I]);
|
||||
EVT EltType = Ins[I].VT;
|
||||
const Align EltAlign = commonAlignment(*RetAlign, Offsets[I]);
|
||||
const Align EltAlign = commonAlignment(RetAlign, Offsets[I]);
|
||||
|
||||
if (auto PromotedVT = PromoteScalarIntegerPTX(TheLoadType)) {
|
||||
TheLoadType = *PromotedVT;
|
||||
EltType = *PromotedVT;
|
||||
}
|
||||
if (TheLoadType != VTs[I])
|
||||
EltType = TheLoadType;
|
||||
|
||||
if (ExtendIntegerRetVal) {
|
||||
TheLoadType = MVT::i32;
|
||||
@@ -1898,13 +1911,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
continue;
|
||||
}
|
||||
|
||||
SDValue Ret = DAG.getNode(
|
||||
NVPTXISD::ProxyReg, dl,
|
||||
{ProxyRegOps[I].getSimpleValueType(), MVT::Other, MVT::Glue},
|
||||
{Chain, ProxyRegOps[I], InGlue});
|
||||
|
||||
Chain = Ret.getValue(1);
|
||||
InGlue = Ret.getValue(2);
|
||||
SDValue Ret =
|
||||
DAG.getNode(NVPTXISD::ProxyReg, dl, ProxyRegOps[I].getSimpleValueType(),
|
||||
{Chain, ProxyRegOps[I]});
|
||||
|
||||
const EVT ExpectedVT = Ins[I].VT;
|
||||
if (!Ret.getValueType().bitsEq(ExpectedVT)) {
|
||||
@@ -1914,14 +1923,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
}
|
||||
|
||||
for (SDValue &T : TempProxyRegOps) {
|
||||
SDValue Repl = DAG.getNode(NVPTXISD::ProxyReg, dl,
|
||||
{T.getSimpleValueType(), MVT::Other, MVT::Glue},
|
||||
{Chain, T.getOperand(0), InGlue});
|
||||
SDValue Repl = DAG.getNode(NVPTXISD::ProxyReg, dl, T.getSimpleValueType(),
|
||||
{Chain, T.getOperand(0)});
|
||||
DAG.ReplaceAllUsesWith(T, Repl);
|
||||
DAG.RemoveDeadNode(T.getNode());
|
||||
|
||||
Chain = Repl.getValue(1);
|
||||
InGlue = Repl.getValue(2);
|
||||
}
|
||||
|
||||
// set isTailCall to false for now, until we figure out how to express
|
||||
@@ -3293,11 +3298,17 @@ bool NVPTXTargetLowering::splitValueIntoRegisterParts(
|
||||
// Name of the symbol is composed from its index and the function name.
|
||||
// Negative index corresponds to special parameter (unsized array) used for
|
||||
// passing variable arguments.
|
||||
SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
|
||||
EVT v) const {
|
||||
SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int I,
|
||||
EVT T) const {
|
||||
StringRef SavedStr = nvTM->getStrPool().save(
|
||||
getParamName(&DAG.getMachineFunction().getFunction(), idx));
|
||||
return DAG.getExternalSymbol(SavedStr.data(), v);
|
||||
getParamName(&DAG.getMachineFunction().getFunction(), I));
|
||||
return DAG.getExternalSymbol(SavedStr.data(), T);
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::getCallParamSymbol(SelectionDAG &DAG, int I,
|
||||
EVT T) const {
|
||||
const StringRef SavedStr = nvTM->getStrPool().save("param" + Twine(I));
|
||||
return DAG.getExternalSymbol(SavedStr.data(), T);
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerFormalArguments(
|
||||
@@ -3394,8 +3405,11 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
||||
const unsigned PackingAmt =
|
||||
LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
|
||||
|
||||
const EVT VecVT = EVT::getVectorVT(
|
||||
F->getContext(), LoadVT.getScalarType(), NumElts * PackingAmt);
|
||||
const EVT VecVT =
|
||||
NumElts == 1
|
||||
? LoadVT
|
||||
: EVT::getVectorVT(F->getContext(), LoadVT.getScalarType(),
|
||||
NumElts * PackingAmt);
|
||||
|
||||
SDValue VecAddr = DAG.getObjectPtrOffset(
|
||||
dl, ArgSymbol, TypeSize::getFixed(Offsets[I]));
|
||||
@@ -3409,22 +3423,16 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
||||
if (P.getNode())
|
||||
P.getNode()->setIROrder(Arg.getArgNo() + 1);
|
||||
for (const unsigned J : llvm::seq(NumElts)) {
|
||||
SDValue Elt = DAG.getNode(
|
||||
LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
|
||||
: ISD::EXTRACT_VECTOR_ELT,
|
||||
dl, LoadVT, P, DAG.getVectorIdxConstant(J * PackingAmt, dl));
|
||||
SDValue Elt =
|
||||
NumElts == 1
|
||||
? P
|
||||
: DAG.getNode(LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
|
||||
: ISD::EXTRACT_VECTOR_ELT,
|
||||
dl, LoadVT, P,
|
||||
DAG.getVectorIdxConstant(J * PackingAmt, dl));
|
||||
|
||||
// Extend or truncate the element if necessary (e.g. an i8 is loaded
|
||||
// into an i16 register)
|
||||
const EVT ExpectedVT = ArgIns[I + J].VT;
|
||||
assert((Elt.getValueType() == ExpectedVT ||
|
||||
(ExpectedVT.isInteger() && Elt.getValueType().isInteger())) &&
|
||||
"Non-integer argument type size mismatch");
|
||||
if (ExpectedVT.bitsGT(Elt.getValueType()))
|
||||
Elt = DAG.getNode(getExtOpcode(ArgIns[I + J].Flags), dl, ExpectedVT,
|
||||
Elt);
|
||||
else if (ExpectedVT.bitsLT(Elt.getValueType()))
|
||||
Elt = DAG.getNode(ISD::TRUNCATE, dl, ExpectedVT, Elt);
|
||||
Elt = correctParamType(Elt, ArgIns[I + J].VT, ArgIns[I + J].Flags,
|
||||
DAG, dl);
|
||||
InVals.push_back(Elt);
|
||||
}
|
||||
I += NumElts;
|
||||
@@ -3467,25 +3475,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
|
||||
const auto GetRetVal = [&](unsigned I) -> SDValue {
|
||||
SDValue RetVal = OutVals[I];
|
||||
assert(!PromoteScalarIntegerPTX(RetVal.getValueType()) &&
|
||||
assert(promoteScalarIntegerPTX(RetVal.getValueType()) ==
|
||||
RetVal.getValueType() &&
|
||||
"OutVal type should always be legal");
|
||||
|
||||
EVT VTI = VTs[I];
|
||||
if (const auto PromotedVT = PromoteScalarIntegerPTX(VTI))
|
||||
VTI = *PromotedVT;
|
||||
|
||||
const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
|
||||
const EVT StoreVT =
|
||||
ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
|
||||
|
||||
assert((RetVal.getValueType() == StoreVT ||
|
||||
(StoreVT.isInteger() && RetVal.getValueType().isInteger())) &&
|
||||
"Non-integer argument type size mismatch");
|
||||
if (StoreVT.bitsGT(RetVal.getValueType())) {
|
||||
RetVal = DAG.getNode(getExtOpcode(Outs[I].Flags), dl, StoreVT, RetVal);
|
||||
} else if (StoreVT.bitsLT(RetVal.getValueType())) {
|
||||
RetVal = DAG.getNode(ISD::TRUNCATE, dl, StoreVT, RetVal);
|
||||
}
|
||||
return RetVal;
|
||||
return correctParamType(RetVal, StoreVT, Outs[I].Flags, DAG, dl);
|
||||
};
|
||||
|
||||
const auto RetAlign = getFunctionParamOptimizedAlign(&F, RetTy, DL);
|
||||
@@ -3500,7 +3497,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
if (NumElts == 1) {
|
||||
Val = GetRetVal(I);
|
||||
} else {
|
||||
SmallVector<SDValue, 6> StoreVals;
|
||||
SmallVector<SDValue, 4> StoreVals;
|
||||
for (const unsigned J : llvm::seq(NumElts)) {
|
||||
SDValue ValJ = GetRetVal(I + J);
|
||||
if (ValJ.getValueType().isVector())
|
||||
@@ -3514,7 +3511,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
Val = DAG.getBuildVector(VT, dl, StoreVals);
|
||||
}
|
||||
|
||||
SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32);
|
||||
const SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32);
|
||||
SDValue Ptr =
|
||||
DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
|
||||
|
||||
|
||||
@@ -25,10 +25,15 @@ enum NodeType : unsigned {
|
||||
// Start the numbering from where ISD NodeType finishes.
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
RET_GLUE,
|
||||
DeclareParam,
|
||||
|
||||
/// These nodes represent a parameter declaration. In PTX this will look like:
|
||||
/// .param .align 16 .b8 param0[1024];
|
||||
/// .param .b32 retval0;
|
||||
///
|
||||
/// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
|
||||
/// DeclareScalarParam(Chain, Externalsym, Size, Glue)
|
||||
DeclareScalarParam,
|
||||
DeclareRetParam,
|
||||
DeclareRet,
|
||||
DeclareArrayParam,
|
||||
|
||||
/// This node represents a PTX call instruction. It's operands are as follows:
|
||||
///
|
||||
@@ -174,7 +179,6 @@ public:
|
||||
|
||||
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
|
||||
const SmallVectorImpl<ISD::OutputArg> &,
|
||||
MaybeAlign RetAlign,
|
||||
std::optional<unsigned> FirstVAArg,
|
||||
const CallBase &CB, unsigned UniqueCallSite) const;
|
||||
|
||||
@@ -272,8 +276,8 @@ private:
|
||||
const NVPTXSubtarget &STI; // cache the subtarget here
|
||||
mutable unsigned GlobalUniqueCallSite;
|
||||
|
||||
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
|
||||
|
||||
SDValue getParamSymbol(SelectionDAG &DAG, int I, EVT T) const;
|
||||
SDValue getCallParamSymbol(SelectionDAG &DAG, int I, EVT T) const;
|
||||
SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
||||
@@ -1990,9 +1990,9 @@ defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
|
||||
defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
|
||||
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
|
||||
|
||||
def SDTDeclareParamProfile :
|
||||
def SDTDeclareArrayParam :
|
||||
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
|
||||
def SDTDeclareScalarParamProfile :
|
||||
def SDTDeclareScalarParam :
|
||||
SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
|
||||
def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
|
||||
def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
|
||||
@@ -2001,22 +2001,17 @@ def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
|
||||
def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
|
||||
def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
|
||||
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
|
||||
def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
|
||||
|
||||
def DeclareParam :
|
||||
SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
|
||||
def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
|
||||
|
||||
|
||||
def declare_array_param :
|
||||
SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def DeclareScalarParam :
|
||||
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def DeclareRetParam :
|
||||
SDNode<"NVPTXISD::DeclareRetParam",
|
||||
SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def DeclareRet :
|
||||
SDNode<"NVPTXISD::DeclareRet",
|
||||
SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>,
|
||||
def declare_scalar_param :
|
||||
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
|
||||
def LoadParam :
|
||||
SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
|
||||
@@ -2037,9 +2032,8 @@ def StoreParamV4 :
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def MoveParam :
|
||||
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
|
||||
def ProxyReg :
|
||||
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def proxy_reg :
|
||||
SDNode<"NVPTXISD::ProxyReg", SDTProxyReg, [SDNPHasChain]>;
|
||||
|
||||
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
|
||||
/// NumParams, Callee, Proto, InGlue)
|
||||
@@ -2188,23 +2182,17 @@ defm StoreParamV2F64 : StoreParamV2Inst<B64, f64imm, ".b64">;
|
||||
|
||||
defm StoreParamV4F32 : StoreParamV4Inst<B32, f32imm, ".b32">;
|
||||
|
||||
def DeclareRetMemInst :
|
||||
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size),
|
||||
".param .align $align .b8 retval0[$size];",
|
||||
[(DeclareRetParam imm:$align, imm:$size)]>;
|
||||
def DeclareRetScalarInst :
|
||||
NVPTXInst<(outs), (ins i32imm:$size),
|
||||
".param .b$size retval0;",
|
||||
[(DeclareRet imm:$size)]>;
|
||||
|
||||
def DeclareParamInst :
|
||||
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
|
||||
".param .align $align .b8 param$a[$size];",
|
||||
[(DeclareParam imm:$align, imm:$a, imm:$size)]>;
|
||||
def DeclareScalarParamInst :
|
||||
def DECLARE_PARAM_array :
|
||||
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
|
||||
".param .align $align .b8 \t$a[$size];", []>;
|
||||
def DECLARE_PARAM_scalar :
|
||||
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
|
||||
".param .b$size param$a;",
|
||||
[(DeclareScalarParam imm:$a, imm:$size)]>;
|
||||
".param .b$size \t$a;", []>;
|
||||
|
||||
def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
|
||||
(DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
|
||||
def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
|
||||
(DECLARE_PARAM_scalar (to_texternsym $a), imm:$size)>;
|
||||
|
||||
foreach t = [I32RT, I64RT] in {
|
||||
defvar inst_name = "MOV" # t.Size # "_PARAM";
|
||||
@@ -2217,7 +2205,7 @@ multiclass ProxyRegInst<string SzStr, NVPTXRegClass rc> {
|
||||
def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src),
|
||||
"mov." # SzStr>;
|
||||
foreach vt = rc.RegTypes in
|
||||
def : Pat<(vt (ProxyReg vt:$src)), (!cast<NVPTXInst>(NAME) $src)>;
|
||||
def : Pat<(vt (proxy_reg vt:$src)), (!cast<NVPTXInst>(NAME) $src)>;
|
||||
}
|
||||
|
||||
defm ProxyRegB1 : ProxyRegInst<"pred", B1>;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -21,17 +21,17 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: ld.param.b8 %rs1, [relaxed_sys_i8_param_2];
|
||||
; SM30-NEXT: ld.param.b64 %rd2, [relaxed_sys_i8_param_0];
|
||||
; SM30-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM30-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM30-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM30-NEXT: mov.b32 %r11, 255;
|
||||
; SM30-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r12;
|
||||
; SM30-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM30-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r15, [relaxed_sys_i8_param_1];
|
||||
; SM30-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r9, [relaxed_sys_i8_param_1];
|
||||
; SM30-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM30-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM30-NEXT: mov.b32 %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r13;
|
||||
; SM30-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM30-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM30-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM30-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
@@ -48,7 +48,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: mov.b32 %r20, %r8;
|
||||
; SM30-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM30-NEXT: ret;
|
||||
;
|
||||
; SM70-LABEL: relaxed_sys_i8(
|
||||
@@ -62,17 +62,17 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: ld.param.b8 %rs1, [relaxed_sys_i8_param_2];
|
||||
; SM70-NEXT: ld.param.b64 %rd2, [relaxed_sys_i8_param_0];
|
||||
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM70-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM70-NEXT: mov.b32 %r11, 255;
|
||||
; SM70-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r12;
|
||||
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM70-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r15, [relaxed_sys_i8_param_1];
|
||||
; SM70-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r9, [relaxed_sys_i8_param_1];
|
||||
; SM70-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM70-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM70-NEXT: mov.b32 %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r13;
|
||||
; SM70-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM70-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM70-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
@@ -89,7 +89,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: mov.b32 %r20, %r8;
|
||||
; SM70-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM70-NEXT: ret;
|
||||
; SM90-LABEL: relaxed_sys_i8(
|
||||
; SM90: {
|
||||
@@ -147,17 +147,17 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: ld.param.b8 %rs1, [acquire_sys_i8_param_2];
|
||||
; SM30-NEXT: ld.param.b64 %rd2, [acquire_sys_i8_param_0];
|
||||
; SM30-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM30-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM30-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM30-NEXT: mov.b32 %r11, 255;
|
||||
; SM30-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r12;
|
||||
; SM30-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM30-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r15, [acquire_sys_i8_param_1];
|
||||
; SM30-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r9, [acquire_sys_i8_param_1];
|
||||
; SM30-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM30-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM30-NEXT: mov.b32 %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r13;
|
||||
; SM30-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM30-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM30-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM30-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
@@ -175,7 +175,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: membar.sys;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM30-NEXT: ret;
|
||||
;
|
||||
; SM70-LABEL: acquire_sys_i8(
|
||||
@@ -189,17 +189,17 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: ld.param.b8 %rs1, [acquire_sys_i8_param_2];
|
||||
; SM70-NEXT: ld.param.b64 %rd2, [acquire_sys_i8_param_0];
|
||||
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM70-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM70-NEXT: mov.b32 %r11, 255;
|
||||
; SM70-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r12;
|
||||
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM70-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r15, [acquire_sys_i8_param_1];
|
||||
; SM70-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r9, [acquire_sys_i8_param_1];
|
||||
; SM70-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM70-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM70-NEXT: mov.b32 %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r13;
|
||||
; SM70-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM70-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM70-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
@@ -217,7 +217,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM70-NEXT: ret;
|
||||
; SM90-LABEL: acquire_sys_i8(
|
||||
; SM90: {
|
||||
@@ -276,18 +276,18 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: ld.param.b8 %rs1, [release_sys_i8_param_2];
|
||||
; SM30-NEXT: ld.param.b64 %rd2, [release_sys_i8_param_0];
|
||||
; SM30-NEXT: membar.sys;
|
||||
; SM30-NEXT: ld.param.b8 %r9, [release_sys_i8_param_1];
|
||||
; SM30-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM30-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM30-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM30-NEXT: mov.b32 %r11, 255;
|
||||
; SM30-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r12;
|
||||
; SM30-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM30-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r15, [release_sys_i8_param_1];
|
||||
; SM30-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM30-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM30-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM30-NEXT: mov.b32 %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r13;
|
||||
; SM30-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM30-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM30-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM30-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
@@ -304,7 +304,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: mov.b32 %r20, %r8;
|
||||
; SM30-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM30-NEXT: ret;
|
||||
;
|
||||
; SM70-LABEL: release_sys_i8(
|
||||
@@ -318,18 +318,18 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: ld.param.b8 %rs1, [release_sys_i8_param_2];
|
||||
; SM70-NEXT: ld.param.b64 %rd2, [release_sys_i8_param_0];
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
; SM70-NEXT: ld.param.b8 %r9, [release_sys_i8_param_1];
|
||||
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM70-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM70-NEXT: mov.b32 %r11, 255;
|
||||
; SM70-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r12;
|
||||
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM70-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r15, [release_sys_i8_param_1];
|
||||
; SM70-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM70-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM70-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM70-NEXT: mov.b32 %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r13;
|
||||
; SM70-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM70-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM70-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM70-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
@@ -346,7 +346,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: mov.b32 %r20, %r8;
|
||||
; SM70-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM70-NEXT: ret;
|
||||
; SM90-LABEL: release_sys_i8(
|
||||
; SM90: {
|
||||
@@ -405,18 +405,18 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: ld.param.b8 %rs1, [acq_rel_sys_i8_param_2];
|
||||
; SM30-NEXT: ld.param.b64 %rd2, [acq_rel_sys_i8_param_0];
|
||||
; SM30-NEXT: membar.sys;
|
||||
; SM30-NEXT: ld.param.b8 %r9, [acq_rel_sys_i8_param_1];
|
||||
; SM30-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM30-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM30-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM30-NEXT: mov.b32 %r11, 255;
|
||||
; SM30-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r12;
|
||||
; SM30-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM30-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r15, [acq_rel_sys_i8_param_1];
|
||||
; SM30-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM30-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM30-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM30-NEXT: mov.b32 %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r13;
|
||||
; SM30-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM30-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM30-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM30-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
@@ -434,7 +434,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: membar.sys;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM30-NEXT: ret;
|
||||
;
|
||||
; SM70-LABEL: acq_rel_sys_i8(
|
||||
@@ -448,18 +448,18 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_sys_i8_param_2];
|
||||
; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_sys_i8_param_0];
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
; SM70-NEXT: ld.param.b8 %r9, [acq_rel_sys_i8_param_1];
|
||||
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM70-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM70-NEXT: mov.b32 %r11, 255;
|
||||
; SM70-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r12;
|
||||
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM70-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r15, [acq_rel_sys_i8_param_1];
|
||||
; SM70-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM70-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM70-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM70-NEXT: mov.b32 %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r13;
|
||||
; SM70-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM70-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM70-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM70-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
@@ -477,7 +477,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM70-NEXT: ret;
|
||||
; SM90-LABEL: acq_rel_sys_i8(
|
||||
; SM90: {
|
||||
@@ -537,18 +537,18 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: ld.param.b8 %rs1, [seq_cst_sys_i8_param_2];
|
||||
; SM30-NEXT: ld.param.b64 %rd2, [seq_cst_sys_i8_param_0];
|
||||
; SM30-NEXT: membar.sys;
|
||||
; SM30-NEXT: ld.param.b8 %r9, [seq_cst_sys_i8_param_1];
|
||||
; SM30-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM30-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM30-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM30-NEXT: mov.b32 %r11, 255;
|
||||
; SM30-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r12;
|
||||
; SM30-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM30-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM30-NEXT: ld.param.b8 %r15, [seq_cst_sys_i8_param_1];
|
||||
; SM30-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM30-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM30-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM30-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM30-NEXT: mov.b32 %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r13;
|
||||
; SM30-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM30-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM30-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM30-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
@@ -566,7 +566,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: membar.sys;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM30-NEXT: ret;
|
||||
;
|
||||
; SM70-LABEL: seq_cst_sys_i8(
|
||||
@@ -580,18 +580,18 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: ld.param.b8 %rs1, [seq_cst_sys_i8_param_2];
|
||||
; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_sys_i8_param_0];
|
||||
; SM70-NEXT: fence.sc.sys;
|
||||
; SM70-NEXT: ld.param.b8 %r9, [seq_cst_sys_i8_param_1];
|
||||
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
|
||||
; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
|
||||
; SM70-NEXT: and.b32 %r10, %r9, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r10, 3;
|
||||
; SM70-NEXT: mov.b32 %r11, 255;
|
||||
; SM70-NEXT: shl.b32 %r12, %r11, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r12;
|
||||
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
|
||||
; SM70-NEXT: and.b32 %r14, %r13, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
|
||||
; SM70-NEXT: ld.param.b8 %r15, [seq_cst_sys_i8_param_1];
|
||||
; SM70-NEXT: shl.b32 %r4, %r15, %r1;
|
||||
; SM70-NEXT: cvt.u32.u64 %r10, %rd2;
|
||||
; SM70-NEXT: and.b32 %r11, %r10, 3;
|
||||
; SM70-NEXT: shl.b32 %r1, %r11, 3;
|
||||
; SM70-NEXT: mov.b32 %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r13, %r12, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r13;
|
||||
; SM70-NEXT: cvt.u32.u16 %r14, %rs1;
|
||||
; SM70-NEXT: and.b32 %r15, %r14, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r15, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r9, %r1;
|
||||
; SM70-NEXT: ld.b32 %r16, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r20, %r16, %r2;
|
||||
; SM70-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
@@ -609,7 +609,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; SM70-NEXT: ret;
|
||||
; SM90-LABEL: seq_cst_sys_i8(
|
||||
; SM90: {
|
||||
|
||||
@@ -24,11 +24,11 @@ define i16 @cvt_i16_i32(i32 %x) {
|
||||
define i16 @cvt_i16_i64(i64 %x) {
|
||||
; CHECK-LABEL: cvt_i16_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [cvt_i16_i64_param_0];
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
|
||||
; CHECK-NEXT: ld.param.b16 %r1, [cvt_i16_i64_param_0];
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i64 %x to i16
|
||||
ret i16 %a
|
||||
|
||||
@@ -158,27 +158,24 @@ define i16 @test_v8i8(i64 %a) {
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<16>;
|
||||
; CHECK-NEXT: .reg .b32 %r<12>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_v8i8_param_0];
|
||||
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd1; }
|
||||
; CHECK-NEXT: cvt.u32.u64 %r2, %rd1;
|
||||
; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8;
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v8i8_param_0];
|
||||
; CHECK-NEXT: bfe.s32 %r3, %r1, 0, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs1, %r3;
|
||||
; CHECK-NEXT: bfe.s32 %r4, %r2, 8, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs2, %r4;
|
||||
; CHECK-NEXT: bfe.s32 %r5, %r2, 16, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs3, %r5;
|
||||
; CHECK-NEXT: bfe.s32 %r6, %r2, 24, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r6, %r1, 24, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs4, %r6;
|
||||
; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r7, %r2, 0, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs5, %r7;
|
||||
; CHECK-NEXT: bfe.s32 %r8, %r1, 8, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs6, %r8;
|
||||
; CHECK-NEXT: bfe.s32 %r9, %r1, 16, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r9, %r2, 16, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs7, %r9;
|
||||
; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8;
|
||||
; CHECK-NEXT: bfe.s32 %r10, %r2, 24, 8;
|
||||
; CHECK-NEXT: cvt.s8.s32 %rs8, %r10;
|
||||
; CHECK-NEXT: add.s16 %rs9, %rs1, %rs2;
|
||||
; CHECK-NEXT: add.s16 %rs10, %rs3, %rs4;
|
||||
|
||||
@@ -121,7 +121,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
|
||||
define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
|
||||
; PTX-LABEL: grid_const_escape(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<3>;
|
||||
; PTX-NEXT: .reg .b32 %r<2>;
|
||||
; PTX-NEXT: .reg .b64 %rd<4>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
@@ -153,7 +153,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
|
||||
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
|
||||
; PTX-NEXT: .reg .b64 %SP;
|
||||
; PTX-NEXT: .reg .b64 %SPL;
|
||||
; PTX-NEXT: .reg .b32 %r<4>;
|
||||
; PTX-NEXT: .reg .b32 %r<3>;
|
||||
; PTX-NEXT: .reg .b64 %rd<8>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
@@ -255,7 +255,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
|
||||
define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
|
||||
; PTX-LABEL: grid_const_partial_escape(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<5>;
|
||||
; PTX-NEXT: .reg .b32 %r<4>;
|
||||
; PTX-NEXT: .reg .b64 %rd<6>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
@@ -295,7 +295,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
|
||||
define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
|
||||
; PTX-LABEL: grid_const_partial_escapemem(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<6>;
|
||||
; PTX-NEXT: .reg .b32 %r<5>;
|
||||
; PTX-NEXT: .reg .b64 %rd<6>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
|
||||
@@ -31,7 +31,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
|
||||
; PTX-LABEL: load_alignment(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<4>;
|
||||
; PTX-NEXT: .reg .b64 %rd<8>;
|
||||
; PTX-NEXT: .reg .b64 %rd<7>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0: // %entry
|
||||
; PTX-NEXT: mov.b64 %rd1, load_alignment_param_0;
|
||||
@@ -76,7 +76,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
|
||||
;
|
||||
; PTX-LABEL: load_padding(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b64 %rd<5>;
|
||||
; PTX-NEXT: .reg .b64 %rd<4>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
; PTX-NEXT: mov.b64 %rd1, load_padding_param_0;
|
||||
|
||||
@@ -8,7 +8,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
|
||||
; CHECK-LABEL: wombat(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<11>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<7>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<6>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0: // %bb
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [wombat_param_2];
|
||||
@@ -27,11 +27,11 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
|
||||
; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;
|
||||
; CHECK-NEXT: or.b32 %r8, %r4, %r7;
|
||||
; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8;
|
||||
; CHECK-NEXT: cvt.rn.f64.s32 %rd3, %r9;
|
||||
; CHECK-NEXT: cvt.rn.f64.u32 %rd4, %r10;
|
||||
; CHECK-NEXT: add.rn.f64 %rd5, %rd4, %rd3;
|
||||
; CHECK-NEXT: mov.b64 %rd6, 0;
|
||||
; CHECK-NEXT: st.global.b64 [%rd6], %rd5;
|
||||
; CHECK-NEXT: cvt.rn.f64.s32 %rd2, %r9;
|
||||
; CHECK-NEXT: cvt.rn.f64.u32 %rd3, %r10;
|
||||
; CHECK-NEXT: add.rn.f64 %rd4, %rd3, %rd2;
|
||||
; CHECK-NEXT: mov.b64 %rd5, 0;
|
||||
; CHECK-NEXT: st.global.b64 [%rd5], %rd4;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: bra.uni $L__BB0_1;
|
||||
bb:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=nvptx64 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc -mtriple=nvptx64 -verify-machineinstrs < %s | %ptxas-verify %}
|
||||
|
||||
@@ -10,9 +11,19 @@
|
||||
; value will be identical regardless of the boolean representation.
|
||||
; Check that the optimization triggers in this case.
|
||||
|
||||
; CHECK-LABEL: @pow2_mask_cmp
|
||||
; CHECK: bfe.u32 {{%r[0-9]+}}, {{%r[0-9]+}}, 3, 1
|
||||
define i32 @pow2_mask_cmp(i32 %x) {
|
||||
; CHECK-LABEL: pow2_mask_cmp(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [pow2_mask_cmp_param_0];
|
||||
; CHECK-NEXT: shr.u16 %rs2, %rs1, 3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
|
||||
; CHECK-NEXT: and.b32 %r2, %r1, 1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = and i32 %x, 8
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%r = zext i1 %cmp to i32
|
||||
|
||||
@@ -445,12 +445,12 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irrr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_0];
|
||||
; CHECK-NEXT: { // callseq 24, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, %rs3};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs3, %rs2, %rs1};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 24
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -467,12 +467,12 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rirr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_0];
|
||||
; CHECK-NEXT: { // callseq 25, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, %rs3};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, 2, %rs2, %rs1};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 25
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -489,12 +489,12 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_0];
|
||||
; CHECK-NEXT: { // callseq 26, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, %rs3};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, 3, %rs1};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 26
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -511,12 +511,12 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_0];
|
||||
; CHECK-NEXT: { // callseq 27, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, %rs3, 4};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, %rs1, 4};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 27
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -533,11 +533,11 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_0];
|
||||
; CHECK-NEXT: { // callseq 28, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, %rs2};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs2, %rs1};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 28
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -554,11 +554,11 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_0];
|
||||
; CHECK-NEXT: { // callseq 29, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, %rs2};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, 3, %rs1};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 29
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -575,11 +575,11 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_0];
|
||||
; CHECK-NEXT: { // callseq 30, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, 4};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, %rs1, 4};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 30
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -596,11 +596,11 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_0];
|
||||
; CHECK-NEXT: { // callseq 31, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, %rs2};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, 3, %rs1};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 31
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -617,11 +617,11 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_0];
|
||||
; CHECK-NEXT: { // callseq 32, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, 4};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, %rs1, 4};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 32
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -638,11 +638,11 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_0];
|
||||
; CHECK-NEXT: { // callseq 33, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, 4};
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, %rs1, 3, 4};
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 33
|
||||
; CHECK-NEXT: ret;
|
||||
|
||||
@@ -103,16 +103,16 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r19, %rs6;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
|
||||
@@ -185,48 +185,48 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
|
||||
; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
|
||||
; CHECK-NEXT: } // callseq 2
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd35, %rs3;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
|
||||
; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
|
||||
; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd37, %rs4;
|
||||
; CHECK-NEXT: and.b64 %rd38, %rd37, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd39, %rd38, 8;
|
||||
; CHECK-NEXT: or.b64 %rd40, %rd36, %rd39;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd41, %rs5;
|
||||
; CHECK-NEXT: and.b64 %rd42, %rd41, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd43, %rd42, 16;
|
||||
; CHECK-NEXT: or.b64 %rd44, %rd40, %rd43;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd45, %rs6;
|
||||
; CHECK-NEXT: and.b64 %rd46, %rd45, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd47, %rd46, 24;
|
||||
; CHECK-NEXT: or.b64 %rd48, %rd44, %rd47;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd49, %rs7;
|
||||
; CHECK-NEXT: and.b64 %rd50, %rd49, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd51, %rd50, 32;
|
||||
; CHECK-NEXT: or.b64 %rd52, %rd48, %rd51;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd53, %rs8;
|
||||
; CHECK-NEXT: and.b64 %rd54, %rd53, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd55, %rd54, 40;
|
||||
; CHECK-NEXT: or.b64 %rd56, %rd52, %rd55;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd57, %rs9;
|
||||
; CHECK-NEXT: and.b64 %rd58, %rd57, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd59, %rd58, 48;
|
||||
; CHECK-NEXT: or.b64 %rd60, %rd56, %rd59;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd61, %rs10;
|
||||
; CHECK-NEXT: shl.b64 %rd62, %rd61, 56;
|
||||
; CHECK-NEXT: or.b64 %rd63, %rd60, %rd62;
|
||||
; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
|
||||
; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
|
||||
; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
|
||||
; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
|
||||
; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
|
||||
; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
|
||||
; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
|
||||
; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
|
||||
; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
|
||||
; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
|
||||
; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
|
||||
; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
|
||||
; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
|
||||
; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd45;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd41;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd37;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd35;
|
||||
; CHECK-NEXT: shr.u64 %rd64, %rd52, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
|
||||
; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
|
||||
; CHECK-NEXT: shr.u64 %rd65, %rd56, 40;
|
||||
; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
|
||||
; CHECK-NEXT: shr.u64 %rd66, %rd60, 48;
|
||||
; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
|
||||
; CHECK-NEXT: shr.u64 %rd67, %rd63, 56;
|
||||
; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
|
||||
; CHECK-NEXT: ret;
|
||||
@@ -317,16 +317,16 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
|
||||
; CHECK-NEXT: } // callseq 4
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r19, %rs6;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
|
||||
@@ -376,16 +376,16 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
|
||||
; CHECK-NEXT: } // callseq 5
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r19, %rs6;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
|
||||
@@ -458,48 +458,48 @@ define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
|
||||
; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
|
||||
; CHECK-NEXT: } // callseq 6
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd35, %rs3;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
|
||||
; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
|
||||
; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd37, %rs4;
|
||||
; CHECK-NEXT: and.b64 %rd38, %rd37, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd39, %rd38, 8;
|
||||
; CHECK-NEXT: or.b64 %rd40, %rd36, %rd39;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd41, %rs5;
|
||||
; CHECK-NEXT: and.b64 %rd42, %rd41, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd43, %rd42, 16;
|
||||
; CHECK-NEXT: or.b64 %rd44, %rd40, %rd43;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd45, %rs6;
|
||||
; CHECK-NEXT: and.b64 %rd46, %rd45, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd47, %rd46, 24;
|
||||
; CHECK-NEXT: or.b64 %rd48, %rd44, %rd47;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd49, %rs7;
|
||||
; CHECK-NEXT: and.b64 %rd50, %rd49, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd51, %rd50, 32;
|
||||
; CHECK-NEXT: or.b64 %rd52, %rd48, %rd51;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd53, %rs8;
|
||||
; CHECK-NEXT: and.b64 %rd54, %rd53, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd55, %rd54, 40;
|
||||
; CHECK-NEXT: or.b64 %rd56, %rd52, %rd55;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd57, %rs9;
|
||||
; CHECK-NEXT: and.b64 %rd58, %rd57, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd59, %rd58, 48;
|
||||
; CHECK-NEXT: or.b64 %rd60, %rd56, %rd59;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd61, %rs10;
|
||||
; CHECK-NEXT: shl.b64 %rd62, %rd61, 56;
|
||||
; CHECK-NEXT: or.b64 %rd63, %rd60, %rd62;
|
||||
; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
|
||||
; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
|
||||
; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
|
||||
; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
|
||||
; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
|
||||
; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
|
||||
; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
|
||||
; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
|
||||
; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
|
||||
; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
|
||||
; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
|
||||
; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
|
||||
; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
|
||||
; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd45;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd41;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd37;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd35;
|
||||
; CHECK-NEXT: shr.u64 %rd64, %rd52, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
|
||||
; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
|
||||
; CHECK-NEXT: shr.u64 %rd65, %rd56, 40;
|
||||
; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
|
||||
; CHECK-NEXT: shr.u64 %rd66, %rd60, 48;
|
||||
; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
|
||||
; CHECK-NEXT: shr.u64 %rd67, %rd63, 56;
|
||||
; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
|
||||
; CHECK-NEXT: ret;
|
||||
|
||||
@@ -348,7 +348,7 @@ define dso_local void @qux() {
|
||||
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
|
||||
; CHECK-PTX-NEXT: .reg .b64 %SP;
|
||||
; CHECK-PTX-NEXT: .reg .b64 %SPL;
|
||||
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
|
||||
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
|
||||
; CHECK-PTX-EMPTY:
|
||||
; CHECK-PTX-NEXT: // %bb.0: // %entry
|
||||
|
||||
Reference in New Issue
Block a user