[NVPTX] Consolidate and cleanup various NVPTXISD nodes (NFC) (#145581)

This change consolidates and cleans up various NVPTXISD target-specific
nodes in order to simplify SDAG ISel. While there are some whitespace
changes in the emitted PTX it is otherwise a non-functional change.

NVPTXISD::Wrapper - This node was used to wrap external-symbol and
global-address nodes. It is redundant and has been removed. Instead we
use the non-target versions of these nodes and convert them
appropriately during ISel.

NVPTXISD::CALL - Much of the family of nodes used to represent a PTX
call instruction have been replaced by this new single node. It
corresponds to a single instruction and is therefore much simpler to
create and lower.
This commit is contained in:
Alex MacLean
2025-06-25 11:42:21 -07:00
committed by GitHub
parent 5f8e7ed5a3
commit 70333de6cf
46 changed files with 430 additions and 1428 deletions

View File

@@ -37,11 +37,7 @@ __device__ __bf16 test_call( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
// CHECK: st.param.b16 [param0], %[[R]];
// CHECK: .param .align 2 .b8 retval0[2];
// CHECK: call.uni (retval0),
// CHECK-NEXT: _Z13external_funcDF16b,
// CHECK-NEXT: (
// CHECK-NEXT: param0
// CHECK-NEXT );
// CHECK: call.uni (retval0), _Z13external_funcDF16b, (param0);
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
return external_func(in);
// CHECK: st.param.b16 [func_retval0], %[[RET]]

View File

@@ -457,3 +457,25 @@ void NVPTXInstPrinter::printCTAGroup(const MCInst *MI, int OpNum,
}
llvm_unreachable("Invalid cta_group in printCTAGroup");
}
void NVPTXInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
raw_ostream &O, StringRef Modifier) {
const MCOperand &MO = MI->getOperand(OpNum);
assert(MO.isImm() && "Invalid operand");
const auto Imm = MO.getImm();
if (Modifier == "RetList") {
assert((Imm == 1 || Imm == 0) && "Invalid return list");
if (Imm)
O << " (retval0),";
return;
}
if (Modifier == "ParamList") {
assert(Imm >= 0 && "Invalid parameter list");
interleaveComma(llvm::seq(Imm), O,
[&](const auto &I) { O << "param" << I; });
return;
}
llvm_unreachable("Invalid modifier");
}

View File

@@ -52,6 +52,8 @@ public:
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O);
void printTmaReductionMode(const MCInst *MI, int OpNum, raw_ostream &O);
void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O);
void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier = {});
};
}

View File

@@ -160,15 +160,9 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreParam:
case NVPTXISD::StoreParamV2:
case NVPTXISD::StoreParamV4:
case NVPTXISD::StoreParamS32:
case NVPTXISD::StoreParamU32:
if (tryStoreParam(N))
return;
break;
case ISD::INTRINSIC_WO_CHAIN:
if (tryIntrinsicNoChain(N))
return;
break;
case ISD::INTRINSIC_W_CHAIN:
if (tryIntrinsicChain(N))
return;
@@ -904,25 +898,6 @@ NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
return {InstructionOrdering, Scope};
}
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
unsigned IID = N->getConstantOperandVal(0);
switch (IID) {
default:
return false;
case Intrinsic::nvvm_texsurf_handle_internal:
SelectTexSurfHandle(N);
return true;
}
}
void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
// Op 0 is the intrinsic ID
SDValue Wrapper = N->getOperand(1);
SDValue GlobalVal = Wrapper.getOperand(0);
ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
MVT::i64, GlobalVal));
}
void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
@@ -1717,8 +1692,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
switch (N->getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
case NVPTXISD::StoreParamU32:
case NVPTXISD::StoreParamS32:
case NVPTXISD::StoreParam:
NumElts = 1;
break;
@@ -1796,27 +1769,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
}
}
break;
// Special case: if we have a sign-extend/zero-extend node, insert the
// conversion instruction first, and use that as the value operand to
// the selected StoreParam node.
case NVPTXISD::StoreParamU32: {
Opcode = NVPTX::StoreParamI32_r;
SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
MVT::i32);
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
MVT::i32, Ops[0], CvtNone);
Ops[0] = SDValue(Cvt, 0);
break;
}
case NVPTXISD::StoreParamS32: {
Opcode = NVPTX::StoreParamI32_r;
SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
MVT::i32);
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
MVT::i32, Ops[0], CvtNone);
Ops[0] = SDValue(Cvt, 0);
break;
}
}
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
@@ -2105,22 +2057,14 @@ static inline bool isAddLike(const SDValue V) {
// selectBaseADDR - Match a dag node which will serve as the base address for an
// ADDR operand pair.
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
// Return true if TGA or ES.
if (N.getOpcode() == ISD::TargetGlobalAddress ||
N.getOpcode() == ISD::TargetExternalSymbol)
return N;
if (N.getOpcode() == NVPTXISD::Wrapper)
return N.getOperand(0);
// addrspacecast(Wrapper(arg_symbol) to addrspace(PARAM)) -> arg_symbol
if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N))
if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
CastN->getOperand(0).getOpcode() == NVPTXISD::Wrapper)
return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG);
if (auto *FIN = dyn_cast<FrameIndexSDNode>(N))
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
GA->getValueType(0), GA->getOffset(),
GA->getTargetFlags());
if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
ES->getTargetFlags());
if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
return N;

View File

@@ -69,7 +69,6 @@ private:
#include "NVPTXGenDAGISel.inc"
void Select(SDNode *N) override;
bool tryIntrinsicNoChain(SDNode *N);
bool tryIntrinsicChain(SDNode *N);
bool tryIntrinsicVoid(SDNode *N);
void SelectTexSurfHandle(SDNode *N);

View File

@@ -702,9 +702,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// We want to legalize constant related memmove and memcopy
// intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -1055,45 +1052,24 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::FIRST_NUMBER:
break;
MAKE_CASE(NVPTXISD::CALL)
MAKE_CASE(NVPTXISD::RET_GLUE)
MAKE_CASE(NVPTXISD::LOAD_PARAM)
MAKE_CASE(NVPTXISD::Wrapper)
MAKE_CASE(NVPTXISD::DeclareParam)
MAKE_CASE(NVPTXISD::DeclareScalarParam)
MAKE_CASE(NVPTXISD::DeclareRet)
MAKE_CASE(NVPTXISD::DeclareScalarRet)
MAKE_CASE(NVPTXISD::DeclareRetParam)
MAKE_CASE(NVPTXISD::PrintCall)
MAKE_CASE(NVPTXISD::PrintConvergentCall)
MAKE_CASE(NVPTXISD::PrintCallUni)
MAKE_CASE(NVPTXISD::PrintConvergentCallUni)
MAKE_CASE(NVPTXISD::CALL)
MAKE_CASE(NVPTXISD::LoadParam)
MAKE_CASE(NVPTXISD::LoadParamV2)
MAKE_CASE(NVPTXISD::LoadParamV4)
MAKE_CASE(NVPTXISD::StoreParam)
MAKE_CASE(NVPTXISD::StoreParamV2)
MAKE_CASE(NVPTXISD::StoreParamV4)
MAKE_CASE(NVPTXISD::StoreParamS32)
MAKE_CASE(NVPTXISD::StoreParamU32)
MAKE_CASE(NVPTXISD::CallArgBegin)
MAKE_CASE(NVPTXISD::CallArg)
MAKE_CASE(NVPTXISD::LastCallArg)
MAKE_CASE(NVPTXISD::CallArgEnd)
MAKE_CASE(NVPTXISD::CallVoid)
MAKE_CASE(NVPTXISD::CallVal)
MAKE_CASE(NVPTXISD::CallSymbol)
MAKE_CASE(NVPTXISD::Prototype)
MAKE_CASE(NVPTXISD::MoveParam)
MAKE_CASE(NVPTXISD::StoreRetval)
MAKE_CASE(NVPTXISD::StoreRetvalV2)
MAKE_CASE(NVPTXISD::StoreRetvalV4)
MAKE_CASE(NVPTXISD::PseudoUseParam)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
MAKE_CASE(NVPTXISD::RETURN)
MAKE_CASE(NVPTXISD::CallSeqBegin)
MAKE_CASE(NVPTXISD::CallSeqEnd)
MAKE_CASE(NVPTXISD::CallPrototype)
MAKE_CASE(NVPTXISD::ProxyReg)
MAKE_CASE(NVPTXISD::LoadV2)
@@ -1115,7 +1091,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
MAKE_CASE(NVPTXISD::SETP_BF16X2)
MAKE_CASE(NVPTXISD::Dummy)
MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
MAKE_CASE(NVPTXISD::BrxEnd)
@@ -1189,15 +1164,6 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
}
}
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
}
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign RetAlign,
@@ -1601,9 +1567,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
? promoteScalarArgumentSize(TypeSize * 8)
: TypeSize * 8;
Chain = DAG.getNode(
NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
{Chain, GetI32(ArgI), GetI32(PromotedSize), GetI32(0), InGlue});
Chain =
DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
{Chain, GetI32(ArgI), GetI32(PromotedSize), InGlue});
}
InGlue = Chain.getValue(1);
@@ -1740,16 +1706,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
if (!shouldPassAsArray(RetTy)) {
const unsigned PromotedResultSize = promoteScalarArgumentSize(ResultSize);
SDValue DeclareRetOps[] = {Chain, GetI32(1), GetI32(PromotedResultSize),
GetI32(0), InGlue};
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, {MVT::Other, MVT::Glue},
DeclareRetOps);
{Chain, GetI32(PromotedResultSize), InGlue});
InGlue = Chain.getValue(1);
} else {
SDValue DeclareRetOps[] = {Chain, GetI32(RetAlign->value()),
GetI32(ResultSize / 8), GetI32(0), InGlue};
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl,
{MVT::Other, MVT::Glue}, DeclareRetOps);
Chain = DAG.getNode(
NVPTXISD::DeclareRetParam, dl, {MVT::Other, MVT::Glue},
{Chain, GetI32(RetAlign->value()), GetI32(ResultSize / 8), InGlue});
InGlue = Chain.getValue(1);
}
}
@@ -1800,25 +1763,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB,
UniqueCallSite);
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
SDValue ProtoOps[] = {
Chain,
DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
InGlue,
};
Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
ProtoOps);
Chain = DAG.getNode(
NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
{Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InGlue});
InGlue = Chain.getValue(1);
}
// Op to just print "call"
SDValue PrintCallOps[] = {Chain, GetI32(Ins.empty() ? 0 : 1), InGlue};
// We model convergent calls as separate opcodes.
unsigned Opcode =
IsIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
if (CLI.IsConvergent)
Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
: NVPTXISD::PrintConvergentCall;
Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, PrintCallOps);
InGlue = Chain.getValue(1);
if (ConvertToIndirectCall) {
// Copy the function ptr to a ptx register and use the register to call the
@@ -1832,38 +1781,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
}
// Ops to print out the function name
SDValue CallVoidOps[] = { Chain, Callee, InGlue };
Chain =
DAG.getNode(NVPTXISD::CallVoid, dl, {MVT::Other, MVT::Glue}, CallVoidOps);
const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0;
const unsigned NumArgs =
std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
/// NumParams, Callee, Proto, InGlue)
Chain = DAG.getNode(NVPTXISD::CALL, dl, {MVT::Other, MVT::Glue},
{Chain, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee,
GetI32(Proto), InGlue});
InGlue = Chain.getValue(1);
// Ops to print out the param list
SDValue CallArgBeginOps[] = { Chain, InGlue };
Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, {MVT::Other, MVT::Glue},
CallArgBeginOps);
InGlue = Chain.getValue(1);
const unsigned E = std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
for (const unsigned I : llvm::seq(E)) {
const unsigned Opcode =
I == (E - 1) ? NVPTXISD::LastCallArg : NVPTXISD::CallArg;
SDValue CallArgOps[] = {Chain, GetI32(1), GetI32(I), InGlue};
Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, CallArgOps);
InGlue = Chain.getValue(1);
}
SDValue CallArgEndOps[] = {Chain, GetI32(IsIndirectCall ? 0 : 1), InGlue};
Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, {MVT::Other, MVT::Glue},
CallArgEndOps);
InGlue = Chain.getValue(1);
if (IsIndirectCall) {
SDValue PrototypeOps[] = {Chain, GetI32(UniqueCallSite), InGlue};
Chain = DAG.getNode(NVPTXISD::Prototype, dl, {MVT::Other, MVT::Glue},
PrototypeOps);
InGlue = Chain.getValue(1);
}
SmallVector<SDValue, 16> ProxyRegOps;
// An item of the vector is filled if the element does not need a ProxyReg
// operation on it and should be added to InVals as is. ProxyRegOps and
@@ -2919,8 +2847,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
case ISD::ADDRSPACECAST:
return LowerADDRSPACECAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return Op;
case ISD::INTRINSIC_WO_CHAIN:
@@ -3129,8 +3055,7 @@ SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
// Store the address of unsized array <function>_vararg[] in the ap object.
SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
SDValue VAReg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
@@ -3370,7 +3295,7 @@ SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
EVT v) const {
StringRef SavedStr = nvTM->getStrPool().save(
getParamName(&DAG.getMachineFunction().getFunction(), idx));
return DAG.getTargetExternalSymbol(SavedStr.data(), v);
return DAG.getExternalSymbol(SavedStr.data(), v);
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -3438,7 +3363,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue P;
if (isKernelFunction(*F)) {
P = DAG.getNode(NVPTXISD::Wrapper, dl, ByvalIn.VT, ArgSymbol);
P = ArgSymbol;
P.getNode()->setIROrder(Arg.getArgNo() + 1);
} else {
P = DAG.getNode(NVPTXISD::MoveParam, dl, ByvalIn.VT, ArgSymbol);

View File

@@ -24,32 +24,19 @@ namespace NVPTXISD {
enum NodeType : unsigned {
// Start the numbering from where ISD NodeType finishes.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
Wrapper,
CALL,
RET_GLUE,
LOAD_PARAM,
DeclareParam,
DeclareScalarParam,
DeclareRetParam,
DeclareRet,
DeclareScalarRet,
PrintCall,
PrintConvergentCall,
PrintCallUni,
PrintConvergentCallUni,
CallArgBegin,
CallArg,
LastCallArg,
CallArgEnd,
CallVoid,
CallVal,
CallSymbol,
Prototype,
/// This node represents a PTX call instruction. It's operands are as follows:
///
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
/// NumParams, Callee, Proto, InGlue)
CALL,
MoveParam,
PseudoUseParam,
RETURN,
CallSeqBegin,
CallSeqEnd,
CallPrototype,
ProxyReg,
FSHL_CLAMP,
@@ -83,7 +70,6 @@ enum NodeType : unsigned {
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
Dummy,
FIRST_MEMORY_OPCODE,
LoadV2 = FIRST_MEMORY_OPCODE,
@@ -100,8 +86,6 @@ enum NodeType : unsigned {
StoreParam,
StoreParamV2,
StoreParamV4,
StoreParamS32, // to sext and store a <32bit value, not used currently
StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
StoreRetvalV4,
@@ -120,8 +104,6 @@ public:
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
const char *getTargetNodeName(unsigned Opcode) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,

View File

@@ -190,22 +190,4 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}
bool NVPTXInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
// Prevent the scheduler from reordering & splitting up MachineInstrs
// which must stick together (in initially set order) to
// comprise a valid PTX function call sequence.
switch (MI.getOpcode()) {
case NVPTX::CallUniPrintCallRetInst1:
case NVPTX::CallArgBeginInst:
case NVPTX::CallArgParam:
case NVPTX::LastCallArgParam:
case NVPTX::CallArgEndInst1:
return true;
}
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
}
}

View File

@@ -66,9 +66,6 @@ public:
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const override;
};
} // namespace llvm

View File

@@ -1700,17 +1700,6 @@ def Offseti32imm : Operand<i32> {
let PrintMethod = "printOffseti32imm";
}
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
// Load a memory address into a u32 or u64 register.
def MOV_ADDR : BasicNVPTXInst<(outs B32:$dst), (ins ADDR_base:$a),
"mov.b32",
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
def MOV_ADDR64 : BasicNVPTXInst<(outs B64:$dst), (ins ADDR_base:$a),
"mov.b64",
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack.
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
@@ -1750,8 +1739,27 @@ def BFMOV16i : MOVi<B16, "b16", bf16, bf16imm, fpimm>;
def FMOV32i : MOVi<B32, "b32", f32, f32imm, fpimm>;
def FMOV64i : MOVi<B64, "b64", f64, f64imm, fpimm>;
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;
def to_tglobaladdr : SDNodeXForm<globaladdr, [{
return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N),
N->getValueType(0), N->getOffset(),
N->getTargetFlags());
}]>;
def to_texternsym : SDNodeXForm<externalsym, [{
return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0),
N->getTargetFlags());
}]>;
def to_tframeindex : SDNodeXForm<frameindex, [{
return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
}]>;
def : Pat<(i32 globaladdr:$dst), (IMOV32i (to_tglobaladdr $dst))>;
def : Pat<(i64 globaladdr:$dst), (IMOV64i (to_tglobaladdr $dst))>;
def : Pat<(i32 externalsym:$dst), (IMOV32i (to_texternsym $dst))>;
def : Pat<(i64 externalsym:$dst), (IMOV64i (to_texternsym $dst))>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
@@ -1759,10 +1767,6 @@ def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
"add.u64 \t$dst, ${addr:add};", []>;
def to_tframeindex : SDNodeXForm<frameindex, [{
return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
}]>;
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1975,26 +1979,19 @@ defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
def SDTDeclareParamProfile :
SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
def SDTDeclareScalarParamProfile :
SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
def SDTCallValProfile : SDTypeProfile<1, 0, []>;
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
def DeclareParam :
@@ -2004,10 +2001,12 @@ def DeclareScalarParam :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareRetParam :
SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile,
SDNode<"NVPTXISD::DeclareRetParam",
SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareRet :
SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
SDNode<"NVPTXISD::DeclareRet",
SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def LoadParam :
SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
@@ -2018,18 +2017,6 @@ def LoadParamV2 :
def LoadParamV4 :
SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
def PrintCall :
SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def PrintConvergentCall :
SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def PrintCallUni :
SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def PrintConvergentCallUni :
SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParam :
SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2039,33 +2026,6 @@ def StoreParamV2 :
def StoreParamV4 :
SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParamU32 :
SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParamS32 :
SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArgBegin :
SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArg :
SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def LastCallArg :
SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArgEnd :
SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallVoid :
SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def Prototype :
SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallVal :
SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def StoreRetval :
@@ -2077,16 +2037,19 @@ def StoreRetvalV2 :
def StoreRetvalV4 :
SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
[SDNPHasChain, SDNPSideEffect]>;
def PseudoUseParam :
SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def RETURNNode :
SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPSideEffect]>;
def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
/// NumParams, Callee, Proto, InGlue)
def SDTCallProfile : SDTypeProfile<0, 6,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>,
SDTCisVT<3, i32>, SDTCisVT<5, i32>]>;
def call :
SDNode<"NVPTXISD::CALL", SDTCallProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
@@ -2107,11 +2070,6 @@ let mayLoad = true in {
[]>;
}
class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat("mov", opstr, " \t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
let mayStore = true in {
multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
@@ -2174,23 +2132,42 @@ let mayStore = true in {
[]>;
}
let isCall=1 in {
multiclass CALL<string OpcStr, SDNode OpNode> {
def PrintCallNoRetInst : NVPTXInst<(outs), (ins),
OpcStr # " ", [(OpNode 0)]>;
def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
OpcStr # " (retval0), ", [(OpNode 1)]>;
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
/// NumParams, Callee, Proto, InGlue)
def CallOperand : Operand<i32> { let PrintMethod = "printCallOperand"; }
foreach is_convergent = [0, 1] in {
defvar convergent_suffix = !if(is_convergent, "_conv", "");
let isCall = 1, isConvergent = is_convergent in {
def CALL # convergent_suffix :
NVPTXInst<(outs),
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
i32imm:$proto),
"call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
def CALL_UNI # convergent_suffix :
NVPTXInst<(outs),
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
"call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
}
}
defm Call : CALL<"call", PrintCall>;
defm CallUni : CALL<"call.uni", PrintCallUni>;
defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto),
(call_inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto),
(call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto),
(call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
// Convergent call instructions. These are identical to regular calls, except
// they have the isConvergent bit set.
let isConvergent=1 in {
defm ConvergentCall : CALL<"call", PrintConvergentCall>;
defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>;
defvar call_uni_inst = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0),
(call_uni_inst (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0),
(call_uni_inst $addr, imm:$rets, imm:$params)>;
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0),
(call_uni_inst $addr, imm:$rets, imm:$params)>;
}
def LoadParamMemI64 : LoadParamMemInst<B64, ".b64">;
@@ -2244,69 +2221,30 @@ def StoreRetvalV4I32 : StoreRetvalV4Inst<B32, ".b32">;
def StoreRetvalV4I16 : StoreRetvalV4Inst<B16, ".b16">;
def StoreRetvalV4I8 : StoreRetvalV4Inst<B16, ".b8">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
[(CallArg 1, imm:$a)]>;
def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
[(LastCallArg 1, imm:$a)]>;
def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ",
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
def CallVoidInstReg : NVPTXInst<(outs), (ins B32:$addr), "$addr, ",
[(CallVoid i32:$addr)]>;
def CallVoidInstReg64 : NVPTXInst<(outs), (ins B64:$addr), "$addr, ",
[(CallVoid i64:$addr)]>;
def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
[(Prototype (i32 imm:$val))]>;
def DeclareRetMemInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num),
".param .align $align .b8 retval$num[$size];",
[(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size),
".param .align $align .b8 retval0[$size];",
[(DeclareRetParam imm:$align, imm:$size)]>;
def DeclareRetScalarInst :
NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
".param .b$size retval$num;",
[(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
def DeclareRetRegInst :
NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
".reg .b$size retval$num;",
[(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
NVPTXInst<(outs), (ins i32imm:$size),
".param .b$size retval0;",
[(DeclareRet imm:$size)]>;
def DeclareParamInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
".param .align $align .b8 param$a[$size];",
[(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
[(DeclareParam imm:$align, imm:$a, imm:$size)]>;
def DeclareScalarParamInst :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
".param .b$size param$a;",
[(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
def DeclareScalarRegInst :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
".reg .b$size param$a;",
[(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
[(DeclareScalarParam imm:$a, imm:$size)]>;
class MoveParamSymbolInst<RegTyInfo t> :
BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src),
"mov.b" # t.Size,
[(set t.Ty:$dst, (MoveParam texternalsym:$src))]>;
def MOV64_PARAM : MoveParamSymbolInst<I64RT>;
def MOV32_PARAM : MoveParamSymbolInst<I32RT>;
class PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
NVPTXInst<(outs), (ins regclass:$src),
"// Pseudo use of $src",
[(PseudoUseParam vt:$src)]>;
def PseudoUseParamI64 : PseudoUseParamInst<B64, i64>;
def PseudoUseParamI32 : PseudoUseParamInst<B32, i32>;
def PseudoUseParamI16 : PseudoUseParamInst<B16, i16>;
def PseudoUseParamF64 : PseudoUseParamInst<B64, f64>;
def PseudoUseParamF32 : PseudoUseParamInst<B32, f32>;
foreach t = [I32RT, I64RT] in {
defvar inst_name = "MOV" # t.Size # "_PARAM";
def inst_name : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), "mov.b" # t.Size>;
def : Pat<(MoveParam (t.Ty externalsym:$src)),
(!cast<NVPTXInst>(inst_name) (t.Ty (to_texternsym $src)))>;
}
multiclass ProxyRegInst<string SzStr, NVPTXRegClass rc> {
def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src),
@@ -2861,21 +2799,6 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPSideEffect]>;
def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def calltarget : Operand<i32>;
let isCall=1 in {
def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>;
}
def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>;
def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>;
// Pseudo instructions.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
: NVPTXInst<outs, ins, asmstr, pattern>;
def Callseq_Start :
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"\\{ // callseq $amt1, $amt2",

View File

@@ -2224,6 +2224,8 @@ def nvvm_move_sym64 : NVPTXInst<(outs B64:$r), (ins ADDR_base:$s),
def texsurf_handles
: BasicNVPTXInst<(outs B64:$result), (ins ADDR_base:$src), "mov.u64">;
def : Pat<(int_nvvm_texsurf_handle_internal globaladdr:$src),
(texsurf_handles (to_tglobaladdr $src))>;
//-----------------------------------
// Compiler Error Warn

View File

@@ -56,8 +56,7 @@ attributes #0 = { noreturn }
; CHECK-NEXT: .noreturn
; CHECK: .visible .func (.param .b32 func_retval0) z()
; CHECK: call.uni (retval0),
; CHECK-NEXT: b,
; CHECK: call.uni (retval0), b,
; CHECK: .alias b, a;

View File

@@ -216,12 +216,7 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;

View File

@@ -19,11 +19,7 @@ define void @foo() {
; CHECK-NEXT: .param .align 8 .b8 param0[16];
; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
; CHECK-NEXT: call.uni
; CHECK-NEXT: bar,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni bar, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @bar(ptr byval(%struct) @G)

View File

@@ -48,8 +48,7 @@ entry:
; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]]
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]]
; CHECK-NEXT: call.uni
; CHECK-NEXT: callee,
; CHECK-NEXT: call.uni callee,
call void @callee(ptr %a, ptr %buf) #2
ret void

View File

@@ -203,12 +203,7 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: .param .b32 param1;
; CHECK-NEXT: st.param.b32 [param1], %r5;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: use,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), use, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;

View File

@@ -9,18 +9,16 @@ declare void @conv() convergent
declare void @not_conv()
define void @test(ptr %f) {
; CHECK: ConvergentCallUniPrintCall
; CHECK-NEXT: @conv
; CHECK: CALL_UNI_conv @conv
call void @conv()
; CHECK: CallUniPrintCall
; CHECK-NEXT: @not_conv
; CHECK: CALL_UNI @not_conv
call void @not_conv()
; CHECK: ConvergentCallPrintCall
; CHECK: CALL_conv %{{[0-9]+}}
call void %f() convergent
; CHECK: CallPrintCall
; CHECK: CALL %{{[0-9]+}}
call void %f()
ret void

View File

@@ -9,12 +9,7 @@ define %struct.64 @test_return_type_mismatch(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch(
; CHECK: .param .align 1 .b8 retval0[8];
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
; CHECK-NEXT: call (retval0),
; CHECK-NEXT: %rd
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: )
; CHECK-NEXT: , prototype_0;
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0;
%ret = call %struct.64 @callee(ptr %p)
ret %struct.64 %ret
}
@@ -23,12 +18,7 @@ define i64 @test_param_type_mismatch(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch(
; CHECK: .param .b64 retval0;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
; CHECK-NEXT: call (retval0),
; CHECK-NEXT: %rd
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: )
; CHECK-NEXT: , prototype_1;
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1;
%ret = call i64 @callee(i64 7)
ret i64 %ret
}
@@ -37,13 +27,7 @@ define i64 @test_param_count_mismatch(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch(
; CHECK: .param .b64 retval0;
; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
; CHECK-NEXT: call (retval0),
; CHECK-NEXT: %rd
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: )
; CHECK-NEXT: , prototype_2;
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2;
%ret = call i64 @callee(ptr %p, i64 7)
ret i64 %ret
}
@@ -52,12 +36,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch_variadic(
; CHECK: .param .align 1 .b8 retval0[8];
; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
; CHECK-NEXT: call (retval0),
; CHECK-NEXT: %rd
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: )
; CHECK-NEXT: , prototype_3;
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3;
%ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
ret %struct.64 %ret
}
@@ -65,12 +44,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
define i64 @test_param_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch_variadic(
; CHECK: .param .b64 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee_variadic
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: )
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
@@ -78,12 +52,7 @@ define i64 @test_param_type_mismatch_variadic(ptr %p) {
define i64 @test_param_count_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch_variadic(
; CHECK: .param .b64 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee_variadic
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: )
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}

View File

@@ -24,11 +24,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-32-NEXT: .param .b32 param0;
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: .param .b32 retval0;
; CHECK-32-NEXT: call.uni (retval0),
; CHECK-32-NEXT: bar,
; CHECK-32-NEXT: (
; CHECK-32-NEXT: param0
; CHECK-32-NEXT: );
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-32-NEXT: } // callseq 0
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
@@ -49,11 +45,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-64-NEXT: .param .b64 param0;
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: .param .b32 retval0;
; CHECK-64-NEXT: call.uni (retval0),
; CHECK-64-NEXT: bar,
; CHECK-64-NEXT: (
; CHECK-64-NEXT: param0
; CHECK-64-NEXT: );
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-64-NEXT: } // callseq 0
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;

View File

@@ -263,12 +263,7 @@ declare half @test_callee(half %a, half %b) #0
; CHECK-DAG: st.param.b16 [param0], [[A]];
; CHECK-DAG: st.param.b16 [param1], [[B]];
; CHECK-DAG: .param .align 2 .b8 retval0[2];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -287,12 +282,7 @@ define half @test_call(half %a, half %b) #0 {
; CHECK-DAG: st.param.b16 [param0], [[B]];
; CHECK-DAG: st.param.b16 [param1], [[A]];
; CHECK-DAG: .param .align 2 .b8 retval0[2];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -311,12 +301,7 @@ define half @test_call_flipped(half %a, half %b) #0 {
; CHECK-DAG: st.param.b16 [param0], [[B]];
; CHECK-DAG: st.param.b16 [param1], [[A]];
; CHECK-DAG: .param .align 2 .b8 retval0[2];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -650,8 +635,7 @@ else:
; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]];
; CHECK: {
; CHECK: st.param.b64 [param0], %[[P1]];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_dummy
; CHECK: call.uni (retval0), test_dummy
; CHECK: }
; CHECK: setp.ne.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: @[[PRED]] bra [[LOOP]];

View File

@@ -467,12 +467,7 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -495,12 +490,7 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -523,12 +513,7 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;

View File

@@ -40,12 +40,7 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
; CHECK-NEXT: .param .b32 param1;
; CHECK-NEXT: st.param.b32 [param1], %r6;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: dummy_f32,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r7, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
@@ -92,12 +87,7 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: st.param.b64 [param1], %rd6;
; CHECK-NEXT: .param .b64 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: dummy_f64,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd7, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;

View File

@@ -50,11 +50,7 @@ define void @test_ld_param_escaping(ptr byval(i32) %a) {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni
; CHECK-NEXT: escape,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni escape, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @escape(ptr %a)
@@ -72,11 +68,7 @@ define void @test_ld_param_byval(ptr byval(i32) %a) {
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni
; CHECK-NEXT: byval_user,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni byval_user, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
call void @byval_user(ptr %a)

View File

@@ -42,11 +42,7 @@ define void @call(fp128 %x) {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, %rd2};
; CHECK-NEXT: call.uni
; CHECK-NEXT: call,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni call, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @call(fp128 %x)

View File

@@ -647,12 +647,7 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: .param .align 4 .b8 param1[4];
; COMMON-NEXT: st.param.b32 [param1], %r2;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
; COMMON-NEXT: call.uni (retval0),
; COMMON-NEXT: test_callee,
; COMMON-NEXT: (
; COMMON-NEXT: param0,
; COMMON-NEXT: param1
; COMMON-NEXT: );
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 0
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
@@ -675,12 +670,7 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: .param .align 4 .b8 param1[4];
; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
; COMMON-NEXT: call.uni (retval0),
; COMMON-NEXT: test_callee,
; COMMON-NEXT: (
; COMMON-NEXT: param0,
; COMMON-NEXT: param1
; COMMON-NEXT: );
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 1
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
@@ -703,12 +693,7 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: .param .align 4 .b8 param1[4];
; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
; COMMON-NEXT: call.uni (retval0),
; COMMON-NEXT: test_callee,
; COMMON-NEXT: (
; COMMON-NEXT: param0,
; COMMON-NEXT: param1
; COMMON-NEXT: );
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 2
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;

View File

@@ -833,12 +833,7 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -861,12 +856,7 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -889,12 +879,7 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;

View File

@@ -33,13 +33,7 @@ define internal i32 @foo() {
; CHECK-NEXT: st.param.b64 [param1], %rd4;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
; CHECK-NEXT: call (retval0),
; CHECK-NEXT: %rd1,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: )
; CHECK-NEXT: , prototype_0;
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
@@ -76,13 +70,7 @@ define internal i32 @bar() {
; CHECK-NEXT: st.param.b64 [param1], %rd5;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
; CHECK-NEXT: call (retval0),
; CHECK-NEXT: %rd1,
; CHECK-NEXT: (
; CHECK-NEXT: param0,
; CHECK-NEXT: param1
; CHECK-NEXT: )
; CHECK-NEXT: , prototype_1;
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;

View File

@@ -14,10 +14,7 @@ define void @foo(ptr %ptr) {
; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 retval0[16];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: bar,
; CHECK-NEXT: (
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), bar, ();
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.v4.b32 [%rd1], {%r1, %r2, %r3, %r4};

View File

@@ -58,11 +58,7 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX32-NEXT: { // callseq 0, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r2;
; PTX32-NEXT: call.uni
; PTX32-NEXT: bar,
; PTX32-NEXT: (
; PTX32-NEXT: param0
; PTX32-NEXT: );
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 0
; PTX32-NEXT: ret;
;
@@ -84,11 +80,7 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX64-NEXT: { // callseq 0, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd1;
; PTX64-NEXT: call.uni
; PTX64-NEXT: bar,
; PTX64-NEXT: (
; PTX64-NEXT: param0
; PTX64-NEXT: );
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 0
; PTX64-NEXT: ret;
%local = alloca i32, align 4
@@ -159,20 +151,12 @@ define void @foo4() {
; PTX32-NEXT: { // callseq 1, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r1;
; PTX32-NEXT: call.uni
; PTX32-NEXT: bar,
; PTX32-NEXT: (
; PTX32-NEXT: param0
; PTX32-NEXT: );
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 1
; PTX32-NEXT: { // callseq 2, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r3;
; PTX32-NEXT: call.uni
; PTX32-NEXT: bar,
; PTX32-NEXT: (
; PTX32-NEXT: param0
; PTX32-NEXT: );
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 2
; PTX32-NEXT: ret;
;
@@ -197,20 +181,12 @@ define void @foo4() {
; PTX64-NEXT: { // callseq 1, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd1;
; PTX64-NEXT: call.uni
; PTX64-NEXT: bar,
; PTX64-NEXT: (
; PTX64-NEXT: param0
; PTX64-NEXT: );
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 1
; PTX64-NEXT: { // callseq 2, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd3;
; PTX64-NEXT: call.uni
; PTX64-NEXT: bar,
; PTX64-NEXT: (
; PTX64-NEXT: param0
; PTX64-NEXT: );
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 2
; PTX64-NEXT: ret;
%A = alloca i32

View File

@@ -133,12 +133,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
; PTX-NEXT: call (retval0),
; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: )
; PTX-NEXT: , prototype_0;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0;
; PTX-NEXT: ld.param.b32 %r1, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -182,14 +177,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: st.param.b64 [param2], %rd4;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
; PTX-NEXT: call (retval0),
; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0,
; PTX-NEXT: param1,
; PTX-NEXT: param2
; PTX-NEXT: )
; PTX-NEXT: , prototype_1;
; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1;
; PTX-NEXT: ld.param.b32 %r2, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
@@ -284,12 +272,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
; PTX-NEXT: call (retval0),
; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: )
; PTX-NEXT: , prototype_2;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2;
; PTX-NEXT: ld.param.b32 %r3, [retval0];
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
@@ -330,12 +313,7 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
; PTX-NEXT: call (retval0),
; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: )
; PTX-NEXT: , prototype_3;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3;
; PTX-NEXT: ld.param.b32 %r4, [retval0];
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
@@ -561,11 +539,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: { // callseq 4, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni
; PTX-NEXT: device_func,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 4
; PTX-NEXT: ret;
call void @device_func(ptr byval(i32) align 4 %input)

View File

@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes IR,IRC
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes IR,IRO
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX,PTXC
@@ -47,11 +47,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b64 retval0;
; PTX-NEXT: call.uni (retval0),
; PTX-NEXT: escape,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni (retval0), escape, (param0);
; PTX-NEXT: ld.param.b64 %rd6, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -89,11 +85,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: .param .b64 retval0;
; PTX-NEXT: call.uni (retval0),
; PTX-NEXT: escape,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni (retval0), escape, (param0);
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;

View File

@@ -153,11 +153,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd1;
; PTX-NEXT: call.uni
; PTX-NEXT: _Z6escapePv,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
entry:
@@ -198,11 +194,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: call.uni
; PTX-NEXT: _Z6escapePv,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
entry:
@@ -902,11 +894,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni
; PTX-NEXT: device_func,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
call void @device_func(ptr byval(i32) align 4 %input)
@@ -929,11 +917,7 @@ define void @device_func(ptr byval(i32) align 4 %input) {
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni
; PTX-NEXT: device_func,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 3
; PTX-NEXT: ret;
call void @device_func(ptr byval(i32) align 4 %input)

View File

@@ -21,11 +21,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000;
; CHECK-NEXT: .param .b64 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: quux,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), quux, (param0);
; CHECK-NEXT: ld.param.b64 %rd1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;

View File

@@ -11,10 +11,7 @@ define dso_local void @naked() naked "frame-pointer"="all" {
; CHECK-32-EMPTY:
; CHECK-32-NEXT: // %bb.0:
; CHECK-32-NEXT: { // callseq 0, 0
; CHECK-32-NEXT: call.uni
; CHECK-32-NEXT: main,
; CHECK-32-NEXT: (
; CHECK-32-NEXT: );
; CHECK-32-NEXT: call.uni main, ();
; CHECK-32-NEXT: } // callseq 0
; CHECK-32-NEXT: // begin inline asm
; CHECK-32-NEXT: exit;
@@ -26,10 +23,7 @@ define dso_local void @naked() naked "frame-pointer"="all" {
; CHECK-64-EMPTY:
; CHECK-64-NEXT: // %bb.0:
; CHECK-64-NEXT: { // callseq 0, 0
; CHECK-64-NEXT: call.uni
; CHECK-64-NEXT: main,
; CHECK-64-NEXT: (
; CHECK-64-NEXT: );
; CHECK-64-NEXT: call.uni main, ();
; CHECK-64-NEXT: } // callseq 0
; CHECK-64-NEXT: // begin inline asm
; CHECK-64-NEXT: exit;
@@ -45,10 +39,7 @@ define dso_local void @normal() "frame-pointer"="all" {
; CHECK-32-EMPTY:
; CHECK-32-NEXT: // %bb.0:
; CHECK-32-NEXT: { // callseq 1, 0
; CHECK-32-NEXT: call.uni
; CHECK-32-NEXT: main,
; CHECK-32-NEXT: (
; CHECK-32-NEXT: );
; CHECK-32-NEXT: call.uni main, ();
; CHECK-32-NEXT: } // callseq 1
; CHECK-32-NEXT: // begin inline asm
; CHECK-32-NEXT: exit;
@@ -60,10 +51,7 @@ define dso_local void @normal() "frame-pointer"="all" {
; CHECK-64-EMPTY:
; CHECK-64-NEXT: // %bb.0:
; CHECK-64-NEXT: { // callseq 1, 0
; CHECK-64-NEXT: call.uni
; CHECK-64-NEXT: main,
; CHECK-64-NEXT: (
; CHECK-64-NEXT: );
; CHECK-64-NEXT: call.uni main, ();
; CHECK-64-NEXT: } // callseq 1
; CHECK-64-NEXT: // begin inline asm
; CHECK-64-NEXT: exit;

View File

@@ -37,11 +37,7 @@ define i32 @test(%struct.1float alignstack(32) %data) {
; CHECK-NEXT: st.param.b8 [param0+2], %r12;
; CHECK-NEXT: st.param.b8 [param0+3], %r13;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r14, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;

View File

@@ -32,8 +32,7 @@
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[C]]
; CHECK: .param .b32 retval0;
; CHECK: call.uni
; CHECK-NEXT: test_i1,
; CHECK: call.uni (retval0), test_i1,
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -76,8 +75,7 @@ define signext i1 @test_i1s(i1 signext %a) {
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v3i1,
; CHECK: call.uni (retval0), test_v3i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]]
@@ -95,8 +93,7 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) {
; CHECK: .param .align 1 .b8 param0[1];
; CHECK: st.param.b8 [param0], [[E0]];
; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0),
; CHECK: test_v4i1,
; CHECK: call.uni (retval0), test_v4i1,
; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
@@ -120,8 +117,7 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) {
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v5i1,
; CHECK: call.uni (retval0), test_v5i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]]
@@ -139,8 +135,7 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK: test_i2,
; CHECK: call.uni (retval0), test_i2,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -156,8 +151,7 @@ define i2 @test_i2(i2 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK: test_i3,
; CHECK: call.uni (retval0), test_i3,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -176,8 +170,7 @@ define i3 @test_i3(i3 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[A]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK: test_i8,
; CHECK: call.uni (retval0), test_i8,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -196,8 +189,7 @@ define i8 @test_i8(i8 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[A]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK: test_i8s,
; CHECK: call.uni (retval0), test_i8s,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]];
@@ -216,8 +208,7 @@ define signext i8 @test_i8s(i8 signext %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[R]]
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v3i8,
; CHECK: call.uni (retval0), test_v3i8,
; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
; interesting here, so it's skipped.
@@ -235,8 +226,7 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[R]];
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v4i8,
; CHECK: call.uni (retval0), test_v4i8,
; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[RET]];
; CHECK-NEXT: ret;
@@ -254,8 +244,7 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
; CHECK-DAG: st.param.v4.b8 [param0],
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v5i8,
; CHECK: call.uni (retval0), test_v5i8,
; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -272,8 +261,7 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) {
; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0];
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i11,
; CHECK: call.uni (retval0), test_i11,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -290,8 +278,7 @@ define i11 @test_i11(i11 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i16,
; CHECK: call.uni (retval0), test_i16,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -309,8 +296,7 @@ define i16 @test_i16(i16 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i16s,
; CHECK: call.uni (retval0), test_i16s,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -329,8 +315,7 @@ define signext i16 @test_i16s(i16 signext %a) {
; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v3i16,
; CHECK: call.uni (retval0), test_v3i16,
; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
@@ -348,8 +333,7 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) {
; CHECK: .param .align 8 .b8 param0[8];
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v4i16,
; CHECK: call.uni (retval0), test_v4i16,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
; CHECK-NEXT: ret;
@@ -367,8 +351,7 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v5i16,
; CHECK: call.uni (retval0), test_v5i16,
; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -386,8 +369,7 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_f16,
; CHECK: call.uni (retval0), test_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -403,8 +385,7 @@ define half @test_f16(half %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v2f16,
; CHECK: call.uni (retval0), test_v2f16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -420,8 +401,7 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_bf16,
; CHECK: call.uni (retval0), test_bf16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -437,8 +417,7 @@ define bfloat @test_bf16(bfloat %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v2bf16,
; CHECK: call.uni (retval0), test_v2bf16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -457,8 +436,7 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK: test_v3f16,
; CHECK: call.uni (retval0), test_v3f16,
; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
@@ -476,8 +454,7 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
; CHECK: .param .align 8 .b8 param0[8];
; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK: test_v4f16,
; CHECK: call.uni (retval0), test_v4f16,
; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
; CHECK: ret;
@@ -495,8 +472,7 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
; CHECK-DAG: st.param.v4.b16 [param0],
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK: test_v5f16,
; CHECK: call.uni (retval0), test_v5f16,
; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
@@ -514,8 +490,7 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK: test_v8f16,
; CHECK: call.uni (retval0), test_v8f16,
; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
; CHECK: ret;
@@ -535,8 +510,7 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
; CHECK-DAG: st.param.v4.b16 [param0+8],
; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK: test_v9f16,
; CHECK: call.uni (retval0), test_v9f16,
; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
@@ -557,8 +531,7 @@ define <9 x half> @test_v9f16(<9 x half> %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i19,
; CHECK: call.uni (retval0), test_i19,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -575,8 +548,7 @@ define i19 @test_i19(i19 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i23,
; CHECK: call.uni (retval0), test_i23,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -593,8 +565,7 @@ define i23 @test_i23(i23 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i24,
; CHECK: call.uni (retval0), test_i24,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -610,8 +581,7 @@ define i24 @test_i24(i24 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i29,
; CHECK: call.uni (retval0), test_i29,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -627,8 +597,7 @@ define i29 @test_i29(i29 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i32,
; CHECK: call.uni (retval0), test_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -646,8 +615,7 @@ define i32 @test_i32(i32 %a) {
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.b32 [param0+8], [[E2]];
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v3i32,
; CHECK: call.uni (retval0), test_v3i32,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]};
@@ -665,8 +633,7 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) {
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v4i32,
; CHECK: call.uni (retval0), test_v4i32,
; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
; CHECK-NEXT: ret;
@@ -684,8 +651,7 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v5i32,
; CHECK: call.uni (retval0), test_v5i32,
; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -703,8 +669,7 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_f32,
; CHECK: call.uni (retval0), test_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -721,8 +686,7 @@ define float @test_f32(float %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i40,
; CHECK: call.uni (retval0), test_i40,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -739,8 +703,7 @@ define i40 @test_i40(i40 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i47,
; CHECK: call.uni (retval0), test_i47,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -757,8 +720,7 @@ define i47 @test_i47(i47 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i48,
; CHECK: call.uni (retval0), test_i48,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -776,8 +738,7 @@ define i48 @test_i48(i48 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i51,
; CHECK: call.uni (retval0), test_i51,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -795,8 +756,7 @@ define i51 @test_i51(i51 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i56,
; CHECK: call.uni (retval0), test_i56,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -812,8 +772,7 @@ define i56 @test_i56(i56 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i57,
; CHECK: call.uni (retval0), test_i57,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -829,8 +788,7 @@ define i57 @test_i57(i57 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .b64 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_i64,
; CHECK: call.uni (retval0), test_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -848,8 +806,7 @@ define i64 @test_i64(i64 %a) {
; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.b64 [param0+16], [[E2]];
; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v3i64,
; CHECK: call.uni (retval0), test_v3i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]};
@@ -872,8 +829,7 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) {
; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v4i64,
; CHECK: call.uni (retval0), test_v4i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]};
@@ -893,8 +849,7 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) {
; CHECK: .param .align 1 .b8 param0[1];
; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni
; CHECK-NEXT: test_s_i1,
; CHECK: call.uni (retval0), test_s_i1,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -910,8 +865,7 @@ define %s_i1 @test_s_i1(%s_i1 %a) {
; CHECK: .param .align 1 .b8 param0[1];
; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni
; CHECK-NEXT: test_s_i8,
; CHECK: call.uni (retval0), test_s_i8,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -927,8 +881,7 @@ define %s_i8 @test_s_i8(%s_i8 %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
; CHECK: call.uni
; CHECK-NEXT: test_s_i16,
; CHECK: call.uni (retval0), test_s_i16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -944,8 +897,7 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
; CHECK: call.uni
; CHECK-NEXT: test_s_f16,
; CHECK: call.uni (retval0), test_s_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -961,8 +913,7 @@ define %s_f16 @test_s_f16(%s_f16 %a) {
; CHECK: .param .align 4 .b8 param0[4]
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_s_i32,
; CHECK: call.uni (retval0), test_s_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -978,8 +929,7 @@ define %s_i32 @test_s_i32(%s_i32 %a) {
; CHECK: .param .align 4 .b8 param0[4]
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_s_f32,
; CHECK: call.uni (retval0), test_s_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -995,8 +945,7 @@ define %s_f32 @test_s_f32(%s_f32 %a) {
; CHECK: .param .align 8 .b8 param0[8];
; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_s_i64,
; CHECK: call.uni (retval0), test_s_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -1021,8 +970,7 @@ define %s_i64 @test_s_i64(%s_i64 %a) {
; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
; CHECK: .param .align 8 .b8 retval0[24];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_s_i32f32,
; CHECK: call.uni (retval0), test_s_i32f32,
; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4];
; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
@@ -1051,8 +999,7 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
; CHECK: st.param.b64 [param0+16], [[E4]];
; CHECK: .param .align 8 .b8 retval0[24];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_s_i32x4,
; CHECK: call.uni (retval0), test_s_i32x4,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
@@ -1081,8 +1028,7 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
; CHECK: st.param.b32 [param0+16], [[E4]];
; CHECK: st.param.b64 [param0+24], [[E5]];
; CHECK: .param .align 8 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK: test_s_i1i32x4,
; CHECK: call.uni (retval0), test_s_i1i32x4,
; CHECK: (
; CHECK: param0
; CHECK: );
@@ -1160,8 +1106,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
; CHECK-DAG: st.param.b8 [param0+23],
; CHECK-DAG: st.param.b8 [param0+24],
; CHECK: .param .align 1 .b8 retval0[25];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_s_i1i32x4p,
; CHECK: call.uni (retval0), test_s_i1i32x4p,
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0];
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
@@ -1237,8 +1182,7 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
; CHECK: st.param.b32 [param0+64], [[E15]];
; CHECK: .param .align 16 .b8 retval0[80];
; CHECK: call.uni (retval0),
; CHECK: test_s_crossfield,
; CHECK: call.uni (retval0), test_s_crossfield,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];

View File

@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
@@ -18,27 +19,23 @@ target triple = "nvptx64-nvidia-cuda"
; CHECK-NEXT: ;
define float @caller_md(float %a, float %b) {
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller_md(
; CHECK-NEXT: .param .b32 caller_md_param_0,
; CHECK-NEXT: .param .b32 caller_md_param_1
; CHECK-NEXT: )
; CHECK-NEXT: {
; CHECK: ld.param.b32 %r1, [caller_md_param_0];
; CHECK-LABEL: caller_md(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [caller_md_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1];
; CHECK-NEXT: {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee_md,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), callee_md, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%s1 = insertvalue %struct.float2 poison, float %a, 0
%s2 = insertvalue %struct.float2 %s1, float %b, 1
%r = call float @callee_md(%struct.float2 %s2)
@@ -46,15 +43,16 @@ define float @caller_md(float %a, float %b) {
}
define float @callee_md(%struct.float2 alignstack(8) %a) {
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md(
; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8]
; CHECK-NEXT: )
; CHECK-NEXT: {
; CHECK: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
; CHECK-LABEL: callee_md(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%v0 = extractvalue %struct.float2 %a, 0
%v1 = extractvalue %struct.float2 %a, 1
%2 = fadd float %v0, %v1
@@ -62,27 +60,23 @@ define float @callee_md(%struct.float2 alignstack(8) %a) {
}
define float @caller(float %a, float %b) {
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller(
; CHECK-NEXT: .param .b32 caller_param_0,
; CHECK-NEXT: .param .b32 caller_param_1
; CHECK-NEXT: )
; CHECK-NEXT: {
; CHECK: ld.param.b32 %r1, [caller_param_0];
; CHECK-LABEL: caller(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [caller_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1];
; CHECK-NEXT: {
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%s1 = insertvalue %struct.float2 poison, float %a, 0
%s2 = insertvalue %struct.float2 %s1, float %b, 1
%r = call float @callee(%struct.float2 %s2)
@@ -90,15 +84,16 @@ define float @caller(float %a, float %b) {
}
define float @callee(%struct.float2 alignstack(8) %a ) {
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee(
; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8]
; CHECK-NEXT: )
; CHECK-NEXT: {
; CHECK: ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
; CHECK-LABEL: callee(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%v0 = extractvalue %struct.float2 %a, 0
%v1 = extractvalue %struct.float2 %a, 1
%2 = fadd float %v0, %v1
@@ -106,9 +101,15 @@ define float @callee(%struct.float2 alignstack(8) %a ) {
}
define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) {
; CHECK-LABEL: .visible .func (.param .align 8 .b8 func_retval0[8]) aligned_return(
; CHECK-NEXT: .param .align 4 .b8 aligned_return_param_0[8]
; CHECK-NEXT: )
; CHECK-NEXT: {
; CHECK-LABEL: aligned_return(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0+4];
; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0];
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: st.param.b32 [func_retval0+4], %r1;
; CHECK-NEXT: ret;
ret %struct.float2 %a
}

View File

@@ -86,11 +86,7 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x1,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x1, (param0);
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
%1 = load i32, ptr %in, align 4
%call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2
@@ -118,11 +114,7 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .align 16 .b8 param0[8];
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x2,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x2, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%agg.tmp = alloca %struct.St4x2, align 8
%1 = load i64, ptr %in, align 4
@@ -160,11 +152,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[12];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x3,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x3, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
%call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2
@@ -207,11 +195,7 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x4,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x4, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
%.fca.0.extract = extractvalue [4 x i32] %call, 0
@@ -258,11 +242,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[20];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x5,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x5, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
%call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2
@@ -318,11 +298,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x6,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x6, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
%call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2
@@ -385,11 +361,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[28];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x7,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x7, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+24];
@@ -460,11 +432,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St4x8,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St4x8, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
%call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2
@@ -537,11 +505,7 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St8x1,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St8x1, (param0);
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
%1 = load i64, ptr %in, align 8
%call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2
@@ -569,11 +533,7 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St8x2,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St8x2, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
%call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
%.fca.0.extract = extractvalue [2 x i64] %call, 0
@@ -608,11 +568,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St8x3,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St8x3, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
%call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2
@@ -656,11 +612,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK-NEXT: callee_St8x4,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK: call.uni (retval0), callee_St8x4, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
%call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2

View File

@@ -131,11 +131,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: call.uni
; CHECK-NEXT: use,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni use, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
@@ -164,11 +160,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) {
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni
; CHECK-NEXT: use,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni use, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;

File diff suppressed because it is too large Load Diff

View File

@@ -16,11 +16,7 @@ define void @test_store_param_undef() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 param0[32];
; CHECK-NEXT: call.uni
; CHECK-NEXT: test_call,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni test_call, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @test_call(%struct.T undef)
@@ -41,11 +37,7 @@ define void @test_store_param_def(i64 %param0, i32 %param1) {
; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1};
; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
; CHECK-NEXT: call.uni
; CHECK-NEXT: test_call,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni test_call, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
%V2 = insertelement <2 x i32> undef, i32 %param1, i32 1

View File

@@ -71,11 +71,7 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) {
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: texfunc,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), texfunc, (param0);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: add.rn.f32 %r8, %r2, %r6;

View File

@@ -33,11 +33,7 @@
; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]];
; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]];
; CHECK: .param .align 8 .b8 retval0[16];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8i16p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4];
@@ -80,11 +76,7 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
; CHECK: .param .align 8 .b8 retval0[24];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8i32p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -147,11 +139,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]];
; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]];
; CHECK: .param .align 8 .b8 retval0[32];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8i64p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10];
@@ -192,11 +180,7 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]];
; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]];
; CHECK: .param .align 8 .b8 retval0[16];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8f16p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3];
; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4];
@@ -239,11 +223,7 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
; CHECK: .param .align 8 .b8 retval0[24];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8f16x2p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -286,11 +266,7 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
; CHECK: .param .align 8 .b8 retval0[24];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8f32p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -353,11 +329,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]];
; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]];
; CHECK: .param .align 8 .b8 retval0[32];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: test_s_i8f64p,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10];

View File

@@ -28,10 +28,7 @@ define ptx_kernel void @kernel_func() {
; NO-TRAP-UNREACHABLE-EMPTY:
; NO-TRAP-UNREACHABLE-NEXT: // %bb.0:
; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0
; NO-TRAP-UNREACHABLE-NEXT: call.uni
; NO-TRAP-UNREACHABLE-NEXT: throw,
; NO-TRAP-UNREACHABLE-NEXT: (
; NO-TRAP-UNREACHABLE-NEXT: );
; NO-TRAP-UNREACHABLE-NEXT: call.uni throw, ();
; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0
; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm
; NO-TRAP-UNREACHABLE-NEXT: exit;
@@ -43,10 +40,7 @@ define ptx_kernel void @kernel_func() {
; NO-TRAP-AFTER-NORETURN-EMPTY:
; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0:
; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0
; NO-TRAP-AFTER-NORETURN-NEXT: call.uni
; NO-TRAP-AFTER-NORETURN-NEXT: throw,
; NO-TRAP-AFTER-NORETURN-NEXT: (
; NO-TRAP-AFTER-NORETURN-NEXT: );
; NO-TRAP-AFTER-NORETURN-NEXT: call.uni throw, ();
; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0
; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm
; NO-TRAP-AFTER-NORETURN-NEXT: exit;
@@ -59,10 +53,7 @@ define ptx_kernel void @kernel_func() {
; TRAP-EMPTY:
; TRAP-NEXT: // %bb.0:
; TRAP-NEXT: { // callseq 0, 0
; TRAP-NEXT: call.uni
; TRAP-NEXT: throw,
; TRAP-NEXT: (
; TRAP-NEXT: );
; TRAP-NEXT: call.uni throw, ();
; TRAP-NEXT: } // callseq 0
; TRAP-NEXT: trap; exit;
;
@@ -72,10 +63,7 @@ define ptx_kernel void @kernel_func() {
; BUG-FIXED-EMPTY:
; BUG-FIXED-NEXT: // %bb.0:
; BUG-FIXED-NEXT: { // callseq 0, 0
; BUG-FIXED-NEXT: call.uni
; BUG-FIXED-NEXT: throw,
; BUG-FIXED-NEXT: (
; BUG-FIXED-NEXT: );
; BUG-FIXED-NEXT: call.uni throw, ();
; BUG-FIXED-NEXT: } // callseq 0
; BUG-FIXED-NEXT: trap;
call void @throw()

View File

@@ -126,12 +126,7 @@ define dso_local i32 @foo() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics1,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0];
; CHECK-PTX-NEXT: } // callseq 0
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2;
@@ -238,12 +233,7 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0];
; CHECK-PTX-NEXT: } // callseq 1
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2;
@@ -315,12 +305,7 @@ define dso_local i32 @baz() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics3,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0];
; CHECK-PTX-NEXT: } // callseq 2
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2;
@@ -397,12 +382,7 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd8;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics4,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 3
; CHECK-PTX-NEXT: ret;

View File

@@ -19,11 +19,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd2, %rd1};
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd4, %rd3};
; CHECK-NEXT: .param .align 16 .b8 retval0[32];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee_St8x4,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: call.uni (retval0), callee_St8x4, (param0);
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0];
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
; CHECK-NEXT: } // callseq 0