[PowerPC][Future] Add Support For Functions That Do Not Use A TOC.

On PowerPC most functions require a valid TOC pointer.

This is the case because either the function itself needs to use this
pointer to access the TOC or because other functions that are called
from that function expect a valid TOC pointer in the register R2.
The main exception to this is leaf functions that do not access the TOC
since they are guaranteed not to need a valid TOC pointer.

This patch introduces a feature that will allow more functions to not
require a valid TOC pointer in R2.

Differential Revision: https://reviews.llvm.org/D73664
This commit is contained in:
Stefan Pintilie
2020-04-08 08:07:35 -05:00
committed by Kamau Bridgeman
parent f3bf25eb66
commit 6c4b40def7
25 changed files with 950 additions and 35 deletions

View File

@@ -393,12 +393,6 @@ static inline int64_t decodePPC64LocalEntryOffset(unsigned Other) {
unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
return ((1 << Val) >> 2) << 2;
}
static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) {
unsigned Val =
(Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4)
: (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0)));
return Val << STO_PPC64_LOCAL_BIT;
}
// ELF Relocation types for PPC64
enum {

View File

@@ -96,6 +96,7 @@
#undef R_PPC64_TPREL16_HIGHA
#undef R_PPC64_DTPREL16_HIGH
#undef R_PPC64_DTPREL16_HIGHA
#undef R_PPC64_REL24_NOTOC
#undef R_PPC64_IRELATIVE
#undef R_PPC64_REL16
#undef R_PPC64_REL16_LO
@@ -190,6 +191,7 @@ ELF_RELOC(R_PPC64_TPREL16_HIGH, 112)
ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113)
ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114)
ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115)
ELF_RELOC(R_PPC64_REL24_NOTOC, 116)
ELF_RELOC(R_PPC64_IRELATIVE, 248)
ELF_RELOC(R_PPC64_REL16, 249)
ELF_RELOC(R_PPC64_REL16_LO, 250)

View File

@@ -284,6 +284,7 @@ public:
VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha
VK_PPC_TLSLD, // symbol@tlsld
VK_PPC_LOCAL, // symbol@local
VK_PPC_NOTOC, // symbol@notoc
VK_COFF_IMGREL32, // symbol@imgrel (image-relative)

View File

@@ -319,6 +319,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
case VK_PPC_TLSLD: return "tlsld";
case VK_PPC_LOCAL: return "local";
case VK_PPC_NOTOC: return "notoc";
case VK_COFF_IMGREL32: return "IMGREL";
case VK_Hexagon_LO16: return "LO16";
case VK_Hexagon_HI16: return "HI16";
@@ -432,6 +433,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO)
.Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
.Case("notoc", VK_PPC_NOTOC)
.Case("gdgot", VK_Hexagon_GD_GOT)
.Case("gdplt", VK_Hexagon_GD_PLT)
.Case("iegot", VK_Hexagon_IE_GOT)

View File

@@ -39,6 +39,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
return Value & 0x3fffffc;
case PPC::fixup_ppc_half16:
return Value & 0xffff;
@@ -62,6 +63,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case PPC::fixup_ppc_brcond14abs:
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
return 4;
case FK_Data_8:
return 8;
@@ -88,6 +90,7 @@ public:
const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 6, 24, 0 },
{ "fixup_ppc_brcond14abs", 16, 14, 0 },
@@ -98,6 +101,7 @@ public:
const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 2, 24, 0 },
{ "fixup_ppc_brcond14abs", 2, 14, 0 },
@@ -151,6 +155,7 @@ public:
return Kind >= FirstLiteralRelocationKind;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
// If the target symbol has a local entry point we must not attempt
// to resolve the fixup directly. Emit a relocation and leave
// resolution of the final target address to the linker.

View File

@@ -86,6 +86,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
case PPC::fixup_ppc_br24_notoc:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_None:
@@ -97,6 +98,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_LOCAL:
Type = ELF::R_PPC_LOCAL24PC;
break;
case MCSymbolRefExpr::VK_PPC_NOTOC:
Type = ELF::R_PPC64_REL24_NOTOC;
break;
}
break;
case PPC::fixup_ppc_brcond14:
@@ -431,6 +435,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return false;
case ELF::R_PPC_REL24:
case ELF::R_PPC64_REL24_NOTOC:
// If the target symbol has a local entry point, we must keep the
// target symbol to preserve that information for the linker.
// The "other" values are stored in the last 6 bits of the second byte.

View File

@@ -19,6 +19,10 @@ enum Fixups {
// 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,
// 24-bit PC relative relocation for direct branches like 'b' and 'bl' where
// the caller does not use the TOC.
fixup_ppc_br24_notoc,
/// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,

View File

@@ -48,7 +48,9 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
((MI.getOpcode() == PPC::BL8_NOTOC)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
}

View File

@@ -179,13 +179,9 @@ public:
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
int64_t Res;
if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
report_fatal_error(".localentry expression must be absolute.");
unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
report_fatal_error(".localentry expression cannot be encoded.");
// encodePPC64LocalEntryOffset will report an error if it cannot
// encode LocalOffset.
unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset);
unsigned Other = S->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
@@ -230,6 +226,31 @@ private:
D->setOther(Other);
return true;
}
unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) {
MCAssembler &MCA = getStreamer().getAssembler();
int64_t Offset;
if (!LocalOffset->evaluateAsAbsolute(Offset, MCA))
MCA.getContext().reportFatalError(
LocalOffset->getLoc(), ".localentry expression must be absolute.");
switch (Offset) {
default:
MCA.getContext().reportFatalError(
LocalOffset->getLoc(),
".localentry expression is not a valid power of 2.");
case 0:
return 0;
case 1:
return 1 << ELF::STO_PPC64_LOCAL_BIT;
case 4:
case 8:
case 16:
case 32:
case 64:
return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT;
}
}
};
class PPCTargetMachOStreamer : public PPCTargetStreamer {

View File

@@ -1460,14 +1460,16 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
//
// This ensures we have r2 set up correctly while executing the function
// body, no matter which entry point is called.
if (Subtarget->isELFv2ABI()
// Only do all that if the function uses r2 in the first place.
&& !MF->getRegInfo().use_empty(PPC::X2)) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) ||
!MF->getRegInfo().use_empty(PPC::R2);
// Only do all that if the function uses R2 as the TOC pointer
// in the first place. We don't need the global entry point if the
// function uses R2 as an allocatable register.
if (Subtarget->isELFv2ABI() && UsesX2OrR2 && PPCFI->usesTOCBasePtr()) {
// Note: The logic here must be synchronized with the code in the
// branch-selection pass which sets the offset of the first block in the
// function. This matters because it affects the alignment.
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
OutStreamer->emitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
@@ -1519,6 +1521,35 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
if (TS)
TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
} else if (Subtarget->isELFv2ABI()) {
// When generating the entry point for a function we have a few scenarios
// based on whether or not that function uses R2 and whether or not that
// function makes calls (or is a leaf function).
// 1) A leaf function that does not use R2 (or treats it as callee-saved
// and preserves it). In this case st_other=0 and both
// the local and global entry points for the function are the same.
// No special entry point code is required.
// 2) A function uses the TOC pointer R2. This function may or may not have
// calls. In this case st_other=[2,6] and the global and local entry
// points are different. Code to correctly setup the TOC pointer in R2
// is put between the global and local entry points. This case is
// covered by the if statatement above.
// 3) A function does not use the TOC pointer R2 but does have calls.
// In this case st_other=1 since we do not know whether or not any
// of the callees clobber R2. This case is dealt with in this else if
// block.
// 4) The function does not use the TOC pointer but R2 is used inside
// the function. In this case st_other=1 once again.
// 5) This function uses inline asm. We mark R2 as reserved if the function
// has inline asm so we have to assume that it may be used.
if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() ||
(!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (TS)
TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym),
MCConstantExpr::create(1, OutContext));
}
}
}

View File

@@ -1404,6 +1404,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
@@ -4689,6 +4690,16 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
SelectionDAG& DAG) const {
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
// FIXME: Tail calls are currently disabled when using PC Relative addressing.
// The issue is that PC Relative is only partially implemented and so there
// is currently a mix of functions that require the TOC and functions that do
// not require it. If we have A calls B calls C and both A and B require the
// TOC and C does not and is marked as clobbering R2 then it is not safe for
// B to tail call C. Since we do not have the information of whether or not
// a funciton needs to use the TOC here in this function we need to be
// conservatively safe and disable all tail calls for now.
if (Subtarget.isUsingPCRelativeCalls()) return false;
if (DisableSCO && !TailCallOpt) return false;
// Variadic argument functions are not supported.
@@ -5085,6 +5096,17 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
return PPCISD::BCTRL;
}
// FIXME: At this moment indirect calls are treated ahead of the
// PC Relative condition because binaries can still contain a possible
// mix of functions that use a TOC and functions that do not use a TOC.
// Once the PC Relative feature is complete this condition should be moved
// up ahead of the indirect calls and should return a PPCISD::BCTRL for
// that case.
if (Subtarget.isUsingPCRelativeCalls()) {
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
return PPCISD::CALL_NOTOC;
}
// The ABIs that maintain a TOC pointer accross calls need to have a nop
// immediately following the call instruction if the caller and callee may
// have different TOC bases. At link time if the linker determines the calls
@@ -5094,8 +5116,8 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// will rewrite the nop to be a load of the TOC pointer from the linkage area
// into gpr2.
if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
: PPCISD::CALL_NOP;
return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
: PPCISD::CALL_NOP;
return PPCISD::CALL;
}
@@ -5372,7 +5394,7 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
// no way to mark dependencies as implicit here.
// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
!CFlags.IsPatchPoint)
!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
@@ -5398,7 +5420,8 @@ SDValue PPCTargetLowering::FinishCall(
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
Subtarget.isAIXABI())
setUsesTOCBasePtr(DAG);
unsigned CallOpc =
@@ -11373,7 +11396,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
if (Subtarget.is64BitELFABI() &&
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
MI.getOpcode() == TargetOpcode::PATCHPOINT &&
!Subtarget.isUsingPCRelativeCalls()) {
// Call lowering should have added an r2 operand to indicate a dependence
// on the TOC base pointer value. It can't however, because there is no
// way to mark the dependence as implicit there, and so the stackmap code

View File

@@ -165,9 +165,11 @@ namespace llvm {
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
/// CALL_NOTOC the caller does not use the TOC.
/// SVR4 calls and 32-bit/64-bit AIX calls.
CALL,
CALL_NOP,
CALL_NOTOC,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.

View File

@@ -140,6 +140,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins abscalltarget:$func),
"bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop (i64 imm:$func))]>;
let Predicates = [PCRelativeMemops] in {
// BL8_NOTOC means that the caller does not use the TOC pointer and if
// it does use R2 then it is just a caller saved register. Therefore it is
// safe to emit only the bl and not the nop for this instruction. The
// linker will not try to restore R2 after the call.
def BL8_NOTOC : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs),
(ins calltarget:$func),
"bl $func", IIC_BrB, []>;
}
}
let Uses = [CTR8, RM] in {
let isPredicable = 1 in
@@ -194,6 +203,11 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
(BL8_NOTOC tglobaladdr:$dst)>;
def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
(BL8_NOTOC texternalsym:$dst)>;
// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;

View File

@@ -252,6 +252,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -994,6 +997,7 @@ def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">;
def HasFPU : Predicate<"PPCSubTarget->hasFPU()">;
def PCRelativeMemops : Predicate<"PPCSubTarget->hasPCRelativeMemops()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.

View File

@@ -81,7 +81,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
if (MO.getTargetFlags() == PPCII::MO_PLT)
RefKind = MCSymbolRefExpr::VK_PLT;
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
const MachineInstr *MI = MO.getParent();
if (MI->getOpcode() == PPC::BL8_NOTOC)
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
const TargetMachine &TM = Printer.TM;

View File

@@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed,
"Number of pairs of rotate left, clear left/right collapsed");
STATISTIC(NumEXTSWAndSLDICombined,
"Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
STATISTIC(NumX2FoundForPCRel, "Number of times the X2 TOC pointer has been "
"found when PC relative NOTOC is being used.");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -99,6 +101,11 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
// Perform peepholes that cannot be skipped.
// Some peephole simplifications are required for correctness and will not
// be skipped even if skipFunction(MF.getFunction()) returns true.
void unskipableSimplifyCode(void);
// Perform peepholes.
bool simplifyCode(void);
@@ -124,9 +131,14 @@ public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
initialize(MF);
// FIXME: This introduces another complete traversal of the instructions
// in the function in the common case (function is not skipped). Although
// this is less than ideal for compile time, this code will go away once
// our PC-Rel implementation is complete.
unskipableSimplifyCode();
if (skipFunction(MF.getFunction()))
return false;
initialize(MF);
return simplifyCode();
}
};
@@ -260,6 +272,41 @@ void PPCMIPeephole::UpdateTOCSaves(
TOCSaves[MI] = Keep;
}
void PPCMIPeephole::unskipableSimplifyCode(void) {
// If this function has no uses of R2 there is nothing to do here.
if(MF->getRegInfo().use_empty(PPC::X2))
return;
// This is only for PCRelative calls.
if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) {
return;
}
// This function has R2 so we need to mark an implicit def for it.
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
FuncInfo->setUsesTOCBasePtr();
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() == PPC::BL8_NOTOC) {
// At this point the BL8_NOTOC instruction is not really safe because it
// assumes that the caller does not need the TOC. It will be safe
// later once the full PC relative implementation is complete but it is
// not now.
// Here we are looking for X2. Since this is Pre-RA the only uses of X2
// would indicate the use of the TOC. We want to detect all uses of the
// TOC. Once the work is done we should not see any uses of the TOC.
// TODO: Once the implementation is complete this should be turned into
// an assert
Register Reg = MF->getSubtarget<PPCSubtarget>().getTOCPointerRegister();
MachineOperand MO = MachineOperand::CreateReg(Reg, false, true);
MI.addOperand(*MF, MO);
MI.setDesc(TII->get(PPC::BL8_NOP));
++NumX2FoundForPCRel;
}
}
}
}
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;

View File

@@ -153,7 +153,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SRV464_TLS_PE_SaveList;
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
// We do not need to treat R2 as callee-saved when using PC-Relative calls
// because any direct uses of R2 will cause it to be reserved. If the function
// is a leaf or the only uses of R2 are implicit uses for calls, the calls
// will use the @notoc relocation which will cause this function to set the
// st_other bit to 1, thereby communicating to its caller that it arbitrarily
// clobbers the TOC.
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) &&
!Subtarget.isUsingPCRelativeCalls();
// Cold calling convention CSRs.
if (MF->getFunction().getCallingConv() == CallingConv::Cold) {

View File

@@ -41,8 +41,8 @@ def P9Model : SchedMachineModel {
let CompleteModel = 1;
// Do not support QPX (Quad Processing eXtension), SPE (Signal Procesing
// Engine) or prefixed instructions on Power 9.
let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs];
// Engine), prefixed instructions on Power 9 or PC relative mem ops.
let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops];
}

View File

@@ -227,3 +227,8 @@ bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
bool PPCSubtarget::isUsingPCRelativeCalls() const {
return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() &&
CodeModel::Medium == getTargetMachine().getCodeModel();
}

View File

@@ -333,6 +333,7 @@ public:
bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); }
bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); }
bool isUsingPCRelativeCalls() const;
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.

View File

@@ -0,0 +1,176 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=CHECK-S
@global_int = common dso_local local_unnamed_addr global i32 0, align 4
define dso_local signext i32 @NoTOC() local_unnamed_addr {
; CHECK-S-LABEL: NoTOC:
; CHECK-S-NOT: .localentry
; CHECK-S: li r3, 42
; CHECK-S-NEXT: blr
entry:
ret i32 42
}
define dso_local signext i32 @AsmClobberX2(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: AsmClobberX2:
; CHECK-S: .localentry AsmClobberX2, 1
; CHECK-S: add r3, r4, r3
; CHECK-S: #APP
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: #NO_APP
; CHECK-S: blr
entry:
%add = add nsw i32 %b, %a
tail call void asm sideeffect "nop", "~{r2}"()
ret i32 %add
}
; FIXME: This is actually a test case that shows a bug. On power9 and earlier
; this test should not compile. On later CPUs (like this test) the @toc
; should be replaced with @pcrel and we won't need R2 and so the problem
; goes away.
define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: AsmClobberX2WithTOC:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep2@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep2@l
; CHECK-S: .localentry AsmClobberX2WithTOC, .Lfunc_lep2-.Lfunc_gep2
; CHECK-S: #APP
; CHECK-S-NEXT: li r2, 0
; CHECK-S-NEXT: #NO_APP
; CHECK-S-NEXT: addis r5, r2, global_int@toc@ha
; CHECK-S-NEXT: lwz r5, global_int@toc@l(r5)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: add r3, r3, r5
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
tail call void asm sideeffect "li 2, 0", "~{r2}"()
%0 = load i32, i32* @global_int, align 4
%add1 = add nsw i32 %add, %0
ret i32 %add1
}
define dso_local signext i32 @AsmClobberX5(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: AsmClobberX5:
; CHECK-S: .localentry AsmClobberX5, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: #APP
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: #NO_APP
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
tail call void asm sideeffect "nop", "~{r5}"()
ret i32 %add
}
; Clobber all GPRs except R2.
define dso_local signext i32 @AsmClobberNotR2(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: AsmClobberNotR2:
; CHECK-S: .localentry AsmClobberNotR2, 1
; CHECK-S: add r3, r4, r3
; CHECK-S: stw r3, -148(r1) # 4-byte Folded Spill
; CHECK-S-NEXT: #APP
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: #NO_APP
; CHECK-S-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload
; CHECK-S: blr
entry:
%add = add nsw i32 %b, %a
tail call void asm sideeffect "nop", "~{r0},~{r1},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"()
ret i32 %add
}
; Increase register pressure enough to force the register allocator to
; make use of R2.
define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) local_unnamed_addr {
; CHECK-S-LABEL: X2IsCallerSaved:
; CHECK-S: .localentry X2IsCallerSaved, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-S-NEXT: add r11, r4, r3
; CHECK-S-NEXT: subf r29, r9, r8
; CHECK-S-NEXT: add r9, r10, r9
; CHECK-S-NEXT: subf r10, r3, r10
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: mullw r3, r3, r11
; CHECK-S-NEXT: mullw r3, r3, r5
; CHECK-S-NEXT: subf r12, r5, r4
; CHECK-S-NEXT: mullw r3, r3, r6
; CHECK-S-NEXT: add r0, r6, r5
; CHECK-S-NEXT: mullw r3, r3, r12
; CHECK-S-NEXT: mullw r3, r3, r0
; CHECK-S-NEXT: mullw r3, r3, r7
; CHECK-S-NEXT: subf r2, r7, r6
; CHECK-S-NEXT: mullw r3, r3, r8
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-S-NEXT: add r30, r8, r7
; CHECK-S-NEXT: mullw r3, r3, r2
; CHECK-S-NEXT: mullw r3, r3, r30
; CHECK-S-NEXT: mullw r3, r3, r29
; CHECK-S-NEXT: mullw r3, r3, r9
; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-S-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-S-NEXT: mullw r3, r3, r10
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%sub = sub nsw i32 %b, %c
%add1 = add nsw i32 %d, %c
%sub2 = sub nsw i32 %d, %e
%add3 = add nsw i32 %f, %e
%sub4 = sub nsw i32 %f, %g
%add5 = add nsw i32 %h, %g
%sub6 = sub nsw i32 %h, %a
%mul = mul i32 %b, %a
%mul7 = mul i32 %mul, %add
%mul8 = mul i32 %mul7, %c
%mul9 = mul i32 %mul8, %d
%mul10 = mul i32 %mul9, %sub
%mul11 = mul i32 %mul10, %add1
%mul12 = mul i32 %mul11, %e
%mul13 = mul i32 %mul12, %f
%mul14 = mul i32 %mul13, %sub2
%mul15 = mul i32 %mul14, %add3
%mul16 = mul i32 %mul15, %sub4
%mul17 = mul i32 %mul16, %add5
%mul18 = mul i32 %mul17, %sub6
ret i32 %mul18
}
define dso_local signext i32 @UsesX2AsTOC() local_unnamed_addr {
; CHECK-S-LABEL: UsesX2AsTOC:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep6@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep6@l
; CHECK-S: .localentry UsesX2AsTOC, .Lfunc_lep6-.Lfunc_gep6
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: addis r3, r2, global_int@toc@ha
; CHECK-S-NEXT: lwa r3, global_int@toc@l(r3)
; CHECK-S-NEXT: blr
entry:
%0 = load i32, i32* @global_int, align 4
ret i32 %0
}
define dso_local double @UsesX2AsConstPoolTOC() local_unnamed_addr {
; CHECK-S-LABEL: UsesX2AsConstPoolTOC:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep7@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep7@l
; CHECK-S: .localentry UsesX2AsConstPoolTOC, .Lfunc_lep7-.Lfunc_gep7
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: addis r3, r2, .LCPI7_0@toc@ha
; CHECK-S-NEXT: lfd f1, .LCPI7_0@toc@l(r3)
; CHECK-S-NEXT: blr
entry:
ret double 0x404124A4EBDD334C
}

View File

@@ -0,0 +1,42 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=CHECK-S
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names --filetype=obj < %s | \
; RUN: llvm-objdump -dr - | FileCheck %s --check-prefix=CHECK-O
; CHECK-S-LABEL: caller
; CHECK-S: bl callee@notoc
; CHECK-S: blr
; CHECK-O-LABEL: caller
; CHECK-O: bl
; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee
; CHECK-O: blr
define dso_local signext i32 @caller() local_unnamed_addr {
entry:
%call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)()
ret i32 %call
}
declare signext i32 @callee(...) local_unnamed_addr
; Some calls can be considered Extrnal Symbols.
; CHECK-S-LABEL: ExternalSymbol
; CHECK-S: bl memcpy@notoc
; CHECK-S: blr
; CHECK-O-LABEL: ExternalSymbol
; CHECK-O: bl
; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy
; CHECK-O: blr
define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false)
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)

View File

@@ -0,0 +1,521 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=CHECK-S
@globalVar = common dso_local local_unnamed_addr global i32 0, align 4
@externGlobalVar = external local_unnamed_addr global i32, align 4
@indirectCall = common dso_local local_unnamed_addr global i32 (i32)* null, align 8
; This funcion needs to remain as noinline.
; The compiler needs to know this function is local but must be forced to call
; it. The only thing we really need to check here is that st_other=0 and
; so we make sure that there is no .localentry.
define dso_local signext i32 @localCall(i32 signext %a) local_unnamed_addr #0 {
; CHECK-S-LABEL: localCall:
; CHECK-S-NOT: .localentry
; CHECK-S: addi r3, r3, 5
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %a, 5
ret i32 %add
}
define dso_local signext i32 @DirectCallLocal1(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: DirectCallLocal1:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep1@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep1@l
; CHECK-S: .localentry DirectCallLocal1, .Lfunc_lep1-.Lfunc_gep1
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha
; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%call = tail call signext i32 @localCall(i32 signext %add)
%0 = load i32, i32* @globalVar, align 4
%mul = mul nsw i32 %0, %call
ret i32 %mul
}
define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: DirectCallLocal2:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep2@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep2@l
; CHECK-S: .localentry DirectCallLocal2, .Lfunc_lep2-.Lfunc_gep2
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%call = tail call signext i32 @localCall(i32 signext %add)
%0 = load i32, i32* @externGlobalVar, align 4
%mul = mul nsw i32 %0, %call
ret i32 %mul
}
define dso_local signext i32 @DirectCallLocalNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: DirectCallLocalNoGlobal:
; CHECK-S: .localentry DirectCallLocalNoGlobal, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: .cfi_def_cfa_offset 48
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: .cfi_offset r30, -16
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -48(r1)
; CHECK-S-NEXT: mr r30, r4
; CHECK-S-NEXT: bl localCall@notoc
; CHECK-S-NEXT: add r3, r3, r30
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 48
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%call = tail call signext i32 @localCall(i32 signext %a)
%add = add nsw i32 %call, %b
ret i32 %add
}
define dso_local signext i32 @DirectCallExtern1(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: DirectCallExtern1:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep4@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep4@l
; CHECK-S: .localentry DirectCallExtern1, .Lfunc_lep4-.Lfunc_gep4
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha
; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%call = tail call signext i32 @externCall(i32 signext %add)
%0 = load i32, i32* @globalVar, align 4
%mul = mul nsw i32 %0, %call
ret i32 %mul
}
declare signext i32 @externCall(i32 signext) local_unnamed_addr
define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: DirectCallExtern2:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep5@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep5@l
; CHECK-S: .localentry DirectCallExtern2, .Lfunc_lep5-.Lfunc_gep5
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%call = tail call signext i32 @externCall(i32 signext %add)
%0 = load i32, i32* @externGlobalVar, align 4
%mul = mul nsw i32 %0, %call
ret i32 %mul
}
define dso_local signext i32 @DirectCallExternNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: DirectCallExternNoGlobal:
; CHECK-S: .localentry DirectCallExternNoGlobal, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: .cfi_def_cfa_offset 48
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: .cfi_offset r30, -16
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -48(r1)
; CHECK-S-NEXT: mr r30, r4
; CHECK-S-NEXT: bl externCall@notoc
; CHECK-S-NEXT: add r3, r3, r30
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 48
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%call = tail call signext i32 @externCall(i32 signext %a)
%add = add nsw i32 %call, %b
ret i32 %add
}
define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr {
; CHECK-S-LABEL: TailCallLocal1:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep7@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep7@l
; CHECK-S: .localentry TailCallLocal1, .Lfunc_lep7-.Lfunc_gep7
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha
; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%0 = load i32, i32* @globalVar, align 4
%add = add nsw i32 %0, %a
%call = tail call signext i32 @localCall(i32 signext %add)
ret i32 %call
}
define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr {
; CHECK-S-LABEL: TailCallLocal2:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep8@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep8@l
; CHECK-S: .localentry TailCallLocal2, .Lfunc_lep8-.Lfunc_gep8
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%0 = load i32, i32* @externGlobalVar, align 4
%add = add nsw i32 %0, %a
%call = tail call signext i32 @localCall(i32 signext %add)
ret i32 %call
}
define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unnamed_addr {
; CHECK-S-LABEL: TailCallLocalNoGlobal:
; CHECK-S: .localentry TailCallLocalNoGlobal, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: bl localCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%call = tail call signext i32 @localCall(i32 signext %a)
ret i32 %call
}
define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr {
; CHECK-S-LABEL: TailCallExtern1:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep10@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep10@l
; CHECK-S: .localentry TailCallExtern1, .Lfunc_lep10-.Lfunc_gep10
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha
; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%0 = load i32, i32* @globalVar, align 4
%add = add nsw i32 %0, %a
%call = tail call signext i32 @externCall(i32 signext %add)
ret i32 %call
}
define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr {
; CHECK-S-LABEL: TailCallExtern2:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep11@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep11@l
; CHECK-S: .localentry TailCallExtern2, .Lfunc_lep11-.Lfunc_gep11
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall
; CHECK-S-NEXT: nop
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%0 = load i32, i32* @externGlobalVar, align 4
%add = add nsw i32 %0, %a
%call = tail call signext i32 @externCall(i32 signext %add)
ret i32 %call
}
define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnamed_addr {
; CHECK-S-LABEL: TailCallExternNoGlobal:
; CHECK-S: .localentry TailCallExternNoGlobal, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: bl externCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%call = tail call signext i32 @externCall(i32 signext %a)
ret i32 %call
}
define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: IndirectCall1:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep13@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep13@l
; CHECK-S: .localentry IndirectCall1, .Lfunc_lep13-.Lfunc_gep13
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: std r2, 24(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: addis r5, r2, indirectCall@toc@ha
; CHECK-S-NEXT: ld r12, indirectCall@toc@l(r5)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: mtctr r12
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: ld 2, 24(r1)
; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha
; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8
%call = tail call signext i32 %0(i32 signext %add)
%1 = load i32, i32* @globalVar, align 4
%mul = mul nsw i32 %1, %call
ret i32 %mul
}
define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) local_unnamed_addr {
; CHECK-S-LABEL: IndirectCall2:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep14@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep14@l
; CHECK-S: .localentry IndirectCall2, .Lfunc_lep14-.Lfunc_gep14
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: std r2, 24(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: addis r5, r2, indirectCall@toc@ha
; CHECK-S-NEXT: ld r12, indirectCall@toc@l(r5)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: mtctr r12
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: ld 2, 24(r1)
; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8
%call = tail call signext i32 %0(i32 signext %add)
%1 = load i32, i32* @externGlobalVar, align 4
%mul = mul nsw i32 %1, %call
ret i32 %mul
}
define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr {
; CHECK-S-LABEL: IndirectCall3:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep15@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep15@l
; CHECK-S: .localentry IndirectCall3, .Lfunc_lep15-.Lfunc_gep15
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: std r2, 24(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: mtctr r5
; CHECK-S-NEXT: mr r12, r5
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: ld 2, 24(r1)
; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha
; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%add = add nsw i32 %b, %a
%call = tail call signext i32 %call_param(i32 signext %add)
%0 = load i32, i32* @globalVar, align 4
%mul = mul nsw i32 %0, %call
ret i32 %mul
}
define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr {
; CHECK-S-LABEL: IndirectCallNoGlobal:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep16@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep16@l
; CHECK-S: .localentry IndirectCallNoGlobal, .Lfunc_lep16-.Lfunc_gep16
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: .cfi_def_cfa_offset 48
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: .cfi_offset r30, -16
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -48(r1)
; CHECK-S-NEXT: mtctr r5
; CHECK-S-NEXT: mr r12, r5
; CHECK-S-NEXT: std r2, 24(r1)
; CHECK-S-NEXT: mr r30, r4
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: ld 2, 24(r1)
; CHECK-S-NEXT: add r3, r3, r30
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: addi r1, r1, 48
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%call = tail call signext i32 %call_param(i32 signext %a)
%add = add nsw i32 %call, %b
ret i32 %add
}
define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr {
; CHECK-S-LABEL: IndirectCallOnly:
; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep17@ha
; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep17@l
; CHECK-S: .localentry IndirectCallOnly, .Lfunc_lep17-.Lfunc_gep17
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: std r2, 24(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: mtctr r4
; CHECK-S-NEXT: mr r12, r4
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: ld 2, 24(r1)
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
entry:
%call = tail call signext i32 %call_param(i32 signext %a)
ret i32 %call
}
attributes #0 = { noinline }

View File

@@ -1,11 +1,11 @@
# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
# RUN: FileCheck < %t %s
# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
# RUN: FileCheck < %t %s
sym:
.localentry sym, 123
# CHECK: LLVM ERROR: .localentry expression cannot be encoded.
# CHECK: error: .localentry expression is not a valid power of 2.

View File

@@ -1,12 +1,12 @@
# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
# RUN: FileCheck < %t %s
# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
# RUN: FileCheck < %t %s
.globl remote_sym
sym:
.localentry sym, remote_sym
# CHECK: LLVM ERROR: .localentry expression must be absolute.
# CHECK: error: .localentry expression must be absolute.