Files
clang-p2996/bolt/lib/Passes/ShrinkWrapping.cpp
Rafael Auler c09cd64e5c [BOLT] Fix AND evaluation bug in shrink wrapping
Fix a bug where shrink-wrapping would use wrong stack offsets
because the stack was being aligned with an AND instruction, hence,
making its true offsets only available during runtime (we can't
statically determine where are the stack elements and we must give up
on this case).

Reviewed By: Amir

Differential Revision: https://reviews.llvm.org/D126110
2022-05-26 14:59:28 -07:00

2075 lines
71 KiB
C++

//===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the ShrinkWrapping class.
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/ShrinkWrapping.h"
#include "bolt/Core/MCPlus.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include <numeric>
#include <stack>
#define DEBUG_TYPE "shrinkwrapping"
using namespace llvm;
namespace opts {
extern cl::opt<bool> TimeOpts;
extern cl::OptionCategory BoltOptCategory;
static cl::opt<unsigned> ShrinkWrappingThreshold(
"shrink-wrapping-threshold",
cl::desc("Percentage of prologue execution count to use as threshold when"
" evaluating whether a block is cold enough to be profitable to"
" move eligible spills there"),
cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
void CalleeSavedAnalysis::analyzeSaves() {
ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
StackReachingUses &SRU = Info.getStackReachingUses();
auto &InsnToBB = Info.getInsnToBBMap();
BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
LLVM_DEBUG(dbgs() << "Checking spill locations\n");
for (BinaryBasicBlock &BB : BF) {
LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
const MCInst *Prev = nullptr;
for (MCInst &Inst : BB) {
if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
// Blacklist weird stores we don't understand
if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
FIE->IsStoreFromReg) {
BlacklistedRegs.set(FIE->RegOrImm);
CalleeSaved.reset(FIE->RegOrImm);
Prev = &Inst;
continue;
}
if (!FIE->IsStore || !FIE->IsStoreFromReg ||
BlacklistedRegs[FIE->RegOrImm]) {
Prev = &Inst;
continue;
}
// If this reg is defined locally, it is not a callee-saved reg
if (RD.isReachedBy(FIE->RegOrImm,
Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
BlacklistedRegs.set(FIE->RegOrImm);
CalleeSaved.reset(FIE->RegOrImm);
Prev = &Inst;
continue;
}
// If this stack position is accessed in another function, we are
// probably dealing with a parameter passed in a stack -- do not mess
// with it
if (SRU.isStoreUsed(*FIE,
Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
/*IncludeLocalAccesses=*/false) {
BlacklistedRegs.set(FIE->RegOrImm);
CalleeSaved.reset(FIE->RegOrImm);
Prev = &Inst;
continue;
}
// If this stack position is loaded elsewhere in another reg, we can't
// update it, so blacklist it.
if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
: SRU.expr_begin(BB))) {
BlacklistedRegs.set(FIE->RegOrImm);
CalleeSaved.reset(FIE->RegOrImm);
Prev = &Inst;
continue;
}
// Ignore regs with multiple saves
if (CalleeSaved[FIE->RegOrImm]) {
BlacklistedRegs.set(FIE->RegOrImm);
CalleeSaved.reset(FIE->RegOrImm);
Prev = &Inst;
continue;
}
CalleeSaved.set(FIE->RegOrImm);
SaveFIEByReg[FIE->RegOrImm] = &*FIE;
SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
<< FIE->RegOrImm << "\n");
}
Prev = &Inst;
}
}
}
void CalleeSavedAnalysis::analyzeRestores() {
ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
// Now compute all restores of these callee-saved regs
for (BinaryBasicBlock &BB : BF) {
const MCInst *Prev = nullptr;
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
MCInst &Inst = *I;
if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
Prev = &Inst;
continue;
}
// If this reg is used locally after a restore, then we are probably
// not dealing with a callee-saved reg. Except if this use is by
// another store, but we don't cover this case yet.
// Also not callee-saved if this load accesses caller stack or isn't
// simple.
if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
RU.isReachedBy(FIE->RegOrImm,
Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
CalleeSaved.reset(FIE->RegOrImm);
Prev = &Inst;
continue;
}
// If stack offsets between saves/store don't agree with each other,
// we don't completely understand what's happening here
if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
CalleeSaved.reset(FIE->RegOrImm);
LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
"mismatching restore: "
<< FIE->RegOrImm << "\n");
Prev = &Inst;
continue;
}
LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
<< "\n");
if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
LoadFIEByReg[FIE->RegOrImm] = &*FIE;
BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
AllocatorId);
HasRestores.set(FIE->RegOrImm);
}
Prev = &Inst;
}
}
}
std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
std::vector<MCInst *> Results;
for (BinaryBasicBlock &BB : BF)
for (MCInst &Inst : BB)
if (getSavedReg(Inst) == Reg)
Results.push_back(&Inst);
return Results;
}
std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
std::vector<MCInst *> Results;
for (BinaryBasicBlock &BB : BF)
for (MCInst &Inst : BB)
if (getRestoredReg(Inst) == Reg)
Results.push_back(&Inst);
return Results;
}
CalleeSavedAnalysis::~CalleeSavedAnalysis() {
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Inst : BB) {
BC.MIB->removeAnnotation(Inst, getSaveTag());
BC.MIB->removeAnnotation(Inst, getRestoreTag());
}
}
}
void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
if (BlacklistedRegions[Offset] < Size)
BlacklistedRegions[Offset] = Size;
}
bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
return true;
return false;
}
bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
int64_t Size) {
bool HasConflict = false;
for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
std::pair<const int64_t, int64_t> &Elem = *Iter;
if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
(Offset != Elem.first || Size != Elem.second)) {
Iter = AvailableRegions.erase(Iter);
HasConflict = true;
continue;
}
++Iter;
}
if (HasConflict) {
blacklistRegion(Offset, Size);
return true;
}
return false;
}
void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
StackPointerTracking &SPT = Info.getStackPointerTracking();
if (!BC.MII->get(Point.getOpcode())
.hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
return;
int SPVal, FPVal;
std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
std::pair<MCPhysReg, int64_t> FP;
if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
else
FP = std::make_pair(0, 0);
std::pair<MCPhysReg, int64_t> SP;
if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
else
SP = std::make_pair(0, 0);
int64_t Output;
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
return;
// Not your regular frame pointer initialization... bail
if (Output != SPVal)
blacklistRegion(0, 0);
}
void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
StackPointerTracking &SPT = Info.getStackPointerTracking();
if (!BC.MII->get(Point.getOpcode())
.hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
return;
// Check if the definition of SP comes from FP -- in this case, this
// value may need to be updated depending on our stack layout changes
const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
unsigned NumDefs = InstInfo.getNumDefs();
bool UsesFP = false;
for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
++I) {
MCOperand &Operand = Point.getOperand(I);
if (!Operand.isReg())
continue;
if (Operand.getReg() == BC.MIB->getFramePointer()) {
UsesFP = true;
break;
}
}
if (!UsesFP)
return;
// Setting up evaluation
int SPVal, FPVal;
std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
std::pair<MCPhysReg, int64_t> FP;
if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
else
FP = std::make_pair(0, 0);
std::pair<MCPhysReg, int64_t> SP;
if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
else
SP = std::make_pair(0, 0);
int64_t Output;
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
return;
// If the value is the same of FP, no need to adjust it
if (Output == FPVal)
return;
// If an allocation happened through FP, bail
if (Output <= SPVal) {
blacklistRegion(0, 0);
return;
}
// We are restoring SP to an old value based on FP. Mark it as a stack
// access to be fixed later.
BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
}
void StackLayoutModifier::classifyStackAccesses() {
// Understand when stack slots are being used non-locally
StackReachingUses &SRU = Info.getStackReachingUses();
for (BinaryBasicBlock &BB : BF) {
const MCInst *Prev = nullptr;
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
MCInst &Inst = *I;
checkFramePointerInitialization(Inst);
checkStackPointerRestore(Inst);
ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
if (!FIEX) {
Prev = &Inst;
continue;
}
if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
blacklistRegion(FIEX->StackOffset, FIEX->Size);
Prev = &Inst;
continue;
}
// If this stack position is accessed in another function, we are
// probably dealing with a parameter passed in a stack -- do not mess
// with it
if (SRU.isStoreUsed(*FIEX,
Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
/*IncludeLocalAccesses=*/false)) {
blacklistRegion(FIEX->StackOffset, FIEX->Size);
Prev = &Inst;
continue;
}
// Now we have a clear stack slot access. Check if its blacklisted or if
// it conflicts with another chunk.
if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
Prev = &Inst;
continue;
}
// We are free to go. Add it as available stack slot which we know how
// to move it.
AvailableRegions[FIEX->StackOffset] = FIEX->Size;
BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
<< (int)FIEX->Size << "\n");
}
}
}
void StackLayoutModifier::classifyCFIs() {
std::stack<std::pair<int64_t, uint16_t>> CFIStack;
int64_t CfaOffset = -8;
uint16_t CfaReg = 7;
auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
} else {
IsSimple = false;
return;
}
};
for (BinaryBasicBlock *&BB : BF.layout()) {
for (MCInst &Inst : *BB) {
if (!BC.MIB->isCFI(Inst))
continue;
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
switch (CFI->getOperation()) {
case MCCFIInstruction::OpDefCfa:
CfaOffset = -CFI->getOffset();
recordAccess(&Inst, CfaOffset);
LLVM_FALLTHROUGH;
case MCCFIInstruction::OpDefCfaRegister:
CfaReg = CFI->getRegister();
break;
case MCCFIInstruction::OpDefCfaOffset:
CfaOffset = -CFI->getOffset();
recordAccess(&Inst, CfaOffset);
break;
case MCCFIInstruction::OpOffset:
recordAccess(&Inst, CFI->getOffset());
BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
BC.MRI->getLLVMRegNum(CFI->getRegister(),
/*isEH=*/false),
AllocatorId);
break;
case MCCFIInstruction::OpSameValue:
BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
BC.MRI->getLLVMRegNum(CFI->getRegister(),
/*isEH=*/false),
AllocatorId);
break;
case MCCFIInstruction::OpRememberState:
CFIStack.push(std::make_pair(CfaOffset, CfaReg));
break;
case MCCFIInstruction::OpRestoreState: {
assert(!CFIStack.empty() && "Corrupt CFI stack");
std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
CFIStack.pop();
CfaOffset = Elem.first;
CfaReg = Elem.second;
break;
}
case MCCFIInstruction::OpRelOffset:
case MCCFIInstruction::OpAdjustCfaOffset:
llvm_unreachable("Unhandled AdjustCfaOffset");
break;
default:
break;
}
}
}
}
void StackLayoutModifier::scheduleChange(
MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
Inst, getTodoTag(), AllocatorId);
WList.push_back(Item);
}
bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
return false;
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
if (!FIE)
return false;
return canCollapseRegion(FIE->StackOffset);
}
bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
if (!IsInitialized)
initialize();
if (!IsSimple)
return false;
if (CollapsedRegions.count(RegionAddr))
return true;
// Check if it is possible to readjust all accesses below RegionAddr
if (!BlacklistedRegions.empty())
return false;
return true;
}
bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
if (!FIE)
return false;
int64_t RegionAddr = FIE->StackOffset;
int64_t RegionSz = FIE->Size;
return collapseRegion(DeletedPush, RegionAddr, RegionSz);
}
bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
int64_t RegionSz) {
if (!canCollapseRegion(RegionAddr))
return false;
assert(IsInitialized);
StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Inst : BB) {
if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
continue;
auto Slot =
BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
Inst, getSlotTag());
if (!AvailableRegions.count(Slot))
continue;
// We need to ensure this access is affected by the deleted push
if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
continue;
if (BC.MIB->isCFI(Inst)) {
if (Slot > RegionAddr)
continue;
scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
continue;
}
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
if (!FIE) {
if (Slot > RegionAddr)
continue;
// SP update based on frame pointer
scheduleChange(
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
continue;
}
if (Slot == RegionAddr) {
BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
continue;
}
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
continue;
if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
continue;
if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
continue;
scheduleChange(
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
}
}
CollapsedRegions.insert(RegionAddr);
return true;
}
void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Inst : BB) {
if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
continue;
BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
scheduleChange(
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
}
}
}
bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
if (!IsInitialized)
initialize();
if (!IsSimple)
return false;
StackPointerTracking &SPT = Info.getStackPointerTracking();
int64_t RegionAddr = SPT.getStateBefore(P)->first;
if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
return false;
if (InsertedRegions.count(RegionAddr))
return true;
// Check if we are going to screw up stack accesses at call sites that
// pass parameters via stack
if (!BlacklistedRegions.empty())
return false;
return true;
}
bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
if (!canInsertRegion(P))
return false;
assert(IsInitialized);
StackPointerTracking &SPT = Info.getStackPointerTracking();
// This RegionAddr is slightly different from the one seen in collapseRegion
// This is the value of SP before the allocation the user wants to make.
int64_t RegionAddr = SPT.getStateBefore(P)->first;
if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
return false;
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Inst : BB) {
if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
continue;
auto Slot =
BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
Inst, getSlotTag());
if (!AvailableRegions.count(Slot))
continue;
if (!(DA.doesADominateB(P, Inst)))
continue;
if (BC.MIB->isCFI(Inst)) {
if (Slot >= RegionAddr)
continue;
scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
continue;
}
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
if (!FIE) {
if (Slot >= RegionAddr)
continue;
scheduleChange(
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
continue;
}
if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
continue;
if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
continue;
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
continue;
scheduleChange(
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
}
}
InsertedRegions.insert(RegionAddr);
return true;
}
void StackLayoutModifier::performChanges() {
std::set<uint32_t> ModifiedCFIIndices;
for (BinaryBasicBlock &BB : BF) {
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
MCInst &Inst = *I;
if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
}
if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
continue;
auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
Inst, getTodoTag());
int64_t Adjustment = 0;
WorklistItem::ActionType AdjustmentType = WorklistItem::None;
for (WorklistItem &WI : WList) {
if (WI.Action == WorklistItem::None)
continue;
assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
WI.Action == WorklistItem::AdjustCFI);
assert((AdjustmentType == WorklistItem::None ||
AdjustmentType == WI.Action) &&
"Conflicting actions requested at the same program point");
AdjustmentType = WI.Action;
Adjustment += WI.OffsetUpdate;
}
if (!Adjustment)
continue;
if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
assert(BC.MIB->isCFI(Inst));
uint32_t CFINum = Inst.getOperand(0).getImm();
if (ModifiedCFIIndices.count(CFINum))
continue;
ModifiedCFIIndices.insert(CFINum);
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
const MCCFIInstruction::OpType Operation = CFI->getOperation();
if (Operation == MCCFIInstruction::OpDefCfa ||
Operation == MCCFIInstruction::OpDefCfaOffset)
Adjustment = 0 - Adjustment;
LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
<< " to " << (CFI->getOffset() + Adjustment) << "\n");
BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
continue;
}
int32_t SrcImm = 0;
MCPhysReg Reg = 0;
MCPhysReg StackPtrReg = 0;
int64_t StackOffset = 0;
bool IsIndexed = false;
bool IsLoad = false;
bool IsStore = false;
bool IsSimple = false;
bool IsStoreFromReg = false;
uint8_t Size = 0;
bool Success = false;
Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
Reg, SrcImm, StackPtrReg, StackOffset,
Size, IsSimple, IsIndexed);
if (!Success) {
// SP update based on FP value
Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
assert(Success);
continue;
}
assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
if (StackPtrReg != BC.MIB->getFramePointer())
Adjustment = -Adjustment;
if (IsLoad)
Success = BC.MIB->createRestoreFromStack(
Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
else if (IsStore)
Success = BC.MIB->createSaveToStack(
Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
LLVM_DEBUG({
dbgs() << "Adjusted instruction: ";
Inst.dump();
});
assert(Success);
}
}
}
void StackLayoutModifier::initialize() {
classifyStackAccesses();
classifyCFIs();
IsInitialized = true;
}
std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0};
std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0};
using BBIterTy = BinaryBasicBlock::iterator;
void ShrinkWrapping::classifyCSRUses() {
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
StackPointerTracking &SPT = Info.getStackPointerTracking();
UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
BitVector(DA.NumInstrs, false));
const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Inst : BB) {
if (BC.MIB->isCFI(Inst))
continue;
BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
BC.MIB->getTouchedRegs(Inst, BV);
BV &= CSA.CalleeSaved;
for (int I : BV.set_bits()) {
if (I == 0)
continue;
if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
UsesByReg[I].set(DA.ExprToIdx[&Inst]);
}
if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
continue;
BV = CSA.CalleeSaved;
BV &= FPAliases;
for (int I : BV.set_bits())
UsesByReg[I].set(DA.ExprToIdx[&Inst]);
}
}
}
void ShrinkWrapping::pruneUnwantedCSRs() {
BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
if (!CSA.CalleeSaved[I])
continue;
if (ParamRegs[I]) {
CSA.CalleeSaved.reset(I);
continue;
}
if (UsesByReg[I].empty()) {
LLVM_DEBUG(
dbgs()
<< "Dismissing Callee-Saved Reg because we found no uses of it:" << I
<< "\n");
CSA.CalleeSaved.reset(I);
continue;
}
if (!CSA.HasRestores[I]) {
LLVM_DEBUG(
dbgs() << "Dismissing Callee-Saved Reg because it does not have "
"restores:"
<< I << "\n");
CSA.CalleeSaved.reset(I);
}
}
}
void ShrinkWrapping::computeSaveLocations() {
SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs());
ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
StackPointerTracking &SPT = Info.getStackPointerTracking();
LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
for (BinaryBasicBlock &BB : BF) {
LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
if (!First)
continue;
// Use reaching instructions to detect if we are inside a loop - if we
// are, do not consider this BB as valid placement for saves.
if (RI.isInLoop(BB))
continue;
const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
// If we don't know stack state at this point, bail
if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
(SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
continue;
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
if (!CSA.CalleeSaved[I])
continue;
BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
for (int J : UsesByReg[I].set_bits())
if (DA.doesADominateB(*First, J))
BBDominatedUses.set(J);
LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
<< BBDominatedUses.count() << " uses for reg " << I
<< ". Total uses for reg is " << UsesByReg[I].count()
<< "\n");
BBDominatedUses &= UsesByReg[I];
if (BBDominatedUses == UsesByReg[I]) {
LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
<< " as a save pos for " << I << "\n");
SavePos[I].insert(First);
LLVM_DEBUG({
dbgs() << "Dominated uses are:\n";
for (int J : UsesByReg[I].set_bits()) {
dbgs() << "Idx " << J << ": ";
DA.Expressions[J]->dump();
}
});
}
}
}
BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(),
std::numeric_limits<uint64_t>::max());
BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr);
auto &InsnToBB = Info.getInsnToBBMap();
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
if (!CSA.CalleeSaved[I])
continue;
for (MCInst *Pos : SavePos[I]) {
BinaryBasicBlock *BB = InsnToBB[Pos];
uint64_t Count = BB->getExecutionCount();
if (Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
Count < BestSaveCount[I]) {
BestSavePos[I] = Pos;
BestSaveCount[I] = Count;
}
}
}
}
void ShrinkWrapping::computeDomOrder() {
std::vector<MCPhysReg> Order;
for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
Order.push_back(I);
}
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
auto &InsnToBB = Info.getInsnToBBMap();
std::sort(Order.begin(), Order.end(),
[&](const MCPhysReg &A, const MCPhysReg &B) {
BinaryBasicBlock *BBA =
BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr;
BinaryBasicBlock *BBB =
BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr;
if (BBA == BBB)
return A < B;
if (!BBA && BBB)
return false;
if (BBA && !BBB)
return true;
if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B]))
return true;
if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A]))
return false;
return A < B;
});
for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
DomOrder[Order[I]] = I;
}
bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
uint64_t &TotalEstimatedWin) {
const uint64_t CurSavingCost = CSA.SavingCost[CSR];
if (!CSA.CalleeSaved[CSR])
return false;
uint64_t BestCount = BestSaveCount[CSR];
BestPosSave = BestSavePos[CSR];
bool ShouldMove = false;
if (BestCount != std::numeric_limits<uint64_t>::max() &&
BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) {
LLVM_DEBUG({
auto &InsnToBB = Info.getInsnToBBMap();
dbgs() << "Better position for saves found in func " << BF.getPrintName()
<< " count << " << BF.getKnownExecutionCount() << "\n";
dbgs() << "Reg: " << CSR
<< "; New BB: " << InsnToBB[BestPosSave]->getName()
<< " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
});
TotalEstimatedWin += CurSavingCost - BestCount;
ShouldMove = true;
}
if (!ShouldMove)
return false;
if (!BestPosSave) {
LLVM_DEBUG({
dbgs() << "Dropping opportunity because we don't know where to put "
"stores -- total est. freq reduc: "
<< TotalEstimatedWin << "\n";
});
return false;
}
return true;
}
/// Auxiliar function used to create basic blocks for critical edges and update
/// the dominance frontier with these new locations
void ShrinkWrapping::splitFrontierCritEdges(
BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
const SmallVector<bool, 4> &IsCritEdge,
const SmallVector<BinaryBasicBlock *, 4> &From,
const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
<< BF.getPrintName() << "\n");
// For every FromBB, there might be one or more critical edges, with
// To[I] containing destination BBs. It's important to memorize
// the original size of the Frontier as we may append to it while splitting
// critical edges originating with blocks with multiple destinations.
for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
if (!IsCritEdge[I])
continue;
if (To[I].empty())
continue;
BinaryBasicBlock *FromBB = From[I];
LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
<< "\n");
// Split edge for every DestinationBBs
for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
BinaryBasicBlock *DestinationBB = To[I][DI];
LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
// Insert dummy instruction so this BB is never empty (we need this for
// PredictiveStackPointerTracking to work, since it annotates instructions
// and not BBs).
if (NewBB->empty()) {
MCInst NewInst;
BC.MIB->createNoop(NewInst);
NewBB->addInstruction(std::move(NewInst));
scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
}
// Update frontier
ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
if (DI == 0) {
// Update frontier inplace
Frontier[I] = NewFrontierPP;
LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
<< '\n');
} else {
// Append new frontier to the end of the list
Frontier.push_back(NewFrontierPP);
LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
<< '\n');
}
}
}
}
SmallVector<ProgramPoint, 4>
ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
uint64_t TotalEstimatedWin) {
SmallVector<ProgramPoint, 4> Frontier;
SmallVector<bool, 4> IsCritEdge;
bool CannotPlace = false;
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
// In case of a critical edge, we need to create extra BBs to host restores
// into edges transitioning to the dominance frontier, otherwise we pull these
// restores to inside the dominated area.
Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
LLVM_DEBUG({
dbgs() << "Dumping dominance frontier for ";
BC.printInstruction(dbgs(), *BestPosSave);
for (ProgramPoint &PP : Frontier)
if (PP.isInst())
BC.printInstruction(dbgs(), *PP.getInst());
else
dbgs() << PP.getBB()->getName() << "\n";
});
for (ProgramPoint &PP : Frontier) {
bool HasCritEdges = false;
if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
doesInstUsesCSR(*PP.getInst(), CSR))
CannotPlace = true;
BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
CritEdgesFrom.emplace_back(FrontierBB);
CritEdgesTo.emplace_back(0);
SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
// Check for invoke instructions at the dominance frontier, which indicates
// the landing pad is not dominated.
if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
LLVM_DEBUG(
dbgs() << "Bailing on restore placement to avoid LP splitting\n");
Frontier.clear();
return Frontier;
}
doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
if (!DA.doesADominateB(*BestPosSave, P)) {
Dests.emplace_back(Info.getParentBB(P));
return;
}
HasCritEdges = true;
});
IsCritEdge.push_back(HasCritEdges);
}
// Restores cannot be placed in empty BBs because we have a dataflow
// analysis that depends on insertions happening before real instructions
// (PredictiveStackPointerTracking). Detect now for empty BBs and add a
// dummy nop that is scheduled to be removed later.
bool InvalidateRequired = false;
for (BinaryBasicBlock *&BB : BF.layout()) {
if (BB->size() != 0)
continue;
MCInst NewInst;
BC.MIB->createNoop(NewInst);
auto II = BB->addInstruction(std::move(NewInst));
scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
InvalidateRequired = true;
}
if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
LLVM_DEBUG({
dbgs() << "Now detected critical edges in the following frontier:\n";
for (ProgramPoint &PP : Frontier) {
if (PP.isBB()) {
dbgs() << " BB: " << PP.getBB()->getName() << "\n";
} else {
dbgs() << " Inst: ";
PP.getInst()->dump();
}
}
});
splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
CritEdgesTo);
InvalidateRequired = true;
}
if (InvalidateRequired) {
// BitVectors that represent all insns of the function are invalid now
// since we changed BBs/Insts. Re-run steps that depend on pointers being
// valid
Info.invalidateAll();
classifyCSRUses();
}
if (CannotPlace) {
LLVM_DEBUG({
dbgs() << "Dropping opportunity because restore placement failed"
" -- total est. freq reduc: "
<< TotalEstimatedWin << "\n";
});
Frontier.clear();
return Frontier;
}
return Frontier;
}
bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
int64_t SaveOffset) {
if (FA.requiresAlignment(BF)) {
LLVM_DEBUG({
dbgs() << "Reg " << CSR
<< " is not using push/pops due to function "
"alignment requirements.\n";
});
return false;
}
for (MCInst *Save : CSA.getSavesByReg(CSR)) {
if (!SLM.canCollapseRegion(Save)) {
LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
return false;
}
}
// Abort if one of the restores for this CSR is not a POP.
for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
if (!BC.MIB->isPop(*Load)) {
LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
return false;
}
}
StackPointerTracking &SPT = Info.getStackPointerTracking();
// Abort if we are inserting a push into an entry BB (offset -8) and this
// func sets up a frame pointer.
if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
LLVM_DEBUG({
dbgs() << "Reg " << CSR
<< " cannot insert region or we are "
"trying to insert a push into entry bb.\n";
});
return false;
}
return true;
}
SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
unsigned CSR) {
SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
// Moving pop locations to the correct sp offset
ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
StackPointerTracking &SPT = Info.getStackPointerTracking();
for (ProgramPoint &PP : FixedRestorePoints) {
BinaryBasicBlock *BB = Info.getParentBB(PP);
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
BV &= UsesByReg[CSR];
if (!BV.any()) {
Found = true;
PP = BB;
continue;
}
}
for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
BitVector BV = *RI.getStateAt(*RIt);
BV &= UsesByReg[CSR];
if (!BV.any()) {
Found = true;
PP = &*RIt;
break;
}
}
}
if (!Found) {
LLVM_DEBUG({
dbgs() << "Could not find restore insertion point for " << CSR
<< ", falling back to load/store mode\n";
});
FixedRestorePoints.clear();
return FixedRestorePoints;
}
}
return FixedRestorePoints;
}
void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
bool UsePushPops) {
for (BinaryBasicBlock *&BB : BF.layout()) {
std::vector<MCInst *> CFIs;
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
MCInst &Inst = *I;
if (BC.MIB->isCFI(Inst)) {
// Delete all offset CFIs related to this CSR
if (SLM.getOffsetCFIReg(Inst) == CSR) {
HasDeletedOffsetCFIs[CSR] = true;
scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
continue;
}
CFIs.push_back(&Inst);
continue;
}
uint16_t SavedReg = CSA.getSavedReg(Inst);
uint16_t RestoredReg = CSA.getRestoredReg(Inst);
if (SavedReg != CSR && RestoredReg != CSR) {
CFIs.clear();
continue;
}
scheduleChange(&Inst, WorklistItem(UsePushPops
? WorklistItem::Erase
: WorklistItem::ChangeToAdjustment,
CSR));
// Delete associated CFIs
const bool RecordDeletedPushCFIs =
SavedReg == CSR && DeletedPushCFIs[CSR].empty();
const bool RecordDeletedPopCFIs =
RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
for (MCInst *CFI : CFIs) {
const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
// Do not touch these...
if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
continue;
scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
if (RecordDeletedPushCFIs) {
// Do not record this to be replayed later because we are going to
// rebuild it.
if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
continue;
DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
}
if (RecordDeletedPopCFIs) {
if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
continue;
DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
}
}
CFIs.clear();
}
}
}
bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
CSA.getRestoredReg(Inst) == CSR)
return false;
BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
BC.MIB->getTouchedRegs(Inst, BV);
return BV[CSR];
}
void ShrinkWrapping::scheduleSaveRestoreInsertions(
unsigned CSR, MCInst *BestPosSave,
SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
auto &InsnToBB = Info.getInsnToBBMap();
const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
assert(FIESave && FIELoad && "Invalid CSR");
LLVM_DEBUG({
dbgs() << "Scheduling save insertion at: ";
BestPosSave->dump();
});
scheduleChange(BestPosSave,
UsePushPops ? WorklistItem::InsertPushOrPop
: WorklistItem::InsertLoadOrStore,
*FIESave, CSR);
for (ProgramPoint &PP : RestorePoints) {
BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
LLVM_DEBUG({
dbgs() << "Scheduling restore insertion at: ";
if (PP.isInst())
PP.getInst()->dump();
else
dbgs() << PP.getBB()->getName() << "\n";
});
MCInst *Term =
FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
if (Term)
PP = Term;
bool PrecededByPrefix = false;
if (PP.isInst()) {
auto Iter = FrontierBB->findInstruction(PP.getInst());
if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
--Iter;
PrecededByPrefix = BC.MIB->isPrefix(*Iter);
}
}
if (PP.isInst() &&
(doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
"cannot move to end of bb");
scheduleChange(InsnToBB[PP.getInst()],
UsePushPops ? WorklistItem::InsertPushOrPop
: WorklistItem::InsertLoadOrStore,
*FIELoad, CSR);
continue;
}
scheduleChange(PP,
UsePushPops ? WorklistItem::InsertPushOrPop
: WorklistItem::InsertLoadOrStore,
*FIELoad, CSR);
}
}
void ShrinkWrapping::moveSaveRestores() {
bool DisablePushPopMode = false;
bool UsedPushPopMode = false;
// Keeps info about successfully moved regs: reg index, save position and
// save size
std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
MCInst *BestPosSave = nullptr;
uint64_t TotalEstimatedWin = 0;
if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin))
continue;
SmallVector<ProgramPoint, 4> RestorePoints =
doRestorePlacement(BestPosSave, I, TotalEstimatedWin);
if (RestorePoints.empty())
continue;
const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
(void)FIELoad;
assert(FIESave && FIELoad);
StackPointerTracking &SPT = Info.getStackPointerTracking();
const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
int SaveOffset = SPFP.first;
uint8_t SaveSize = FIESave->Size;
// If we don't know stack state at this point, bail
if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
(SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
continue;
// Operation mode: if true, will insert push/pops instead of loads/restores
bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
if (UsePushPops) {
SmallVector<ProgramPoint, 4> FixedRestorePoints =
fixPopsPlacements(RestorePoints, SaveOffset, I);
if (FixedRestorePoints.empty())
UsePushPops = false;
else
RestorePoints = FixedRestorePoints;
}
// Disable push-pop mode for all CSRs in this function
if (!UsePushPops)
DisablePushPopMode = true;
else
UsedPushPopMode = true;
scheduleOldSaveRestoresRemoval(I, UsePushPops);
scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
}
// Revert push-pop mode if it failed for a single CSR
if (DisablePushPopMode && UsedPushPopMode) {
UsedPushPopMode = false;
for (BinaryBasicBlock &BB : BF) {
auto WRI = Todo.find(&BB);
if (WRI != Todo.end()) {
std::vector<WorklistItem> &TodoList = WRI->second;
for (WorklistItem &Item : TodoList)
if (Item.Action == WorklistItem::InsertPushOrPop)
Item.Action = WorklistItem::InsertLoadOrStore;
}
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
MCInst &Inst = *I;
auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
Inst, getAnnotationIndex());
if (!TodoList)
continue;
bool isCFI = BC.MIB->isCFI(Inst);
for (WorklistItem &Item : *TodoList) {
if (Item.Action == WorklistItem::InsertPushOrPop)
Item.Action = WorklistItem::InsertLoadOrStore;
if (!isCFI && Item.Action == WorklistItem::Erase)
Item.Action = WorklistItem::ChangeToAdjustment;
}
}
}
}
// Update statistics
if (!UsedPushPopMode) {
SpillsMovedRegularMode += MovedRegs.size();
return;
}
// Schedule modifications to stack-accessing instructions via
// StackLayoutModifier.
SpillsMovedPushPopMode += MovedRegs.size();
for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
unsigned RegNdx;
MCInst *SavePos;
size_t SaveSize;
std::tie(RegNdx, SavePos, SaveSize) = I;
for (MCInst *Save : CSA.getSavesByReg(RegNdx))
SLM.collapseRegion(Save);
SLM.insertRegion(SavePos, SaveSize);
}
}
namespace {
/// Helper function to identify whether two basic blocks created by splitting
/// a critical edge have the same contents.
bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
const BinaryBasicBlock &B) {
if (A.succ_size() != B.succ_size())
return false;
if (A.succ_size() != 1)
return false;
if (*A.succ_begin() != *B.succ_begin())
return false;
if (A.size() != B.size())
return false;
// Compare instructions
auto I = A.begin(), E = A.end();
auto OtherI = B.begin(), OtherE = B.end();
while (I != E && OtherI != OtherE) {
if (I->getOpcode() != OtherI->getOpcode())
return false;
if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
return true;
}))
return false;
++I;
++OtherI;
}
return true;
}
} // namespace
bool ShrinkWrapping::foldIdenticalSplitEdges() {
bool Changed = false;
for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
BinaryBasicBlock &BB = *Iter;
if (!BB.getName().startswith(".LSplitEdge"))
continue;
for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
BinaryBasicBlock &RBB = *RIter;
if (&RBB == &BB)
break;
if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
!isIdenticalSplitEdgeBB(BC, *Iter, RBB))
continue;
assert(RBB.pred_size() == 1 && "Invalid split edge BB");
BinaryBasicBlock *Pred = *RBB.pred_begin();
uint64_t OrigCount = Pred->branch_info_begin()->Count;
uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
Changed = true;
// Remove the block from CFG
RBB.markValid(false);
}
}
return Changed;
}
namespace {
// A special StackPointerTracking that compensates for our future plans
// in removing/adding insn.
class PredictiveStackPointerTracking
: public StackPointerTrackingBase<PredictiveStackPointerTracking> {
friend class DataflowAnalysis<PredictiveStackPointerTracking,
std::pair<int, int>>;
decltype(ShrinkWrapping::Todo) &TodoMap;
DataflowInfoManager &Info;
Optional<unsigned> AnnotationIndex;
protected:
void compNextAux(const MCInst &Point,
const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
std::pair<int, int> &Res) {
for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
BC.MIB->isPush(Point)) {
Res.first += BC.MIB->getPushSize(Point);
continue;
}
if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
BC.MIB->isPop(Point)) {
Res.first -= BC.MIB->getPopSize(Point);
continue;
}
if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
Item.FIEToInsert.IsStore) {
Res.first -= Item.FIEToInsert.Size;
continue;
}
if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
Item.FIEToInsert.IsLoad) {
Res.first += Item.FIEToInsert.Size;
continue;
}
}
}
std::pair<int, int> computeNext(const MCInst &Point,
const std::pair<int, int> &Cur) {
std::pair<int, int> Res =
StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
Point, Cur);
if (Res.first == StackPointerTracking::SUPERPOSITION ||
Res.first == StackPointerTracking::EMPTY)
return Res;
auto TodoItems =
BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
Point, ShrinkWrapping::getAnnotationName());
if (TodoItems)
compNextAux(Point, *TodoItems, Res);
auto &InsnToBBMap = Info.getInsnToBBMap();
if (&*InsnToBBMap[&Point]->rbegin() != &Point)
return Res;
auto WRI = TodoMap.find(InsnToBBMap[&Point]);
if (WRI == TodoMap.end())
return Res;
compNextAux(Point, WRI->second, Res);
return Res;
}
StringRef getAnnotationName() const {
return StringRef("PredictiveStackPointerTracking");
}
public:
PredictiveStackPointerTracking(BinaryFunction &BF,
decltype(ShrinkWrapping::Todo) &TodoMap,
DataflowInfoManager &Info,
MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
: StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
AllocatorId),
TodoMap(TodoMap), Info(Info) {}
void run() {
StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
}
};
} // end anonymous namespace
void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
int SPValPop) {
MCInst *SavePoint = nullptr;
for (BinaryBasicBlock &BB : BF) {
for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
++InstIter) {
int32_t SrcImm = 0;
MCPhysReg Reg = 0;
MCPhysReg StackPtrReg = 0;
int64_t StackOffset = 0;
bool IsIndexed = false;
bool IsLoad = false;
bool IsStore = false;
bool IsSimple = false;
bool IsStoreFromReg = false;
uint8_t Size = 0;
if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
Reg, SrcImm, StackPtrReg, StackOffset, Size,
IsSimple, IsIndexed))
continue;
if (Reg != CSR || !IsStore || !IsSimple)
continue;
SavePoint = &*InstIter;
break;
}
if (SavePoint)
break;
}
assert(SavePoint);
LLVM_DEBUG({
dbgs() << "Now using as save point for reg " << CSR << " :";
SavePoint->dump();
});
bool PrevAffectedZone = false;
BinaryBasicBlock *PrevBB = nullptr;
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
for (BinaryBasicBlock *BB : BF.layout()) {
if (BB->size() == 0)
continue;
const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
const bool InAffectedZoneAtBegin =
(*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
bool InAffectedZone = InAffectedZoneAtBegin;
for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
const bool CurZone = DA.count(*InstIter, *SavePoint);
if (InAffectedZone != CurZone) {
auto InsertionIter = InstIter;
++InsertionIter;
InAffectedZone = CurZone;
if (InAffectedZone)
InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
SPValPop);
else
InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
SPValPush);
--InstIter;
}
}
// Are we at the first basic block or hot-cold split point?
if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
if (InAffectedZoneAtBegin)
insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
} else if (InAffectedZoneAtBegin != PrevAffectedZone) {
if (InAffectedZoneAtBegin)
insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
else
insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
}
PrevAffectedZone = InAffectedZoneAtEnd;
PrevBB = BB;
}
}
void ShrinkWrapping::rebuildCFIForSP() {
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Inst : BB) {
if (!BC.MIB->isCFI(Inst))
continue;
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
}
}
int PrevSPVal = -8;
BinaryBasicBlock *PrevBB = nullptr;
StackPointerTracking &SPT = Info.getStackPointerTracking();
for (BinaryBasicBlock *BB : BF.layout()) {
if (BB->size() == 0)
continue;
const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
int SPVal = SPValAtBegin;
for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
const int CurVal = SPT.getStateAt(*Iter)->first;
if (SPVal != CurVal) {
auto InsertionIter = Iter;
++InsertionIter;
Iter = BF.addCFIInstruction(
BB, InsertionIter,
MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
SPVal = CurVal;
}
}
if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
BF.addCFIInstruction(
BB, BB->begin(),
MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
else if (SPValAtBegin != PrevSPVal)
BF.addCFIInstruction(
PrevBB, PrevBB->end(),
MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
PrevSPVal = SPValAtEnd;
PrevBB = BB;
}
for (BinaryBasicBlock &BB : BF)
for (auto I = BB.begin(); I != BB.end();)
if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
I = BB.eraseInstruction(I);
else
++I;
}
MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
const FrameIndexEntry &FIE,
bool CreatePushOrPop) {
MCInst NewInst;
if (SPVal != StackPointerTracking::SUPERPOSITION &&
SPVal != StackPointerTracking::EMPTY) {
if (FIE.IsLoad) {
if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
FIE.StackOffset - SPVal, FIE.RegOrImm,
FIE.Size)) {
errs() << "createRestoreFromStack: not supported on this platform\n";
abort();
}
} else {
if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
FIE.StackOffset - SPVal, FIE.RegOrImm,
FIE.Size)) {
errs() << "createSaveToStack: not supported on this platform\n";
abort();
}
}
if (CreatePushOrPop)
BC.MIB->changeToPushOrPop(NewInst);
return NewInst;
}
assert(FPVal != StackPointerTracking::SUPERPOSITION &&
FPVal != StackPointerTracking::EMPTY);
if (FIE.IsLoad) {
if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
FIE.StackOffset - FPVal, FIE.RegOrImm,
FIE.Size)) {
errs() << "createRestoreFromStack: not supported on this platform\n";
abort();
}
} else {
if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
FIE.StackOffset - FPVal, FIE.RegOrImm,
FIE.Size)) {
errs() << "createSaveToStack: not supported on this platform\n";
abort();
}
}
return NewInst;
}
void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
if (UpdatedCFIs.count(CFI))
return;
switch (CFI->getOperation()) {
case MCCFIInstruction::OpDefCfa:
case MCCFIInstruction::OpDefCfaRegister:
case MCCFIInstruction::OpDefCfaOffset:
CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
break;
case MCCFIInstruction::OpOffset:
default:
break;
}
UpdatedCFIs.insert(CFI);
}
BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
BBIterTy Pos, unsigned Reg,
bool isPush, int Sz,
int64_t NewOffset) {
if (isPush) {
for (uint32_t Idx : DeletedPushCFIs[Reg]) {
Pos = BF.addCFIPseudo(&BB, Pos, Idx);
updateCFIInstOffset(*Pos++, NewOffset);
}
if (HasDeletedOffsetCFIs[Reg]) {
Pos = BF.addCFIInstruction(
&BB, Pos,
MCCFIInstruction::createOffset(
nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
++Pos;
}
} else {
for (uint32_t Idx : DeletedPopCFIs[Reg]) {
Pos = BF.addCFIPseudo(&BB, Pos, Idx);
updateCFIInstOffset(*Pos++, NewOffset);
}
if (HasDeletedOffsetCFIs[Reg]) {
Pos = BF.addCFIInstruction(
&BB, Pos,
MCCFIInstruction::createSameValue(
nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
++Pos;
}
}
return Pos;
}
BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
BinaryBasicBlock *CurBB,
const WorklistItem &Item,
int64_t SPVal, int64_t FPVal) {
// Trigger CFI reconstruction for this CSR if necessary - writing to
// PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
if ((Item.FIEToInsert.IsStore &&
!DeletedPushCFIs[Item.AffectedReg].empty()) ||
(Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
HasDeletedOffsetCFIs[Item.AffectedReg]) {
if (Item.Action == WorklistItem::InsertPushOrPop) {
if (Item.FIEToInsert.IsStore)
PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
else
PopOffsetByReg[Item.AffectedReg] = SPVal;
} else {
if (Item.FIEToInsert.IsStore)
PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
else
PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
}
}
LLVM_DEBUG({
dbgs() << "Creating stack access with SPVal = " << SPVal
<< "; stack offset = " << Item.FIEToInsert.StackOffset
<< " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
<< "\n";
});
MCInst NewInst =
createStackAccess(SPVal, FPVal, Item.FIEToInsert,
Item.Action == WorklistItem::InsertPushOrPop);
if (InsertionPoint != CurBB->end()) {
LLVM_DEBUG({
dbgs() << "Adding before Inst: ";
InsertionPoint->dump();
dbgs() << "the following inst: ";
NewInst.dump();
});
BBIterTy Iter =
CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
return ++Iter;
}
CurBB->addInstruction(std::move(NewInst));
LLVM_DEBUG(dbgs() << "Adding to BB!\n");
return CurBB->end();
}
BBIterTy ShrinkWrapping::processInsertionsList(
BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
bool HasInsertions = false;
for (WorklistItem &Item : TodoList) {
if (Item.Action == WorklistItem::Erase ||
Item.Action == WorklistItem::ChangeToAdjustment)
continue;
HasInsertions = true;
break;
}
if (!HasInsertions)
return InsertionPoint;
assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
SPVal != StackPointerTracking::EMPTY) ||
(FPVal != StackPointerTracking::SUPERPOSITION &&
FPVal != StackPointerTracking::EMPTY)) &&
"Cannot insert if we have no idea of the stack state here");
// Revert the effect of PSPT for this location, we want SP Value before
// insertions
if (InsertionPoint == CurBB->end()) {
for (WorklistItem &Item : TodoList) {
if (Item.Action != WorklistItem::InsertPushOrPop)
continue;
if (Item.FIEToInsert.IsStore)
SPVal += Item.FIEToInsert.Size;
if (Item.FIEToInsert.IsLoad)
SPVal -= Item.FIEToInsert.Size;
}
}
// Reorder POPs to obey the correct dominance relation between them
std::stable_sort(TodoList.begin(), TodoList.end(),
[&](const WorklistItem &A, const WorklistItem &B) {
if ((A.Action != WorklistItem::InsertPushOrPop ||
!A.FIEToInsert.IsLoad) &&
(B.Action != WorklistItem::InsertPushOrPop ||
!B.FIEToInsert.IsLoad))
return false;
if ((A.Action != WorklistItem::InsertPushOrPop ||
!A.FIEToInsert.IsLoad))
return true;
if ((B.Action != WorklistItem::InsertPushOrPop ||
!B.FIEToInsert.IsLoad))
return false;
return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
});
// Process insertions
for (WorklistItem &Item : TodoList) {
if (Item.Action == WorklistItem::Erase ||
Item.Action == WorklistItem::ChangeToAdjustment)
continue;
InsertionPoint =
processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
if (Item.Action == WorklistItem::InsertPushOrPop &&
Item.FIEToInsert.IsStore)
SPVal -= Item.FIEToInsert.Size;
if (Item.Action == WorklistItem::InsertPushOrPop &&
Item.FIEToInsert.IsLoad)
SPVal += Item.FIEToInsert.Size;
}
return InsertionPoint;
}
bool ShrinkWrapping::processInsertions() {
PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
PSPT.run();
bool Changes = false;
for (BinaryBasicBlock &BB : BF) {
// Process insertions before some inst.
for (auto I = BB.begin(); I != BB.end(); ++I) {
MCInst &Inst = *I;
auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
Inst, getAnnotationIndex());
if (!TodoList)
continue;
Changes = true;
std::vector<WorklistItem> List = *TodoList;
LLVM_DEBUG({
dbgs() << "Now processing insertions in " << BB.getName()
<< " before inst: ";
Inst.dump();
});
auto Iter = I;
std::pair<int, int> SPTState =
*PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
}
// Process insertions at the end of bb
auto WRI = Todo.find(&BB);
if (WRI != Todo.end()) {
std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
SPTState.second);
Changes = true;
}
}
return Changes;
}
void ShrinkWrapping::processDeletions() {
LivenessAnalysis &LA = Info.getLivenessAnalysis();
for (BinaryBasicBlock &BB : BF) {
for (auto II = BB.begin(); II != BB.end(); ++II) {
MCInst &Inst = *II;
auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
Inst, getAnnotationIndex());
if (!TodoList)
continue;
// Process all deletions
for (WorklistItem &Item : *TodoList) {
if (Item.Action != WorklistItem::Erase &&
Item.Action != WorklistItem::ChangeToAdjustment)
continue;
if (Item.Action == WorklistItem::ChangeToAdjustment) {
// Is flag reg alive across this func?
bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
if (int Sz = BC.MIB->getPushSize(Inst)) {
BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
continue;
}
if (int Sz = BC.MIB->getPopSize(Inst)) {
BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
continue;
}
}
LLVM_DEBUG({
dbgs() << "Erasing: ";
BC.printInstruction(dbgs(), Inst);
});
II = std::prev(BB.eraseInstruction(II));
break;
}
}
}
}
void ShrinkWrapping::rebuildCFI() {
const bool FP = Info.getStackPointerTracking().HasFramePointer;
Info.invalidateAll();
if (!FP) {
rebuildCFIForSP();
Info.invalidateAll();
}
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
continue;
const int64_t SPValPush = PushOffsetByReg[I];
const int64_t SPValPop = PopOffsetByReg[I];
insertUpdatedCFI(I, SPValPush, SPValPop);
Info.invalidateAll();
}
}
bool ShrinkWrapping::perform() {
HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
if (BF.checkForAmbiguousJumpTables()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
<< ".\n");
// We could call disambiguateJumpTables here, but it is probably not worth
// the cost (of duplicating potentially large jump tables that could regress
// dcache misses). Moreover, ambiguous JTs are rare and coming from code
// written in assembly language. Just bail.
return false;
}
SLM.initialize();
CSA.compute();
classifyCSRUses();
pruneUnwantedCSRs();
computeSaveLocations();
computeDomOrder();
moveSaveRestores();
LLVM_DEBUG({
dbgs() << "Func before shrink-wrapping: \n";
BF.dump();
});
SLM.performChanges();
// Early exit if processInsertions doesn't detect any todo items
if (!processInsertions())
return false;
processDeletions();
if (foldIdenticalSplitEdges()) {
const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
(void)Stats;
LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
<< " redundant split edge BBs (" << Stats.second
<< " bytes) for " << BF.getPrintName() << "\n");
}
rebuildCFI();
// We may have split edges, creating BBs that need correct branching
BF.fixBranches();
LLVM_DEBUG({
dbgs() << "Func after shrink-wrapping: \n";
BF.dump();
});
return true;
}
void ShrinkWrapping::printStats() {
outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
<< " spills inserting load/stores and " << SpillsMovedPushPopMode
<< " spills inserting push/pops\n";
}
// Operators necessary as a result of using MCAnnotation
raw_ostream &operator<<(raw_ostream &OS,
const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
OS << "SWTodo[";
const char *Sep = "";
for (const ShrinkWrapping::WorklistItem &Item : Vec) {
OS << Sep;
switch (Item.Action) {
case ShrinkWrapping::WorklistItem::Erase:
OS << "Erase";
break;
case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
OS << "ChangeToAdjustment";
break;
case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
OS << "InsertLoadOrStore";
break;
case ShrinkWrapping::WorklistItem::InsertPushOrPop:
OS << "InsertPushOrPop";
break;
}
Sep = ", ";
}
OS << "]";
return OS;
}
raw_ostream &
operator<<(raw_ostream &OS,
const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
OS << "SLMTodo[";
const char *Sep = "";
for (const StackLayoutModifier::WorklistItem &Item : Vec) {
OS << Sep;
switch (Item.Action) {
case StackLayoutModifier::WorklistItem::None:
OS << "None";
break;
case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
OS << "AdjustLoadStoreOffset";
break;
case StackLayoutModifier::WorklistItem::AdjustCFI:
OS << "AdjustCFI";
break;
}
Sep = ", ";
}
OS << "]";
return OS;
}
bool operator==(const ShrinkWrapping::WorklistItem &A,
const ShrinkWrapping::WorklistItem &B) {
return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
A.Adjustment == B.Adjustment &&
A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
A.FIEToInsert.Size == B.FIEToInsert.Size &&
A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
}
bool operator==(const StackLayoutModifier::WorklistItem &A,
const StackLayoutModifier::WorklistItem &B) {
return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
}
} // end namespace bolt
} // end namespace llvm