[BOLT] Support POSSIBLE_PIC_FIXED_BRANCH

Detect and support fixed PIC indirect jumps of the following form:
```
movslq  En(%rip), %r1
leaq  PIC_JUMP_TABLE(%rip), %r2
addq  %r2, %r1
jmpq  *%r1
```

with PIC_JUMP_TABLE that looks like following:

```
  JT:  ----------
   E1:| L1 - JT  |
      |----------|
   E2:| L2 - JT  |
      |----------|
      |          |
         ......
   En:| Ln - JT  |
       ----------
```

The code could be produced by compilers, see
https://github.com/llvm/llvm-project/issues/91648.

Test Plan: updated jump-table-fixed-ref-pic.test

Reviewers: maksfb, ayermolo, dcci, rafaelauler

Reviewed By: rafaelauler

Pull Request: https://github.com/llvm/llvm-project/pull/91667
This commit is contained in:
Amir Ayupov
2024-05-09 12:35:45 -07:00
parent 401d7bcabc
commit 3023b15fb1
10 changed files with 153 additions and 59 deletions

View File

@@ -431,6 +431,9 @@ public:
return nullptr;
}
/// Deregister JumpTable registered at a given \p Address and delete it.
void deleteJumpTable(uint64_t Address);
unsigned getDWARFEncodingSize(unsigned Encoding) {
if (Encoding == dwarf::DW_EH_PE_omit)
return 0;

View File

@@ -58,6 +58,8 @@ enum class IndirectBranchType : char {
POSSIBLE_PIC_JUMP_TABLE, /// Possibly a jump table for PIC.
POSSIBLE_GOTO, /// Possibly a gcc's computed goto.
POSSIBLE_FIXED_BRANCH, /// Possibly an indirect branch to a fixed location.
POSSIBLE_PIC_FIXED_BRANCH, /// Possibly an indirect jump to a fixed entry in a
/// PIC jump table.
};
class MCPlusBuilder {
@@ -1474,12 +1476,11 @@ public:
/// will be set to the different components of the branch. \p MemLocInstr
/// is the instruction that loads up the indirect function pointer. It may
/// or may not be same as \p Instruction.
virtual IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue,
const MCExpr *&DispExpr, MCInst *&PCRelBaseOut) const {
virtual IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const {
llvm_unreachable("not implemented");
return IndirectBranchType::UNKNOWN;
}

View File

@@ -2523,6 +2523,16 @@ BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
return nullptr;
}
/// Deregister JumpTable registered at a given \p Address and delete it.
void BinaryContext::deleteJumpTable(uint64_t Address) {
assert(JumpTables.count(Address) && "Must have a jump table at address");
JumpTable *JT = JumpTables.at(Address);
for (BinaryFunction *Parent : JT->Parents)
Parent->JumpTables.erase(Address);
JumpTables.erase(Address);
delete JT;
}
DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
const DWARFAddressRangesVector &InputRanges) const {
DebugAddressRangesVector OutputRanges;

View File

@@ -780,6 +780,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
// setting the value of the register used by the branch.
MCInst *MemLocInstr;
// The instruction loading the fixed PIC jump table entry value.
MCInst *FixedEntryLoadInstr;
// Address of the table referenced by MemLocInstr. Could be either an
// array of function pointers, or a jump table.
uint64_t ArrayStart = 0;
@@ -811,7 +814,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, FixedEntryLoadInstr);
if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
return BranchType;
@@ -877,6 +880,43 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
if (BaseRegNum == BC.MRI->getProgramCounter())
ArrayStart += getAddress() + Offset + Size;
if (FixedEntryLoadInstr) {
assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH &&
"Invalid IndirectBranch type");
MCInst::iterator FixedEntryDispOperand =
BC.MIB->getMemOperandDisp(*FixedEntryLoadInstr);
assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() &&
"Invalid memory instruction");
const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr();
const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr);
uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC);
ErrorOr<int64_t> Value =
BC.getSignedValueAtAddress(EntryAddress, EntrySize);
if (!Value)
return IndirectBranchType::UNKNOWN;
BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in " << *this
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
<< " referencing data at 0x" << Twine::utohexstr(EntryAddress)
<< " the destination value is 0x"
<< Twine::utohexstr(ArrayStart + *Value) << '\n';
TargetAddress = ArrayStart + *Value;
// Remove spurious JumpTable at EntryAddress caused by PIC reference from
// the load instruction.
BC.deleteJumpTable(EntryAddress);
// Replace FixedEntryDispExpr used in target address calculation with outer
// jump table reference.
JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart);
assert(JT && "Must have a containing jump table for PIC fixed branch");
BC.MIB->replaceMemOperandDisp(*FixedEntryLoadInstr, JT->getFirstLabel(),
EntryAddress - ArrayStart, &*BC.Ctx);
return BranchType;
}
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
<< Twine::utohexstr(ArrayStart) << '\n');
@@ -1126,6 +1166,7 @@ void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
}
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
if (opts::JumpTables == JTS_NONE)
IsSimple = false;
break;
@@ -1878,9 +1919,11 @@ bool BinaryFunction::postProcessIndirectBranches(
int64_t DispValue;
const MCExpr *DispExpr;
MCInst *PCRelBaseInstr;
MCInst *FixedEntryLoadInstr;
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr,
FixedEntryLoadInstr);
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
continue;

View File

@@ -386,13 +386,15 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
JumpTableInfoType HotTargets;
MCInst *MemLocInstr;
MCInst *PCRelBaseOut;
MCInst *FixedEntryLoadInstr;
unsigned BaseReg, IndexReg;
int64_t DispValue;
const MCExpr *DispExpr;
MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst);
const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(),
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut);
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut,
FixedEntryLoadInstr);
assert(MemLocInstr && "There should always be a load for jump tables");
if (!MemLocInstr)

View File

@@ -852,16 +852,19 @@ public:
return Uses;
}
IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInstr) const override {
MemLocInstrOut = nullptr;
BaseRegNumOut = AArch64::NoRegister;
IndexRegNumOut = AArch64::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInstr = nullptr;
// An instruction referencing memory used by jump instruction (directly or
// via register). This location could be an array of function pointers

View File

@@ -176,13 +176,14 @@ public:
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut) const override {
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const override {
MemLocInstr = nullptr;
BaseRegNum = 0;
IndexRegNum = 0;
DispValue = 0;
DispExpr = nullptr;
PCRelBaseOut = nullptr;
FixedEntryLoadInst = nullptr;
// Check for the following long tail call sequence:
// 1: auipc xi, %pcrel_hi(sym)

View File

@@ -1866,8 +1866,11 @@ public:
return true;
}
/// Analyzes PIC-style jump table code template and return identified
/// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr
/// (POSSIBLE_PIC_FIXED_BRANCH case).
template <typename Itr>
std::pair<IndirectBranchType, MCInst *>
std::tuple<IndirectBranchType, MCInst *, MCInst *>
analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const {
// Analyze PIC-style jump table code template:
//
@@ -1876,6 +1879,13 @@ public:
// add %r2, %r1
// jmp *%r1
//
// or a fixed indirect jump template:
//
// movslq En(%rip), {%r2|%r1} <- FixedEntryLoadInstr
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
// add %r2, %r1
// jmp *%r1
//
// (with any irrelevant instructions in-between)
//
// When we call this helper we've already determined %r1 and %r2, and
@@ -1916,8 +1926,13 @@ public:
MO.SegRegNum == X86::NoRegister;
};
LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n");
MCInst *MemLocInstr = nullptr;
const MCInst *MovInstr = nullptr;
MCInst *FirstInstr = nullptr;
MCInst *SecondInstr = nullptr;
enum {
NOMATCH = 0,
MATCH_JUMP_TABLE,
MATCH_FIXED_BRANCH,
} MatchingState = NOMATCH;
while (++II != IE) {
MCInst &Instr = *II;
const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode());
@@ -1926,68 +1941,76 @@ public:
// Ignore instructions that don't affect R1, R2 registers.
continue;
}
if (!MovInstr) {
// Expect to see MOV instruction.
if (!isMOVSX64rm32(Instr)) {
LLVM_DEBUG(dbgs() << "MOV instruction expected.\n");
const bool IsMOVSXInstr = isMOVSX64rm32(Instr);
const bool IsLEAInstr = isLEA64r(Instr);
if (MatchingState == NOMATCH) {
if (IsMOVSXInstr)
MatchingState = MATCH_JUMP_TABLE;
else if (IsLEAInstr)
MatchingState = MATCH_FIXED_BRANCH;
else
break;
}
// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
// If it sets %r1 - rename the registers so we have to only check
// a single form.
unsigned MovDestReg = Instr.getOperand(0).getReg();
if (MovDestReg != R2)
// Check if the first instruction is setting %r1 or %r2. In canonical
// form lea sets %r1 and mov sets %r2. If it's the opposite - rename so
// we have to only check a single form.
unsigned DestReg = Instr.getOperand(0).getReg();
MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1;
if (DestReg != ExpectReg)
std::swap(R1, R2);
if (MovDestReg != R2) {
LLVM_DEBUG(dbgs() << "MOV instruction expected to set %r2\n");
if (DestReg != ExpectReg)
break;
}
// Verify operands for MOV.
// Verify operands
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isIndexed(*MO, R1))
// POSSIBLE_PIC_JUMP_TABLE
if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) ||
(MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO)))
FirstInstr = &Instr;
else
break;
MovInstr = &Instr;
} else {
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo))
unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2;
if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo))
continue;
if (!isLEA64r(Instr)) {
LLVM_DEBUG(dbgs() << "LEA instruction expected\n");
if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) ||
(MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr))
break;
}
if (Instr.getOperand(0).getReg() != R1) {
LLVM_DEBUG(dbgs() << "LEA instruction expected to set %r1\n");
if (Instr.getOperand(0).getReg() != ExpectReg)
break;
}
// Verify operands for LEA.
// Verify operands.
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isRIPRel(*MO))
break;
MemLocInstr = &Instr;
SecondInstr = &Instr;
break;
}
}
if (!MemLocInstr)
return std::make_pair(IndirectBranchType::UNKNOWN, nullptr);
if (!SecondInstr)
return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr);
if (MatchingState == MATCH_FIXED_BRANCH) {
LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n");
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH,
FirstInstr, SecondInstr);
}
LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n");
return std::make_pair(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
MemLocInstr);
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
SecondInstr, nullptr);
}
IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInst) const override {
// Try to find a (base) memory location from where the address for
// the indirect branch is loaded. For X86-64 the memory will be specified
// in the following format:
@@ -2014,6 +2037,7 @@ public:
IndexRegNumOut = X86::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInst = nullptr;
std::reverse_iterator<InstructionIterator> II(End);
std::reverse_iterator<InstructionIterator> IE(Begin);
@@ -2046,7 +2070,8 @@ public:
unsigned R2 = PrevInstr.getOperand(2).getReg();
if (R1 == R2)
return IndirectBranchType::UNKNOWN;
std::tie(Type, MemLocInstr) = analyzePICJumpTable(PrevII, IE, R1, R2);
std::tie(Type, MemLocInstr, FixedEntryLoadInst) =
analyzePICJumpTable(PrevII, IE, R1, R2);
break;
}
return IndirectBranchType::UNKNOWN;
@@ -2090,6 +2115,8 @@ public:
if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister)
return IndirectBranchType::UNKNOWN;
break;
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
break;
default:
if (MO->ScaleImm != PtrSize)
return IndirectBranchType::UNKNOWN;

View File

@@ -6,7 +6,7 @@ main:
jae .L4
cmpq $0x1, %rdi
jne .L4
mov .Ljt_pic+8(%rip), %rax
movslq .Ljt_pic+8(%rip), %rax
lea .Ljt_pic(%rip), %rdx
add %rdx, %rax
jmpq *%rax

View File

@@ -1,9 +1,13 @@
## Verify that BOLT detects fixed destination of indirect jump for PIC
## case.
XFAIL: *
RUN: %clang %cflags -no-pie %S/Inputs/jump-table-fixed-ref-pic.s -Wl,-q -o %t
RUN: llvm-bolt %t --relocs -o %t.null 2>&1 | FileCheck %s
RUN: llvm-bolt %t --relocs -o %t.null -print-cfg 2>&1 | FileCheck %s
CHECK: BOLT-INFO: fixed indirect branch detected in main
CHECK: BOLT-INFO: fixed PIC indirect branch detected in main {{.*}} the destination value is 0x[[#TGT:]]
CHECK: Binary Function "main" after building cfg
CHECK: movslq ".rodata/1"+8(%rip), %rax
CHECK-NEXT: leaq ".rodata/1"(%rip), %rdx
CHECK-NEXT: addq %rdx, %rax
CHECK-NEXT: jmpq *%rax # UNKNOWN CONTROL FLOW