Files
clang-p2996/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
Joao Moreira 82af95029e [X86] Enable ibt-seal optimization when LTO is used in Kernel
Intel's CET/IBT requires every indirect branch target to be an ENDBR instruction. Because of that, the compiler needs to correctly emit these instruction on function's prologues. Because this is a security feature, it is desirable that only actual indirect-branch-targeted functions are emitted with ENDBRs. While it is possible to identify address-taken functions through LTO, minimizing these ENDBR instructions remains a hard task for user-space binaries because exported functions may end being reachable through PLT entries, that will use an indirect branch for such. Because this cannot be determined during compilation-time, the compiler currently emits ENDBRs to every non-local-linkage function.

Despite the challenge presented for user-space, the kernel landscape is different as no PLTs are used. With the intent of providing the most fit ENDBR emission for the kernel, kernel developers proposed an optimization named "ibt-seal" which replaces the ENDBRs for NOPs directly in the binary. The discussion of this feature can be seen in [1].

This diff brings the enablement of the flag -mibt-seal, which in combination with LTO enforces a different policy for ENDBR placement in when the code-model is set to "kernel". In this scenario, the compiler will only emit ENDBRs to address taken functions, ignoring non-address taken functions that are don't have local linkage.

A comparison between an LTO-compiled kernel binaries without and with the -mibt-seal feature enabled shows that when -mibt-seal was used, the number of ENDBRs in the vmlinux.o binary patched by objtool decreased from 44383 to 33192, and that the number of superfluous ENDBR instructions nopped-out decreased from 11730 to 540.

The 540 missed superfluous ENDBRs need to be investigated further, but hypotheses are: assembly code not being taken care of by the compiler, kernel exported symbols mechanisms creating bogus address taken situations or even these being removed due to other binary optimizations like kernel's static_calls. For now, I assume that the large drop in the number of ENDBR instructions already justifies the feature being merged.

[1] - https://lkml.org/lkml/2021/11/22/591

Reviewed By: xiangzhangllvm

Differential Revision: https://reviews.llvm.org/D116070
2022-01-21 10:55:34 +08:00

204 lines
6.9 KiB
C++

//===---- X86IndirectBranchTracking.cpp - Enables CET IBT mechanism -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a pass that enables Indirect Branch Tracking (IBT) as part
// of Control-Flow Enforcement Technology (CET).
// The pass adds ENDBR (End Branch) machine instructions at the beginning of
// each basic block or function that is referenced by an indrect jump/call
// instruction.
// The ENDBR instructions have a NOP encoding and as such are ignored in
// targets that do not support CET IBT mechanism.
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
using namespace llvm;
#define DEBUG_TYPE "x86-indirect-branch-tracking"
cl::opt<bool> IndirectBranchTracking(
"x86-indirect-branch-tracking", cl::init(false), cl::Hidden,
cl::desc("Enable X86 indirect branch tracking pass."));
STATISTIC(NumEndBranchAdded, "Number of ENDBR instructions added");
namespace {
class X86IndirectBranchTrackingPass : public MachineFunctionPass {
public:
X86IndirectBranchTrackingPass() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "X86 Indirect Branch Tracking";
}
bool runOnMachineFunction(MachineFunction &MF) override;
private:
static char ID;
/// Machine instruction info used throughout the class.
const X86InstrInfo *TII = nullptr;
/// Endbr opcode for the current machine function.
unsigned int EndbrOpcode = 0;
/// Adds a new ENDBR instruction to the beginning of the MBB.
/// The function will not add it if already exists.
/// It will add ENDBR32 or ENDBR64 opcode, depending on the target.
/// \returns true if the ENDBR was added and false otherwise.
bool addENDBR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
};
} // end anonymous namespace
char X86IndirectBranchTrackingPass::ID = 0;
FunctionPass *llvm::createX86IndirectBranchTrackingPass() {
return new X86IndirectBranchTrackingPass();
}
bool X86IndirectBranchTrackingPass::addENDBR(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
assert(TII && "Target instruction info was not initialized");
assert((X86::ENDBR64 == EndbrOpcode || X86::ENDBR32 == EndbrOpcode) &&
"Unexpected Endbr opcode");
// If the MBB/I is empty or the current instruction is not ENDBR,
// insert ENDBR instruction to the location of I.
if (I == MBB.end() || I->getOpcode() != EndbrOpcode) {
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(EndbrOpcode));
++NumEndBranchAdded;
return true;
}
return false;
}
static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
if (!MOp.isGlobal())
return false;
auto *CalleeFn = dyn_cast<Function>(MOp.getGlobal());
if (!CalleeFn)
return false;
AttributeList Attrs = CalleeFn->getAttributes();
return Attrs.hasFnAttr(Attribute::ReturnsTwice);
}
// Checks if function should have an ENDBR in its prologue
static bool needsPrologueENDBR(MachineFunction &MF, const Module *M) {
Function &F = MF.getFunction();
if (F.doesNoCfCheck())
return false;
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
Metadata *IBTSeal = M->getModuleFlag("ibt-seal");
switch (TM->getCodeModel()) {
// Large code model functions always reachable through indirect calls.
case CodeModel::Large:
return true;
// Only address taken functions in LTO'ed kernel are reachable indirectly.
// IBTSeal implies LTO, thus only check if function is address taken.
case CodeModel::Kernel:
// Check if ibt-seal was enabled (implies LTO is being used).
if (IBTSeal) {
return F.hasAddressTaken();
}
// if !IBTSeal, fall into default case.
LLVM_FALLTHROUGH;
// Address taken or externally linked functions may be reachable.
default:
return (F.hasAddressTaken() || !F.hasLocalLinkage());
}
}
bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &SubTarget = MF.getSubtarget<X86Subtarget>();
const Module *M = MF.getMMI().getModule();
// Check that the cf-protection-branch is enabled.
Metadata *isCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
// NB: We need to enable IBT in jitted code if JIT compiler is CET
// enabled.
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
#ifdef __CET__
bool isJITwithCET = TM->isJIT();
#else
bool isJITwithCET = false;
#endif
if (!isCFProtectionSupported && !IndirectBranchTracking && !isJITwithCET)
return false;
// True if the current MF was changed and false otherwise.
bool Changed = false;
TII = SubTarget.getInstrInfo();
EndbrOpcode = SubTarget.is64Bit() ? X86::ENDBR64 : X86::ENDBR32;
// If function is reachable indirectly, mark the first BB with ENDBR.
if (needsPrologueENDBR(MF, M)) {
auto MBB = MF.begin();
Changed |= addENDBR(*MBB, MBB->begin());
}
for (auto &MBB : MF) {
// Find all basic blocks that their address was taken (for example
// in the case of indirect jump) and add ENDBR instruction.
if (MBB.hasAddressTaken())
Changed |= addENDBR(MBB, MBB.begin());
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
if (I->isCall() && I->getNumOperands() > 0 &&
IsCallReturnTwice(I->getOperand(0))) {
Changed |= addENDBR(MBB, std::next(I));
}
}
// Exception handle may indirectly jump to catch pad, So we should add
// ENDBR before catch pad instructions. For SjLj exception model, it will
// create a new BB(new landingpad) indirectly jump to the old landingpad.
if (TM->Options.ExceptionModel == ExceptionHandling::SjLj) {
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
// New Landingpad BB without EHLabel.
if (MBB.isEHPad()) {
if (I->isDebugInstr())
continue;
Changed |= addENDBR(MBB, I);
break;
} else if (I->isEHLabel()) {
// Old Landingpad BB (is not Landingpad now) with
// the the old "callee" EHLabel.
MCSymbol *Sym = I->getOperand(0).getMCSymbol();
if (!MF.hasCallSiteLandingPad(Sym))
continue;
Changed |= addENDBR(MBB, std::next(I));
break;
}
}
} else if (MBB.isEHPad()){
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
if (!I->isEHLabel())
continue;
Changed |= addENDBR(MBB, std::next(I));
break;
}
}
}
return Changed;
}