The KCFI sanitizer, enabled with `-fsanitize=kcfi`, implements a
forward-edge control flow integrity scheme for indirect calls. It
uses a !kcfi_type metadata node to attach a type identifier for each
function and injects verification code before indirect calls.
Unlike the current CFI schemes implemented in LLVM, KCFI does not
require LTO, does not alter function references to point to a jump
table, and never breaks function address equality. KCFI is intended
to be used in low-level code, such as operating system kernels,
where the existing schemes can cause undue complications because
of the aforementioned properties. However, unlike the existing
schemes, KCFI is limited to validating only function pointers and is
not compatible with executable-only memory.
KCFI does not provide runtime support, but always traps when a
type mismatch is encountered. Users of the scheme are expected
to handle the trap. With `-fsanitize=kcfi`, Clang emits a `kcfi`
operand bundle to indirect calls, and LLVM lowers this to a
known architecture-specific sequence of instructions for each
callsite to make runtime patching easier for users who require this
functionality.
A KCFI type identifier is a 32-bit constant produced by taking the
lower half of xxHash64 from a C++ mangled typename. If a program
contains indirect calls to assembly functions, they must be
manually annotated with the expected type identifiers to prevent
errors. To make this easier, Clang generates a weak SHN_ABS
`__kcfi_typeid_<function>` symbol for each address-taken function
declaration, which can be used to annotate functions in assembly
as long as at least one C translation unit linked into the program
takes the function address. For example on AArch64, we might have
the following code:
```
.c:
int f(void);
int (*p)(void) = f;
p();
.s:
.4byte __kcfi_typeid_f
.global f
f:
...
```
Note that X86 uses a different preamble format for compatibility
with Linux kernel tooling. See the comments in
`X86AsmPrinter::emitKCFITypeId` for details.
As users of KCFI may need to locate trap locations for binary
validation and error handling, LLVM can additionally emit the
locations of traps to a `.kcfi_traps` section.
Similarly to other sanitizers, KCFI checking can be disabled for a
function with a `no_sanitize("kcfi")` function attribute.
Relands 67504c9549 with a fix for
32-bit builds.
Reviewed By: nickdesaulniers, kees, joaomoreira, MaskRay
Differential Revision: https://reviews.llvm.org/D119296
250 lines
5.7 KiB
C++
250 lines
5.7 KiB
C++
//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file declares the function that lexes the machine instruction source
|
|
// string.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
|
|
#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
|
|
|
|
#include "llvm/ADT/APSInt.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include <string>
|
|
|
|
namespace llvm {
|
|
|
|
class Twine;
|
|
|
|
/// A token produced by the machine instruction lexer.
|
|
struct MIToken {
|
|
enum TokenKind {
|
|
// Markers
|
|
Eof,
|
|
Error,
|
|
Newline,
|
|
|
|
// Tokens with no info.
|
|
comma,
|
|
equal,
|
|
underscore,
|
|
colon,
|
|
coloncolon,
|
|
dot,
|
|
exclaim,
|
|
lparen,
|
|
rparen,
|
|
lbrace,
|
|
rbrace,
|
|
plus,
|
|
minus,
|
|
less,
|
|
greater,
|
|
|
|
// Keywords
|
|
kw_implicit,
|
|
kw_implicit_define,
|
|
kw_def,
|
|
kw_dead,
|
|
kw_dereferenceable,
|
|
kw_killed,
|
|
kw_undef,
|
|
kw_internal,
|
|
kw_early_clobber,
|
|
kw_debug_use,
|
|
kw_renamable,
|
|
kw_tied_def,
|
|
kw_frame_setup,
|
|
kw_frame_destroy,
|
|
kw_nnan,
|
|
kw_ninf,
|
|
kw_nsz,
|
|
kw_arcp,
|
|
kw_contract,
|
|
kw_afn,
|
|
kw_reassoc,
|
|
kw_nuw,
|
|
kw_nsw,
|
|
kw_exact,
|
|
kw_nofpexcept,
|
|
kw_debug_location,
|
|
kw_debug_instr_number,
|
|
kw_cfi_same_value,
|
|
kw_cfi_offset,
|
|
kw_cfi_rel_offset,
|
|
kw_cfi_def_cfa_register,
|
|
kw_cfi_def_cfa_offset,
|
|
kw_cfi_adjust_cfa_offset,
|
|
kw_cfi_escape,
|
|
kw_cfi_def_cfa,
|
|
kw_cfi_llvm_def_aspace_cfa,
|
|
kw_cfi_register,
|
|
kw_cfi_remember_state,
|
|
kw_cfi_restore,
|
|
kw_cfi_restore_state,
|
|
kw_cfi_undefined,
|
|
kw_cfi_window_save,
|
|
kw_cfi_aarch64_negate_ra_sign_state,
|
|
kw_blockaddress,
|
|
kw_intrinsic,
|
|
kw_target_index,
|
|
kw_half,
|
|
kw_float,
|
|
kw_double,
|
|
kw_x86_fp80,
|
|
kw_fp128,
|
|
kw_ppc_fp128,
|
|
kw_target_flags,
|
|
kw_volatile,
|
|
kw_non_temporal,
|
|
kw_invariant,
|
|
kw_align,
|
|
kw_basealign,
|
|
kw_addrspace,
|
|
kw_stack,
|
|
kw_got,
|
|
kw_jump_table,
|
|
kw_constant_pool,
|
|
kw_call_entry,
|
|
kw_custom,
|
|
kw_liveout,
|
|
kw_landing_pad,
|
|
kw_inlineasm_br_indirect_target,
|
|
kw_ehfunclet_entry,
|
|
kw_liveins,
|
|
kw_successors,
|
|
kw_floatpred,
|
|
kw_intpred,
|
|
kw_shufflemask,
|
|
kw_pre_instr_symbol,
|
|
kw_post_instr_symbol,
|
|
kw_heap_alloc_marker,
|
|
kw_cfi_type,
|
|
kw_bbsections,
|
|
kw_unknown_size,
|
|
kw_unknown_address,
|
|
kw_ir_block_address_taken,
|
|
kw_machine_block_address_taken,
|
|
|
|
// Metadata types.
|
|
kw_distinct,
|
|
|
|
// Named metadata keywords
|
|
md_tbaa,
|
|
md_alias_scope,
|
|
md_noalias,
|
|
md_range,
|
|
md_diexpr,
|
|
md_dilocation,
|
|
|
|
// Identifier tokens
|
|
Identifier,
|
|
NamedRegister,
|
|
NamedVirtualRegister,
|
|
MachineBasicBlockLabel,
|
|
MachineBasicBlock,
|
|
StackObject,
|
|
FixedStackObject,
|
|
NamedGlobalValue,
|
|
GlobalValue,
|
|
ExternalSymbol,
|
|
MCSymbol,
|
|
|
|
// Other tokens
|
|
IntegerLiteral,
|
|
FloatingPointLiteral,
|
|
HexLiteral,
|
|
VectorLiteral,
|
|
VirtualRegister,
|
|
ConstantPoolItem,
|
|
JumpTableIndex,
|
|
NamedIRBlock,
|
|
IRBlock,
|
|
NamedIRValue,
|
|
IRValue,
|
|
QuotedIRValue, // `<constant value>`
|
|
SubRegisterIndex,
|
|
StringConstant
|
|
};
|
|
|
|
private:
|
|
TokenKind Kind = Error;
|
|
StringRef Range;
|
|
StringRef StringValue;
|
|
std::string StringValueStorage;
|
|
APSInt IntVal;
|
|
|
|
public:
|
|
MIToken() = default;
|
|
|
|
MIToken &reset(TokenKind Kind, StringRef Range);
|
|
|
|
MIToken &setStringValue(StringRef StrVal);
|
|
MIToken &setOwnedStringValue(std::string StrVal);
|
|
MIToken &setIntegerValue(APSInt IntVal);
|
|
|
|
TokenKind kind() const { return Kind; }
|
|
|
|
bool isError() const { return Kind == Error; }
|
|
|
|
bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
|
|
|
|
bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
|
|
|
|
bool isRegister() const {
|
|
return Kind == NamedRegister || Kind == underscore ||
|
|
Kind == NamedVirtualRegister || Kind == VirtualRegister;
|
|
}
|
|
|
|
bool isRegisterFlag() const {
|
|
return Kind == kw_implicit || Kind == kw_implicit_define ||
|
|
Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
|
|
Kind == kw_undef || Kind == kw_internal ||
|
|
Kind == kw_early_clobber || Kind == kw_debug_use ||
|
|
Kind == kw_renamable;
|
|
}
|
|
|
|
bool isMemoryOperandFlag() const {
|
|
return Kind == kw_volatile || Kind == kw_non_temporal ||
|
|
Kind == kw_dereferenceable || Kind == kw_invariant ||
|
|
Kind == StringConstant;
|
|
}
|
|
|
|
bool is(TokenKind K) const { return Kind == K; }
|
|
|
|
bool isNot(TokenKind K) const { return Kind != K; }
|
|
|
|
StringRef::iterator location() const { return Range.begin(); }
|
|
|
|
StringRef range() const { return Range; }
|
|
|
|
/// Return the token's string value.
|
|
StringRef stringValue() const { return StringValue; }
|
|
|
|
const APSInt &integerValue() const { return IntVal; }
|
|
|
|
bool hasIntegerValue() const {
|
|
return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
|
|
Kind == MachineBasicBlockLabel || Kind == StackObject ||
|
|
Kind == FixedStackObject || Kind == GlobalValue ||
|
|
Kind == VirtualRegister || Kind == ConstantPoolItem ||
|
|
Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
|
|
}
|
|
};
|
|
|
|
/// Consume a single machine instruction token in the given source and return
|
|
/// the remaining source string.
|
|
StringRef lexMIToken(
|
|
StringRef Source, MIToken &Token,
|
|
function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
|