Files
clang-p2996/llvm/test/CodeGen/X86/retpoline.ll
Chandler Carruth ae0cafece8 [x86/retpoline] Split the LLVM concept of retpolines into separate
subtarget features for indirect calls and indirect branches.

This is in preparation for enabling *only* the call retpolines when
using speculative load hardening.

I've continued to use subtarget features for now as they continue to
seem the best fit given the lack of other retpoline like constructs so
far.

The LLVM side is pretty simple. I'd like to eventually get rid of the
old feature, but not sure what backwards compatibility issues that will
cause.

This does remove the "implies" from requesting an external thunk. This
always seemed somewhat questionable and is now clearly not desirable --
you specify a thunk the same way no matter which set of things are
getting retpolines.

I really want to keep this nicely isolated from end users and just an
LLVM implementation detail, so I've moved the `-mretpoline` flag in
Clang to no longer rely on a specific subtarget feature by that name and
instead to be directly handled. In some ways this is simpler, but in
order to preserve existing behavior I've had to add some fallback code
so that users who relied on merely passing -mretpoline-external-thunk
continue to get the same behavior. We should eventually remove this
I suspect (we have never tested that it works!) but I've not done that
in this patch.

Differential Revision: https://reviews.llvm.org/D51150

llvm-svn: 340515
2018-08-23 06:06:38 +00:00

512 lines
17 KiB
LLVM

; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
declare void @bar(i32)
; Test a simple indirect call and tail call.
define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
entry:
tail call void @bar(i32 %x)
tail call void %fp(i32 %x)
tail call void @bar(i32 %x)
tail call void %fp(i32 %x)
ret void
}
; X64-LABEL: icall_reg:
; X64-DAG: movq %rdi, %[[fp:[^ ]*]]
; X64-DAG: movl %esi, %[[x:[^ ]*]]
; X64: movl %esi, %edi
; X64: callq bar
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq %[[fp]], %r11
; X64: callq __llvm_retpoline_r11
; X64: movl %[[x]], %edi
; X64: callq bar
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq %[[fp]], %r11
; X64: jmp __llvm_retpoline_r11 # TAILCALL
; X64FAST-LABEL: icall_reg:
; X64FAST: callq bar
; X64FAST: callq __llvm_retpoline_r11
; X64FAST: callq bar
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
; X86-LABEL: icall_reg:
; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]
; X86: pushl %[[x]]
; X86: calll bar
; X86: movl %[[fp]], %eax
; X86: pushl %[[x]]
; X86: calll __llvm_retpoline_eax
; X86: pushl %[[x]]
; X86: calll bar
; X86: movl %[[fp]], %eax
; X86: pushl %[[x]]
; X86: calll __llvm_retpoline_eax
; X86-NOT: # TAILCALL
; X86FAST-LABEL: icall_reg:
; X86FAST: calll bar
; X86FAST: calll __llvm_retpoline_eax
; X86FAST: calll bar
; X86FAST: calll __llvm_retpoline_eax
@global_fp = external global void (i32)*
; Test an indirect call through a global variable.
define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
%fp1 = load void (i32)*, void (i32)** @global_fp
call void %fp1(i32 %x)
%fp2 = load void (i32)*, void (i32)** @global_fp
tail call void %fp2(i32 %x)
ret void
}
; X64-LABEL: icall_global_fp:
; X64-DAG: movl %edi, %[[x:[^ ]*]]
; X64-DAG: movq global_fp(%rip), %r11
; X64: callq __llvm_retpoline_r11
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq global_fp(%rip), %r11
; X64: jmp __llvm_retpoline_r11 # TAILCALL
; X64FAST-LABEL: icall_global_fp:
; X64FAST: movq global_fp(%rip), %r11
; X64FAST: callq __llvm_retpoline_r11
; X64FAST: movq global_fp(%rip), %r11
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
; X86-LABEL: icall_global_fp:
; X86: movl global_fp, %eax
; X86: pushl 4(%esp)
; X86: calll __llvm_retpoline_eax
; X86: addl $4, %esp
; X86: movl global_fp, %eax
; X86: jmp __llvm_retpoline_eax # TAILCALL
; X86FAST-LABEL: icall_global_fp:
; X86FAST: calll __llvm_retpoline_eax
; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
%struct.Foo = type { void (%struct.Foo*)** }
; Test an indirect call through a vtable.
define void @vcall(%struct.Foo* %obj) #0 {
%vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
%vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
%vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
%fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
tail call void %fp(%struct.Foo* %obj)
tail call void %fp(%struct.Foo* %obj)
ret void
}
; X64-LABEL: vcall:
; X64: movq %rdi, %[[obj:[^ ]*]]
; X64: movq (%rdi), %[[vptr:[^ ]*]]
; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
; X64: movq %[[fp]], %r11
; X64: callq __llvm_retpoline_r11
; X64-DAG: movq %[[obj]], %rdi
; X64-DAG: movq %[[fp]], %r11
; X64: jmp __llvm_retpoline_r11 # TAILCALL
; X64FAST-LABEL: vcall:
; X64FAST: callq __llvm_retpoline_r11
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
; X86-LABEL: vcall:
; X86: movl 8(%esp), %[[obj:[^ ]*]]
; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]
; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
; X86: movl %[[fp]], %eax
; X86: pushl %[[obj]]
; X86: calll __llvm_retpoline_eax
; X86: addl $4, %esp
; X86: movl %[[fp]], %eax
; X86: jmp __llvm_retpoline_eax # TAILCALL
; X86FAST-LABEL: vcall:
; X86FAST: calll __llvm_retpoline_eax
; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
declare void @direct_callee()
define void @direct_tail() #0 {
tail call void @direct_callee()
ret void
}
; X64-LABEL: direct_tail:
; X64: jmp direct_callee # TAILCALL
; X64FAST-LABEL: direct_tail:
; X64FAST: jmp direct_callee # TAILCALL
; X86-LABEL: direct_tail:
; X86: jmp direct_callee # TAILCALL
; X86FAST-LABEL: direct_tail:
; X86FAST: jmp direct_callee # TAILCALL
declare void @nonlazybind_callee() #2
define void @nonlazybind_caller() #0 {
call void @nonlazybind_callee()
tail call void @nonlazybind_callee()
ret void
}
; X64-LABEL: nonlazybind_caller:
; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
; X64: movq %[[REG]], %r11
; X64: callq __llvm_retpoline_r11
; X64: movq %[[REG]], %r11
; X64: jmp __llvm_retpoline_r11 # TAILCALL
; X64FAST-LABEL: nonlazybind_caller:
; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
; X64FAST: callq __llvm_retpoline_r11
; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
; X86-LABEL: nonlazybind_caller:
; X86: calll nonlazybind_callee@PLT
; X86: jmp nonlazybind_callee@PLT # TAILCALL
; X86FAST-LABEL: nonlazybind_caller:
; X86FAST: calll nonlazybind_callee@PLT
; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL
; Check that a switch gets lowered using a jump table when retpolines are only
; enabled for calls.
define void @switch_jumptable(i32* %ptr, i64* %sink) #0 {
; X64-LABEL: switch_jumptable:
; X64: jmpq *
; X86-LABEL: switch_jumptable:
; X86: jmpl *
entry:
br label %header
header:
%i = load volatile i32, i32* %ptr
switch i32 %i, label %bb0 [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 5, label %bb5
i32 6, label %bb6
i32 7, label %bb7
i32 8, label %bb8
i32 9, label %bb9
]
bb0:
store volatile i64 0, i64* %sink
br label %header
bb1:
store volatile i64 1, i64* %sink
br label %header
bb2:
store volatile i64 2, i64* %sink
br label %header
bb3:
store volatile i64 3, i64* %sink
br label %header
bb4:
store volatile i64 4, i64* %sink
br label %header
bb5:
store volatile i64 5, i64* %sink
br label %header
bb6:
store volatile i64 6, i64* %sink
br label %header
bb7:
store volatile i64 7, i64* %sink
br label %header
bb8:
store volatile i64 8, i64* %sink
br label %header
bb9:
store volatile i64 9, i64* %sink
br label %header
}
@indirectbr_preserved.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_preserved, %bb0),
i8* blockaddress(@indirectbr_preserved, %bb1),
i8* blockaddress(@indirectbr_preserved, %bb2),
i8* blockaddress(@indirectbr_preserved, %bb3),
i8* blockaddress(@indirectbr_preserved, %bb4),
i8* blockaddress(@indirectbr_preserved, %bb5),
i8* blockaddress(@indirectbr_preserved, %bb6),
i8* blockaddress(@indirectbr_preserved, %bb7),
i8* blockaddress(@indirectbr_preserved, %bb8),
i8* blockaddress(@indirectbr_preserved, %bb9)]
; Check that we preserve indirectbr when only calls are retpolined.
define void @indirectbr_preserved(i64* readonly %p, i64* %sink) #0 {
; X64-LABEL: indirectbr_preserved:
; X64: jmpq *
; X86-LABEL: indirectbr_preserved:
; X86: jmpl *
entry:
%i0 = load i64, i64* %p
%target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i0
%target0 = load i8*, i8** %target.i0
indirectbr i8* %target0, [label %bb1, label %bb3]
bb0:
store volatile i64 0, i64* %sink
br label %latch
bb1:
store volatile i64 1, i64* %sink
br label %latch
bb2:
store volatile i64 2, i64* %sink
br label %latch
bb3:
store volatile i64 3, i64* %sink
br label %latch
bb4:
store volatile i64 4, i64* %sink
br label %latch
bb5:
store volatile i64 5, i64* %sink
br label %latch
bb6:
store volatile i64 6, i64* %sink
br label %latch
bb7:
store volatile i64 7, i64* %sink
br label %latch
bb8:
store volatile i64 8, i64* %sink
br label %latch
bb9:
store volatile i64 9, i64* %sink
br label %latch
latch:
%i.next = load i64, i64* %p
%target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i.next
%target.next = load i8*, i8** %target.i.next
; Potentially hit a full 10 successors here so that even if we rewrite as
; a switch it will try to be lowered with a jump table.
indirectbr i8* %target.next, [label %bb0,
label %bb1,
label %bb2,
label %bb3,
label %bb4,
label %bb5,
label %bb6,
label %bb7,
label %bb8,
label %bb9]
}
@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
i8* blockaddress(@indirectbr_rewrite, %bb1),
i8* blockaddress(@indirectbr_rewrite, %bb2),
i8* blockaddress(@indirectbr_rewrite, %bb3),
i8* blockaddress(@indirectbr_rewrite, %bb4),
i8* blockaddress(@indirectbr_rewrite, %bb5),
i8* blockaddress(@indirectbr_rewrite, %bb6),
i8* blockaddress(@indirectbr_rewrite, %bb7),
i8* blockaddress(@indirectbr_rewrite, %bb8),
i8* blockaddress(@indirectbr_rewrite, %bb9)]
; Check that when retpolines are enabled for indirect branches the indirectbr
; instruction gets rewritten to use switch, and that in turn doesn't get lowered
; as a jump table.
define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #1 {
; X64-LABEL: indirectbr_rewrite:
; X64-NOT: jmpq
; X86-LABEL: indirectbr_rewrite:
; X86-NOT: jmpl
entry:
%i0 = load i64, i64* %p
%target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
%target0 = load i8*, i8** %target.i0
indirectbr i8* %target0, [label %bb1, label %bb3]
bb0:
store volatile i64 0, i64* %sink
br label %latch
bb1:
store volatile i64 1, i64* %sink
br label %latch
bb2:
store volatile i64 2, i64* %sink
br label %latch
bb3:
store volatile i64 3, i64* %sink
br label %latch
bb4:
store volatile i64 4, i64* %sink
br label %latch
bb5:
store volatile i64 5, i64* %sink
br label %latch
bb6:
store volatile i64 6, i64* %sink
br label %latch
bb7:
store volatile i64 7, i64* %sink
br label %latch
bb8:
store volatile i64 8, i64* %sink
br label %latch
bb9:
store volatile i64 9, i64* %sink
br label %latch
latch:
%i.next = load i64, i64* %p
%target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
%target.next = load i8*, i8** %target.i.next
; Potentially hit a full 10 successors here so that even if we rewrite as
; a switch it will try to be lowered with a jump table.
indirectbr i8* %target.next, [label %bb0,
label %bb1,
label %bb2,
label %bb3,
label %bb4,
label %bb5,
label %bb6,
label %bb7,
label %bb8,
label %bb9]
}
; Lastly check that the necessary thunks were emitted.
;
; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
; X64-NEXT: .hidden __llvm_retpoline_r11
; X64-NEXT: .weak __llvm_retpoline_r11
; X64: __llvm_retpoline_r11:
; X64-NEXT: # {{.*}} # %entry
; X64-NEXT: callq [[CALL_TARGET:.*]]
; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
; X64-NEXT: # %entry
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: pause
; X64-NEXT: lfence
; X64-NEXT: jmp [[CAPTURE_SPEC]]
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: [[CALL_TARGET]]: # Block address taken
; X64-NEXT: # %entry
; X64-NEXT: movq %r11, (%rsp)
; X64-NEXT: retq
;
; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
; X86-NEXT: .hidden __llvm_retpoline_eax
; X86-NEXT: .weak __llvm_retpoline_eax
; X86: __llvm_retpoline_eax:
; X86-NEXT: # {{.*}} # %entry
; X86-NEXT: calll [[CALL_TARGET:.*]]
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: pause
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: retl
;
; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
; X86-NEXT: .hidden __llvm_retpoline_ecx
; X86-NEXT: .weak __llvm_retpoline_ecx
; X86: __llvm_retpoline_ecx:
; X86-NEXT: # {{.*}} # %entry
; X86-NEXT: calll [[CALL_TARGET:.*]]
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: pause
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: movl %ecx, (%esp)
; X86-NEXT: retl
;
; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
; X86-NEXT: .hidden __llvm_retpoline_edx
; X86-NEXT: .weak __llvm_retpoline_edx
; X86: __llvm_retpoline_edx:
; X86-NEXT: # {{.*}} # %entry
; X86-NEXT: calll [[CALL_TARGET:.*]]
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: pause
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: movl %edx, (%esp)
; X86-NEXT: retl
;
; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
; X86-NEXT: .hidden __llvm_retpoline_edi
; X86-NEXT: .weak __llvm_retpoline_edi
; X86: __llvm_retpoline_edi:
; X86-NEXT: # {{.*}} # %entry
; X86-NEXT: calll [[CALL_TARGET:.*]]
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: pause
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: movl %edi, (%esp)
; X86-NEXT: retl
attributes #0 = { "target-features"="+retpoline-indirect-calls" }
attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" }
attributes #2 = { nonlazybind }