Files
clang-p2996/llvm/test/CodeGen/X86/statepoint-stack-usage.ll
Denis Antrushin c08d48fc2d [Statepoints] Change statepoint machine instr format to better suit VReg lowering.
Current Statepoint MI format is this:

   STATEPOINT
   <id>, <num patch bytes >, <num call arguments>, <call target>,
   [call arguments...],
   <StackMaps::ConstantOp>, <calling convention>,
   <StackMaps::ConstantOp>, <statepoint flags>,
   <StackMaps::ConstantOp>, <num deopt args>, [deopt args...],
   <gc base/derived pairs...> <gc allocas...>

Note that GC pointers are listed in pairs <base,derived>.
This causes base pointers to appear many times (at least twice) in
instruction, which is bad for us when VReg lowering is ON.
The problem is that machine operand tiedness is 1-1 relation, so
it might look like this:

  %vr2 = STATEPOINT ... %vr1, %vr1(tied-def0)

Since only one instance of %vr1 is tied, that may lead to incorrect
codegen (see PR46917 for more details), so we have to always spill
base pointers. This mostly defeats new VReg lowering scheme.

This patch changes statepoint instruction format so that every
gc pointer appears only once in operand list. That way they all can
be tied. Additional set of operands is added to preserve base-derived
relation required to build stackmap.
New statepoint has following format:

  STATEPOINT
  <id>, <num patch bytes>, <num call arguments>, <call target>,
  [call arguments...],
  <StackMaps::ConstantOp>, <calling convention>,
  <StackMaps::ConstantOp>, <statepoint flags>,
  <StackMaps::ConstantOp>, <num deopt args>, [deopt args...],
  <StackMaps::ConstantOp>, <num gc pointers>, [gc pointers...],
  <StackMaps::ConstantOp>, <num gc allocas>,  [gc allocas...]
  <StackMaps::ConstantOp>, <num entries in gc map>, [base/derived indices...]

Changes are:
  - every gc pointer is listed only once in a flat length-prefixed list;
  - alloca list is prefixed with its length too;
  - following alloca list is length-prefixed list of base-derived
    indices of pointers from gc pointer list. Note that indices are
    logical (number of pointer), not absolute (index of machine operand).

Differential Revision: https://reviews.llvm.org/D87154
2020-10-06 17:40:29 +07:00

137 lines
8.5 KiB
LLVM

; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 < %s | FileCheck %s
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; This test is checking to make sure that we reuse the same stack slots
; for GC values spilled over two different call sites. Since the order
; of GC arguments differ, niave lowering code would insert loads and
; stores to rearrange items on the stack. We need to make sure (for
; performance) that this doesn't happen.
define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
; CHECK-LABEL: back_to_back_calls
; The exact stores don't matter, but there need to be three stack slots created
; CHECK-DAG: movq %rdi, {{[0-9]*}}(%rsp)
; CHECK-DAG: movq %rdx, {{[0-9]*}}(%rsp)
; CHECK-DAG: movq %rsi, {{[0-9]*}}(%rsp)
; There should be no more than three moves
; CHECK-NOT: movq
%safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
%a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0)
%b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1)
%c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 2)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
%safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
%a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 2)
%b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 1)
%c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 0)
; CHECK: callq
ret i32 1
}
; This test simply checks that minor changes in vm state don't prevent slots
; being reused for gc values.
define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
; CHECK-LABEL: reserve_first
; The exact stores don't matter, but there need to be three stack slots created
; CHECK-DAG: movq %rdi, {{[0-9]*}}(%rsp)
; CHECK-DAG: movq %rdx, {{[0-9]*}}(%rsp)
; CHECK-DAG: movq %rsi, {{[0-9]*}}(%rsp)
%safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
%a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0)
%b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1)
%c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 2)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
%safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1), "deopt" (i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0)]
%a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 2)
%b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 1)
%c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 0)
; CHECK: callq
ret i32 1
}
; Check that we reuse the same stack slot across multiple calls. The use of
; more than two calls here is critical. We've had a bug which allowed reuse
; exactly once which went undetected for a long time.
define i32 @back_to_back_deopt(i32 %a, i32 %b, i32 %c) #1
gc "statepoint-example" {
; CHECK-LABEL: back_to_back_deopt
; The exact stores don't matter, but there need to be three stack slots created
; CHECK-DAG: movl %edi, 12(%rsp)
; CHECK-DAG: movl %esi, 8(%rsp)
; CHECK-DAG: movl %edx, 4(%rsp)
; CHECK: callq
; CHECK-DAG: movl %ebx, 12(%rsp)
; CHECK-DAG: movl %ebp, 8(%rsp)
; CHECK-DAG: movl %r14d, 4(%rsp)
; CHECK: callq
; CHECK-DAG: movl %ebx, 12(%rsp)
; CHECK-DAG: movl %ebp, 8(%rsp)
; CHECK-DAG: movl %r14d, 4(%rsp)
; CHECK: callq
; CHECK-DAG: movl %ebx, 12(%rsp)
; CHECK-DAG: movl %ebp, 8(%rsp)
; CHECK-DAG: movl %r14d, 4(%rsp)
; CHECK: callq
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 %a, i32 %b, i32 %c)]
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 %a, i32 %b, i32 %c)]
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 %a, i32 %b, i32 %c)]
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 %a, i32 %b, i32 %c)]
ret i32 1
}
; Test that stack slots are reused for invokes
define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" personality i32 ()* @"personality_function" {
; CHECK-LABEL: back_to_back_invokes
entry:
; The exact stores don't matter, but there need to be three stack slots created
; CHECK-DAG: movq %rdi, {{[0-9]*}}(%rsp)
; CHECK-DAG: movq %rdx, {{[0-9]*}}(%rsp)
; CHECK-DAG: movq %rsi, {{[0-9]*}}(%rsp)
; CHECK: callq
%safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
to label %normal_return unwind label %exceptional_return
normal_return:
%a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0)
%b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1)
%c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 2)
; Should work even through bitcasts
%c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)*
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
; CHECK: callq
%safepoint_token2 = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
to label %normal_return2 unwind label %exceptional_return2
normal_return2:
%a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 2)
%b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 0, i32 1)
%c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 0, i32 0)
ret i32 1
exceptional_return:
%landing_pad = landingpad { i8*, i32 }
cleanup
ret i32 0
exceptional_return2:
%landing_pad2 = landingpad { i8*, i32 }
cleanup
ret i32 0
}
; Function Attrs: nounwind
declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #3
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 @"personality_function"()
attributes #1 = { uwtable }