Relying on any MachineFunction state in the MachineFunctionInfo constructor is hazardous, because the construction time is unclear and determined by the first use. The function may be only partially constructed, which is part of why we have many of these hacky string attributes to track what we need for ABI lowering. For SelectionDAG, all stack objects are created up-front before calling convention lowering so stack objects are visible at construction time. For GlobalISel, none of the IR function has been visited yet and the allocas haven't been added to the MachineFrameInfo yet. This should fix failing to set flat_scratch_init in GlobalISel when needed. This pass really needs to be turned into some kind of analysis, but I haven't found a nice way use one here.
28 lines
917 B
LLVM
28 lines
917 B
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; Make sure flat_scratch_init is set
|
|
|
|
; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
|
|
; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
|
|
%alloca = alloca i32, addrspace(5)
|
|
%cast = addrspacecast i32 addrspace(5)* %alloca to i32*
|
|
store volatile i32 0, i32* %cast
|
|
ret void
|
|
}
|
|
|
|
; TODO: Could optimize out in this case
|
|
; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
|
|
; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
|
|
%alloca = alloca i32, addrspace(5)
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
|
|
; GCN: .amdhsa_user_sgpr_flat_scratch_init 0
|
|
define amdgpu_kernel void @kernel_no_calls_no_stack() {
|
|
ret void
|
|
}
|