Files
clang-p2996/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Matt Arsenault 9719f17011 AMDGPU: Move handling of allocation of fixed ABI inputs
For the fixed ABI, set this in the initial argument constructor,
rather than relying on the allocation logic to set the values. Also
stop passing them for amdgpu_gfx, since the DAG path seems to skip
these. I'm unclear on what amdgpu_gfx's expectations are.  This will
allow moving the special input registers out of the normal argument
range.
2021-02-03 09:27:59 -05:00

187 lines
6.8 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
; Make sure this interacts well with -amdgpu-fixed-function-abi
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
declare float @extern_func(float) #0
declare float @extern_func_many_args(<64 x float>) #0
@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
define amdgpu_gfx float @no_stack(float %arg0) #0 {
%add = fadd float %arg0, 1.0
ret float %add
}
define amdgpu_gfx float @simple_stack(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%add = fadd float %arg0, %val
ret float %add
}
define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%add = fadd float %arg0, %val
%stack2 = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack2
%val2 = load volatile float, float addrspace(5)* %stack2
%add2 = fadd float %add, %val2
ret float %add2
}
define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
bb0:
%cmp = fcmp ogt float %arg0, 0.0
br i1 %cmp, label %bb1, label %bb2
bb1:
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%add = fadd float %arg0, %val
br label %bb2
bb2:
%res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
ret float %res
}
define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
bb0:
br label %bb1
bb1:
%ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%add = fadd float %arg0, %val
%cmp = icmp sgt i32 %ctr, 0
%newctr = sub i32 %ctr, 1
br i1 %cmp, label %bb1, label %bb2
bb2:
ret float %add
}
define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
%res = call amdgpu_gfx float @simple_stack(float %arg0)
ret float %res
}
define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%res = call amdgpu_gfx float @simple_stack(float %arg0)
%add = fadd float %res, %val
ret float %add
}
define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
%res = call amdgpu_gfx float @extern_func(float %arg0)
ret float %res
}
define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%res = call amdgpu_gfx float @extern_func(float %arg0)
%add = fadd float %res, %val
ret float %add
}
define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
%res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
ret float %res
}
define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
%fptr = load void()*, void()* addrspace(4)* @funcptr
call amdgpu_gfx void %fptr()
ret float %arg0
}
define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%fptr = load void()*, void()* addrspace(4)* @funcptr
call amdgpu_gfx void %fptr()
%add = fadd float %arg0, %val
ret float %add
}
define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, float addrspace(5)* %stack
%val = load volatile float, float addrspace(5)* %stack
%res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
%add = fadd float %res, %val
ret float %add
}
@lds = internal addrspace(3) global [64 x float] undef
define amdgpu_gfx float @simple_lds(float %arg0) #0 {
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
%val = load float, float addrspace(3)* %lds_ptr
ret float %val
}
define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
%val = load float, float addrspace(3)* %lds_ptr
%res = call amdgpu_gfx float @simple_lds_recurse(float %val)
ret float %res
}
attributes #0 = { nounwind }
; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers:
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: dynamic_stack_loop:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: multiple_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
; GCN-NEXT: no_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: no_stack_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: no_stack_extern_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: no_stack_extern_call_many_args:
; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GISEL-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GCN-NEXT: no_stack_indirect_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: simple_lds:
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: simple_lds_recurse:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: simple_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
; GCN-NEXT: simple_stack_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: simple_stack_extern_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: simple_stack_indirect_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: simple_stack_recurse:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: ...