Files
clang-p2996/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Vitaly Buka a496c8be6e Revert "[CodeGen]Allow targets to use target specific COPY instructions for live range splitting"
And dependent commits.

Details in D150388.

This reverts commit 825b7f0ca5.
This reverts commit 7a98f084c4.
This reverts commit b4a62b1fa5.
This reverts commit b7836d8562.

No conflicts in the code, few tests had conflicts in autogenerated CHECKs:
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll

Reviewed By: alexfh

Differential Revision: https://reviews.llvm.org/D156381
2023-07-26 22:13:32 -07:00

232 lines
8.6 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
declare amdgpu_gfx float @extern_func(float) #0
declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
@funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4
define amdgpu_gfx float @no_stack(float %arg0) #0 {
%add = fadd float %arg0, 1.0
ret float %add
}
define amdgpu_gfx float @simple_stack(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%add = fadd float %arg0, %val
ret float %add
}
define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%add = fadd float %arg0, %val
%stack2 = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack2
%val2 = load volatile float, ptr addrspace(5) %stack2
%add2 = fadd float %add, %val2
ret float %add2
}
define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
bb0:
%cmp = fcmp ogt float %arg0, 0.0
br i1 %cmp, label %bb1, label %bb2
bb1:
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%add = fadd float %arg0, %val
br label %bb2
bb2:
%res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
ret float %res
}
define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
bb0:
br label %bb1
bb1:
%ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%add = fadd float %arg0, %val
%cmp = icmp sgt i32 %ctr, 0
%newctr = sub i32 %ctr, 1
br i1 %cmp, label %bb1, label %bb2
bb2:
ret float %add
}
define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
%res = call amdgpu_gfx float @simple_stack(float %arg0)
ret float %res
}
define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%res = call amdgpu_gfx float @simple_stack(float %arg0)
%add = fadd float %res, %val
ret float %add
}
define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
%res = call amdgpu_gfx float @extern_func(float %arg0)
ret float %res
}
define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%res = call amdgpu_gfx float @extern_func(float %arg0)
%add = fadd float %res, %val
ret float %add
}
define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
%res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
ret float %res
}
define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
%fptr = load ptr, ptr addrspace(4) @funcptr
call amdgpu_gfx void %fptr()
ret float %arg0
}
define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%fptr = load ptr, ptr addrspace(4) @funcptr
call amdgpu_gfx void %fptr()
%add = fadd float %arg0, %val
ret float %add
}
define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
%stack = alloca float, i32 4, align 4, addrspace(5)
store volatile float 2.0, ptr addrspace(5) %stack
%val = load volatile float, ptr addrspace(5) %stack
%res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
%add = fadd float %res, %val
ret float %add
}
@lds = internal addrspace(3) global [64 x float] undef
define amdgpu_gfx float @simple_lds(float %arg0) #0 {
%val = load float, ptr addrspace(3) @lds
ret float %val
}
define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
%val = load float, ptr addrspace(3) @lds
%res = call amdgpu_gfx float @simple_lds_recurse(float %val)
ret float %res
}
attributes #0 = { nounwind }
; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers:
; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x28{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: dynamic_stack_loop:
; GCN-NEXT: .lds_size: 0{{$}}
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: multiple_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: no_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x25{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: no_stack_extern_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x2c{{$}}
; GCN-NEXT: no_stack_extern_call_many_args:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GCN-NEXT: .vgpr_count: 0x2c{{$}}
; GCN-NEXT: no_stack_indirect_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x2c{{$}}
; GCN-NEXT: simple_lds:
; GCN-NEXT: .lds_size: 0x100{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: simple_lds_recurse:
; GCN-NEXT: .lds_size: 0x100{{$}}
; GCN-NEXT: .sgpr_count: 0x28{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x2a{{$}}
; GCN-NEXT: simple_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
; GCN-NEXT: .vgpr_count: 0x2{{$}}
; GCN-NEXT: simple_stack_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x25{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: simple_stack_extern_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x2c{{$}}
; GCN-NEXT: simple_stack_indirect_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x30{{$}}
; GCN-NEXT: .vgpr_count: 0x2c{{$}}
; GCN-NEXT: simple_stack_recurse:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x28{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
; GCN-NEXT: ...