Files
clang-p2996/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
Matt Arsenault 722b8e0e5a AMDGPU: Invert ABI attribute handling
Previously we assumed all callable functions did not need any
implicitly passed inputs, and added attributes to functions to
indicate when they were necessary. Requiring attributes for
correctness is pretty ugly, and it makes supporting indirect and
external calls more complicated.

This inverts the direction of the attributes, so an undecorated
function is assumed to need all implicit imputs. This enables
AMDGPUAttributor by default to mark when functions are proven to not
need a given input. This strips the equivalent functionality from the
legacy AMDGPUAnnotateKernelFeatures pass.

However, AMDGPUAnnotateKernelFeatures is not fully removed at this
point although it should be in the future. It is still necessary for
the two hacky amdgpu-calls and amdgpu-stack-objects attributes, which
would be better served by a trivial analysis on the IR during
selection. Additionally, AMDGPUAnnotateKernelFeatures still
redundantly handles the uniform-work-group-size attribute to be
removed in a future commit.

At this point when not using -amdgpu-fixed-function-abi, we are still
modifying the ABI based on these newly negated attributes. In the
future, this option will be removed and the locations for implicit
inputs will always be fixed. We will then use the new attributes to
avoid passing the values when unnecessary.
2021-09-09 18:24:28 -04:00

79 lines
3.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
target datalayout = "A5"
define internal void @indirect() {
; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
; AKF_GCN-NEXT: ret void
;
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@indirect
; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: ret void
;
; GFX9-LABEL: indirect:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
ret void
}
define amdgpu_kernel void @test_simple_indirect_call() {
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
; AKF_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast void ()* addrspace(5)* [[FPTR]] to void ()**
; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR_CAST]], align 8
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR_CAST]], align 8
; AKF_GCN-NEXT: call void [[FP]]()
; AKF_GCN-NEXT: ret void
;
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast void ()* addrspace(5)* [[FPTR]] to void ()**
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: ret void
;
; GFX9-LABEL: test_simple_indirect_call:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x4
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s6, s9
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GFX9-NEXT: s_add_u32 s0, s0, s9
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshr_b32 s4, s4, 16
; GFX9-NEXT: s_mul_i32 s4, s4, s5
; GFX9-NEXT: v_mul_lo_u32 v0, s4, v0
; GFX9-NEXT: s_getpc_b64 s[6:7]
; GFX9-NEXT: s_add_u32 s6, s6, indirect@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s7, s7, indirect@rel32@hi+12
; GFX9-NEXT: v_mov_b32_e32 v3, s6
; GFX9-NEXT: v_mov_b32_e32 v4, s7
; GFX9-NEXT: v_mad_u32_u24 v0, v1, s5, v0
; GFX9-NEXT: v_add_lshl_u32 v0, v0, v2, 3
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: ds_write_b64 v0, v[3:4]
; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX9-NEXT: s_endpgm
%fptr = alloca void()*, addrspace(5)
%fptr.cast = addrspacecast void()* addrspace(5)* %fptr to void()**
store void()* @indirect, void()** %fptr.cast
%fp = load void()*, void()** %fptr.cast
call void %fp()
ret void
}
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.