If a function has `amdgpu-flat-work-group-size`, honor it in `initialize` by taking its value directly; otherwise, it uses the default range as a starting point. We will no longer manipulate the known range, which can cause issues because the known range is a "throttle" to the assumed range such that the assumed range can't get widened properly in `updateImpl` if the known range is not set properly for whatever reasons. Another benefit of not touching the known range is, if we indicate pessimistic state, it also invalidates the AA such that `manifest` will not be called. Since we honor the attribute, we don't want and will not add any half-baked attribute added to a function.
91 lines
4.4 KiB
LLVM
91 lines
4.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
|
|
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
|
|
|
target datalayout = "A5"
|
|
|
|
define internal void @indirect() {
|
|
; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
|
|
; AKF_GCN-NEXT: ret void
|
|
;
|
|
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@indirect
|
|
; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; ATTRIBUTOR_GCN-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: indirect:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_simple_indirect_call() {
|
|
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
|
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; AKF_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
|
|
; AKF_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
|
|
; AKF_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
|
|
; AKF_GCN-NEXT: call void [[FP]]()
|
|
; AKF_GCN-NEXT: ret void
|
|
;
|
|
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
|
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
|
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
|
|
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
|
|
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
|
|
; ATTRIBUTOR_GCN-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: test_simple_indirect_call:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
|
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
|
; GFX9-NEXT: s_mov_b32 s13, s15
|
|
; GFX9-NEXT: s_mov_b32 s12, s14
|
|
; GFX9-NEXT: s_load_dwordx2 s[14:15], s[4:5], 0x4
|
|
; GFX9-NEXT: s_add_u32 s0, s0, s17
|
|
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
|
; GFX9-NEXT: s_mov_b32 s32, 0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GFX9-NEXT: s_mul_i32 s14, s14, s15
|
|
; GFX9-NEXT: v_mul_lo_u32 v3, s14, v0
|
|
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
|
; GFX9-NEXT: s_add_u32 s18, s18, indirect@rel32@lo+4
|
|
; GFX9-NEXT: s_addc_u32 s19, s19, indirect@rel32@hi+12
|
|
; GFX9-NEXT: s_mov_b32 s14, s16
|
|
; GFX9-NEXT: v_mad_u32_u24 v3, v1, s15, v3
|
|
; GFX9-NEXT: v_add_lshl_u32 v5, v3, v2, 3
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s18
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v4, s19
|
|
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX9-NEXT: ds_write_b64 v5, v[3:4]
|
|
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
|
; GFX9-NEXT: s_endpgm
|
|
%fptr = alloca ptr, addrspace(5)
|
|
%fptr.cast = addrspacecast ptr addrspace(5) %fptr to ptr
|
|
store ptr @indirect, ptr %fptr.cast
|
|
%fp = load ptr, ptr %fptr.cast
|
|
call void %fp()
|
|
ret void
|
|
}
|
|
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
;.
|
|
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
|
|
;.
|
|
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
|
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
|
;.
|
|
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
;.
|
|
; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
;.
|