If a function has `amdgpu-flat-work-group-size`, honor it in `initialize` by taking its value directly; otherwise, it uses the default range as a starting point. We will no longer manipulate the known range, which can cause issues because the known range is a "throttle" to the assumed range such that the assumed range can't get widened properly in `updateImpl` if the known range is not set properly for whatever reasons. Another benefit of not touching the known range is, if we indicate pessimistic state, it also invalidates the AA such that `manifest` will not be called. Since we honor the attribute, we don't want and will not add any half-baked attribute added to a function.
48 lines
2.8 KiB
LLVM
48 lines
2.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
|
|
|
|
define internal void @indirect() {
|
|
; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
|
|
; AKF_GCN-NEXT: ret void
|
|
;
|
|
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@indirect
|
|
; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; ATTRIBUTOR_GCN-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_simple_indirect_call() #0 {
|
|
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
|
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; AKF_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
|
|
; AKF_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
|
|
; AKF_GCN-NEXT: call void [[FP]]()
|
|
; AKF_GCN-NEXT: ret void
|
|
;
|
|
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
|
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
|
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
|
|
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
|
|
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
|
|
; ATTRIBUTOR_GCN-NEXT: ret void
|
|
;
|
|
%fptr = alloca ptr, addrspace(5)
|
|
store ptr @indirect, ptr addrspace(5) %fptr
|
|
%fp = load ptr, ptr addrspace(5) %fptr
|
|
call void %fp()
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-no-dispatch-id" }
|
|
|
|
;.
|
|
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
|
|
;.
|
|
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
|
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
|
;.
|