SIMachineFunctionInfo has a scan of the function body for inline asm
which may use AGPRs, or callees in SIMachineFunctionInfo. Move this
into the attributor, so it actually works interprocedurally.
Could probably avoid most of the test churn if this bothered to avoid
adding this on subtargets without AGPRs. We should also probably
try to delete the MIR scan in usesAGPRs but it seems to be trickier
to eliminate.
256 lines
10 KiB
LLVM
256 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
|
|
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
|
|
|
|
define amdgpu_kernel void @kernel_uses_asm_virtreg() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "a"(i32 poison)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%def = call i32 asm sideeffect "; def $0", "=a"()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
|
|
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "v"(i32 poison)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_uses_asm_physreg() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "{a0}"(i32 poison)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
|
|
ret void
|
|
}
|
|
|
|
define void @func_uses_asm_virtreg_agpr() {
|
|
; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
|
|
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "a"(i32 poison)
|
|
ret void
|
|
}
|
|
|
|
define void @func_uses_asm_physreg_agpr() {
|
|
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
|
|
; CHECK-SAME: ) #[[ATTR2]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "{a0}"(i32 poison)
|
|
ret void
|
|
}
|
|
|
|
define void @func_uses_asm_physreg_agpr_tuple() {
|
|
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
|
|
; CHECK-SAME: ) #[[ATTR2]] {
|
|
; CHECK-NEXT: call void asm sideeffect "
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
|
|
ret void
|
|
}
|
|
|
|
declare void @unknown()
|
|
|
|
define amdgpu_kernel void @kernel_calls_extern() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
|
|
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
|
|
; CHECK-NEXT: call void @unknown()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @unknown()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
|
|
; CHECK-SAME: ) #[[ATTR4]] {
|
|
; CHECK-NEXT: call void @unknown() #[[ATTR9:[0-9]+]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @unknown() #0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
|
|
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
|
|
; CHECK-NEXT: call void [[INDIRECT]]()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void %indirect()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
|
|
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
|
|
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void %indirect() #0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @func_uses_asm_physreg_agpr()
|
|
ret void
|
|
}
|
|
|
|
define void @empty() {
|
|
; CHECK-LABEL: define void @empty(
|
|
; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define void @also_empty() {
|
|
; CHECK-LABEL: define void @also_empty(
|
|
; CHECK-SAME: ) #[[ATTR5]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_empty() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
|
|
; CHECK-SAME: ) #[[ATTR1]] {
|
|
; CHECK-NEXT: call void @empty()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @empty()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @empty()
|
|
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @empty()
|
|
call void @func_uses_asm_physreg_agpr()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
|
|
; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
|
|
ret void
|
|
}
|
|
|
|
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32 immarg, i32 immarg, i32 immarg)
|
|
|
|
define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
|
|
; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
|
|
; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
|
|
store <32 x float> %result, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
|
|
; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%result = call i32 @llvm.amdgcn.workitem.id.x()
|
|
store i32 %result, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
|
|
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
|
|
; CHECK-NEXT: call void [[FPTR]]()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%fptr = select i1 %cond, ptr @empty, ptr @also_empty
|
|
call void %fptr()
|
|
ret void
|
|
}
|
|
|
|
|
|
attributes #0 = { "amdgpu-no-agpr" }
|
|
;.
|
|
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
|
|
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
|
|
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
|
|
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
|
|
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
|
|
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
|
|
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
|
|
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
|
|
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
|
|
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
|
|
;.
|