This patch removes the conservative uniformity check in the indirect call specialization callback, as whether the function pointer is uniform doesn't matter too much. Instead, we add an argument to control specialization.
114 lines
5.6 KiB
LLVM
114 lines
5.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='amdgpu-attributor<closed-world>' %s | FileCheck --check-prefixes=CHECK,CW %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='amdgpu-attributor<closed-world>' -amdgpu-indirect-call-specialization-threshold=0 %s | FileCheck --check-prefixes=CHECK,NO %s
|
|
|
|
target datalayout = "A5"
|
|
|
|
@G = global i32 0, align 4
|
|
|
|
;.
|
|
; CHECK: @G = global i32 0, align 4
|
|
;.
|
|
define void @bar1() {
|
|
; CHECK-LABEL: define {{[^@]+}}@bar1
|
|
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: store i32 1, ptr @G, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
store i32 1, ptr @G, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @bar2() {
|
|
; CHECK-LABEL: define {{[^@]+}}@bar2
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: store i32 2, ptr @G, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
store i32 2, ptr @G, align 4
|
|
ret void
|
|
}
|
|
|
|
define ptr @helper1() {
|
|
; CHECK-LABEL: define {{[^@]+}}@helper1
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: ret ptr @bar1
|
|
;
|
|
entry:
|
|
ret ptr @bar1
|
|
}
|
|
|
|
define ptr @helper2() {
|
|
; CHECK-LABEL: define {{[^@]+}}@helper2
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: ret ptr @bar2
|
|
;
|
|
entry:
|
|
ret ptr @bar2
|
|
}
|
|
|
|
define amdgpu_kernel void @foo(ptr noundef %fp) {
|
|
; OW-LABEL: define {{[^@]+}}@foo
|
|
; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; OW-NEXT: entry:
|
|
; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
|
|
; OW-NEXT: call void [[FP]]()
|
|
; OW-NEXT: ret void
|
|
;
|
|
; CW-LABEL: define {{[^@]+}}@foo
|
|
; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CW-NEXT: entry:
|
|
; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
|
|
; CW-NEXT: [[TMP0:%.*]] = icmp eq ptr [[FP]], @bar1
|
|
; CW-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
|
|
; CW: 1:
|
|
; CW-NEXT: call void @bar1()
|
|
; CW-NEXT: br label [[TMP5:%.*]]
|
|
; CW: 2:
|
|
; CW-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP4:%.*]]
|
|
; CW: 3:
|
|
; CW-NEXT: call void @bar2()
|
|
; CW-NEXT: br label [[TMP5]]
|
|
; CW: 4:
|
|
; CW-NEXT: unreachable
|
|
; CW: 5:
|
|
; CW-NEXT: ret void
|
|
;
|
|
; NO-LABEL: define {{[^@]+}}@foo
|
|
; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; NO-NEXT: entry:
|
|
; NO-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
; NO-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
|
|
; NO-NEXT: call void [[FP]](), !callees [[META0:![0-9]+]]
|
|
; NO-NEXT: ret void
|
|
;
|
|
entry:
|
|
%fp.addr = alloca ptr, addrspace(5)
|
|
store ptr %fp, ptr addrspace(5) %fp.addr
|
|
%load = load ptr, ptr addrspace(5) %fp.addr
|
|
call void %load()
|
|
ret void
|
|
}
|
|
|
|
;.
|
|
; NO: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
|
|
; NO: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
|
;.
|
|
; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
|
|
; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
|
|
;.
|
|
; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
|
|
; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
|
;.
|
|
; NO: [[META0]] = !{ptr @bar1, ptr @bar2}
|
|
;.
|