Summary: This patch introduces a mechanism to check the code object version from the module flag, This avoids checking from command line. In case the module flag is missing, we use the current default code object version supported in the compiler. For tools whose inputs are not IR, we may need other approach (directive, for example) to check the code object version, That will be in a separate patch later. For LIT tests update, we directly add module flag if there is only a single code object version associated with all checks in one file. In cause of multiple code object version in one file, we use the "sed" method to "clone" the checks to achieve the goal. Reviewer: arsenm Differential Revision: https://reviews.llvm.org/D14313
298 lines
9.0 KiB
LLVM
298 lines
9.0 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s
|
|
|
|
declare void @function1()
|
|
|
|
declare void @function2() #0
|
|
|
|
; Function Attrs: noinline
|
|
define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
|
|
store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: noinline
|
|
define void @function4(i64 %arg, ptr %a) #2 {
|
|
store i64 %arg, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: noinline
|
|
define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 168
|
|
%x = load i64, ptr addrspace(4) %gep
|
|
store i64 %x, ptr %sink
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind readnone speculatable willreturn
|
|
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
|
|
|
|
; CHECK: amdhsa.kernels:
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel10
|
|
define amdgpu_kernel void @test_kernel10(ptr %a) {
|
|
store i8 3, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Call to an extern function
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel20
|
|
define amdgpu_kernel void @test_kernel20(ptr %a) {
|
|
call void @function1()
|
|
store i8 3, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Explicit attribute on kernel
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel21
|
|
define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
|
|
call void @function1()
|
|
store i8 3, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Explicit attribute on extern callee
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel22
|
|
define amdgpu_kernel void @test_kernel22(ptr %a) {
|
|
call void @function2()
|
|
store i8 3, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Access more bytes than the pointer size
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel30
|
|
define amdgpu_kernel void @test_kernel30(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 192
|
|
%x = load i128, ptr addrspace(4) %gep
|
|
store i128 %x, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Typical load of queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel40
|
|
define amdgpu_kernel void @test_kernel40(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 200
|
|
%x = load i64, ptr addrspace(4) %gep
|
|
store i64 %x, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Typical usage, overriden by explicit attribute on kernel
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel41
|
|
define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 200
|
|
%x = load i64, ptr addrspace(4) %gep
|
|
store i64 %x, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Access to implicit arg before the queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel42
|
|
define amdgpu_kernel void @test_kernel42(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 192
|
|
%x = load i64, ptr addrspace(4) %gep
|
|
store i64 %x, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Access to implicit arg after the queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel43
|
|
define amdgpu_kernel void @test_kernel43(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 208
|
|
%x = load i64, ptr addrspace(4) %gep
|
|
store i64 %x, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Accessing a byte just before the queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel44
|
|
define amdgpu_kernel void @test_kernel44(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 199
|
|
%x = load i8, ptr addrspace(4) %gep, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Accessing a byte inside the queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel45
|
|
define amdgpu_kernel void @test_kernel45(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 200
|
|
%x = load i8, ptr addrspace(4) %gep, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Accessing a byte inside the queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel46
|
|
define amdgpu_kernel void @test_kernel46(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 207
|
|
%x = load i8, ptr addrspace(4) %gep, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Accessing a byte just after the queue pointer
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel47
|
|
define amdgpu_kernel void @test_kernel47(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 208
|
|
%x = load i8, ptr addrspace(4) %gep, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Access with an unknown offset
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel50
|
|
define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 %b
|
|
%x = load i8, ptr addrspace(4) %gep, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Multiple geps reaching the queue pointer argument.
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel51
|
|
define amdgpu_kernel void @test_kernel51(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
|
|
%gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 184
|
|
%x = load i8, ptr addrspace(4) %gep2, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Multiple geps not reaching the queue pointer argument.
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel52
|
|
define amdgpu_kernel void @test_kernel52(ptr %a) {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
|
|
%gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 16
|
|
%x = load i8, ptr addrspace(4) %gep2, align 1
|
|
store i8 %x, ptr %a, align 1
|
|
ret void
|
|
}
|
|
|
|
; Queue pointer used inside a function call
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel60
|
|
define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 200
|
|
%x = load i64, ptr addrspace(4) %gep
|
|
call void @function4(i64 %x, ptr %a)
|
|
ret void
|
|
}
|
|
|
|
; Queue pointer retrieved inside a function call; chain of geps
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel61
|
|
define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
|
|
call void @function5(ptr addrspace(4) %gep, ptr %a)
|
|
ret void
|
|
}
|
|
|
|
; Pointer captured
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel70
|
|
define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
|
|
store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
|
|
ret void
|
|
}
|
|
|
|
; Pointer captured inside function call
|
|
|
|
; CHECK: - .args:
|
|
; CHECK: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel71
|
|
define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
|
|
call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
|
|
ret void
|
|
}
|
|
|
|
; Ineffective pointer capture
|
|
|
|
; CHECK: - .args:
|
|
; CHECK-NOT: hidden_queue_ptr
|
|
; CHECK-LABEL: .name: test_kernel72
|
|
define amdgpu_kernel void @test_kernel72() #2 {
|
|
%ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
|
|
store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-no-queue-ptr" }
|
|
attributes #1 = { nounwind readnone speculatable willreturn }
|
|
attributes #2 = { noinline }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
|