If a function has `amdgpu-flat-work-group-size`, honor it in `initialize` by taking its value directly; otherwise, it uses the default range as a starting point. We will no longer manipulate the known range, which can cause issues because the known range is a "throttle" to the assumed range such that the assumed range can't get widened properly in `updateImpl` if the known range is not set properly for whatever reasons. Another benefit of not touching the known range is, if we indicate pessimistic state, it also invalidates the AA such that `manifest` will not be called. Since we honor the attribute, we don't want and will not add any half-baked attribute added to a function.
303 lines
18 KiB
LLVM
303 lines
18 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
|
|
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s
|
|
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
|
|
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V6 %s
|
|
|
|
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
|
|
|
|
declare i32 @llvm.amdgcn.workgroup.id.x() #0
|
|
declare i32 @llvm.amdgcn.workgroup.id.y() #0
|
|
declare i32 @llvm.amdgcn.workgroup.id.z() #0
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
|
declare i32 @llvm.amdgcn.workitem.id.z() #0
|
|
declare i32 @llvm.amdgcn.lds.kernel.id() #0
|
|
declare i64 @llvm.amdgcn.dispatch.id() #0
|
|
|
|
|
|
declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
|
|
declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
|
|
declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
|
|
|
|
; Avoid adding all of these to the output attribute sets
|
|
define void @use_everything_else() {
|
|
; CHECK-LABEL: define {{[^@]+}}@use_everything_else
|
|
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
|
|
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
|
|
; CHECK-NEXT: [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
; CHECK-NEXT: [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
; CHECK-NEXT: [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
; CHECK-NEXT: store volatile i32 [[VAL0]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: store volatile i32 [[VAL1]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: store volatile i32 [[VAL2]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: store volatile i32 [[VAL3]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: store volatile i32 [[VAL4]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: store volatile i32 [[VAL5]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
; CHECK-NEXT: [[QUEUE_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
|
|
; CHECK-NEXT: [[VAL6:%.*]] = load volatile ptr, ptr addrspace(4) [[DISPATCH_PTR]], align 8
|
|
; CHECK-NEXT: [[VAL7:%.*]] = load volatile ptr, ptr addrspace(4) [[QUEUE_PTR]], align 8
|
|
; CHECK-NEXT: [[VAL8:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
|
; CHECK-NEXT: store volatile i32 [[VAL8]], ptr addrspace(1) null, align 4
|
|
; CHECK-NEXT: [[VAL9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
|
|
; CHECK-NEXT: store volatile i64 [[VAL9]], ptr addrspace(1) null, align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
|
|
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
|
|
%val3 = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
%val4 = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
%val5 = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
store volatile i32 %val0, ptr addrspace(1) null
|
|
store volatile i32 %val1, ptr addrspace(1) null
|
|
store volatile i32 %val2, ptr addrspace(1) null
|
|
store volatile i32 %val3, ptr addrspace(1) null
|
|
store volatile i32 %val4, ptr addrspace(1) null
|
|
store volatile i32 %val5, ptr addrspace(1) null
|
|
%dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
|
|
%val6 = load volatile ptr, ptr addrspace(4) %dispatch.ptr
|
|
%val7 = load volatile ptr, ptr addrspace(4) %queue.ptr
|
|
%val8 = call i32 @llvm.amdgcn.lds.kernel.id()
|
|
store volatile i32 %val8, ptr addrspace(1) null
|
|
%val9 = call i64 @llvm.amdgcn.dispatch.id()
|
|
store volatile i64 %val9, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kernarg) {
|
|
; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
|
|
; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
; V4-NEXT: call void @use_everything_else()
|
|
; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
|
|
; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V4-NEXT: ret void
|
|
;
|
|
; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
|
|
; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
|
|
; V5-NEXT: call void @use_everything_else()
|
|
; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
|
|
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V5-NEXT: ret void
|
|
;
|
|
; V6-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
|
|
; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
|
|
; V6-NEXT: call void @use_everything_else()
|
|
; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
|
|
; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V6-NEXT: ret void
|
|
;
|
|
call void @use_everything_else()
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 32
|
|
%load = load ptr, ptr addrspace(4) %gep
|
|
store ptr %load, ptr addrspace(1) %kernarg
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_default_queue_offset_v5_0(ptr addrspace(1) %kernarg) {
|
|
; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
|
|
; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
; V4-NEXT: call void @use_everything_else()
|
|
; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
|
|
; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V4-NEXT: ret void
|
|
;
|
|
; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
|
|
; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
; V5-NEXT: call void @use_everything_else()
|
|
; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
|
|
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V5-NEXT: ret void
|
|
;
|
|
; V6-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
|
|
; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
; V6-NEXT: call void @use_everything_else()
|
|
; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
|
|
; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V6-NEXT: ret void
|
|
;
|
|
call void @use_everything_else()
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 104
|
|
%load = load ptr, ptr addrspace(4) %gep
|
|
store ptr %load, ptr addrspace(1) %kernarg
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %kernarg) {
|
|
; V4-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
|
|
; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
; V4-NEXT: call void @use_everything_else()
|
|
; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
|
|
; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V4-NEXT: ret void
|
|
;
|
|
; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
|
|
; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
|
|
; V5-NEXT: call void @use_everything_else()
|
|
; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
|
|
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V5-NEXT: ret void
|
|
;
|
|
; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
|
|
; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
|
|
; V6-NEXT: call void @use_everything_else()
|
|
; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
|
|
; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; V6-NEXT: ret void
|
|
;
|
|
call void @use_everything_else()
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 40
|
|
%load = load ptr, ptr addrspace(4) %gep
|
|
store ptr %load, ptr addrspace(1) %kernarg
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) {
|
|
; CHECK-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
|
|
; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
; CHECK-NEXT: call void @use_everything_else()
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
|
|
; CHECK-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @use_everything_else()
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 112
|
|
%load = load ptr, ptr addrspace(4) %gep
|
|
store ptr %load, ptr addrspace(1) %kernarg
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr addrspace(1) %kernarg) {
|
|
; V4-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
|
|
; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
|
|
; V4-NEXT: call void @use_everything_else()
|
|
; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
|
|
; V4-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
|
|
; V4-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
|
|
; V4-NEXT: ret void
|
|
;
|
|
; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
|
|
; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
|
|
; V5-NEXT: call void @use_everything_else()
|
|
; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
|
|
; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
|
|
; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
|
|
; V5-NEXT: ret void
|
|
;
|
|
; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
|
|
; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
|
|
; V6-NEXT: call void @use_everything_else()
|
|
; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
|
|
; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
|
|
; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
|
|
; V6-NEXT: ret void
|
|
;
|
|
call void @use_everything_else()
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 32
|
|
%load = load <2 x ptr>, ptr addrspace(4) %gep
|
|
store <2 x ptr> %load, ptr addrspace(1) %kernarg
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) {
|
|
; V4-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
|
|
; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3]] {
|
|
; V4-NEXT: call void @use_everything_else()
|
|
; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
|
|
; V4-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
|
|
; V4-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
|
|
; V4-NEXT: ret void
|
|
;
|
|
; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
|
|
; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
; V5-NEXT: call void @use_everything_else()
|
|
; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
|
|
; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
|
|
; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
|
|
; V5-NEXT: ret void
|
|
;
|
|
; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
|
|
; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
; V6-NEXT: call void @use_everything_else()
|
|
; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
|
|
; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
|
|
; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
|
|
; V6-NEXT: ret void
|
|
;
|
|
|
|
call void @use_everything_else()%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 104
|
|
%load = load <2 x ptr>, ptr addrspace(4) %gep
|
|
store <2 x ptr> %load, ptr addrspace(1) %kernarg
|
|
ret void
|
|
}
|
|
|
|
|
|
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
|
|
|
|
|
|
;.
|
|
; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
; V4: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V4: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V4: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V4: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V4: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
;.
|
|
; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
; V5: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V5: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V5: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V5: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
;.
|
|
; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
; V6: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V6: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V6: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
; V6: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
|
|
;.
|
|
; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
|
|
;.
|
|
; V5: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
;.
|
|
; V6: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
|
;.
|