Summary: This patch introduces a mechanism to check the code object version from the module flag, This avoids checking from command line. In case the module flag is missing, we use the current default code object version supported in the compiler. For tools whose inputs are not IR, we may need other approach (directive, for example) to check the code object version, That will be in a separate patch later. For LIT tests update, we directly add module flag if there is only a single code object version associated with all checks in one file. In cause of multiple code object version in one file, we use the "sed" method to "clone" the checks to achieve the goal. Reviewer: arsenm Differential Revision: https://reviews.llvm.org/D14313
204 lines
10 KiB
LLVM
204 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_local_size_x(
|
|
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
|
|
; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4
|
|
; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%group.id = tail call i32 @llvm.amdgcn.workgroup.id.x()
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%block.count.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
|
|
%cmp.id.count = icmp ult i32 %group.id, %block.count.x
|
|
%local.size.offset = select i1 %cmp.id.count, i64 12, i64 18
|
|
%gep.local.size = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 %local.size.offset
|
|
%local.size = load i16, ptr addrspace(4) %gep.local.size, align 2
|
|
store i16 %local.size, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_local_size_y(
|
|
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
|
|
; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 2
|
|
; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%group.id = tail call i32 @llvm.amdgcn.workgroup.id.y()
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.block.count.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4
|
|
%block.count.y = load i32, ptr addrspace(4) %gep.block.count.y, align 4
|
|
%cmp.id.count = icmp ult i32 %group.id, %block.count.y
|
|
%local.size.offset = select i1 %cmp.id.count, i64 14, i64 20
|
|
%gep.local.size = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 %local.size.offset
|
|
%local.size = load i16, ptr addrspace(4) %gep.local.size, align 2
|
|
store i16 %local.size, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_local_size_z(
|
|
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
|
|
; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4
|
|
; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%group.id = tail call i32 @llvm.amdgcn.workgroup.id.z()
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.block.count.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8
|
|
%block.count.z = load i32, ptr addrspace(4) %gep.block.count.z, align 4
|
|
%cmp.id.count = icmp ult i32 %group.id, %block.count.z
|
|
%local.size.offset = select i1 %cmp.id.count, i64 16, i64 22
|
|
%gep.local.size = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 %local.size.offset
|
|
%local.size = load i16, ptr addrspace(4) %gep.local.size, align 2
|
|
store i16 %local.size, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_remainder_x(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_remainder_x(
|
|
; GCN-NEXT: store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.x = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 18
|
|
%remainder.x = load i16, ptr addrspace(4) %gep.x, align 2
|
|
store i16 %remainder.x, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_remainder_y(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_remainder_y(
|
|
; GCN-NEXT: store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 18
|
|
%remainder.y = load i16, ptr addrspace(4) %gep.y, align 2
|
|
store i16 %remainder.y, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_remainder_z(
|
|
; GCN-NEXT: store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 18
|
|
%remainder.z = load i16, ptr addrspace(4) %gep.z, align 2
|
|
store i16 %remainder.z, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_work_group_size_x(
|
|
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; GCN-NEXT: [[GEP_X:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
|
|
; GCN-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP_X]], align 4
|
|
; GCN-NEXT: store i16 [[GROUP_SIZE_X]], ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.x = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12
|
|
%group.size.x = load i16, ptr addrspace(4) %gep.x, align 2
|
|
store i16 %group.size.x, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_work_group_size_y(
|
|
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; GCN-NEXT: [[GEP_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
|
|
; GCN-NEXT: [[GROUP_SIZE_Y:%.*]] = load i16, ptr addrspace(4) [[GEP_Y]], align 2
|
|
; GCN-NEXT: store i16 [[GROUP_SIZE_Y]], ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 14
|
|
%group.size.y = load i16, ptr addrspace(4) %gep.y, align 2
|
|
store i16 %group.size.y, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) #0 {
|
|
; GCN-LABEL: @get_work_group_size_z(
|
|
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; GCN-NEXT: [[GEP_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
|
|
; GCN-NEXT: [[GROUP_SIZE_Z:%.*]] = load i16, ptr addrspace(4) [[GEP_Z]], align 4
|
|
; GCN-NEXT: store i16 [[GROUP_SIZE_Z]], ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 16
|
|
%group.size.z = load i16, ptr addrspace(4) %gep.z, align 2
|
|
store i16 %group.size.z, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_work_group_size_x_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
|
|
; GCN-LABEL: @get_work_group_size_x_reqd(
|
|
; GCN-NEXT: store i16 8, ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.x = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12
|
|
%group.size.x = load i16, ptr addrspace(4) %gep.x, align 2
|
|
store i16 %group.size.x, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_work_group_size_y_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
|
|
; GCN-LABEL: @get_work_group_size_y_reqd(
|
|
; GCN-NEXT: store i16 16, ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 14
|
|
%group.size.y = load i16, ptr addrspace(4) %gep.y, align 2
|
|
store i16 %group.size.y, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
|
|
define amdgpu_kernel void @get_work_group_size_z_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
|
|
; GCN-LABEL: @get_work_group_size_z_reqd(
|
|
; GCN-NEXT: store i16 2, ptr addrspace(1) [[OUT:%.*]], align 2
|
|
; GCN-NEXT: ret void
|
|
;
|
|
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 16
|
|
%group.size.z = load i16, ptr addrspace(4) %gep.z, align 2
|
|
store i16 %group.size.z, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
|
|
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
|
|
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
|
declare i32 @llvm.amdgcn.workgroup.id.y() #1
|
|
declare i32 @llvm.amdgcn.workgroup.id.z() #1
|
|
|
|
!llvm.module.flags = !{!1}
|
|
|
|
attributes #0 = { nounwind "uniform-work-group-size"="true" }
|
|
attributes #1 = { nounwind readnone speculatable }
|
|
!0 = !{i32 8, i32 16, i32 2}
|
|
!1 = !{i32 1, !"amdgpu_code_object_version", i32 500}
|