There are many tests that specify a target triple/CPU flags but no DataLayout which can lead to IR being generated that has unusual behaviour. This commit attempts to use the default DataLayout based on the relevant flags if there is no explicit override on the command line or in the IR file. One thing that is not currently possible to differentiate from a missing datalayout `target datalayout = ""` in the IR file since the current APIs don't allow detecting this case. If it is considered useful to support this case (instead of passing "-data-layout=" on the command line), I can change IR parsers to track whether they have seen such a directive and change the callback type. Differential Revision: https://reviews.llvm.org/D141060
81 lines
5.0 KiB
LLVM
81 lines
5.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
|
|
|
|
%vec_type = type { %vec_base }
|
|
%vec_base = type { %union.anon }
|
|
%union.anon = type { %"vec_base<char, 3>::n_vec_" }
|
|
%"vec_base<char, 3>::n_vec_" = type { [3 x i8] }
|
|
|
|
$_f1 = comdat any
|
|
$_f2 = comdat any
|
|
@_f1 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type poison, comdat, align 1
|
|
@_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type poison, comdat, align 1
|
|
|
|
;.
|
|
; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] poison, align 4, !absolute_symbol !0
|
|
;.
|
|
define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
|
|
; GCN-LABEL: test:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 2
|
|
; GCN-NEXT: ds_write_b8 v0, v1
|
|
; GCN-NEXT: ds_read_u8 v2, v0 offset:2
|
|
; GCN-NEXT: ds_read_u16 v3, v0
|
|
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: ds_write_b8 v0, v2 offset:6
|
|
; GCN-NEXT: ds_write_b16 v0, v3 offset:4
|
|
; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
|
|
; GCN-NEXT: global_store_byte v0, v1, s[0:1]
|
|
; GCN-NEXT: s_endpgm
|
|
; CHECK-LABEL: define protected amdgpu_kernel void @test(
|
|
; CHECK-SAME: ptr addrspace(1) nocapture [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
|
|
; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
|
|
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 3
|
|
; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
|
|
; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
|
|
; CHECK-NEXT: [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP1]], 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]]
|
|
; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8
|
|
; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
store i8 3, ptr addrspace(3) @_f1, align 1
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
|
|
%0 = load i8, ptr addrspace(3) @_f2, align 1
|
|
%cmp.i.i = icmp eq i8 %0, 3
|
|
store i8 2, ptr addrspace(3) @_f1, align 1
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
|
|
%1 = load i8, ptr addrspace(3) @_f2, align 1
|
|
%cmp.i.i19 = icmp eq i8 %1, 2
|
|
%2 = and i1 %cmp.i.i19, %cmp.i.i
|
|
%frombool8 = zext i1 %2 to i8
|
|
store i8 %frombool8, ptr addrspace(1) %ptr.coerce, align 1
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #1
|
|
|
|
;.
|
|
; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="7" }
|
|
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
|
;.
|
|
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
|
|
; CHECK: [[META1:![0-9]+]] = !{!2}
|
|
; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3}
|
|
; CHECK: [[META3:![0-9]+]] = distinct !{!3}
|
|
; CHECK: [[META4:![0-9]+]] = !{!5}
|
|
; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3}
|
|
; CHECK: [[META6:![0-9]+]] = !{!5, !2}
|
|
; CHECK: [[META7:![0-9]+]] = !{}
|
|
;.
|