This is still relying on the manual code for splitting 64-bit constants, and handling pointers. We were missing some of the tablegen patterns for all immediate types, so this has some side effect DAG path improvements. This also reduces the diff in the 2 selector outputs.
665 lines
26 KiB
LLVM
665 lines
26 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
; LDS is allocated per-kernel. Module scope variables are gathered into a struct which is
|
|
; allocated at address zero, if used by the kernel. Kernel scope variables are gathered into
|
|
; a per-kernel struct and allocated immediately after the module scope.
|
|
; This test checks that the module and kernel scope variables are allocated in deterministic
|
|
; order without spurious alignment padding between the two
|
|
|
|
; External LDS is checked because it influences LDS padding in general and because it will
|
|
; not be moved into either module or kernel struct
|
|
|
|
@module_variable = addrspace(3) global i16 undef
|
|
|
|
; Variables are allocated into module scope block when used by a non-kernel function
|
|
define void @use_module() #0 {
|
|
; CHECK-LABEL: use_module:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
|
; CHECK-NEXT: ds_write_b16 v0, v0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
store i16 0, ptr addrspace(3) @module_variable
|
|
ret void
|
|
}
|
|
|
|
; Variables only used by kernels are specialised and allocated per-kernel
|
|
@kernel_normal = addrspace(3) global i16 undef
|
|
@kernel_overalign = addrspace(3) global i16 undef, align 4
|
|
|
|
; External LDS shall not introduce padding between module and kernel scope variables
|
|
@extern_normal = external addrspace(3) global [0 x float]
|
|
@extern_overalign = external addrspace(3) global [0 x float], align 8
|
|
|
|
|
|
; External LDS does not influence the frame when called indirectly either
|
|
define void @use_extern_normal() #0 {
|
|
; CHECK-LABEL: use_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
|
|
; CHECK-NEXT: s_mov_b32 s4, s15
|
|
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4048f5c3
|
|
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
|
|
; CHECK-NEXT: s_add_u32 s4, s4, s6
|
|
; CHECK-NEXT: s_addc_u32 s5, s5, s7
|
|
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, s4
|
|
; CHECK-NEXT: ds_write_b32 v1, v0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 0
|
|
store float 0x40091EB860000000, ptr addrspace(3) %arrayidx
|
|
ret void
|
|
}
|
|
|
|
define void @use_extern_overalign() #0 {
|
|
; CHECK-LABEL: use_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
|
|
; CHECK-NEXT: s_mov_b32 s4, s15
|
|
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0x42280000
|
|
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
|
|
; CHECK-NEXT: s_add_u32 s4, s4, s6
|
|
; CHECK-NEXT: s_addc_u32 s5, s5, s7
|
|
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, s4
|
|
; CHECK-NEXT: ds_write_b32 v1, v0 offset:4
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 1
|
|
store float 4.200000e+01, ptr addrspace(3) %arrayidx
|
|
ret void
|
|
}
|
|
|
|
|
|
; First 2^3 of 2^4 cases encoded into function names
|
|
; no use of extern variable from nested function
|
|
; module_variable used/not-used
|
|
; kernel variable normal/overaligned
|
|
; extern variable normal/overaligned
|
|
|
|
define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_normal_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
|
; CHECK-NEXT: s_add_i32 s0, s0, 4
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b32 v2, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_normal_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
|
; CHECK-NEXT: s_add_u32 s16, s16, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s17, s17, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[16:17], 0x0
|
|
; CHECK-NEXT: s_load_dword s15, s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
|
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_add_i32 s4, s4, 4
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
|
; CHECK-NEXT: ds_write_b32 v3, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_overalign_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
|
; CHECK-NEXT: s_add_i32 s0, s0, 4
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b32 v2, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_overalign_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
|
; CHECK-NEXT: s_add_u32 s16, s16, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s17, s17, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[16:17], 0x0
|
|
; CHECK-NEXT: s_load_dword s15, s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
|
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
|
; CHECK-NEXT: ds_write_b32 v3, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_normal_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
|
; CHECK-NEXT: s_add_i32 s0, s0, 8
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b32 v2, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_normal_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
|
; CHECK-NEXT: s_add_u32 s16, s16, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s17, s17, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[16:17], 0x0
|
|
; CHECK-NEXT: s_load_dword s15, s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
|
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
|
; CHECK-NEXT: ds_write_b32 v3, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_overalign_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
|
; CHECK-NEXT: s_add_i32 s0, s0, 8
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b32 v2, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_overalign_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
|
; CHECK-NEXT: s_add_u32 s16, s16, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s17, s17, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[16:17], 0x0
|
|
; CHECK-NEXT: s_load_dword s15, s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
|
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
|
; CHECK-NEXT: ds_write_b32 v3, v1
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
|
ret void
|
|
}
|
|
|
|
|
|
;; Second 2^3 of 2^4 cases encoded into function names
|
|
; with extern variable from nested function
|
|
; module_variable used/not-used
|
|
; kernel variable normal/overaligned
|
|
; extern variable normal/overaligned
|
|
|
|
define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_normal_indirect_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
|
; CHECK-NEXT: s_mov_b32 s15, 0
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: ds_write_b16 v4, v3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
call void @use_extern_normal()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_normal_indirect_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: s_mov_b32 s15, 4
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
call void @use_extern_normal()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
|
; CHECK-NEXT: s_mov_b32 s15, 2
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: ds_write_b16 v4, v3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
call void @use_extern_normal()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_normal:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: s_mov_b32 s15, 6
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
call void @use_extern_normal()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_normal_indirect_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
|
; CHECK-NEXT: s_mov_b32 s15, 1
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: ds_write_b16 v4, v3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
call void @use_extern_overalign()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_normal_indirect_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: s_mov_b32 s15, 5
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_normal
|
|
|
|
call void @use_extern_overalign()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
|
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
|
; CHECK-NEXT: s_mov_b32 s15, 3
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: ds_write_b16 v4, v3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
call void @use_extern_overalign()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i32 %idx) {
|
|
; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_overalign:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_add_u32 s10, s10, s15
|
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
|
; CHECK-NEXT: s_addc_u32 s11, s11, 0
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
|
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
|
|
; CHECK-NEXT: s_add_u32 s0, s0, s15
|
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
|
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_add_u32 s8, s6, 8
|
|
; CHECK-NEXT: s_addc_u32 s9, s7, 0
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; CHECK-NEXT: s_mov_b32 s15, 7
|
|
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
|
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
|
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
|
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
|
; CHECK-NEXT: ds_write_b16 v1, v0
|
|
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
|
; CHECK-NEXT: s_endpgm
|
|
call void @use_module()
|
|
store i16 1, ptr addrspace(3) @module_variable
|
|
|
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
|
|
|
call void @use_extern_overalign()
|
|
ret void
|
|
}
|
|
|
|
|
|
attributes #0 = { noinline }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
|