Fix bitcast test, which was splitting apart phis intended to force bitcasts that survive all the way to selection. Disable the amdgpu-codegenprepare phi splitting, which defeats the technique of using a phi to ensure a bitcast reaches all the way to selection. Also add a variety of bfloat tests. These probably need revisiting to avoid the cast folding into argument loads. Also round out set of bfloat bitcast and ABI tests. Add codegen tests for more bf16 operations The promotion of these works contrary to the comment.
2019 lines
81 KiB
LLVM
2019 lines
81 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
|
|
|
|
define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_i1_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_and_b32 s4, s4, 1
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_byte v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_and_b32 s0, s0, 1
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i1 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_i8_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_byte v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i8 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_i16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_short v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i16 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i64 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f16_inreg(half inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_f16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_short v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store half %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f32_inreg(float inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store float %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f64_inreg(double inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_f64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store double %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2i16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3i16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX9-NEXT: global_store_short v[0:1], v0, off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 {
|
|
; GFX89-LABEL: void_func_v4i16_inreg:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX89-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX89-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v4i16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v5i16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX9-NEXT: global_store_short v[0:1], v0, off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8i16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v4i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v5i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i32_inreg:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, s16
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v1, s17
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v2, s18
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v3, s19
|
|
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; CIGFX89-NEXT: s_nop 0
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, s12
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v1, s13
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v2, s14
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v3, s15
|
|
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; CIGFX89-NEXT: s_nop 0
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, s8
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v1, s9
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v2, s10
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v3, s11
|
|
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; CIGFX89-NEXT: s_nop 0
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, s4
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v1, s5
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v2, s6
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v3, s7
|
|
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v16i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v32i32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v7, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, v0
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v4, s28
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, s29
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s24
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s25
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s26
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s27
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s22
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s23
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v1, s29
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v5, s25
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v7, s27
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
|
|
; GFX11-NEXT: s_clause 0x4
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v4i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v5i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v16i64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v7, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, v0
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v4, s28
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, s29
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s24
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s25
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s26
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s27
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s22
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s23
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v1, s29
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v5, s25
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v7, s27
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
|
|
; GFX11-NEXT: s_clause 0x4
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2f16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3f16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX9-NEXT: global_store_short v[0:1], v0, off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v4f16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8f16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v16f16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v4f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v16f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2f64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3f64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v4f64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8f64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v16f64_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v7, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, v0
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v4, s28
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, s29
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s24
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s25
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s26
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s27
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s22
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s23
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f64_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v1, s29
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v5, s25
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v7, s27
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
|
|
; GFX11-NEXT: s_clause 0x4
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 {
|
|
; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v13, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v12, v0
|
|
; GFX9-NEXT: v_mov_b32_e32 v10, s28
|
|
; GFX9-NEXT: v_mov_b32_e32 v11, s29
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s24
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s25
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s26
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s27
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s22
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s23
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 1, v6
|
|
; GFX9-NEXT: global_store_byte v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_byte v[0:1], v7, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_short v[0:1], v8, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_short v[0:1], v9, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v8, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v7, s29
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s24 :: v_dual_mov_b32 v11, s25
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s26 :: v_dual_mov_b32 v13, s27
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s20 :: v_dual_mov_b32 v15, s21
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23
|
|
; GFX11-NEXT: v_dual_mov_b32 v18, s16 :: v_dual_mov_b32 v19, s17
|
|
; GFX11-NEXT: v_dual_mov_b32 v20, s18 :: v_dual_mov_b32 v21, s19
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v7, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v9, s15
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v2
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v11, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v13, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s4 :: v_dual_mov_b32 v15, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s6 :: v_dual_mov_b32 v17, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b8 v[0:1], v0, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b8 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile i1 %arg1, ptr addrspace(1) undef
|
|
store volatile i8 %arg2, ptr addrspace(1) undef
|
|
store volatile i16 %arg3, ptr addrspace(1) undef
|
|
store volatile half %arg4, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 {
|
|
; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v13, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v12, v0
|
|
; GFX9-NEXT: v_mov_b32_e32 v10, s28
|
|
; GFX9-NEXT: v_mov_b32_e32 v11, s29
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s24
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s25
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s26
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s27
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s22
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s23
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s16
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s17
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s18
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s19
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s15
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[6:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v8, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v7, s29
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s24 :: v_dual_mov_b32 v11, s25
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s26 :: v_dual_mov_b32 v13, s27
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s20 :: v_dual_mov_b32 v15, s21
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v7, s17
|
|
; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v9, s19
|
|
; GFX11-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13
|
|
; GFX11-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v13, s15
|
|
; GFX11-NEXT: v_dual_mov_b32 v14, s8 :: v_dual_mov_b32 v15, s9
|
|
; GFX11-NEXT: v_dual_mov_b32 v16, s10 :: v_dual_mov_b32 v17, s11
|
|
; GFX11-NEXT: v_dual_mov_b32 v18, s4 :: v_dual_mov_b32 v19, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v20, s6 :: v_dual_mov_b32 v21, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[22:25], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @too_many_args_use_workitem_id_x_inreg(
|
|
; GFX9-LABEL: too_many_args_use_workitem_id_x_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s5
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s6
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s7
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s8
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s9
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s10
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s11
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s12
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s13
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s14
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s15
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s16
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s17
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s18
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s19
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s20
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s21
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s22
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s23
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s24
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s25
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s26
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s27
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s28
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v6, s29
|
|
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[0:1], v1, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[0:1], v3, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[0:1], v4, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[0:1], v5, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: too_many_args_use_workitem_id_x_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
|
|
; GFX11-NEXT: v_mov_b32_e32 v6, s4
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, s6 :: v_dual_mov_b32 v2, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, s8 :: v_dual_mov_b32 v4, s7
|
|
; GFX11-NEXT: v_mov_b32_e32 v6, s9
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v2, s10
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v4, s12
|
|
; GFX11-NEXT: v_mov_b32_e32 v6, s14
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v3, s16
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s17 :: v_dual_mov_b32 v5, s18
|
|
; GFX11-NEXT: v_mov_b32_e32 v6, s19
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v3, s21
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v5, s23
|
|
; GFX11-NEXT: v_mov_b32_e32 v6, s24
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, s28 :: v_dual_mov_b32 v2, s25
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, s26 :: v_dual_mov_b32 v4, s27
|
|
; GFX11-NEXT: v_mov_b32_e32 v6, s29
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7,
|
|
i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15,
|
|
i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23,
|
|
i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) {
|
|
;%val = call i32 @llvm.amdgcn.workitem.id.x()
|
|
;store volatile i32 %val, ptr addrspace(1) undef
|
|
|
|
store volatile i32 %arg0, ptr addrspace(1) undef
|
|
store volatile i32 %arg1, ptr addrspace(1) undef
|
|
store volatile i32 %arg2, ptr addrspace(1) undef
|
|
store volatile i32 %arg3, ptr addrspace(1) undef
|
|
store volatile i32 %arg4, ptr addrspace(1) undef
|
|
store volatile i32 %arg5, ptr addrspace(1) undef
|
|
store volatile i32 %arg6, ptr addrspace(1) undef
|
|
store volatile i32 %arg7, ptr addrspace(1) undef
|
|
|
|
store volatile i32 %arg8, ptr addrspace(1) undef
|
|
store volatile i32 %arg9, ptr addrspace(1) undef
|
|
store volatile i32 %arg10, ptr addrspace(1) undef
|
|
store volatile i32 %arg11, ptr addrspace(1) undef
|
|
store volatile i32 %arg12, ptr addrspace(1) undef
|
|
store volatile i32 %arg13, ptr addrspace(1) undef
|
|
store volatile i32 %arg14, ptr addrspace(1) undef
|
|
store volatile i32 %arg15, ptr addrspace(1) undef
|
|
|
|
store volatile i32 %arg16, ptr addrspace(1) undef
|
|
store volatile i32 %arg17, ptr addrspace(1) undef
|
|
store volatile i32 %arg18, ptr addrspace(1) undef
|
|
store volatile i32 %arg19, ptr addrspace(1) undef
|
|
store volatile i32 %arg20, ptr addrspace(1) undef
|
|
store volatile i32 %arg21, ptr addrspace(1) undef
|
|
store volatile i32 %arg22, ptr addrspace(1) undef
|
|
store volatile i32 %arg23, ptr addrspace(1) undef
|
|
|
|
store volatile i32 %arg24, ptr addrspace(1) undef
|
|
store volatile i32 %arg25, ptr addrspace(1) undef
|
|
store volatile i32 %arg26, ptr addrspace(1) undef
|
|
store volatile i32 %arg27, ptr addrspace(1) undef
|
|
store volatile i32 %arg28, ptr addrspace(1) undef
|
|
store volatile i32 %arg29, ptr addrspace(1) undef
|
|
store volatile i32 %arg30, ptr addrspace(1) undef
|
|
store volatile i32 %arg31, ptr addrspace(1) undef
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
|
|
; GFX9-LABEL: void_func_i32_v2float_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32_v2float_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
store <2 x float> %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
|
|
; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, s33
|
|
; GFX9-NEXT: s_mov_b32 s33, s32
|
|
; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
|
|
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
|
|
; GFX9-NEXT: s_mov_b64 exec, s[8:9]
|
|
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
|
; GFX9-NEXT: s_getpc_b64 s[8:9]
|
|
; GFX9-NEXT: s_add_u32 s8, s8, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
|
|
; GFX9-NEXT: s_addc_u32 s9, s9, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
|
|
; GFX9-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0
|
|
; GFX9-NEXT: v_writelane_b32 v40, s7, 2
|
|
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
|
; GFX9-NEXT: s_mov_b32 s2, s6
|
|
; GFX9-NEXT: s_mov_b32 s1, s5
|
|
; GFX9-NEXT: s_mov_b32 s0, s4
|
|
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
|
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
|
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
|
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
|
|
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
|
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
|
|
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
|
|
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
|
; GFX9-NEXT: s_mov_b32 s33, s4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: caller_void_func_i32_v2float_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, s33
|
|
; GFX11-NEXT: s_mov_b32 s33, s32
|
|
; GFX11-NEXT: s_or_saveexec_b32 s4, -1
|
|
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX11-NEXT: s_add_i32 s32, s32, 16
|
|
; GFX11-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX11-NEXT: s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
|
|
; GFX11-NEXT: s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
|
|
; GFX11-NEXT: v_writelane_b32 v40, s3, 2
|
|
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
|
|
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
|
|
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
|
|
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
|
|
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
|
|
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
|
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
|
; GFX11-NEXT: s_add_i32 s32, s32, -16
|
|
; GFX11-NEXT: s_mov_b32 s33, s0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT s_mov_b32 s3, s33
|
|
; GFX11-NEXT s_mov_b32 s33, s32
|
|
; GFX11-NEXT s_or_saveexec_b32 s4, -1
|
|
; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
|
; GFX11-NEXT s_mov_b32 exec_lo, s4
|
|
; GFX11-NEXT s_add_i32 s32, s32, 16
|
|
; GFX11-NEXT s_getpc_b64 s[4:5]
|
|
; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
|
|
; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
|
|
; GFX11-NEXT v_writelane_b32 v40, s3, 2
|
|
; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0
|
|
; GFX11-NEXT v_writelane_b32 v40, s30, 0
|
|
; GFX11-NEXT v_writelane_b32 v40, s31, 1
|
|
; GFX11-NEXT s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5]
|
|
; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT v_readlane_b32 s31, v40, 1
|
|
; GFX11-NEXT v_readlane_b32 s30, v40, 0
|
|
; GFX11-NEXT v_readlane_b32 s0, v40, 2
|
|
; GFX11-NEXT s_or_saveexec_b32 s1, -1
|
|
; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
|
; GFX11-NEXT s_mov_b32 exec_lo, s1
|
|
; GFX11-NEXT s_add_i32 s32, s32, -16
|
|
; GFX11-NEXT s_mov_b32 s33, s0
|
|
; GFX11-NEXT s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT s_setpc_b64 s[30:31]
|
|
call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1)
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_bf16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_bf16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_d16_hi_b16 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store bfloat %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v2bf16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2bf16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v3bf16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX9-NEXT: global_store_short v[0:1], v0, off
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3bf16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v4bf16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4bf16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v8bf16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8bf16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 {
|
|
; GFX9-LABEL: void_func_v16bf16_inreg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s9
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s10
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s11
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16bf16_inreg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
|
|
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
|
|
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind noinline }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|