Files
clang-p2996/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
Brox Chen 6dbc01e801 [AMDGPU][True16][CodeGen] update GFX11Plus codegen test with true16 flag (#135078)
This is a NFC patch.

This patch run a bulk update on CodeGen tests that are impacted by the
true16 features. This patch applies:
1. duplicate GFX11plus runlines and apply them with
"+mattr=+real-true16" and "+mattr=-real-true16"
2. update the test with the update script

For some GISEL runlines, the current CodeGen do not fully support the
true16 version. Still update the runlines, but comment out the failing
one, and added a "FIXME-TRUE16" comment to that test for easier
tracking. These test will be fixed in the following patches.

This is in a transition state that we support both
"+real-true16/-real-true16" in our code base. We plan to move to
"+real-true16" as default, and finally remove "-real-true16" mode and
test lines.
2025-04-23 13:06:52 -04:00

2300 lines
94 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i1_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s4, s16, 1
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i1_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, s0, 1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i1 %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i8_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: void_func_i8_inreg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX11-TRUE16-NEXT: global_store_b8 v[0:1], v0, off
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: void_func_i8_inreg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX11-FAKE16-NEXT: global_store_b8 v[0:1], v0, off
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store i8 %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: void_func_i16_inreg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: void_func_i16_inreg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store i16 %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i64 %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_f16_inreg(half inreg %arg0) #0 {
; GFX9-LABEL: void_func_f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: void_func_f16_inreg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: void_func_f16_inreg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store half %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_f32_inreg(float inreg %arg0) #0 {
; GFX9-LABEL: void_func_f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store float %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_f64_inreg(double inreg %arg0) #0 {
; GFX9-LABEL: void_func_f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store double %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i16> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i16> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 {
; GFX89-LABEL: void_func_v4i16_inreg:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_mov_b32_e32 v0, s4
; GFX89-NEXT: v_mov_b32_e32 v1, s5
; GFX89-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v4i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i16> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v5i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s18
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i16> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i16> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v5i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 {
; CIGFX89-LABEL: void_func_v16i32_inreg:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: v_mov_b32_e32 v0, s16
; CIGFX89-NEXT: v_mov_b32_e32 v1, s17
; CIGFX89-NEXT: v_mov_b32_e32 v2, s18
; CIGFX89-NEXT: v_mov_b32_e32 v3, s19
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_nop 0
; CIGFX89-NEXT: v_mov_b32_e32 v0, s12
; CIGFX89-NEXT: v_mov_b32_e32 v1, s13
; CIGFX89-NEXT: v_mov_b32_e32 v2, s14
; CIGFX89-NEXT: v_mov_b32_e32 v3, s15
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_nop 0
; CIGFX89-NEXT: v_mov_b32_e32 v0, s8
; CIGFX89-NEXT: v_mov_b32_e32 v1, s9
; CIGFX89-NEXT: v_mov_b32_e32 v2, s10
; CIGFX89-NEXT: v_mov_b32_e32 v3, s11
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_nop 0
; CIGFX89-NEXT: v_mov_b32_e32 v0, s4
; CIGFX89-NEXT: v_mov_b32_e32 v1, s5
; CIGFX89-NEXT: v_mov_b32_e32 v2, s6
; CIGFX89-NEXT: v_mov_b32_e32 v3, s7
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v16i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v32i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v19, v1
; GFX9-NEXT: v_mov_b32_e32 v18, v0
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: v_mov_b32_e32 v16, s28
; GFX9-NEXT: v_mov_b32_e32 v17, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <32 x i32> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i64> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i64> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i64> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v5i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i64> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i64> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v19, v1
; GFX9-NEXT: v_mov_b32_e32 v18, v0
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: v_mov_b32_e32 v16, s28
; GFX9-NEXT: v_mov_b32_e32 v17, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i64> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x half> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x half> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x half> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x half> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x half> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x float> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x float> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x float> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x float> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x float> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x double> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x double> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x double> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x double> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v19, v1
; GFX9-NEXT: v_mov_b32_e32 v18, v0
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: v_mov_b32_e32 v16, s28
; GFX9-NEXT: v_mov_b32_e32 v17, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x double> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 {
; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v25, v1
; GFX9-NEXT: v_mov_b32_e32 v24, v0
; GFX9-NEXT: v_mov_b32_e32 v22, s28
; GFX9-NEXT: v_mov_b32_e32 v23, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v18
; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v19, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_short v[0:1], v20, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_short v[0:1], v21, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s24 :: v_dual_mov_b32 v7, s25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s26 :: v_dual_mov_b32 v9, s27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s20 :: v_dual_mov_b32 v11, s21
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s22 :: v_dual_mov_b32 v13, s23
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s16 :: v_dual_mov_b32 v23, s17
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s18 :: v_dual_mov_b32 v25, s19
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[2:5], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[22:25], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 1, v14
; GFX11-TRUE16-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b8 v[0:1], v4, off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b8 v[0:1], v15, off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v16, off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v17, off dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[2:5], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3
; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 1, v14
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b8 v[0:1], v12, off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b8 v[0:1], v15, off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v16, off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v17, off dlc
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
store volatile i1 %arg1, ptr addrspace(1) poison
store volatile i8 %arg2, ptr addrspace(1) poison
store volatile i16 %arg3, ptr addrspace(1) poison
store volatile half %arg4, ptr addrspace(1) poison
ret void
}
define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 {
; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v25, v1
; GFX9-NEXT: v_mov_b32_e32 v24, v0
; GFX9-NEXT: v_mov_b32_e32 v22, s28
; GFX9-NEXT: v_mov_b32_e32 v23, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[18:19], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[20:21], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1
; GFX11-NEXT: v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[22:25], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[14:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
store volatile <2 x i32> %arg1, ptr addrspace(1) poison
store volatile <2 x float> %arg2, ptr addrspace(1) poison
ret void
}
define void @too_many_args_use_workitem_id_x_inreg(
; GFX9-LABEL: too_many_args_use_workitem_id_x_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s16
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s17
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s18
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s19
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s20
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s21
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s22
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s23
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s24
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s25
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s26
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s27
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s28
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s29
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v1, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v3, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v4, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v5, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v6, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v7, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v8, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v9, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v10, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v11, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v12, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v13, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v14, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v15, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v16, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v17, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: too_many_args_use_workitem_id_x_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v14, s0 :: v_dual_mov_b32 v15, s1
; GFX11-NEXT: v_mov_b32_e32 v16, s2
; GFX11-NEXT: v_mov_b32_e32 v18, s19
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v17, s18
; GFX11-NEXT: v_dual_mov_b32 v15, s16 :: v_dual_mov_b32 v16, s17
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v15, s21 :: v_dual_mov_b32 v14, s20
; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23
; GFX11-NEXT: v_mov_b32_e32 v18, s24
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v17, s28 :: v_dual_mov_b32 v14, s25
; GFX11-NEXT: v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v16, s27
; GFX11-NEXT: v_mov_b32_e32 v18, s29
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v7, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v9, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v10, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v11, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v12, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v13, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7,
i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15,
i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23,
i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) {
;%val = call i32 @llvm.amdgcn.workitem.id.x()
;store volatile i32 %val, ptr addrspace(1) poison
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %arg1, ptr addrspace(1) poison
store volatile i32 %arg2, ptr addrspace(1) poison
store volatile i32 %arg3, ptr addrspace(1) poison
store volatile i32 %arg4, ptr addrspace(1) poison
store volatile i32 %arg5, ptr addrspace(1) poison
store volatile i32 %arg6, ptr addrspace(1) poison
store volatile i32 %arg7, ptr addrspace(1) poison
store volatile i32 %arg8, ptr addrspace(1) poison
store volatile i32 %arg9, ptr addrspace(1) poison
store volatile i32 %arg10, ptr addrspace(1) poison
store volatile i32 %arg11, ptr addrspace(1) poison
store volatile i32 %arg12, ptr addrspace(1) poison
store volatile i32 %arg13, ptr addrspace(1) poison
store volatile i32 %arg14, ptr addrspace(1) poison
store volatile i32 %arg15, ptr addrspace(1) poison
store volatile i32 %arg16, ptr addrspace(1) poison
store volatile i32 %arg17, ptr addrspace(1) poison
store volatile i32 %arg18, ptr addrspace(1) poison
store volatile i32 %arg19, ptr addrspace(1) poison
store volatile i32 %arg20, ptr addrspace(1) poison
store volatile i32 %arg21, ptr addrspace(1) poison
store volatile i32 %arg22, ptr addrspace(1) poison
store volatile i32 %arg23, ptr addrspace(1) poison
store volatile i32 %arg24, ptr addrspace(1) poison
store volatile i32 %arg25, ptr addrspace(1) poison
store volatile i32 %arg26, ptr addrspace(1) poison
store volatile i32 %arg27, ptr addrspace(1) poison
store volatile i32 %arg28, ptr addrspace(1) poison
store volatile i32 %arg29, ptr addrspace(1) poison
store volatile i32 %arg30, ptr addrspace(1) poison
store volatile i32 %arg31, ptr addrspace(1) poison
ret void
}
define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
; GFX9-LABEL: void_func_i32_v2float_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: v_mov_b32_e32 v1, s18
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i32_v2float_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: v_mov_b32_e32 v0, s1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) poison
store <2 x float> %arg1, ptr addrspace(1) poison
ret void
}
define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s19, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
; GFX9-NEXT: v_writelane_b32 v40, s19, 2
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: caller_void_func_i32_v2float_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s16, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s16
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: s_getpc_b64 s[16:17]
; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s3, 2
; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT s_mov_b32 s3, s33
; GFX11-NEXT s_mov_b32 s33, s32
; GFX11-NEXT s_or_saveexec_b32 s4, -1
; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT s_mov_b32 exec_lo, s4
; GFX11-NEXT s_add_i32 s32, s32, 16
; GFX11-NEXT s_getpc_b64 s[4:5]
; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
; GFX11-NEXT v_writelane_b32 v40, s3, 2
; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0
; GFX11-NEXT v_writelane_b32 v40, s30, 0
; GFX11-NEXT v_writelane_b32 v40, s31, 1
; GFX11-NEXT s_waitcnt lgkmcnt(0)
; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5]
; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT v_readlane_b32 s31, v40, 1
; GFX11-NEXT v_readlane_b32 s30, v40, 0
; GFX11-NEXT v_readlane_b32 s0, v40, 2
; GFX11-NEXT s_or_saveexec_b32 s1, -1
; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT s_mov_b32 exec_lo, s1
; GFX11-NEXT s_add_i32 s32, s32, -16
; GFX11-NEXT s_mov_b32 s33, s0
; GFX11-NEXT s_waitcnt vmcnt(0)
; GFX11-NEXT s_setpc_b64 s[30:31]
call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1)
ret void
}
define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
; GFX9-LABEL: void_func_bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: void_func_bf16_inreg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: void_func_bf16_inreg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store bfloat %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x bfloat> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x bfloat> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x bfloat> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x bfloat> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x bfloat> %arg0, ptr addrspace(1) poison
ret void
}
define void @void_func_2_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_2_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s17
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_2_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) %ptr
store volatile i32 %arg1, ptr addrspace(1) %ptr
ret void
}
define void @void_func_2_i64_inreg(i64 inreg %arg0, i64 inreg %arg1, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_2_i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s16
; GFX9-NEXT: v_mov_b32_e32 v5, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_2_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i64 %arg0, ptr addrspace(1) %ptr
store volatile i64 %arg1, ptr addrspace(1) %ptr
ret void
}
define void @void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s16
; GFX9-NEXT: v_mov_b32_e32 v5, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s18
; GFX9-NEXT: v_mov_b32_e32 v2, s19
; GFX9-NEXT: v_mov_b32_e32 v3, s20
; GFX9-NEXT: global_store_dword v[0:1], v4, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s16
; GFX11-NEXT: v_mov_b32_e32 v6, s2
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i64 %arg0, ptr addrspace(1) %ptr
store volatile i32 %arg1, ptr addrspace(1) %ptr
store volatile i64 %arg2, ptr addrspace(1) %ptr
ret void
}
define void @void_func_5_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_5_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s17
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s19
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s20
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_5_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
; GFX11-NEXT: v_mov_b32_e32 v6, s16
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) %ptr
store volatile i32 %arg1, ptr addrspace(1) %ptr
store volatile i32 %arg2, ptr addrspace(1) %ptr
store volatile i32 %arg3, ptr addrspace(1) %ptr
store volatile i32 %arg4, ptr addrspace(1) %ptr
ret void
}
define void @void_func_a5i32_inreg([5 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_a5i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s20
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:16
; GFX9-NEXT: v_mov_b32_e32 v5, s19
; GFX9-NEXT: v_mov_b32_e32 v4, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_a5i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v5, s3
; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v6, off offset:16
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store [5 x i32] %arg0, ptr addrspace(1) %ptr
ret void
}
; Force all implicit inputs to be required
declare void @extern()
define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_a13i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s29, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-NEXT: v_mov_b32_e32 v2, s28
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48
; GFX9-NEXT: v_mov_b32_e32 v5, s27
; GFX9-NEXT: v_mov_b32_e32 v4, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s24
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32
; GFX9-NEXT: v_writelane_b32 v40, s29, 2
; GFX9-NEXT: v_mov_b32_e32 v5, s23
; GFX9-NEXT: v_mov_b32_e32 v4, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s20
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v3, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: s_getpc_b64 s[16:17]
; GFX9-NEXT: s_add_u32 s16, s16, extern@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s17, s17, extern@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX9-NEXT: v_mov_b32_e32 v5, s19
; GFX9-NEXT: v_mov_b32_e32 v4, s18
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_a13i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s25, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s26, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s26
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21
; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19
; GFX11-NEXT: s_getpc_b64 s[20:21]
; GFX11-NEXT: s_add_u32 s20, s20, extern@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s21, s21, extern@gotpcrel32@hi+12
; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17
; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3
; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0
; GFX11-NEXT: v_writelane_b32 v40, s25, 2
; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23
; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_mov_b32_e32 v10, s0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
store [13 x i32] %arg0, ptr addrspace(1) %ptr
call void @extern()
ret void
}
; define void @void_func_a14i32_inreg([14 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; store [14 x i32] %arg0, ptr addrspace(1) %ptr
; call void @extern()
; ret void
; }
; FIXME:
; define void @void_func_a15i32_inreg([15 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; store [15 x i32] %arg0, ptr addrspace(1) %ptr
; call void @extern()
; ret void
; }
; FIXME:
; define void @void_func_a16i32_inreg([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; store [16 x i32] %arg0, ptr addrspace(1) %ptr
; call void @extern()
; ret void
; }
; FIXME: Should still fail
define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_a16i32_inreg__noimplicit:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v7, v1
; GFX9-NEXT: v_mov_b32_e32 v6, v0
; GFX9-NEXT: v_mov_b32_e32 v5, s29
; GFX9-NEXT: v_mov_b32_e32 v4, s28
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:48
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v7, s27
; GFX9-NEXT: v_mov_b32_e32 v6, s26
; GFX9-NEXT: v_mov_b32_e32 v5, s25
; GFX9-NEXT: v_mov_b32_e32 v4, s24
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v7, s23
; GFX9-NEXT: v_mov_b32_e32 v6, s22
; GFX9-NEXT: v_mov_b32_e32 v5, s21
; GFX9-NEXT: v_mov_b32_e32 v4, s20
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v7, s19
; GFX9-NEXT: v_mov_b32_e32 v6, s18
; GFX9-NEXT: v_mov_b32_e32 v5, s17
; GFX9-NEXT: v_mov_b32_e32 v4, s16
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_a16i32_inreg__noimplicit:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v5, s27 :: v_dual_mov_b32 v4, s26
; GFX11-NEXT: v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s24
; GFX11-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v8, s22
; GFX11-NEXT: v_dual_mov_b32 v7, s21 :: v_dual_mov_b32 v6, s20
; GFX11-NEXT: v_dual_mov_b32 v13, s19 :: v_dual_mov_b32 v12, s18
; GFX11-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16
; GFX11-NEXT: v_dual_mov_b32 v17, s3 :: v_dual_mov_b32 v16, s2
; GFX11-NEXT: v_dual_mov_b32 v15, s1 :: v_dual_mov_b32 v14, s0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:48
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:32
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off offset:16
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store [16 x i32] %arg0, ptr addrspace(1) %ptr
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }