Files
clang-p2996/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
Aaditya 11b0401926 [AMDGPU] Restore SP from saved-FP or saved-BP (#124007)
Currently, the AMDGPU backend bumps the Stack Pointer 
by fixed size offsets in the prolog of device functions, and 
restores it by the same amount in the epilog.
Prolog:
sp += frameSize

Epilog:
sp -= frameSize

If a function has dynamic stack realignment,
Prolog:
sp += frameSize + max_alignment

Epilog:
sp -= frameSize + max_alignment

These calculations are not optimal in case of dynamic 
stack realignment, and completely fail in case of 
dynamic stack readjustment.
This patch uses the saved Frame Pointer to restore SP. 
Prolog:
fp = sp
sp += frameSize

Epilog:
sp = fp

In case of dynamic stack realignment, SP is restored from 
the saved Base Pointer. 
Prolog:
fp = sp + (max_alignment - 1)
fp = fp & (-max_alignment)
bp = sp
sp += frameSize + max_alignment

Epilog:
sp = bp

(Note: The presence of BP has been enforced in case of any 
dynamic stack realignment.)

---------

Co-authored-by: Pravin Jagtap <Pravin.Jagtap@amd.com>
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
2025-01-24 19:13:40 +05:30

2231 lines
90 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i1_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s4, s16, 1
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i1_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, s0, 1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i1 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i8_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i8_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i8 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i16 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
; GFX9-LABEL: void_func_i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i64 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_f16_inreg(half inreg %arg0) #0 {
; GFX9-LABEL: void_func_f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store half %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_f32_inreg(float inreg %arg0) #0 {
; GFX9-LABEL: void_func_f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store float %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_f64_inreg(double inreg %arg0) #0 {
; GFX9-LABEL: void_func_f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store double %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 {
; GFX89-LABEL: void_func_v4i16_inreg:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_mov_b32_e32 v0, s4
; GFX89-NEXT: v_mov_b32_e32 v1, s5
; GFX89-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v4i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v5i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s18
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v5i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 {
; CIGFX89-LABEL: void_func_v16i32_inreg:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: v_mov_b32_e32 v0, s16
; CIGFX89-NEXT: v_mov_b32_e32 v1, s17
; CIGFX89-NEXT: v_mov_b32_e32 v2, s18
; CIGFX89-NEXT: v_mov_b32_e32 v3, s19
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_nop 0
; CIGFX89-NEXT: v_mov_b32_e32 v0, s12
; CIGFX89-NEXT: v_mov_b32_e32 v1, s13
; CIGFX89-NEXT: v_mov_b32_e32 v2, s14
; CIGFX89-NEXT: v_mov_b32_e32 v3, s15
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_nop 0
; CIGFX89-NEXT: v_mov_b32_e32 v0, s8
; CIGFX89-NEXT: v_mov_b32_e32 v1, s9
; CIGFX89-NEXT: v_mov_b32_e32 v2, s10
; CIGFX89-NEXT: v_mov_b32_e32 v3, s11
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_nop 0
; CIGFX89-NEXT: v_mov_b32_e32 v0, s4
; CIGFX89-NEXT: v_mov_b32_e32 v1, s5
; CIGFX89-NEXT: v_mov_b32_e32 v2, s6
; CIGFX89-NEXT: v_mov_b32_e32 v3, s7
; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v16i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v32i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v19, v1
; GFX9-NEXT: v_mov_b32_e32 v18, v0
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: v_mov_b32_e32 v16, s28
; GFX9-NEXT: v_mov_b32_e32 v17, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <32 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v5i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v19, v1
; GFX9-NEXT: v_mov_b32_e32 v18, v0
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: v_mov_b32_e32 v16, s28
; GFX9-NEXT: v_mov_b32_e32 v17, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s28
; GFX9-NEXT: v_mov_b32_e32 v1, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16f64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v19, v1
; GFX9-NEXT: v_mov_b32_e32 v18, v0
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: v_mov_b32_e32 v16, s28
; GFX9-NEXT: v_mov_b32_e32 v17, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off
; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 {
; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v25, v1
; GFX9-NEXT: v_mov_b32_e32 v24, v0
; GFX9-NEXT: v_mov_b32_e32 v22, s28
; GFX9-NEXT: v_mov_b32_e32 v23, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v18
; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v19, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_short v[0:1], v20, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_short v[0:1], v21, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1
; GFX11-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3
; GFX11-NEXT: v_and_b32_e32 v12, 1, v14
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b8 v[0:1], v12, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b8 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b16 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b16 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile i1 %arg1, ptr addrspace(1) undef
store volatile i8 %arg2, ptr addrspace(1) undef
store volatile i16 %arg3, ptr addrspace(1) undef
store volatile half %arg4, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 {
; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v25, v1
; GFX9-NEXT: v_mov_b32_e32 v24, v0
; GFX9-NEXT: v_mov_b32_e32 v22, s28
; GFX9-NEXT: v_mov_b32_e32 v23, s29
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s24
; GFX9-NEXT: v_mov_b32_e32 v1, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s27
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[18:19], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[20:21], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
; GFX11-NEXT: v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1
; GFX11-NEXT: v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[22:25], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[14:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <2 x i32> %arg1, ptr addrspace(1) undef
store volatile <2 x float> %arg2, ptr addrspace(1) undef
ret void
}
define void @too_many_args_use_workitem_id_x_inreg(
; GFX9-LABEL: too_many_args_use_workitem_id_x_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s16
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s17
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s18
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s19
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s20
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s21
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s22
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s23
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s24
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s25
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s26
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s27
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s28
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v18, s29
; GFX9-NEXT: global_store_dword v[0:1], v18, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v1, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v3, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v4, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v5, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v6, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v7, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v8, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v9, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v10, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v11, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v12, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v13, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v14, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v15, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v16, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v17, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: too_many_args_use_workitem_id_x_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v14, s0 :: v_dual_mov_b32 v15, s1
; GFX11-NEXT: v_mov_b32_e32 v16, s2
; GFX11-NEXT: v_mov_b32_e32 v18, s19
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v17, s18
; GFX11-NEXT: v_dual_mov_b32 v15, s16 :: v_dual_mov_b32 v16, s17
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v15, s21 :: v_dual_mov_b32 v14, s20
; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23
; GFX11-NEXT: v_mov_b32_e32 v18, s24
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v17, s28 :: v_dual_mov_b32 v14, s25
; GFX11-NEXT: v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v16, s27
; GFX11-NEXT: v_mov_b32_e32 v18, s29
; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v7, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v9, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v10, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v11, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v12, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v13, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7,
i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15,
i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23,
i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) {
;%val = call i32 @llvm.amdgcn.workitem.id.x()
;store volatile i32 %val, ptr addrspace(1) undef
store volatile i32 %arg0, ptr addrspace(1) undef
store volatile i32 %arg1, ptr addrspace(1) undef
store volatile i32 %arg2, ptr addrspace(1) undef
store volatile i32 %arg3, ptr addrspace(1) undef
store volatile i32 %arg4, ptr addrspace(1) undef
store volatile i32 %arg5, ptr addrspace(1) undef
store volatile i32 %arg6, ptr addrspace(1) undef
store volatile i32 %arg7, ptr addrspace(1) undef
store volatile i32 %arg8, ptr addrspace(1) undef
store volatile i32 %arg9, ptr addrspace(1) undef
store volatile i32 %arg10, ptr addrspace(1) undef
store volatile i32 %arg11, ptr addrspace(1) undef
store volatile i32 %arg12, ptr addrspace(1) undef
store volatile i32 %arg13, ptr addrspace(1) undef
store volatile i32 %arg14, ptr addrspace(1) undef
store volatile i32 %arg15, ptr addrspace(1) undef
store volatile i32 %arg16, ptr addrspace(1) undef
store volatile i32 %arg17, ptr addrspace(1) undef
store volatile i32 %arg18, ptr addrspace(1) undef
store volatile i32 %arg19, ptr addrspace(1) undef
store volatile i32 %arg20, ptr addrspace(1) undef
store volatile i32 %arg21, ptr addrspace(1) undef
store volatile i32 %arg22, ptr addrspace(1) undef
store volatile i32 %arg23, ptr addrspace(1) undef
store volatile i32 %arg24, ptr addrspace(1) undef
store volatile i32 %arg25, ptr addrspace(1) undef
store volatile i32 %arg26, ptr addrspace(1) undef
store volatile i32 %arg27, ptr addrspace(1) undef
store volatile i32 %arg28, ptr addrspace(1) undef
store volatile i32 %arg29, ptr addrspace(1) undef
store volatile i32 %arg30, ptr addrspace(1) undef
store volatile i32 %arg31, ptr addrspace(1) undef
ret void
}
define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
; GFX9-LABEL: void_func_i32_v2float_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: v_mov_b32_e32 v1, s18
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i32_v2float_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: v_mov_b32_e32 v0, s1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) undef
store <2 x float> %arg1, ptr addrspace(1) undef
ret void
}
define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s19, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
; GFX9-NEXT: v_writelane_b32 v40, s19, 2
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: caller_void_func_i32_v2float_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s16, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s16
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: s_getpc_b64 s[16:17]
; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s3, 2
; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT s_mov_b32 s3, s33
; GFX11-NEXT s_mov_b32 s33, s32
; GFX11-NEXT s_or_saveexec_b32 s4, -1
; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT s_mov_b32 exec_lo, s4
; GFX11-NEXT s_add_i32 s32, s32, 16
; GFX11-NEXT s_getpc_b64 s[4:5]
; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
; GFX11-NEXT v_writelane_b32 v40, s3, 2
; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0
; GFX11-NEXT v_writelane_b32 v40, s30, 0
; GFX11-NEXT v_writelane_b32 v40, s31, 1
; GFX11-NEXT s_waitcnt lgkmcnt(0)
; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5]
; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT v_readlane_b32 s31, v40, 1
; GFX11-NEXT v_readlane_b32 s30, v40, 0
; GFX11-NEXT v_readlane_b32 s0, v40, 2
; GFX11-NEXT s_or_saveexec_b32 s1, -1
; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT s_mov_b32 exec_lo, s1
; GFX11-NEXT s_add_i32 s32, s32, -16
; GFX11-NEXT s_mov_b32 s33, s0
; GFX11-NEXT s_waitcnt vmcnt(0)
; GFX11-NEXT s_setpc_b64 s[30:31]
call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1)
ret void
}
define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
; GFX9-LABEL: void_func_bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store bfloat %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v2bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v3bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: global_store_short v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v4bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v8bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 {
; GFX9-LABEL: void_func_v16bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s20
; GFX9-NEXT: v_mov_b32_e32 v1, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s23
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_2_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_2_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s17
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_2_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) %ptr
store volatile i32 %arg1, ptr addrspace(1) %ptr
ret void
}
define void @void_func_2_i64_inreg(i64 inreg %arg0, i64 inreg %arg1, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_2_i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s16
; GFX9-NEXT: v_mov_b32_e32 v5, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s19
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_2_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i64 %arg0, ptr addrspace(1) %ptr
store volatile i64 %arg1, ptr addrspace(1) %ptr
ret void
}
define void @void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s16
; GFX9-NEXT: v_mov_b32_e32 v5, s17
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s18
; GFX9-NEXT: v_mov_b32_e32 v2, s19
; GFX9-NEXT: v_mov_b32_e32 v3, s20
; GFX9-NEXT: global_store_dword v[0:1], v4, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s1
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s16
; GFX11-NEXT: v_mov_b32_e32 v6, s2
; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i64 %arg0, ptr addrspace(1) %ptr
store volatile i32 %arg1, ptr addrspace(1) %ptr
store volatile i64 %arg2, ptr addrspace(1) %ptr
ret void
}
define void @void_func_5_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_5_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s17
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s18
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s19
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s20
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_5_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
; GFX11-NEXT: v_mov_b32_e32 v6, s16
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) %ptr
store volatile i32 %arg1, ptr addrspace(1) %ptr
store volatile i32 %arg2, ptr addrspace(1) %ptr
store volatile i32 %arg3, ptr addrspace(1) %ptr
store volatile i32 %arg4, ptr addrspace(1) %ptr
ret void
}
define void @void_func_a5i32_inreg([5 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_a5i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s20
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:16
; GFX9-NEXT: v_mov_b32_e32 v5, s19
; GFX9-NEXT: v_mov_b32_e32 v4, s18
; GFX9-NEXT: v_mov_b32_e32 v3, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_a5i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v5, s3
; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v6, off offset:16
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store [5 x i32] %arg0, ptr addrspace(1) %ptr
ret void
}
; Force all implicit inputs to be required
declare void @extern()
define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_a13i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s29, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 vcc, -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, vcc
; GFX9-NEXT: v_mov_b32_e32 v2, s28
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48
; GFX9-NEXT: v_mov_b32_e32 v5, s27
; GFX9-NEXT: v_mov_b32_e32 v4, s26
; GFX9-NEXT: v_mov_b32_e32 v3, s25
; GFX9-NEXT: v_mov_b32_e32 v2, s24
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32
; GFX9-NEXT: v_writelane_b32 v40, s29, 2
; GFX9-NEXT: v_mov_b32_e32 v5, s23
; GFX9-NEXT: v_mov_b32_e32 v4, s22
; GFX9-NEXT: v_mov_b32_e32 v3, s21
; GFX9-NEXT: v_mov_b32_e32 v2, s20
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v3, s17
; GFX9-NEXT: v_mov_b32_e32 v2, s16
; GFX9-NEXT: s_getpc_b64 s[16:17]
; GFX9-NEXT: s_add_u32 s16, s16, extern@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s17, s17, extern@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX9-NEXT: v_mov_b32_e32 v5, s19
; GFX9-NEXT: v_mov_b32_e32 v4, s18
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_a13i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s25, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s26, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s26
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21
; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19
; GFX11-NEXT: s_getpc_b64 s[20:21]
; GFX11-NEXT: s_add_u32 s20, s20, extern@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s21, s21, extern@gotpcrel32@hi+12
; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17
; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3
; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0
; GFX11-NEXT: v_writelane_b32 v40, s25, 2
; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23
; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_mov_b32_e32 v10, s0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
store [13 x i32] %arg0, ptr addrspace(1) %ptr
call void @extern()
ret void
}
; define void @void_func_a14i32_inreg([14 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; store [14 x i32] %arg0, ptr addrspace(1) %ptr
; call void @extern()
; ret void
; }
; FIXME:
; define void @void_func_a15i32_inreg([15 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; store [15 x i32] %arg0, ptr addrspace(1) %ptr
; call void @extern()
; ret void
; }
; FIXME:
; define void @void_func_a16i32_inreg([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; store [16 x i32] %arg0, ptr addrspace(1) %ptr
; call void @extern()
; ret void
; }
; FIXME: Should still fail
define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
; GFX9-LABEL: void_func_a16i32_inreg__noimplicit:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v7, v1
; GFX9-NEXT: v_mov_b32_e32 v6, v0
; GFX9-NEXT: v_mov_b32_e32 v5, s29
; GFX9-NEXT: v_mov_b32_e32 v4, s28
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:48
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v7, s27
; GFX9-NEXT: v_mov_b32_e32 v6, s26
; GFX9-NEXT: v_mov_b32_e32 v5, s25
; GFX9-NEXT: v_mov_b32_e32 v4, s24
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v7, s23
; GFX9-NEXT: v_mov_b32_e32 v6, s22
; GFX9-NEXT: v_mov_b32_e32 v5, s21
; GFX9-NEXT: v_mov_b32_e32 v4, s20
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_mov_b32_e32 v7, s19
; GFX9-NEXT: v_mov_b32_e32 v6, s18
; GFX9-NEXT: v_mov_b32_e32 v5, s17
; GFX9-NEXT: v_mov_b32_e32 v4, s16
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_a16i32_inreg__noimplicit:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v5, s27 :: v_dual_mov_b32 v4, s26
; GFX11-NEXT: v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s24
; GFX11-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v8, s22
; GFX11-NEXT: v_dual_mov_b32 v7, s21 :: v_dual_mov_b32 v6, s20
; GFX11-NEXT: v_dual_mov_b32 v13, s19 :: v_dual_mov_b32 v12, s18
; GFX11-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16
; GFX11-NEXT: v_dual_mov_b32 v17, s3 :: v_dual_mov_b32 v16, s2
; GFX11-NEXT: v_dual_mov_b32 v15, s1 :: v_dual_mov_b32 v14, s0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:48
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:32
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off offset:16
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store [16 x i32] %arg0, ptr addrspace(1) %ptr
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }